Project import generated by Copybara.

GitOrigin-RevId: b695dda274aa3ac3c7d054e150bd9eb5c1285b19
This commit is contained in:
MediaPipe Team 2020-01-17 14:36:18 -08:00 committed by chris
parent 66b377c825
commit dd02df1dbe
39 changed files with 692 additions and 203 deletions

View File

@ -38,6 +38,8 @@ namespace mediapipe {
// }
// }
// }
// Optionally, you can pass in a side packet that will override `max_vec_size`
// that is specified in the options.
template <typename T>
class ClipVectorSizeCalculator : public CalculatorBase {
public:
@ -53,6 +55,10 @@ class ClipVectorSizeCalculator : public CalculatorBase {
cc->Inputs().Index(0).Set<std::vector<T>>();
cc->Outputs().Index(0).Set<std::vector<T>>();
// Optional input side packet that determines `max_vec_size`.
if (cc->InputSidePackets().NumEntries() > 0) {
cc->InputSidePackets().Index(0).Set<int>();
}
return ::mediapipe::OkStatus();
}
@ -61,6 +67,11 @@ class ClipVectorSizeCalculator : public CalculatorBase {
cc->SetOffset(TimestampDiff(0));
max_vec_size_ = cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>()
.max_vec_size();
// Override `max_vec_size` if passed as side packet.
if (cc->InputSidePackets().NumEntries() > 0 &&
!cc->InputSidePackets().Index(0).IsEmpty()) {
max_vec_size_ = cc->InputSidePackets().Index(0).Get<int>();
}
return ::mediapipe::OkStatus();
}

View File

@ -176,4 +176,31 @@ TEST(TestClipUniqueIntPtrVectorSizeCalculatorTest, ConsumeOneTimestamp) {
}
}
TEST(TestClipIntVectorSizeCalculatorTest, SidePacket) {
CalculatorGraphConfig::Node node_config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
calculator: "TestClipIntVectorSizeCalculator"
input_stream: "input_vector"
input_side_packet: "max_vec_size"
output_stream: "output_vector"
options {
[mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 1 }
}
)");
CalculatorRunner runner(node_config);
// This should override the default of 1 set in the options.
runner.MutableSidePackets()->Index(0) = Adopt(new int(2));
std::vector<int> input = {0, 1, 2, 3};
AddInputVector(input, /*timestamp=*/1, &runner);
MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
EXPECT_EQ(1, outputs.size());
EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
const std::vector<int>& output = outputs[0].Get<std::vector<int>>();
EXPECT_EQ(2, output.size());
std::vector<int> expected_vector = {0, 1};
EXPECT_EQ(expected_vector, output);
}
} // namespace mediapipe

View File

@ -35,6 +35,16 @@ namespace mediapipe {
typedef ConcatenateVectorCalculator<float> ConcatenateFloatVectorCalculator;
REGISTER_CALCULATOR(ConcatenateFloatVectorCalculator);
// Example config:
// node {
// calculator: "ConcatenateInt32VectorCalculator"
// input_stream: "int32_vector_1"
// input_stream: "int32_vector_2"
// output_stream: "concatenated_int32_vector"
// }
typedef ConcatenateVectorCalculator<int32> ConcatenateInt32VectorCalculator;
REGISTER_CALCULATOR(ConcatenateInt32VectorCalculator);
// Example config:
// node {
// calculator: "ConcatenateTfLiteTensorVectorCalculator"

View File

@ -138,8 +138,7 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// Note: To enable horizontal or vertical flipping, specify them in the
// calculator options. Flipping is applied after rotation.
//
// Note: Only scale mode STRETCH is currently supported on CPU,
// and flipping is not yet supported either.
// Note: Only scale mode STRETCH is currently supported on CPU.
//
class ImageTransformationCalculator : public CalculatorBase {
public:
@ -316,6 +315,11 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
cv::Mat input_mat = formats::MatView(&input_img);
cv::Mat scaled_mat;
if (!output_height_ || !output_width_) {
output_height_ = input_height;
output_width_ = input_width;
}
if (scale_mode_ == mediapipe::ScaleMode_Mode_STRETCH) {
cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_));
} else {
@ -367,10 +371,21 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
cv::Mat rotation_mat = cv::getRotationMatrix2D(src_center, angle, 1.0);
cv::warpAffine(scaled_mat, rotated_mat, rotation_mat, scaled_mat.size());
cv::Mat flipped_mat;
if (options_.flip_horizontally() || options_.flip_vertically()) {
const int flip_code =
options_.flip_horizontally() && options_.flip_vertically()
? -1
: options_.flip_horizontally();
cv::flip(rotated_mat, flipped_mat, flip_code);
} else {
flipped_mat = rotated_mat;
}
std::unique_ptr<ImageFrame> output_frame(
new ImageFrame(input_img.Format(), output_width, output_height));
cv::Mat output_mat = formats::MatView(output_frame.get());
rotated_mat.copyTo(output_mat);
flipped_mat.copyTo(output_mat);
cc->Outputs().Tag("IMAGE").Add(output_frame.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
@ -440,9 +455,8 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
cc->InputSidePackets().Tag("ROTATION_DEGREES").Get<int>());
}
static mediapipe::FrameScaleMode scale_mode =
mediapipe::FrameScaleModeFromProto(scale_mode_,
mediapipe::FrameScaleMode::kStretch);
mediapipe::FrameScaleMode scale_mode = mediapipe::FrameScaleModeFromProto(
scale_mode_, mediapipe::FrameScaleMode::kStretch);
mediapipe::FrameRotation rotation =
mediapipe::FrameRotationFromDegrees(RotationModeToDegrees(rotation_));

View File

@ -34,6 +34,7 @@ namespace mediapipe {
const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
const char kImageTag[] = "IMAGE";
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
const char kBBoxTag[] = "BBOX";
@ -145,6 +146,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
}
cc->Inputs().Tag(tag).Set<std::vector<Detection>>();
}
if (absl::StartsWith(tag, kFloatContextFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<float>>();
}
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<float>>();
}
@ -344,6 +348,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
sequence_.get());
}
}
if (absl::StartsWith(tag, kFloatContextFeaturePrefixTag) &&
!cc->Inputs().Tag(tag).IsEmpty()) {
std::string key =
tag.substr(sizeof(kFloatContextFeaturePrefixTag) /
sizeof(*kFloatContextFeaturePrefixTag) -
1);
RET_CHECK_EQ(cc->InputTimestamp(), Timestamp::PostStream());
mpms::SetContextFeatureFloats(
key, cc->Inputs().Tag(tag).Get<std::vector<float>>(),
sequence_.get());
}
if (absl::StartsWith(tag, kFloatFeaturePrefixTag) &&
!cc->Inputs().Tag(tag).IsEmpty()) {
std::string key = tag.substr(sizeof(kFloatFeaturePrefixTag) /

View File

@ -194,6 +194,38 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
}
}
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
SetUpCalculator(
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"},
{}, false, true);
auto input_sequence = absl::make_unique<tf::SequenceExample>();
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3);
runner_->MutableInputs()
->Tag("FLOAT_CONTEXT_FEATURE_TEST")
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
vf_ptr = absl::make_unique<std::vector<float>>(2, 4);
runner_->MutableInputs()
->Tag("FLOAT_CONTEXT_FEATURE_OTHER")
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_THAT(mpms::GetContextFeatureFloats("TEST", output_sequence),
testing::ElementsAre(3, 3));
ASSERT_THAT(mpms::GetContextFeatureFloats("OTHER", output_sequence),
testing::ElementsAre(4, 4));
}
TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
tf::Features context;
(*context.mutable_feature())["TEST"].mutable_bytes_list()->add_value("YES");

View File

@ -508,6 +508,17 @@ proto_library(
],
)
proto_library(
name = "timed_box_list_to_render_data_calculator_proto",
srcs = ["timed_box_list_to_render_data_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
"//mediapipe/util:color_proto",
"//mediapipe/util:render_data_proto",
],
)
proto_library(
name = "labels_to_render_data_calculator_proto",
srcs = ["labels_to_render_data_calculator.proto"],
@ -651,6 +662,37 @@ cc_library(
alwayslink = 1,
)
mediapipe_cc_proto_library(
name = "timed_box_list_to_render_data_calculator_cc_proto",
srcs = ["timed_box_list_to_render_data_calculator.proto"],
cc_deps = [
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
],
visibility = ["//visibility:public"],
deps = [":timed_box_list_to_render_data_calculator_proto"],
)
cc_library(
name = "timed_box_list_to_render_data_calculator",
srcs = ["timed_box_list_to_render_data_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":timed_box_list_to_render_data_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
"//mediapipe/util/tracking:box_tracker_cc_proto",
"//mediapipe/util/tracking:tracking_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)
cc_library(
name = "labels_to_render_data_calculator",
srcs = ["labels_to_render_data_calculator.cc"],

View File

@ -37,6 +37,8 @@ namespace mediapipe {
// }
// }
// }
// Optionally, uses a side packet to override `min_size` specified in the
// calculator options.
template <typename IterableT>
class CollectionHasMinSizeCalculator : public CalculatorBase {
public:
@ -54,6 +56,10 @@ class CollectionHasMinSizeCalculator : public CalculatorBase {
cc->Inputs().Tag("ITERABLE").Set<IterableT>();
cc->Outputs().Index(0).Set<bool>();
// Optional input side packet that determines `min_size_`.
if (cc->InputSidePackets().NumEntries() > 0) {
cc->InputSidePackets().Index(0).Set<int>();
}
return ::mediapipe::OkStatus();
}
@ -62,6 +68,11 @@ class CollectionHasMinSizeCalculator : public CalculatorBase {
min_size_ =
cc->Options<::mediapipe::CollectionHasMinSizeCalculatorOptions>()
.min_size();
// Override `min_size` if passed as side packet.
if (cc->InputSidePackets().NumEntries() > 0 &&
!cc->InputSidePackets().Index(0).IsEmpty()) {
min_size_ = cc->InputSidePackets().Index(0).Get<int>();
}
return ::mediapipe::OkStatus();
}

View File

@ -0,0 +1,146 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/calculators/util/timed_box_list_to_render_data_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/util/tracking/box_tracker.pb.h"
#include "mediapipe/util/tracking/tracking.pb.h"
namespace mediapipe {
namespace {
constexpr char kTimedBoxListTag[] = "BOX_LIST";
constexpr char kRenderDataTag[] = "RENDER_DATA";
void AddTimedBoxProtoToRenderData(
const TimedBoxProto& box_proto,
const TimedBoxListToRenderDataCalculatorOptions& options,
RenderData* render_data) {
if (box_proto.has_quad() && box_proto.quad().vertices_size() > 0 &&
box_proto.quad().vertices_size() % 2 == 0) {
const int num_corners = box_proto.quad().vertices_size() / 2;
for (int i = 0; i < num_corners; ++i) {
const int next_corner = (i + 1) % num_corners;
auto* line_annotation = render_data->add_render_annotations();
line_annotation->mutable_color()->set_r(options.box_color().r());
line_annotation->mutable_color()->set_g(options.box_color().g());
line_annotation->mutable_color()->set_b(options.box_color().b());
line_annotation->set_thickness(options.thickness());
RenderAnnotation::Line* line = line_annotation->mutable_line();
line->set_x_start(box_proto.quad().vertices(i * 2));
line->set_y_start(box_proto.quad().vertices(i * 2 + 1));
line->set_x_end(box_proto.quad().vertices(next_corner * 2));
line->set_y_end(box_proto.quad().vertices(next_corner * 2 + 1));
}
} else {
auto* rect_annotation = render_data->add_render_annotations();
rect_annotation->mutable_color()->set_r(options.box_color().r());
rect_annotation->mutable_color()->set_g(options.box_color().g());
rect_annotation->mutable_color()->set_b(options.box_color().b());
rect_annotation->set_thickness(options.thickness());
RenderAnnotation::Rectangle* rect = rect_annotation->mutable_rectangle();
rect->set_normalized(true);
rect->set_left(box_proto.left());
rect->set_right(box_proto.right());
rect->set_top(box_proto.top());
rect->set_bottom(box_proto.bottom());
rect->set_rotation(box_proto.rotation());
}
}
} // namespace
// A calculator that converts TimedBoxProtoList proto to RenderData proto for
// visualization. If the input TimedBoxProto contains `quad` field, this
// calculator will draw a quadrilateral based on it. Otherwise this calculator
// will draw a rotated rectangle based on `top`, `bottom`, `left`, `right` and
// `rotation` fields
//
// Example config:
// node {
// calculator: "TimedBoxListToRenderDataCalculator"
// input_stream: "BOX_LIST:landmarks"
// output_stream: "RENDER_DATA:render_data"
// options {
// [TimedBoxListToRenderDataCalculatorOptions.ext] {
// box_color { r: 0 g: 255 b: 0 }
// thickness: 4.0
// }
// }
// }
class TimedBoxListToRenderDataCalculator : public CalculatorBase {
public:
TimedBoxListToRenderDataCalculator() {}
~TimedBoxListToRenderDataCalculator() override {}
TimedBoxListToRenderDataCalculator(
const TimedBoxListToRenderDataCalculator&) = delete;
TimedBoxListToRenderDataCalculator& operator=(
const TimedBoxListToRenderDataCalculator&) = delete;
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
TimedBoxListToRenderDataCalculatorOptions options_;
};
REGISTER_CALCULATOR(TimedBoxListToRenderDataCalculator);
::mediapipe::Status TimedBoxListToRenderDataCalculator::GetContract(
CalculatorContract* cc) {
if (cc->Inputs().HasTag(kTimedBoxListTag)) {
cc->Inputs().Tag(kTimedBoxListTag).Set<TimedBoxProtoList>();
}
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status TimedBoxListToRenderDataCalculator::Open(
CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<TimedBoxListToRenderDataCalculatorOptions>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status TimedBoxListToRenderDataCalculator::Process(
CalculatorContext* cc) {
auto render_data = absl::make_unique<RenderData>();
if (cc->Inputs().HasTag(kTimedBoxListTag)) {
const auto& box_list =
cc->Inputs().Tag(kTimedBoxListTag).Get<TimedBoxProtoList>();
for (const auto& box : box_list.box()) {
AddTimedBoxProtoToRenderData(box, options_, render_data.get());
}
}
cc->Outputs()
.Tag(kRenderDataTag)
.Add(render_data.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,32 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/util/color.proto";
message TimedBoxListToRenderDataCalculatorOptions {
extend CalculatorOptions {
optional TimedBoxListToRenderDataCalculatorOptions ext = 289899854;
}
// Color of boxes.
optional Color box_color = 1;
// Thickness of the drawing of boxes.
optional double thickness = 2 [default = 1.0];
}

View File

@ -66,6 +66,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_de
--calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
```
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
#### Graph
![graph visualization](images/face_detection_desktop.png)

View File

@ -2,6 +2,7 @@
- [Overview](#overview)
- [OpenGL Support](#opengl-support)
- [Desktop GPUs](#desktop-gpu-linux)
- [Life of a GPU calculator](#life-of-a-gpu-calculator)
- [GpuBuffer to ImageFrame converters](#gpubuffer-to-imageframe-converters)
- [Disable GPU support](#disable-gpu-support)
@ -22,13 +23,60 @@ Below are the design principles for GPU support in MediaPipe
* Because different platforms may require different techniques for best performance, the API should allow flexibility in the way things are implemented behind the scenes.
* A calculator should be allowed maximum flexibility in using the GPU for all or part of its operation, combining it with the CPU if necessary.
### OpenGL support
### OpenGL Support
MediaPipe supports OpenGL ES up to version 3.2 on Android and up to ES 3.0 on iOS. In addition, MediaPipe also supports Metal on iOS.
MediaPipe supports OpenGL ES up to version 3.2 on Android/Linux and up to ES 3.0
on iOS. In addition, MediaPipe also supports Metal on iOS.
* MediaPipe allows graphs to run OpenGL in multiple GL contexts. For example, this can be very useful in graphs that combine a slower GPU inference path (eg, at 10 FPS) with a faster GPU rendering path (eg, at 30 FPS): since one GL context corresponds to one sequential command queue, using the same context for both tasks would reduce the rendering frame rate. One challenge MediaPipe's use of multiple contexts solves is the ability to communicate across them. An example scenario is one with an input video that is sent to both the rendering and inferences paths, and rendering needs to have access to the latest output from inference.
OpenGL ES 3.1 or greater is required (on Android/Linux systems) for running
machine learning inference calculators and graphs.
* An OpenGL context cannot be accessed by multiple threads at the same time. Furthermore, switching the active GL context on the same thread can be slow on some Android devices. Therefore, our approach is to have one dedicated thread per context. Each thread issues GL commands, building up a serial command queue on its context, which is then executed by the GPU asynchronously.
MediaPipe allows graphs to run OpenGL in multiple GL contexts. For example, this
can be very useful in graphs that combine a slower GPU inference path (eg, at 10
FPS) with a faster GPU rendering path (eg, at 30 FPS): since one GL context
corresponds to one sequential command queue, using the same context for both
tasks would reduce the rendering frame rate.
One challenge MediaPipe's use of multiple contexts solves is the ability to
communicate across them. An example scenario is one with an input video that is
sent to both the rendering and inferences paths, and rendering needs to have
access to the latest output from inference.
An OpenGL context cannot be accessed by multiple threads at the same time.
Furthermore, switching the active GL context on the same thread can be slow on
some Android devices. Therefore, our approach is to have one dedicated thread
per context. Each thread issues GL commands, building up a serial command queue
on its context, which is then executed by the GPU asynchronously.
#### Desktop GPU (Linux)
MediaPipe GPU can run on linux systems with video cards that support OpenGL ES
3.1 and up.
To check if your linux desktop GPU can run mediapipe:
```bash
$ sudo apt-get install mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev
$ sudo apt-get install mesa-utils
$ glxinfo | grep -i opengl
```
My linux box prints:
```bash
$ glxinfo | grep -i opengl
...
OpenGL ES profile version string: OpenGL ES 3.2 NVIDIA 430.50
OpenGL ES profile shading language version string: OpenGL ES GLSL ES 3.20
OpenGL ES profile extensions:
```
*^notice the OpenGL ES 3.2 text^*
To run MediaPipe GPU on desktop, you need to see ES 3.1 or greater printed.
If OpenGL ES is not printed, or is below 3.1, then the GPU inference will not
run.
### Life of a GPU calculator

View File

@ -40,6 +40,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair
--calculator_graph_config_file=mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt
```
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
#### Graph
![hair_segmentation_mobile_gpu_graph](images/mobile/hair_segmentation_mobile_gpu.png)

View File

@ -61,6 +61,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tra
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
```
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
#### Graph
![graph visualization](images/hand_tracking_desktop.png)

View File

@ -231,7 +231,7 @@ To build and run iOS apps:
* Install [Homebrew](https://brew.sh).
* Install [Xcode](https://developer.apple.com/xcode/) and its Command Line
Tools by `xcode-select install`.
Tools by `xcode-select --install`.
2. Checkout MediaPipe repository.
@ -331,7 +331,7 @@ To build and run iOS apps:
```
5. Make sure that Python 3 and Python "six" library is installed.
5. Make sure that Python 3 and the Python "six" library are installed.
```
$ brew install python
@ -574,6 +574,7 @@ This will use a Docker image that will isolate mediapipe's installation from the
Requirements:
* Java Runtime.
* Android SDK release 28.0.3 and above.
* Android NDK r17c and above.

View File

@ -61,6 +61,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/mu
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt
```
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
#### Graph
![graph visualization](images/multi_hand_tracking_desktop.png)

View File

@ -214,7 +214,6 @@ To build and run the TensorFlow Lite example on desktop (CPU) with Webcam, run:
# Video from webcam running on desktop CPU
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/object_detection:object_detection_cpu
# It should print:
#Target //mediapipe/examples/desktop/object_detection:object_detection_cpu up-to-date:
# bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu

View File

@ -149,12 +149,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -180,12 +180,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -158,12 +158,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -151,12 +151,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override

View File

@ -52,15 +52,24 @@ struct PolynomialResidual {
const double out_;
};
// Computes the amount of delta position change along the fitted polynomial
// curve, translates the delta from being relative to the origin of the original
// dimension to being relative to the center of the original dimension, then
// regulates the delta to avoid moving camera off the frame boundaries.
float ComputeDelta(const float in, const int original_dimension,
const int output_dimension, const double a, const double b,
const double c, const double d, const double k) {
// The value `out` here represents a normalized distance between the center of
// the output window and the origin of the original window.
float out =
a * in + b * in * in + c * in * in * in + d * in * in * in * in + k;
float delta = (out - 0.5) * 2 * output_dimension;
const float max_delta = (original_dimension - output_dimension) / 2.0f;
// Translate `out` to a pixel distance between the center of the output window
// and the center of the original window. This value can be negative, 0, or
// positive.
float delta = (out - 0.5) * original_dimension;
// Make sure delta doesn't move the camera off the frame boundary.
const float max_delta = (original_dimension - output_dimension) / 2.0f;
if (delta > max_delta) {
delta = max_delta;
} else if (delta < -max_delta) {

View File

@ -140,7 +140,7 @@ constexpr double prediction[] = {
-24.550379, -24.06503,
};
void GenerateDataPoints(
void GenerateDataPointsFromRealVideo(
const int focus_point_frames_length,
const int prior_focus_point_frames_length,
std::vector<FocusPointFrame>* focus_point_frames,
@ -163,14 +163,15 @@ void GenerateDataPoints(
}
}
TEST(PolynomialRegressionPathSolverTest, Success) {
TEST(PolynomialRegressionPathSolverTest, SuccessInTrackingCameraMode) {
PolynomialRegressionPathSolver solver;
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
GenerateDataPoints(/* focus_point_frames_length = */ 100,
/* prior_focus_point_frames_length = */ 100,
&focus_point_frames, &prior_focus_point_frames);
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 100,
/* prior_focus_point_frames_length = */ 100,
&focus_point_frames,
&prior_focus_point_frames);
constexpr int kFrameWidth = 200;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;
@ -185,14 +186,86 @@ TEST(PolynomialRegressionPathSolverTest, Success) {
}
}
TEST(PolynomialRegressionPathSolverTest, SuccessInStationaryCameraMode) {
PolynomialRegressionPathSolver solver;
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
constexpr int kFocusPointFramesLength = 100;
constexpr float kNormStationaryFocusPointX = 0.34f;
for (int i = 0; i < kFocusPointFramesLength; i++) {
FocusPoint sp;
// Add a fixed normalized focus point location.
sp.set_norm_point_x(kNormStationaryFocusPointX);
FocusPointFrame spf;
*spf.add_point() = sp;
focus_point_frames.push_back(spf);
}
constexpr int kFrameWidth = 300;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;
constexpr int kCropHeight = 300;
MP_ASSERT_OK(solver.ComputeCameraPath(
focus_point_frames, prior_focus_point_frames, kFrameWidth, kFrameHeight,
kCropWidth, kCropHeight, &all_xforms));
ASSERT_EQ(all_xforms.size(), kFocusPointFramesLength);
constexpr int kExpectedShift = -48;
for (int i = 0; i < all_xforms.size(); i++) {
cv::Mat mat = all_xforms[i];
EXPECT_FLOAT_EQ(mat.at<float>(0, 2), kExpectedShift);
}
}
// Test the case where focus points are so close to boundaries that the amount
// of shifts would have moved the camera to go outside frame boundaries. In this
// case, the solver should regulate the camera position shift to keep it stay
// inside the viewport.
TEST(PolynomialRegressionPathSolverTest, SuccessWhenFocusPointCloseToBoundary) {
PolynomialRegressionPathSolver solver;
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
constexpr int kFocusPointFramesLength = 100;
constexpr float kNormStationaryFocusPointX = 0.99f;
for (int i = 0; i < kFocusPointFramesLength; i++) {
FocusPoint sp;
// Add a fixed normalized focus point location.
sp.set_norm_point_x(kNormStationaryFocusPointX);
FocusPointFrame spf;
*spf.add_point() = sp;
focus_point_frames.push_back(spf);
}
constexpr int kFrameWidth = 500;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;
constexpr int kCropHeight = 300;
MP_ASSERT_OK(solver.ComputeCameraPath(
focus_point_frames, prior_focus_point_frames, kFrameWidth, kFrameHeight,
kCropWidth, kCropHeight, &all_xforms));
ASSERT_EQ(all_xforms.size(), kFocusPointFramesLength);
// Regulate max delta change = (500 - 100) / 2.
constexpr int kExpectedShift = 200;
for (int i = 0; i < all_xforms.size(); i++) {
cv::Mat mat = all_xforms[i];
EXPECT_FLOAT_EQ(mat.at<float>(0, 2), kExpectedShift);
}
}
TEST(PolynomialRegressionPathSolverTest, FewFramesShouldWork) {
PolynomialRegressionPathSolver solver;
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
GenerateDataPoints(/* focus_point_frames_length = */ 1,
/* prior_focus_point_frames_length = */ 1,
&focus_point_frames, &prior_focus_point_frames);
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 1,
/* prior_focus_point_frames_length = */ 1,
&focus_point_frames,
&prior_focus_point_frames);
constexpr int kFrameWidth = 200;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;
@ -208,9 +281,10 @@ TEST(PolynomialRegressionPathSolverTest, OneCurrentFrameShouldWork) {
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
GenerateDataPoints(/* focus_point_frames_length = */ 1,
/* prior_focus_point_frames_length = */ 0,
&focus_point_frames, &prior_focus_point_frames);
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 1,
/* prior_focus_point_frames_length = */ 0,
&focus_point_frames,
&prior_focus_point_frames);
constexpr int kFrameWidth = 200;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;
@ -226,9 +300,10 @@ TEST(PolynomialRegressionPathSolverTest, ZeroFrameShouldFail) {
std::vector<FocusPointFrame> focus_point_frames;
std::vector<FocusPointFrame> prior_focus_point_frames;
std::vector<cv::Mat> all_xforms;
GenerateDataPoints(/* focus_point_frames_length = */ 0,
/* prior_focus_point_frames_length = */ 0,
&focus_point_frames, &prior_focus_point_frames);
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 0,
/* prior_focus_point_frames_length = */ 0,
&focus_point_frames,
&prior_focus_point_frames);
constexpr int kFrameWidth = 200;
constexpr int kFrameHeight = 300;
constexpr int kCropWidth = 100;

View File

@ -371,11 +371,12 @@ void ImageFrame::CopyPixelData(ImageFormat::Format format, int width,
void ImageFrame::CopyToBuffer(uint8* buffer, int buffer_size) const {
CHECK(buffer);
CHECK_EQ(1, ByteDepth());
int data_size = width_ * height_ * NumberOfChannels() * ByteDepth();
const int data_size = width_ * height_ * NumberOfChannels();
CHECK_LE(data_size, buffer_size);
if (IsContiguous()) {
// The data is stored contiguously, we can just copy.
std::copy_n(pixel_data_.get(), data_size, buffer);
const uint8* src = reinterpret_cast<const uint8*>(pixel_data_.get());
std::copy_n(src, data_size, buffer);
} else {
InternalCopyToBuffer(0 /* contiguous storage */,
reinterpret_cast<char*>(buffer));

View File

@ -29,6 +29,9 @@ message Rect {
// Rotation angle is counter-clockwise in radian.
optional float rotation = 5 [default = 0.0];
// Optional unique id to help associate different Rects to each other.
optional int64 rect_id = 6;
}
// A rectangle with rotation in normalized coordinates. The values of box center
@ -45,4 +48,8 @@ message NormalizedRect {
// Rotation angle is counter-clockwise in radian.
optional float rotation = 5 [default = 0.0];
// Optional unique id to help associate different NormalizedRects to each
// other.
optional int64 rect_id = 6;
}

View File

@ -57,6 +57,9 @@ public abstract class CameraHelper {
*/
public abstract Size computeDisplaySizeFromViewSize(Size viewSize);
/** Returns a boolean which is true if the camera is in Portrait mode, false in Landscape mode. */
public abstract boolean isCameraRotated();
public void setOnCameraStartedListener(@Nullable OnCameraStartedListener listener) {
onCameraStartedListener = listener;
}

View File

@ -16,13 +16,21 @@ package com.google.mediapipe.components;
import android.app.Activity;
import androidx.lifecycle.LifecycleOwner;
import android.content.Context;
import android.graphics.SurfaceTexture;
import android.hardware.camera2.CameraAccessException;
import android.hardware.camera2.CameraCharacteristics;
import android.hardware.camera2.CameraManager;
import android.hardware.camera2.CameraMetadata;
import android.hardware.camera2.params.StreamConfigurationMap;
import android.util.Log;
import android.util.Size;
import androidx.camera.core.CameraX;
import androidx.camera.core.CameraX.LensFacing;
import androidx.camera.core.Preview;
import androidx.camera.core.PreviewConfig;
import java.util.Arrays;
import java.util.List;
/**
* Uses CameraX APIs for camera setup and access.
@ -32,6 +40,9 @@ import androidx.camera.core.PreviewConfig;
public class CameraXPreviewHelper extends CameraHelper {
private static final String TAG = "CameraXPreviewHelper";
// Target frame and view resolution size in landscape.
private static final Size TARGET_SIZE = new Size(1280, 720);
private Preview preview;
// Size of the camera-preview frames from the camera.
@ -39,6 +50,10 @@ public class CameraXPreviewHelper extends CameraHelper {
// Rotation of the camera-preview frames in degrees.
private int frameRotation;
// Focal length resolved in pixels on the frame texture.
private float focalLengthPixels;
private CameraCharacteristics cameraCharacteristics = null;
@Override
@SuppressWarnings("RestrictTo") // See b/132705545.
public void startCamera(
@ -46,7 +61,10 @@ public class CameraXPreviewHelper extends CameraHelper {
LensFacing cameraLensFacing =
cameraFacing == CameraHelper.CameraFacing.FRONT ? LensFacing.FRONT : LensFacing.BACK;
PreviewConfig previewConfig =
new PreviewConfig.Builder().setLensFacing(cameraLensFacing).build();
new PreviewConfig.Builder()
.setLensFacing(cameraLensFacing)
.setTargetResolution(TARGET_SIZE)
.build();
preview = new Preview(previewConfig);
preview.setOnPreviewOutputUpdateListener(
@ -60,11 +78,22 @@ public class CameraXPreviewHelper extends CameraHelper {
return;
}
}
Integer selectedLensFacing =
cameraFacing == CameraHelper.CameraFacing.FRONT
? CameraMetadata.LENS_FACING_FRONT
: CameraMetadata.LENS_FACING_BACK;
calculateFocalLength(context, selectedLensFacing);
if (onCameraStartedListener != null) {
onCameraStartedListener.onCameraStarted(previewOutput.getSurfaceTexture());
}
});
CameraX.bindToLifecycle(/*lifecycleOwner=*/ (LifecycleOwner) context, preview);
}
@Override
public boolean isCameraRotated() {
return frameRotation % 180 == 90;
}
@Override
@ -75,28 +104,79 @@ public class CameraXPreviewHelper extends CameraHelper {
return null;
}
// Valid rotation values are 0, 90, 180 and 270.
// Frames are rotated relative to the device's "natural" landscape orientation. When in portrait
// mode, valid rotation values are 90 or 270, and the width/height should be swapped to
// calculate aspect ratio.
float frameAspectRatio =
frameRotation == 90 || frameRotation == 270
? frameSize.getHeight() / (float) frameSize.getWidth()
: frameSize.getWidth() / (float) frameSize.getHeight();
Size optimalSize = getOptimalViewSize(viewSize);
return optimalSize != null ? optimalSize : frameSize;
}
float viewAspectRatio = viewSize.getWidth() / (float) viewSize.getHeight();
private Size getOptimalViewSize(Size targetSize) {
if (cameraCharacteristics != null) {
StreamConfigurationMap map =
cameraCharacteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP);
Size[] outputSizes = map.getOutputSizes(SurfaceTexture.class);
// Match shortest sides together.
int scaledWidth;
int scaledHeight;
if (frameAspectRatio < viewAspectRatio) {
scaledWidth = viewSize.getWidth();
scaledHeight = Math.round(viewSize.getWidth() / frameAspectRatio);
} else {
scaledHeight = viewSize.getHeight();
scaledWidth = Math.round(viewSize.getHeight() * frameAspectRatio);
int selectedWidth = -1;
int selectedHeight = -1;
float selectedAspectRatioDifference = 1e3f;
float targetAspectRatio = targetSize.getWidth() / (float) targetSize.getHeight();
// Find the smallest size >= target size with the closest aspect ratio.
for (Size size : outputSizes) {
float aspectRatio = (float) size.getWidth() / size.getHeight();
float aspectRatioDifference = Math.abs(aspectRatio - targetAspectRatio);
if (aspectRatioDifference <= selectedAspectRatioDifference) {
if ((selectedWidth == -1 && selectedHeight == -1)
|| (size.getWidth() <= selectedWidth
&& size.getWidth() >= frameSize.getWidth()
&& size.getHeight() <= selectedHeight
&& size.getHeight() >= frameSize.getHeight())) {
selectedWidth = size.getWidth();
selectedHeight = size.getHeight();
selectedAspectRatioDifference = aspectRatioDifference;
}
}
}
if (selectedWidth != -1 && selectedHeight != -1) {
return new Size(selectedWidth, selectedHeight);
}
}
return null;
}
return new Size(scaledWidth, scaledHeight);
public float getFocalLengthPixels() {
return focalLengthPixels;
}
private void calculateFocalLength(Activity context, Integer lensFacing) {
CameraManager cameraManager = (CameraManager) context.getSystemService(Context.CAMERA_SERVICE);
try {
List<String> cameraList = Arrays.asList(cameraManager.getCameraIdList());
for (String availableCameraId : cameraList) {
CameraCharacteristics availableCameraCharacteristics =
cameraManager.getCameraCharacteristics(availableCameraId);
Integer availableLensFacing =
availableCameraCharacteristics.get(CameraCharacteristics.LENS_FACING);
if (availableLensFacing == null) {
continue;
}
if (availableLensFacing.equals(lensFacing)) {
cameraCharacteristics = availableCameraCharacteristics;
break;
}
}
// Focal length of the camera in millimeters.
// Note that CameraCharacteristics returns a list of focal lengths and there could be more
// than one focal length available if optical zoom is enabled or there are multiple physical
// cameras in the logical camera referenced here. A theoretically correct of doing this would
// be to use the focal length set explicitly via Camera2 API, as documented in
// https://developer.android.com/reference/android/hardware/camera2/CaptureRequest#LENS_FOCAL_LENGTH.
float focalLengthMm =
cameraCharacteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)[0];
// Sensor Width of the camera in millimeters.
float sensorWidthMm =
cameraCharacteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE).getWidth();
focalLengthPixels = frameSize.getWidth() * focalLengthMm / sensorWidthMm;
} catch (CameraAccessException e) {
Log.e(TAG, "Accessing camera ID info got error: " + e);
}
}
}

View File

@ -332,17 +332,10 @@ public class ExternalTextureConverter implements TextureFrameProducer {
bindFramebuffer(outputFrame.getTextureName(), destinationWidth, destinationHeight);
renderer.render(surfaceTexture);
// Populate frame timestamp with the System.nanoTime() timestamp after render() as renderer
// Populate frame timestamp with surface texture timestamp after render() as renderer
// ensures that surface texture has the up-to-date timestamp. (Also adjust |timestampOffset|
// to ensure that timestamps increase monotonically.)
// We assume that the camera timestamp is generated at the same time as this method is called
// and get the time via System.nanoTime(). This timestamp is aligned with the clock used by
// the microphone which returns timestamps aligned to the same time base as System.nanoTime().
// Data sent from camera and microphone should have timestamps aligned on the same clock and
// timebase so that the data can be processed by a MediaPipe graph simultaneously.
// Android's SurfaceTexture.getTimestamp() method is not aligned to the System.nanoTime()
// clock, so it cannot be used for texture timestamps in this method.
long textureTimestamp = System.nanoTime() / NANOS_PER_MICRO;
long textureTimestamp = surfaceTexture.getTimestamp() / NANOS_PER_MICRO;
if (previousTimestampValid && textureTimestamp + timestampOffset <= previousTimestamp) {
timestampOffset = previousTimestamp + 1 - textureTimestamp;
}

View File

@ -16,7 +16,6 @@ package com.google.mediapipe.components;
import android.content.Context;
import android.graphics.Bitmap;
import android.media.AudioFormat;
import android.util.Log;
import com.google.common.base.Preconditions;
import com.google.mediapipe.framework.AndroidAssetUtil;
@ -39,15 +38,12 @@ import javax.annotation.Nullable;
/**
* A {@link com.google.mediapipe.components.TextureFrameProcessor} that sends video frames through a
* MediaPipe graph and a {@link com.google.mediapipe.components.AudioDataProcessor} that sends audio
* data samples through a MediaPipe graph.
* MediaPipe graph.
*/
public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor {
public class FrameProcessor implements TextureFrameProcessor {
private static final String TAG = "FrameProcessor";
private static final int BYTES_PER_MONO_SAMPLE = 2; // 16 bit PCM encoding.
private List<TextureFrameConsumer> videoConsumers = new ArrayList<>();
private List<AudioDataConsumer> audioConsumers = new ArrayList<>();
private List<TextureFrameConsumer> consumers = new ArrayList<>();
private Graph mediapipeGraph;
private AndroidPacketCreator packetCreator;
private OnWillAddFrameListener addFrameListener;
@ -57,15 +53,6 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
private SurfaceOutput videoSurfaceOutput;
private final AtomicBoolean started = new AtomicBoolean(false);
private boolean hybridPath = false;
// Input stream of audio data. Can be null.
private String audioInputStream;
// Output stream of audio data. Can be null.
private String audioOutputStream;
// Number of channels of audio data read in the input stream. This can be only 1 or 2, as
// AudioRecord supports only AudioFormat.CHANNEL_IN_MONO and AudioFormat.CHANNEL_IN_STEREO.
private int numAudioChannels = 1;
// Sample rate of audio data sent to the MediaPipe graph.
private double audioSampleRate;
/**
* Constructor.
@ -104,7 +91,7 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
public void process(Packet packet) {
List<TextureFrameConsumer> currentConsumers;
synchronized (this) {
currentConsumers = videoConsumers;
currentConsumers = consumers;
}
for (TextureFrameConsumer consumer : currentConsumers) {
TextureFrame frame = PacketGetter.getTextureFrame(packet);
@ -128,54 +115,6 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
videoSurfaceOutput = mediapipeGraph.addSurfaceOutput(videoOutputStream);
}
/**
* Adds input streams to process audio data and output streams that output processed audio data.
*
* @param inputStream the graph input stream that will receive input audio samples.
* @param outputStream the output stream from which output audio samples will be produced.
* @param numChannels the number of audio channels in the input audio stream.
* @param audioSampleRateInHz the sample rate for audio samples in hertz (Hz).
*/
public void addAudioStreams(
@Nullable String inputStream,
@Nullable String outputStream,
int numChannels,
double audioSampleRateInHz) {
audioInputStream = inputStream;
audioOutputStream = outputStream;
numAudioChannels = numChannels;
audioSampleRate = audioSampleRateInHz;
if (audioInputStream != null) {
Packet audioHeader =
packetCreator.createTimeSeriesHeader(numAudioChannels, audioSampleRateInHz);
mediapipeGraph.setStreamHeader(audioInputStream, audioHeader);
}
if (audioOutputStream != null) {
AudioFormat audioFormat =
new AudioFormat.Builder()
.setSampleRate((int) audioSampleRateInHz)
.setChannelMask(numAudioChannels)
.build();
mediapipeGraph.addPacketCallback(
audioOutputStream,
new PacketCallback() {
@Override
public void process(Packet packet) {
List<AudioDataConsumer> currentAudioConsumers;
synchronized (this) {
currentAudioConsumers = audioConsumers;
}
for (AudioDataConsumer consumer : currentAudioConsumers) {
byte[] audioData = PacketGetter.getAudioByteData(packet);
consumer.onNewAudioData(audioData, packet.getTimestamp(), audioFormat);
}
}
});
}
}
/**
* Interface to be used so that this class can receive a callback when onNewFrame has determined
* it will process an input frame. Can be used to feed packets to accessory streams.
@ -195,16 +134,9 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
}
@Override
public void setConsumer(TextureFrameConsumer consumer) {
public void setConsumer(TextureFrameConsumer listener) {
synchronized (this) {
videoConsumers = Arrays.asList(consumer);
}
}
@Override
public void setAudioConsumer(AudioDataConsumer consumer) {
synchronized (this) {
audioConsumers = Arrays.asList(consumer);
consumers = Arrays.asList(listener);
}
}
@ -221,20 +153,20 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
mediapipeGraph.addPacketCallback(outputStream, callback);
}
public void addConsumer(TextureFrameConsumer consumer) {
public void addConsumer(TextureFrameConsumer listener) {
synchronized (this) {
List<TextureFrameConsumer> newConsumers = new ArrayList<>(videoConsumers);
newConsumers.add(consumer);
videoConsumers = newConsumers;
List<TextureFrameConsumer> newConsumers = new ArrayList<>(consumers);
newConsumers.add(listener);
consumers = newConsumers;
}
}
public boolean removeConsumer(TextureFrameConsumer listener) {
boolean existed;
synchronized (this) {
List<TextureFrameConsumer> newConsumers = new ArrayList<>(videoConsumers);
List<TextureFrameConsumer> newConsumers = new ArrayList<>(consumers);
existed = newConsumers.remove(listener);
videoConsumers = newConsumers;
consumers = newConsumers;
}
return existed;
}
@ -373,45 +305,4 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
private void startGraph() {
mediapipeGraph.startRunningGraph();
}
@Override
public void onNewAudioData(byte[] audioData, long timestampMicros, AudioFormat audioFormat) {
if (audioFormat.getChannelCount() != numAudioChannels
|| audioFormat.getSampleRate() != audioSampleRate) {
Log.e(TAG, "Producer's AudioFormat doesn't match FrameProcessor's AudioFormat");
return;
}
Preconditions.checkNotNull(audioInputStream);
int numSamples = audioData.length / BYTES_PER_MONO_SAMPLE / numAudioChannels;
byte[] data = audioData;
Packet audioPacket = packetCreator.createAudioPacket(data, numAudioChannels, numSamples);
try {
// addConsumablePacketToInputStream allows the graph to take exclusive ownership of the
// packet, which may allow for more memory optimizations.
mediapipeGraph.addConsumablePacketToInputStream(
audioInputStream, audioPacket, timestampMicros);
} catch (MediaPipeException e) {
Log.e(TAG, "Mediapipe error: ", e);
}
audioPacket.release();
}
public void addAudioConsumer(AudioDataConsumer consumer) {
synchronized (this) {
List<AudioDataConsumer> newConsumers = new ArrayList<>(audioConsumers);
newConsumers.add(consumer);
audioConsumers = newConsumers;
}
}
public boolean removeAudioConsumer(AudioDataConsumer consumer) {
boolean existed;
synchronized (this) {
List<AudioDataConsumer> newConsumers = new ArrayList<>(audioConsumers);
existed = newConsumers.remove(consumer);
audioConsumers = newConsumers;
}
return existed;
}
}

View File

@ -411,7 +411,7 @@ void Graph::SetPacketJavaClass(JNIEnv* env) {
// Set the timestamp of the packet in-place by calling the rvalue-reference
// version of At here.
packet = std::move(packet).At(Timestamp(timestamp));
packet = std::move(packet).At(Timestamp::CreateNoErrorChecking(timestamp));
// Then std::move it into the input stream.
return AddPacketToInputStream(stream_name, std::move(packet));

View File

@ -240,11 +240,11 @@ JNIEXPORT void JNICALL GRAPH_METHOD(nativeAddPacketToInputStream)(
mediapipe::android::Graph* mediapipe_graph =
reinterpret_cast<mediapipe::android::Graph*>(context);
// We push in a copy of the current packet at the given timestamp.
ThrowIfError(env,
mediapipe_graph->AddPacketToInputStream(
JStringToStdString(env, stream_name),
mediapipe::android::Graph::GetPacketFromHandle(packet).At(
mediapipe::Timestamp(timestamp))));
ThrowIfError(
env, mediapipe_graph->AddPacketToInputStream(
JStringToStdString(env, stream_name),
mediapipe::android::Graph::GetPacketFromHandle(packet).At(
mediapipe::Timestamp::CreateNoErrorChecking(timestamp))));
}
JNIEXPORT void JNICALL GRAPH_METHOD(nativeMovePacketToInputStream)(

View File

@ -203,14 +203,14 @@ SetClipStartTimestamp(1000000, &sequence);
SetClipEndTimestamp(6000000, &sequence);
// For an object tracking task with action labels:
std::vector<mediapipe::Locations> locations_on_frame_1;
std::vector<mediapipe::Location> locations_on_frame_1;
AddBBox(locations_on_frame_1, &sequence);
AddBBoxTimestamp(3000000, &sequence);
AddBBoxLabelIndex({4, 3}, &sequence);
AddBBoxLabelString({"run", "jump"}, &sequence);
AddBBoxTrackString({"id_0", "id_1"}, &sequence);
// AddBBoxClassString({"cls_0", "cls_0"}, &sequence); // if required
std::vector<mediapipe::Locations> locations_on_frame_2;
std::vector<mediapipe::Location> locations_on_frame_2;
AddBBox(locations_on_frame_2, &sequence);
AddBBoxTimestamp(5000000, &sequence);
AddBBoxLabelIndex({3}, &sequence);
@ -470,8 +470,8 @@ tasks and tracking (or class) fields for tracking information.
|`CLASS_SEGMENTATION/image/format`|context bytes|`set_class_segmentation_format` / `SetClassSegmentationFormat`|The encoding format of the class label images.|
|`CLASS_SEGMENTATION/image/height`|context int|`set_class_segmentation_height` / `SetClassSegmentationHeight`|The height of the image in pixels.|
|`CLASS_SEGMENTATION/image/width`|context int|`set_class_segmentation_width` / `SetClassSegmentationWidth`|The width of the image in pixels.|
|`CLASS_SEGMENTATION/image/class/ label/index`|context int list|`set_class_segmentation_label_index` / `SetClassSegmentationLabelIndex`|If necessary a mapping from values in the image to class labels.|
|`CLASS_SEGMENTATION/image/class/ label/string`|context bytes list|`set_class_segmentation_label_string` / `SetClassSegmentationLabelString`|A mapping from values in the image to class labels.|
|`CLASS_SEGMENTATION/image/class/ label/index`|context int list|`set_class_segmentation_class_label_index` / `SetClassSegmentationClassLabelIndex`|If necessary a mapping from values in the image to class labels.|
|`CLASS_SEGMENTATION/image/class/ label/string`|context bytes list|`set_class_segmentation_class_label_string` / `SetClassSegmentationClassLabelString`|A mapping from values in the image to class labels.|
### Keys related to image instance segmentation
| key | type | python call / c++ call | description |
@ -482,8 +482,8 @@ tasks and tracking (or class) fields for tracking information.
|`INSTANCE_SEGMENTATION/image/ format`|context bytes|`set_instance_segmentation_format` / `SetInstanceSegmentationFormat`|The encoding format of the object instance labels.|
|`INSTANCE_SEGMENTATION/image/ height`|context int|`set_instance_segmentation_height` / `SetInstanceSegmentationHeight`|The height of the image in pixels.|
|`INSTANCE_SEGMENTATION/image/ width`|context int|`set_instance_segmentation_width` / `SetInstanceSegmentationWidth`|The width of the image in pixels.|
|`INSTANCE_SEGMENTATION/image/ class/label/index`|context int list|`set_instance_segmentation_label_index` / `SetInstanceSegmentationLabelIndex`|If necessary a mapping from values in the image to class labels.|
|`INSTANCE_SEGMENTATION/image/ class/label/string`|context bytes list|`set_instance_segmentation_label_string` / `SetInstanceSegmentationLabelString`|A mapping from values in the image to class labels.|
|`INSTANCE_SEGMENTATION/image/ class/label/index`|context int list|`set_instance_segmentation_class_label_index` / `SetInstanceSegmentationClassLabelIndex`|If necessary a mapping from values in the image to class labels.|
|`INSTANCE_SEGMENTATION/image/ class/label/string`|context bytes list|`set_instance_segmentation_class_label_string` / `SetInstanceSegmentationClassLabelString`|A mapping from values in the image to class labels.|
|`INSTANCE_SEGMENTATION/image/ object/class/index`|context int|`set_instance_segmentation_object_class_index` / `SetInstanceSegmentationObjectClassIndex`|If necessary a mapping from values in the image to class indices.|
### Keys related to optical flow

View File

@ -563,6 +563,8 @@ const char kFeatureNumSamplesKey[] = "feature/num_samples";
const char kFeaturePacketRateKey[] = "feature/packet_rate";
// For audio, the original audio sampling rate the feature is derived from.
const char kFeatureAudioSampleRateKey[] = "feature/audio_sample_rate";
// The feature as a list of floats.
const char kContextFeatureFloatsKey[] = "context_feature/floats";
// Feature list keys:
// The feature as a list of floats.
@ -593,6 +595,8 @@ void AddAudioAsFeature(const std::string& prefix,
PREFIXED_VECTOR_INT64_CONTEXT_FEATURE(FeatureDimensions, kFeatureDimensionsKey);
PREFIXED_FLOAT_CONTEXT_FEATURE(FeatureRate, kFeatureRateKey);
PREFIXED_VECTOR_FLOAT_CONTEXT_FEATURE(ContextFeatureFloats,
kContextFeatureFloatsKey);
PREFIXED_BYTES_CONTEXT_FEATURE(FeatureBytesFormat, kFeatureBytesFormatKey);
PREFIXED_VECTOR_FLOAT_FEATURE_LIST(FeatureFloats, kFeatureFloatsKey);
PREFIXED_VECTOR_BYTES_FEATURE_LIST(FeatureBytes, kFeatureBytesKey);