diff --git a/Dockerfile b/Dockerfile index d622191c6..dc3b034a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,7 +54,7 @@ RUN pip3 install tf_slim RUN ln -s /usr/bin/python3 /usr/bin/python # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.4.1 RUN mkdir /bazel && \ wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\ azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ diff --git a/WORKSPACE b/WORKSPACE index eb3efd275..395d5f52c 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -10,7 +10,7 @@ http_archive( sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", ) load("@bazel_skylib//lib:versions.bzl", "versions") -versions.check(minimum_bazel_version = "2.0.0") +versions.check(minimum_bazel_version = "3.4.0") # ABSL cpp library lts_2020_02_25 @@ -324,8 +324,9 @@ maven_install( "androidx.lifecycle:lifecycle-common:2.2.0", "androidx.annotation:annotation:aar:1.1.0", "androidx.appcompat:appcompat:aar:1.1.0-rc01", - "androidx.camera:camera-core:aar:1.0.0-alpha06", - "androidx.camera:camera-camera2:aar:1.0.0-alpha06", + "androidx.camera:camera-core:1.0.0-beta10", + "androidx.camera:camera-camera2:1.0.0-beta10", + "androidx.camera:camera-lifecycle:1.0.0-beta10", "androidx.constraintlayout:constraintlayout:aar:1.1.3", "androidx.core:core:aar:1.1.0-rc03", "androidx.legacy:legacy-support-v4:aar:1.0.0", @@ -337,6 +338,7 @@ maven_install( "com.google.flogger:flogger-system-backend:0.3.1", "com.google.flogger:flogger:0.3.1", "com.google.guava:guava:27.0.1-android", + "com.google.guava:listenablefuture:1.0", "junit:junit:4.12", "org.hamcrest:hamcrest-library:1.3", ], diff --git a/docs/getting_started/hello_world_android.md b/docs/getting_started/hello_world_android.md index e4e8286f7..6bb98f671 100644 --- a/docs/getting_started/hello_world_android.md +++ b/docs/getting_started/hello_world_android.md @@ -446,8 +446,8 @@ visible so that we can start seeing frames from the `previewFrameTexture`. However, before starting the camera, we need to decide which camera we want to use. [`CameraXPreviewHelper`] inherits from [`CameraHelper`] which provides two options, `FRONT` and `BACK`. We can pass in the decision from the `BUILD` file -as metadata such that no code change is required to build a another version of -the app using a different camera. +as metadata such that no code change is required to build another version of the +app using a different camera. Assuming we want to use `BACK` camera to perform edge detection on a live scene that we view from the camera, add the metadata into `AndroidManifest.xml`: diff --git a/docs/getting_started/install.md b/docs/getting_started/install.md index b9be6e498..257c817a9 100644 --- a/docs/getting_started/install.md +++ b/docs/getting_started/install.md @@ -42,16 +42,16 @@ apps, see these [instructions](./building_examples.md#ios). Follow the official [Bazel documentation](https://docs.bazel.build/versions/master/install-ubuntu.html) - to install Bazel 2.0 or higher. + to install Bazel 3.4 or higher. For Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, Bazel needs to be built from source. ```bash - # For Bazel 3.0.0 - wget https://github.com/bazelbuild/bazel/releases/download/3.0.0/bazel-3.0.0-dist.zip + # For Bazel 3.4.0 + wget https://github.com/bazelbuild/bazel/releases/download/3.4.0/bazel-3.4.0-dist.zip sudo apt-get install build-essential openjdk-8-jdk python zip unzip - unzip bazel-3.0.0-dist.zip + unzip bazel-3.4.0-dist.zip env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh sudo cp output/bazel /usr/local/bin/ ``` @@ -221,7 +221,7 @@ build issues. Follow the official [Bazel documentation](https://docs.bazel.build/versions/master/install-redhat.html) - to install Bazel 2.0 or higher. + to install Bazel 3.4 or higher. 3. Install OpenCV. @@ -356,7 +356,7 @@ build issues. Option 2. Follow the official [Bazel documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x) - to install Bazel 2.0 or higher. + to install Bazel 3.4 or higher. 4. Install OpenCV and FFmpeg. @@ -427,7 +427,6 @@ build issues. linkstatic = 1, visibility = ["//visibility:public"], ) - ``` 5. Make sure that Python 3 and the Python "six" library are installed. @@ -506,7 +505,7 @@ next section. Follow the official [Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html) - to install Bazel 2.0 or higher. + to install Bazel 3.4 or higher. 6. Set Bazel variables. @@ -567,7 +566,6 @@ next section. # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! # I20200514 20:43:12.280613 1200 hello_world.cc:56] Hello World! - ``` If you run into a build error, please read @@ -607,14 +605,14 @@ cameras. Alternatively, you use a video file as input. ```bash username@DESKTOP-TMVLBJ1:~$ curl -sLO --retry 5 --retry-max-time 10 \ - https://storage.googleapis.com/bazel/3.0.0/release/bazel-3.0.0-installer-linux-x86_64.sh && \ - sudo mkdir -p /usr/local/bazel/3.0.0 && \ - chmod 755 bazel-3.0.0-installer-linux-x86_64.sh && \ - sudo ./bazel-3.0.0-installer-linux-x86_64.sh --prefix=/usr/local/bazel/3.0.0 && \ - source /usr/local/bazel/3.0.0/lib/bazel/bin/bazel-complete.bash + https://storage.googleapis.com/bazel/3.4.0/release/bazel-3.4.0-installer-linux-x86_64.sh && \ + sudo mkdir -p /usr/local/bazel/3.4.0 && \ + chmod 755 bazel-3.4.0-installer-linux-x86_64.sh && \ + sudo ./bazel-3.4.0-installer-linux-x86_64.sh --prefix=/usr/local/bazel/3.4.0 && \ + source /usr/local/bazel/3.4.0/lib/bazel/bin/bazel-complete.bash - username@DESKTOP-TMVLBJ1:~$ /usr/local/bazel/3.0.0/lib/bazel/bin/bazel version && \ - alias bazel='/usr/local/bazel/3.0.0/lib/bazel/bin/bazel' + username@DESKTOP-TMVLBJ1:~$ /usr/local/bazel/3.4.0/lib/bazel/bin/bazel version && \ + alias bazel='/usr/local/bazel/3.4.0/lib/bazel/bin/bazel' ``` 6. Checkout MediaPipe repository. diff --git a/docs/tools/tracing_and_profiling.md b/docs/tools/tracing_and_profiling.md index 789071f3d..ed58eb61b 100644 --- a/docs/tools/tracing_and_profiling.md +++ b/docs/tools/tracing_and_profiling.md @@ -26,9 +26,10 @@ To enable tracing and profiling of a mediapipe graph: 1. The profiling library must be linked to the framework. 2. Tracing and profiling must be enabled in the graph configuration. -The profiling library is linked to the framework by default. If needed, -the profiling library can be omitted from the framework using the bazel -command line option: `--define MEDIAPIPE_PROFILING=0`. +The profiling library is linked to the framework by default for Desktop. +If needed, it can be omitted from the framework using the bazel command line +option: `--define MEDIAPIPE_PROFILING=0`. For other platforms, you can use the +bazel command line option `--define MEDIAPIPE_PROFILING=1` to link it. To enable tracing and profiling, the `CalculatorGraphConfig` (in [calculator.proto](https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto)) @@ -38,6 +39,7 @@ is a simple setup that turns on tracing and keeps 100 seconds of timing events: ``` profiler_config { trace_enabled: true + enable_profiler: true trace_log_interval_count: 200 } ``` @@ -147,6 +149,7 @@ we record ten intervals of half a second each. This can be overridden by adding ```bash profiler_config { trace_enabled: true + enable_profiler: true trace_log_path: "/sdcard/profiles/" } ``` diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 9762f41ee..57d8da709 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -225,6 +225,15 @@ cc_library( name = "concatenate_vector_calculator", srcs = ["concatenate_vector_calculator.cc"], hdrs = ["concatenate_vector_calculator.h"], + copts = select({ + # Needed for "//mediapipe/framework/formats:tensor" compatibility on Apple + # platforms for Metal pulled in via the tensor.h header. + "//mediapipe:apple": [ + "-x objective-c++", + "-fobjc-arc", # enable reference-counting + ], + "//conditions:default": [], + }), visibility = ["//visibility:public"], deps = [ ":concatenate_vector_calculator_cc_proto", diff --git a/mediapipe/calculators/core/gate_calculator.cc b/mediapipe/calculators/core/gate_calculator.cc index 8d05c9268..9c142c70a 100644 --- a/mediapipe/calculators/core/gate_calculator.cc +++ b/mediapipe/calculators/core/gate_calculator.cc @@ -59,30 +59,16 @@ std::string ToString(GateState state) { // ALLOW or DISALLOW can also be specified as an input side packet. The rules // for evaluation remain the same as above. // -// If side_input_has_precedence isn't set in the calculator option, // ALLOW/DISALLOW inputs must be specified either using input stream or -// via input side packet but not both. Otherwise, both input stream and input -// side packet can be specified and the calculator will take one signal over the -// other based on the value of the side_input_has_precedence field. +// via input side packet but not both. // // Intended to be used with the default input stream handler, which synchronizes // all data input streams with the ALLOW/DISALLOW control input stream. // -// Example configs: +// Example config: // node { // calculator: "GateCalculator" -// input_stream: "input_stream0" -// input_stream: "input_stream1" -// input_stream: "input_streamN" // input_side_packet: "ALLOW:allow" or "DISALLOW:disallow" -// output_stream: "STATE_CHANGE:state_change" -// output_stream: "output_stream0" -// output_stream: "output_stream1" -// output_stream: "output_streamN" -// } -// -// node { -// calculator: "GateCalculator" // input_stream: "input_stream0" // input_stream: "input_stream1" // input_stream: "input_streamN" @@ -92,25 +78,6 @@ std::string ToString(GateState state) { // output_stream: "output_stream1" // output_stream: "output_streamN" // } -// -// With side_input_has_precedence: -// node { -// calculator: "GateCalculator" -// input_stream: "input_stream0" -// input_stream: "input_stream1" -// input_stream: "input_streamN" -// input_stream: "ALLOW:allow_stream" or "DISALLOW:disallow_stream" -// input_side_packet: "ALLOW:allow_packet" or "DISALLOW:disallow_packet" -// output_stream: "STATE_CHANGE:state_change" -// output_stream: "output_stream0" -// output_stream: "output_stream1" -// output_stream: "output_streamN" -// options: { -// [mediapipe.GateCalculatorOptions.ext] { -// side_input_has_precedence: true or false -// } -// } -// } class GateCalculator : public CalculatorBase { public: GateCalculator() {} @@ -121,15 +88,9 @@ class GateCalculator : public CalculatorBase { cc->InputSidePackets().HasTag("DISALLOW"); bool input_via_stream = cc->Inputs().HasTag("ALLOW") || cc->Inputs().HasTag("DISALLOW"); - const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); - if (options.has_side_input_has_precedence()) { - RET_CHECK(input_via_side_packet && input_via_stream); - } else { - // Only one of input_side_packet or input_stream may specify - // ALLOW/DISALLOW input when side_input_has_precedence is not set - // in the options. - RET_CHECK(input_via_side_packet ^ input_via_stream); - } + // Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW + // input. + RET_CHECK(input_via_side_packet ^ input_via_stream); if (input_via_side_packet) { RET_CHECK(cc->InputSidePackets().HasTag("ALLOW") ^ @@ -140,8 +101,7 @@ class GateCalculator : public CalculatorBase { } else { cc->InputSidePackets().Tag("DISALLOW").Set(); } - } - if (input_via_stream) { + } else { RET_CHECK(cc->Inputs().HasTag("ALLOW") ^ cc->Inputs().HasTag("DISALLOW")); if (cc->Inputs().HasTag("ALLOW")) { @@ -174,13 +134,19 @@ class GateCalculator : public CalculatorBase { } ::mediapipe::Status Open(CalculatorContext* cc) final { - bool use_side_packet_for_allow_disallow = false; + const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); + use_calculator_option_for_allow_disallow_ = + options.has_allowance_override(); + if (use_calculator_option_for_allow_disallow_) { + allow_by_calculator_option_ = options.allowance_override(); + } + if (cc->InputSidePackets().HasTag("ALLOW")) { - use_side_packet_for_allow_disallow = true; + use_side_packet_for_allow_disallow_ = true; allow_by_side_packet_decision_ = cc->InputSidePackets().Tag("ALLOW").Get(); } else if (cc->InputSidePackets().HasTag("DISALLOW")) { - use_side_packet_for_allow_disallow = true; + use_side_packet_for_allow_disallow_ = true; allow_by_side_packet_decision_ = !cc->InputSidePackets().Tag("DISALLOW").Get(); } @@ -190,33 +156,28 @@ class GateCalculator : public CalculatorBase { last_gate_state_ = GATE_UNINITIALIZED; RET_CHECK_OK(CopyInputHeadersToOutputs(cc->Inputs(), &cc->Outputs())); - const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); empty_packets_as_allow_ = options.empty_packets_as_allow(); - if (!options.has_side_input_has_precedence()) { - side_input_has_precedence_ = use_side_packet_for_allow_disallow; - } else { - side_input_has_precedence_ = options.side_input_has_precedence(); - } - return ::mediapipe::OkStatus(); } ::mediapipe::Status Process(CalculatorContext* cc) final { - bool allow_by_stream = empty_packets_as_allow_; - if (cc->Inputs().HasTag("ALLOW") && !cc->Inputs().Tag("ALLOW").IsEmpty()) { - allow_by_stream = cc->Inputs().Tag("ALLOW").Get(); - } - if (cc->Inputs().HasTag("DISALLOW") && - !cc->Inputs().Tag("DISALLOW").IsEmpty()) { - allow_by_stream = !cc->Inputs().Tag("DISALLOW").Get(); - } - const bool allow_by_side_packet = - allow_by_side_packet_decision_ || empty_packets_as_allow_; - bool allow = false; - if (side_input_has_precedence_) { - allow = allow_by_side_packet; - } else { - allow = allow_by_stream; + // The allow/disallow signal in the calculator option has the highest + // priority. If it's not set, use the stream/side packet signal. + bool allow = allow_by_calculator_option_; + if (!use_calculator_option_for_allow_disallow_) { + allow = empty_packets_as_allow_; + if (use_side_packet_for_allow_disallow_) { + allow = allow_by_side_packet_decision_; + } else { + if (cc->Inputs().HasTag("ALLOW") && + !cc->Inputs().Tag("ALLOW").IsEmpty()) { + allow = cc->Inputs().Tag("ALLOW").Get(); + } + if (cc->Inputs().HasTag("DISALLOW") && + !cc->Inputs().Tag("DISALLOW").IsEmpty()) { + allow = !cc->Inputs().Tag("DISALLOW").Get(); + } + } } const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW; @@ -251,9 +212,11 @@ class GateCalculator : public CalculatorBase { private: GateState last_gate_state_ = GATE_UNINITIALIZED; int num_data_streams_; + bool empty_packets_as_allow_ = false; + bool use_side_packet_for_allow_disallow_ = false; bool allow_by_side_packet_decision_ = false; - bool empty_packets_as_allow_; - bool side_input_has_precedence_; + bool use_calculator_option_for_allow_disallow_ = false; + bool allow_by_calculator_option_ = false; }; REGISTER_CALCULATOR(GateCalculator); diff --git a/mediapipe/calculators/core/gate_calculator.proto b/mediapipe/calculators/core/gate_calculator.proto index 777b6bc85..63850774a 100644 --- a/mediapipe/calculators/core/gate_calculator.proto +++ b/mediapipe/calculators/core/gate_calculator.proto @@ -28,11 +28,8 @@ message GateCalculatorOptions { // this option to true inverts that, allowing the data packets to go through. optional bool empty_packets_as_allow = 1; - // Input side packet and input stream are allowed to coexist only if this - // field is set. When it's set to true, the input side packet has higher - // precedence and the input stream signal will be ignored. When it's set to - // false, the input stream signal always overrides the input side packet - // signal. - // - optional bool side_input_has_precedence = 2; + // If set, the calculator will always allow (if set to yes) or disallow (if + // set to no) the input streams to pass through, and ignore the ALLOW or + // DISALLOW input stream or side input packets. + optional bool allowance_override = 2; } diff --git a/mediapipe/calculators/core/gate_calculator_test.cc b/mediapipe/calculators/core/gate_calculator_test.cc index 07d20350c..d6d2c5530 100644 --- a/mediapipe/calculators/core/gate_calculator_test.cc +++ b/mediapipe/calculators/core/gate_calculator_test.cc @@ -330,45 +330,48 @@ TEST_F(GateCalculatorTest, AllowInitialNoStateTransition) { ASSERT_EQ(0, output.size()); } -TEST_F(GateCalculatorTest, TestOverrideDecisionBySidePacketSignal) { +TEST_F(GateCalculatorTest, + TestCalculatorOptionDecisionOverrideOverStreamSingal) { SetRunner(R"( calculator: "GateCalculator" input_stream: "test_input" input_stream: "ALLOW:gating_stream" - input_side_packet: "ALLOW:gating_packet" output_stream: "test_output" options: { [mediapipe.GateCalculatorOptions.ext] { - side_input_has_precedence: true + allowance_override: false } } )"); constexpr int64 kTimestampValue0 = 42; - runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(false)); + // The CalculatorOptions says disallow and the stream says allow. Should + // follow the CalculatorOptions' decision to disallow outputting anything. RunTimeStep(kTimestampValue0, "ALLOW", true); const std::vector& output = runner()->Outputs().Get("", 0).packets; ASSERT_EQ(0, output.size()); } -TEST_F(GateCalculatorTest, TestOverrideDecisionByStreamSignal) { +TEST_F(GateCalculatorTest, + TestCalculatorOptionDecisionOverrideOverSidePacketSingal) { SetRunner(R"( calculator: "GateCalculator" input_stream: "test_input" - input_stream: "ALLOW:gating_stream" input_side_packet: "ALLOW:gating_packet" output_stream: "test_output" options: { [mediapipe.GateCalculatorOptions.ext] { - side_input_has_precedence: false + allowance_override: true } } )"); constexpr int64 kTimestampValue0 = 42; + // The CalculatorOptions says allow and the side packet says disallow. Should + // follow the CalculatorOptions' decision to allow outputting a packet. runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(false)); - RunTimeStep(kTimestampValue0, "ALLOW", true); + RunTimeStep(kTimestampValue0, true); const std::vector& output = runner()->Outputs().Get("", 0).packets; ASSERT_EQ(1, output.size()); diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 6ad3be548..7328ec3d9 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -12,148 +12,78 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library") +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") licenses(["notice"]) package(default_visibility = ["//visibility:private"]) -exports_files(["LICENSE"]) - -proto_library( +mediapipe_proto_library( name = "opencv_image_encoder_calculator_proto", srcs = ["opencv_image_encoder_calculator.proto"], - visibility = ["//visibility:public"], - deps = ["//mediapipe/framework:calculator_proto"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], ) -proto_library( +mediapipe_proto_library( name = "scale_image_calculator_proto", srcs = ["scale_image_calculator.proto"], visibility = ["//visibility:public"], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", "//mediapipe/framework/formats:image_format_proto", ], ) -proto_library( +mediapipe_proto_library( name = "set_alpha_calculator_proto", srcs = ["set_alpha_calculator.proto"], visibility = ["//visibility:public"], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", ], ) -proto_library( +mediapipe_proto_library( name = "image_cropping_calculator_proto", srcs = ["image_cropping_calculator.proto"], visibility = ["//visibility:public"], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", ], ) -proto_library( +mediapipe_proto_library( name = "bilateral_filter_calculator_proto", srcs = ["bilateral_filter_calculator.proto"], visibility = [ "//visibility:public", ], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", ], ) -proto_library( +mediapipe_proto_library( name = "recolor_calculator_proto", srcs = ["recolor_calculator.proto"], visibility = ["//visibility:public"], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", "//mediapipe/util:color_proto", ], ) -mediapipe_cc_proto_library( - name = "opencv_image_encoder_calculator_cc_proto", - srcs = ["opencv_image_encoder_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = [ - "//visibility:public", - ], - deps = [":opencv_image_encoder_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "opencv_encoded_image_to_image_frame_calculator_cc_proto", - srcs = ["opencv_encoded_image_to_image_frame_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//visibility:public"], - deps = [":opencv_encoded_image_to_image_frame_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "mask_overlay_calculator_cc_proto", - srcs = ["mask_overlay_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//visibility:public"], - deps = [":mask_overlay_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "scale_image_calculator_cc_proto", - srcs = ["scale_image_calculator.proto"], - cc_deps = [ - "//mediapipe/framework:calculator_cc_proto", - "//mediapipe/framework/formats:image_format_cc_proto", - ], - visibility = ["//visibility:public"], - deps = [":scale_image_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "set_alpha_calculator_cc_proto", - srcs = ["set_alpha_calculator.proto"], - cc_deps = [ - "//mediapipe/framework:calculator_cc_proto", - ], - visibility = ["//visibility:public"], - deps = [":set_alpha_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "image_cropping_calculator_cc_proto", - srcs = ["image_cropping_calculator.proto"], - cc_deps = [ - "//mediapipe/framework:calculator_cc_proto", - ], - visibility = ["//visibility:public"], - deps = [":image_cropping_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "bilateral_filter_calculator_cc_proto", - srcs = ["bilateral_filter_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = [ - "//visibility:public", - ], - deps = [":bilateral_filter_calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "recolor_calculator_cc_proto", - srcs = ["recolor_calculator.proto"], - cc_deps = [ - "//mediapipe/framework:calculator_cc_proto", - "//mediapipe/util:color_cc_proto", - ], - visibility = ["//visibility:public"], - deps = [":recolor_calculator_proto"], -) - cc_library( name = "color_convert_calculator", srcs = ["color_convert_calculator.cc"], @@ -550,32 +480,33 @@ cc_test( ], ) -proto_library( +mediapipe_proto_library( name = "mask_overlay_calculator_proto", srcs = ["mask_overlay_calculator.proto"], visibility = ["//visibility:public"], - deps = ["//mediapipe/framework:calculator_proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], ) -proto_library( +mediapipe_proto_library( name = "opencv_encoded_image_to_image_frame_calculator_proto", srcs = ["opencv_encoded_image_to_image_frame_calculator.proto"], visibility = ["//visibility:public"], - deps = ["//mediapipe/framework:calculator_proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], ) -proto_library( +mediapipe_proto_library( name = "feature_detector_calculator_proto", srcs = ["feature_detector_calculator.proto"], - deps = ["//mediapipe/framework:calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "feature_detector_calculator_cc_proto", - srcs = ["feature_detector_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//visibility:public"], - deps = [":feature_detector_calculator_proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], ) cc_library( diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index 3daf3827f..fd07bbe34 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -311,7 +311,6 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], }), alwayslink = 1, diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc index cf5635d3a..662d8fa16 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc @@ -184,6 +184,7 @@ class PackMediaSequenceCalculator : public CalculatorBase { features_present_[tag] = false; } + replace_keypoints_ = false; if (cc->Options() .replace_data_instead_of_append()) { for (const auto& tag : cc->Inputs().GetTags()) { @@ -212,6 +213,15 @@ class PackMediaSequenceCalculator : public CalculatorBase { } mpms::ClearBBox(key, sequence_.get()); mpms::ClearBBoxTimestamp(key, sequence_.get()); + mpms::ClearBBoxIsAnnotated(key, sequence_.get()); + mpms::ClearBBoxNumRegions(key, sequence_.get()); + mpms::ClearBBoxLabelString(key, sequence_.get()); + mpms::ClearBBoxLabelIndex(key, sequence_.get()); + mpms::ClearBBoxClassString(key, sequence_.get()); + mpms::ClearBBoxClassIndex(key, sequence_.get()); + mpms::ClearBBoxTrackString(key, sequence_.get()); + mpms::ClearBBoxTrackIndex(key, sequence_.get()); + mpms::ClearUnmodifiedBBoxTimestamp(key, sequence_.get()); } if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) { std::string key = tag.substr(sizeof(kFloatFeaturePrefixTag) / @@ -223,8 +233,7 @@ class PackMediaSequenceCalculator : public CalculatorBase { if (absl::StartsWith(tag, kKeypointsTag)) { std::string key = tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1); - mpms::ClearBBoxPoint(key, sequence_.get()); - mpms::ClearBBoxTimestamp(key, sequence_.get()); + replace_keypoints_ = true; } } if (cc->Inputs().HasTag(kForwardFlowEncodedTag)) { @@ -342,11 +351,25 @@ class PackMediaSequenceCalculator : public CalculatorBase { .Get>>>(); for (const auto& pair : keypoints) { - mpms::AddBBoxTimestamp(mpms::merge_prefix(key, pair.first), - cc->InputTimestamp().Value(), sequence_.get()); - mpms::AddBBoxPoint(mpms::merge_prefix(key, pair.first), pair.second, - sequence_.get()); + std::string prefix = mpms::merge_prefix(key, pair.first); + if (replace_keypoints_) { + mpms::ClearBBoxPoint(prefix, sequence_.get()); + mpms::ClearBBoxTimestamp(prefix, sequence_.get()); + mpms::ClearBBoxIsAnnotated(prefix, sequence_.get()); + mpms::ClearBBoxNumRegions(prefix, sequence_.get()); + mpms::ClearBBoxLabelString(prefix, sequence_.get()); + mpms::ClearBBoxLabelIndex(prefix, sequence_.get()); + mpms::ClearBBoxClassString(prefix, sequence_.get()); + mpms::ClearBBoxClassIndex(prefix, sequence_.get()); + mpms::ClearBBoxTrackString(prefix, sequence_.get()); + mpms::ClearBBoxTrackIndex(prefix, sequence_.get()); + mpms::ClearUnmodifiedBBoxTimestamp(prefix, sequence_.get()); + } + mpms::AddBBoxTimestamp(prefix, cc->InputTimestamp().Value(), + sequence_.get()); + mpms::AddBBoxPoint(prefix, pair.second, sequence_.get()); } + replace_keypoints_ = false; } if (absl::StartsWith(tag, kFloatContextFeaturePrefixTag) && !cc->Inputs().Tag(tag).IsEmpty()) { @@ -475,6 +498,7 @@ class PackMediaSequenceCalculator : public CalculatorBase { std::unique_ptr sequence_; std::map features_present_; + bool replace_keypoints_; }; REGISTER_CALCULATOR(PackMediaSequenceCalculator); diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc index 15d92fe92..9d383cbd4 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc @@ -839,5 +839,59 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) { ASSERT_EQ(mpms::GetBBoxTimestampAt("PREFIX", output_sequence, 4), 50); } +TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) { + SetUpCalculator({"IMAGE:images", "BBOX:bbox"}, {}, false, true); + auto input_sequence = ::absl::make_unique(); + cv::Mat image(2, 3, CV_8UC3, cv::Scalar(0, 0, 255)); + std::vector bytes; + ASSERT_TRUE(cv::imencode(".jpg", image, bytes, {80})); + std::string test_image_string(bytes.begin(), bytes.end()); + OpenCvImageEncoderCalculatorResults encoded_image; + encoded_image.set_encoded_image(test_image_string); + int height = 2; + int width = 2; + encoded_image.set_width(width); + encoded_image.set_height(height); + + int num_images = 5; // Timestamps: 10, 20, 30, 40, 50 + for (int i = 0; i < num_images; ++i) { + auto image_ptr = + ::absl::make_unique(encoded_image); + runner_->MutableInputs()->Tag("IMAGE").packets.push_back( + Adopt(image_ptr.release()).At(Timestamp(i))); + } + + for (int i = 0; i < num_images; ++i) { + auto detections = ::absl::make_unique<::std::vector>(); + Detection detection; + detection = Detection(); + detection.add_label("relative bbox"); + detection.add_label_id(1); + detection.add_score(0.75); + Location::CreateRelativeBBoxLocation(0, 0.5, 0.5, 0.5) + .ConvertToProto(detection.mutable_location_data()); + detections->push_back(detection); + runner_->MutableInputs()->Tag("BBOX").packets.push_back( + Adopt(detections.release()).At(Timestamp(i))); + } + + for (int i = 0; i < 10; ++i) { + mpms::AddBBoxTimestamp(-1, input_sequence.get()); + mpms::AddBBoxIsAnnotated(-1, input_sequence.get()); + mpms::AddBBoxNumRegions(-1, input_sequence.get()); + mpms::AddBBoxLabelString({"anything"}, input_sequence.get()); + mpms::AddBBoxLabelIndex({-1}, input_sequence.get()); + mpms::AddBBoxClassString({"anything"}, input_sequence.get()); + mpms::AddBBoxClassIndex({-1}, input_sequence.get()); + mpms::AddBBoxTrackString({"anything"}, input_sequence.get()); + mpms::AddBBoxTrackIndex({-1}, input_sequence.get()); + } + + runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = + Adopt(input_sequence.release()); + // If the all the previous values aren't cleared, this assert will fail. + MP_ASSERT_OK(runner_->Run()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 3dae64f9c..29bc4a59f 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -281,6 +281,9 @@ class TfLiteInferenceCalculator : public CalculatorBase { bool use_quantized_tensors_ = false; bool use_advanced_gpu_api_ = false; + bool allow_precision_loss_ = false; + ::mediapipe::TfLiteInferenceCalculatorOptions_Delegate_Gpu_API + tflite_gpu_runner_api_; bool use_kernel_caching_ = false; std::string cached_kernel_filename_; @@ -365,6 +368,8 @@ bool ShouldUseGpu(CC* cc) { options.has_delegate() && options.delegate().has_gpu() && options.delegate().gpu().use_advanced_gpu_api(); + allow_precision_loss_ = options.delegate().gpu().allow_precision_loss(); + tflite_gpu_runner_api_ = options.delegate().gpu().api(); use_kernel_caching_ = use_advanced_gpu_api_ && options.delegate().gpu().use_kernel_caching(); @@ -703,11 +708,23 @@ bool ShouldUseGpu(CC* cc) { // Create runner tflite::gpu::InferenceOptions options; - options.priority1 = tflite::gpu::InferencePriority::MIN_LATENCY; + options.priority1 = allow_precision_loss_ + ? tflite::gpu::InferencePriority::MIN_LATENCY + : tflite::gpu::InferencePriority::MAX_PRECISION; options.priority2 = tflite::gpu::InferencePriority::AUTO; options.priority3 = tflite::gpu::InferencePriority::AUTO; options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED; tflite_gpu_runner_ = std::make_unique(options); + if (tflite_gpu_runner_api_ == + ::mediapipe::TfLiteInferenceCalculatorOptions_Delegate_Gpu_API:: + TfLiteInferenceCalculatorOptions_Delegate_Gpu_API_OPENGL) { + tflite_gpu_runner_->ForceOpenGL(); + } + if (tflite_gpu_runner_api_ == + ::mediapipe::TfLiteInferenceCalculatorOptions_Delegate_Gpu_API:: + TfLiteInferenceCalculatorOptions_Delegate_Gpu_API_OPENCL) { + tflite_gpu_runner_->ForceOpenCL(); + } MP_RETURN_IF_ERROR( tflite_gpu_runner_->InitializeWithModel(model, op_resolver)); diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.proto b/mediapipe/calculators/tflite/tflite_inference_calculator.proto index bd83fea45..5b42e9512 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.proto @@ -49,6 +49,20 @@ message TfLiteInferenceCalculatorOptions { // delegate: { gpu { use_advanced_gpu_api: true } } optional bool use_advanced_gpu_api = 1 [default = false]; + // This option is valid for TFLite GPU delegate API2 only, + // Choose any of available APIs to force running inference using it. + enum API { + ANY = 0; + OPENGL = 1; + OPENCL = 2; + } + optional API api = 4 [default = ANY]; + + // This option is valid for TFLite GPU delegate API2 only, + // Set to true to use 16-bit float precision. If max precision is needed, + // set to false for 32-bit float calculations only. + optional bool allow_precision_loss = 3 [default = true]; + // Load pre-compiled serialized binary cache to accelerate init process. // Only available for OpenCL delegate on Android. optional bool use_kernel_caching = 2 [default = false]; diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 8ca63d7ee..b515c5729 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -18,8 +18,6 @@ licenses(["notice"]) package(default_visibility = ["//visibility:public"]) -exports_files(["LICENSE"]) - cc_library( name = "alignment_points_to_rects_calculator", srcs = ["alignment_points_to_rects_calculator.cc"], @@ -250,9 +248,11 @@ cc_library( "@com_google_absl//absl/strings", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:video_stream_header", "//mediapipe/framework/port:logging", "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", "//mediapipe/framework/port:status", "//mediapipe/framework/port:vector", "//mediapipe/util:annotation_renderer", @@ -276,6 +276,7 @@ cc_library( deps = [ ":detection_label_id_to_text_calculator_cc_proto", "//mediapipe/framework/formats:detection_cc_proto", + "@com_google_absl//absl/container:node_hash_map", "//mediapipe/framework/port:status", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:packet", diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index 4936dbfb5..5da424a61 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -20,9 +20,11 @@ #include "mediapipe/framework/calculator_options.pb.h" #include "mediapipe/framework/formats/image_format.pb.h" #include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/video_stream_header.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/vector.h" #include "mediapipe/util/annotation_renderer.h" @@ -40,13 +42,9 @@ namespace mediapipe { namespace { -constexpr char kInputFrameTag[] = "IMAGE"; -constexpr char kOutputFrameTag[] = "IMAGE"; - -constexpr char kInputVectorTag[] = "VECTOR"; - -constexpr char kInputFrameTagGpu[] = "IMAGE_GPU"; -constexpr char kOutputFrameTagGpu[] = "IMAGE_GPU"; +constexpr char kVectorTag[] = "VECTOR"; +constexpr char kGpuBufferTag[] = "IMAGE_GPU"; +constexpr char kImageFrameTag[] = "IMAGE"; enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; @@ -57,12 +55,15 @@ size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT // merges the annotation overlay with the image frame. As a result, drawing in // this color is not supported and it should be set to something unlikely used. constexpr uchar kAnnotationBackgroundColor = 2; // Grayscale value. + +// Future Image type. +inline bool HasImageTag(mediapipe::CalculatorContext* cc) { return false; } } // namespace // A calculator for rendering data on images. // // Inputs: -// 1. IMAGE or IMAGE_GPU (optional): An ImageFrame (or GpuBuffer) +// 1. IMAGE or IMAGE_GPU (optional): An ImageFrame (or GpuBuffer), // containing the input image. // If output is CPU, and input isn't provided, the renderer creates a // blank canvas with the width, height and color provided in the options. @@ -74,7 +75,8 @@ constexpr uchar kAnnotationBackgroundColor = 2; // Grayscale value. // input vector items. These input streams are tagged with "VECTOR". // // Output: -// 1. IMAGE or IMAGE_GPU: A rendered ImageFrame (or GpuBuffer). +// 1. IMAGE or IMAGE_GPU: A rendered ImageFrame (or GpuBuffer), +// Note: Output types should match their corresponding input stream type. // // For CPU input frames, only SRGBA, SRGB and GRAY8 format are supported. The // output format is the same as input except for GRAY8 where the output is in @@ -133,14 +135,17 @@ class AnnotationOverlayCalculator : public CalculatorBase { ::mediapipe::Status CreateRenderTargetCpu(CalculatorContext* cc, std::unique_ptr& image_mat, ImageFormat::Format* target_format); + template ::mediapipe::Status CreateRenderTargetGpu( CalculatorContext* cc, std::unique_ptr& image_mat); + template ::mediapipe::Status RenderToGpu(CalculatorContext* cc, uchar* overlay_image); ::mediapipe::Status RenderToCpu(CalculatorContext* cc, const ImageFormat::Format& target_format, uchar* data_image); ::mediapipe::Status GlRender(CalculatorContext* cc); + template ::mediapipe::Status GlSetup(CalculatorContext* cc); // Options for the calculator. @@ -172,24 +177,26 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); bool use_gpu = false; - if (cc->Inputs().HasTag(kInputFrameTag) && - cc->Inputs().HasTag(kInputFrameTagGpu)) { + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().HasTag(kGpuBufferTag)) { return ::mediapipe::InternalError("Cannot have multiple input images."); } - if (cc->Inputs().HasTag(kInputFrameTagGpu) != - cc->Outputs().HasTag(kOutputFrameTagGpu)) { + if (cc->Inputs().HasTag(kGpuBufferTag) != + cc->Outputs().HasTag(kGpuBufferTag)) { return ::mediapipe::InternalError("GPU output must have GPU input."); } - // Input image to render onto copy of. + // Input image to render onto copy of. Should be same type as output. #if !defined(MEDIAPIPE_DISABLE_GPU) - if (cc->Inputs().HasTag(kInputFrameTagGpu)) { - cc->Inputs().Tag(kInputFrameTagGpu).Set(); - use_gpu |= true; + if (cc->Inputs().HasTag(kGpuBufferTag)) { + cc->Inputs().Tag(kGpuBufferTag).Set(); + CHECK(cc->Outputs().HasTag(kGpuBufferTag)); + use_gpu = true; } #endif // !MEDIAPIPE_DISABLE_GPU - if (cc->Inputs().HasTag(kInputFrameTag)) { - cc->Inputs().Tag(kInputFrameTag).Set(); + if (cc->Inputs().HasTag(kImageFrameTag)) { + cc->Inputs().Tag(kImageFrameTag).Set(); + CHECK(cc->Outputs().HasTag(kImageFrameTag)); } // Data streams to render. @@ -197,7 +204,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); ++id) { auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); std::string tag = tag_and_index.first; - if (tag == kInputVectorTag) { + if (tag == kVectorTag) { cc->Inputs().Get(id).Set>(); } else if (tag.empty()) { // Empty tag defaults to accepting a single object of RenderData type. @@ -205,15 +212,14 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); } } - // Rendered image. + // Rendered image. Should be same type as input. #if !defined(MEDIAPIPE_DISABLE_GPU) - if (cc->Outputs().HasTag(kOutputFrameTagGpu)) { - cc->Outputs().Tag(kOutputFrameTagGpu).Set(); - use_gpu |= true; + if (cc->Outputs().HasTag(kGpuBufferTag)) { + cc->Outputs().Tag(kGpuBufferTag).Set(); } #endif // !MEDIAPIPE_DISABLE_GPU - if (cc->Outputs().HasTag(kOutputFrameTag)) { - cc->Outputs().Tag(kOutputFrameTag).Set(); + if (cc->Outputs().HasTag(kImageFrameTag)) { + cc->Outputs().Tag(kImageFrameTag).Set(); } if (use_gpu) { @@ -229,20 +235,16 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); cc->SetOffset(TimestampDiff(0)); options_ = cc->Options(); - if (cc->Inputs().HasTag(kInputFrameTagGpu) && - cc->Outputs().HasTag(kOutputFrameTagGpu)) { + if (cc->Inputs().HasTag(kGpuBufferTag) || HasImageTag(cc)) { #if !defined(MEDIAPIPE_DISABLE_GPU) use_gpu_ = true; -#else - RET_CHECK_FAIL() << "GPU processing not enabled."; #endif // !MEDIAPIPE_DISABLE_GPU } - if (cc->Inputs().HasTag(kInputFrameTagGpu) || - cc->Inputs().HasTag(kInputFrameTag)) { + if (cc->Inputs().HasTag(kGpuBufferTag) || + cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) { image_frame_available_ = true; } else { - image_frame_available_ = false; RET_CHECK(options_.has_canvas_width_px()); RET_CHECK(options_.has_canvas_height_px()); } @@ -253,14 +255,12 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); if (use_gpu_) renderer_->SetScaleFactor(options_.gpu_scale_factor()); // Set the output header based on the input header (if present). - const char* input_tag = use_gpu_ ? kInputFrameTagGpu : kInputFrameTag; - const char* output_tag = use_gpu_ ? kOutputFrameTagGpu : kOutputFrameTag; - if (image_frame_available_ && - !cc->Inputs().Tag(input_tag).Header().IsEmpty()) { + const char* tag = use_gpu_ ? kGpuBufferTag : kImageFrameTag; + if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty()) { const auto& input_header = - cc->Inputs().Tag(input_tag).Header().Get(); + cc->Inputs().Tag(tag).Header().Get(); auto* output_video_header = new VideoHeader(input_header); - cc->Outputs().Tag(output_tag).SetHeader(Adopt(output_video_header)); + cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header)); } if (use_gpu_) { @@ -282,15 +282,20 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); if (!gpu_initialized_) { MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { - MP_RETURN_IF_ERROR(GlSetup(cc)); - return ::mediapipe::OkStatus(); + return GlSetup(cc); })); gpu_initialized_ = true; } + if (cc->Inputs().HasTag(kGpuBufferTag)) { + MP_RETURN_IF_ERROR( + (CreateRenderTargetGpu( + cc, image_mat))); + } #endif // !MEDIAPIPE_DISABLE_GPU - MP_RETURN_IF_ERROR(CreateRenderTargetGpu(cc, image_mat)); } else { - MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + if (cc->Inputs().HasTag(kImageFrameTag)) { + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + } } // Reset the renderer with the image_mat. No copy here. @@ -301,7 +306,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); ++id) { auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); std::string tag = tag_and_index.first; - if (!tag.empty() && tag != kInputVectorTag) { + if (!tag.empty() && tag != kVectorTag) { continue; } if (cc->Inputs().Get(id).IsEmpty()) { @@ -312,7 +317,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); const RenderData& render_data = cc->Inputs().Get(id).Get(); renderer_->RenderDataOnImage(render_data); } else { - RET_CHECK_EQ(kInputVectorTag, tag); + RET_CHECK_EQ(kVectorTag, tag); const std::vector& render_data_vec = cc->Inputs().Get(id).Get>(); for (const RenderData& render_data : render_data_vec) { @@ -327,8 +332,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); uchar* image_mat_ptr = image_mat->data; MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, cc, image_mat_ptr]() -> ::mediapipe::Status { - MP_RETURN_IF_ERROR(RenderToGpu(cc, image_mat_ptr)); - return ::mediapipe::OkStatus(); + return RenderToGpu( + cc, image_mat_ptr); })); #endif // !MEDIAPIPE_DISABLE_GPU } else { @@ -369,19 +374,21 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); ImageFrame::kDefaultAlignmentBoundary); #endif // !MEDIAPIPE_DISABLE_GPU - cc->Outputs() - .Tag(kOutputFrameTag) - .Add(output_frame.release(), cc->InputTimestamp()); + if (cc->Outputs().HasTag(kImageFrameTag)) { + cc->Outputs() + .Tag(kImageFrameTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } return ::mediapipe::OkStatus(); } +template ::mediapipe::Status AnnotationOverlayCalculator::RenderToGpu( CalculatorContext* cc, uchar* overlay_image) { #if !defined(MEDIAPIPE_DISABLE_GPU) // Source and destination textures. - const auto& input_frame = - cc->Inputs().Tag(kInputFrameTagGpu).Get(); + const auto& input_frame = cc->Inputs().Tag(Tag).Get(); auto input_texture = gpu_helper_.CreateSourceTexture(input_frame); auto output_texture = gpu_helper_.CreateDestinationTexture( @@ -414,10 +421,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); } // Send out blended image as GPU packet. - auto output_frame = output_texture.GetFrame(); - cc->Outputs() - .Tag(kOutputFrameTagGpu) - .Add(output_frame.release(), cc->InputTimestamp()); + auto output_frame = output_texture.GetFrame(); + cc->Outputs().Tag(Tag).Add(output_frame.release(), cc->InputTimestamp()); // Cleanup input_texture.Release(); @@ -432,7 +437,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); ImageFormat::Format* target_format) { if (image_frame_available_) { const auto& input_frame = - cc->Inputs().Tag(kInputFrameTag).Get(); + cc->Inputs().Tag(kImageFrameTag).Get(); int target_mat_type; switch (input_frame.Format()) { @@ -455,21 +460,14 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); image_mat = absl::make_unique( input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); if (input_frame.Format() == ImageFormat::GRAY8) { - const int target_num_channels = - ImageFrame::NumberOfChannelsForFormat(*target_format); - for (int i = 0; i < input_frame.PixelDataSize(); i++) { - const auto& pix = input_frame.PixelData()[i]; - for (int c = 0; c < target_num_channels; c++) { - image_mat->data[i * target_num_channels + c] = pix; - } - } + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); } else { - // Make of a copy since the input frame may be consumed by other nodes. - const int buffer_size = - input_frame.Height() * input_frame.Width() * - ImageFrame::NumberOfChannelsForFormat(*target_format); - input_frame.CopyToBuffer(image_mat->data, buffer_size); + input_mat.copyTo(*image_mat); } } else { image_mat = absl::make_unique( @@ -482,13 +480,12 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); return ::mediapipe::OkStatus(); } +template ::mediapipe::Status AnnotationOverlayCalculator::CreateRenderTargetGpu( CalculatorContext* cc, std::unique_ptr& image_mat) { #if !defined(MEDIAPIPE_DISABLE_GPU) if (image_frame_available_) { - const auto& input_frame = - cc->Inputs().Tag(kInputFrameTagGpu).Get(); - + const auto& input_frame = cc->Inputs().Tag(Tag).Get(); const mediapipe::ImageFormat::Format format = mediapipe::ImageFormatForGpuBufferFormat(input_frame.format()); if (format != mediapipe::ImageFormat::SRGBA && @@ -564,6 +561,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); return ::mediapipe::OkStatus(); } +template ::mediapipe::Status AnnotationOverlayCalculator::GlSetup( CalculatorContext* cc) { #if !defined(MEDIAPIPE_DISABLE_GPU) @@ -639,8 +637,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); const float alignment = ImageFrame::kGlDefaultAlignmentBoundary; const float scale_factor = options_.gpu_scale_factor(); if (image_frame_available_) { - const auto& input_frame = - cc->Inputs().Tag(kInputFrameTagGpu).Get(); + const auto& input_frame = cc->Inputs().Tag(Tag).Get(); width_ = RoundUp(input_frame.width(), alignment); height_ = RoundUp(input_frame.height(), alignment); } else { diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc index 7d9cd5740..5e5564731 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "absl/container/node_hash_map.h" #include "mediapipe/calculators/util/detection_label_id_to_text_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/detection.pb.h" @@ -52,7 +53,7 @@ class DetectionLabelIdToTextCalculator : public CalculatorBase { ::mediapipe::Status Process(CalculatorContext* cc) override; private: - std::unordered_map label_map_; + absl::node_hash_map label_map_; }; REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator); diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index 2930c488a..af526044a 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -317,6 +317,7 @@ cc_library( "//mediapipe/util/tracking:box_tracker", "//mediapipe/util/tracking:tracking_visualization_utilities", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings", ], diff --git a/mediapipe/calculators/video/box_tracker_calculator.cc b/mediapipe/calculators/video/box_tracker_calculator.cc index a56392ee3..a946eb3fa 100644 --- a/mediapipe/calculators/video/box_tracker_calculator.cc +++ b/mediapipe/calculators/video/box_tracker_calculator.cc @@ -19,6 +19,7 @@ #include #include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_map.h" #include "absl/container/node_hash_set.h" #include "absl/strings/numbers.h" #include "mediapipe/calculators/video/box_tracker_calculator.pb.h" @@ -207,7 +208,7 @@ class BoxTrackerCalculator : public CalculatorBase { // Boxes that are tracked in streaming mode. MotionBoxMap streaming_motion_boxes_; - std::unordered_map> last_tracked_boxes_; + absl::node_hash_map> last_tracked_boxes_; int frame_num_since_reset_ = 0; // Cache used during streaming mode for fast forward tracking. diff --git a/mediapipe/calculators/video/tool/BUILD b/mediapipe/calculators/video/tool/BUILD index 3d3ed2f86..408461d2f 100644 --- a/mediapipe/calculators/video/tool/BUILD +++ b/mediapipe/calculators/video/tool/BUILD @@ -19,8 +19,6 @@ licenses(["notice"]) package(default_visibility = ["//mediapipe/calculators/video:__subpackages__"]) -exports_files(["LICENSE"]) - proto_library( name = "flow_quantizer_model_proto", srcs = ["flow_quantizer_model.proto"], diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java index 8a4924756..4bf30c833 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java @@ -134,6 +134,9 @@ public class MainActivity extends AppCompatActivity { protected void onPause() { super.onPause(); converter.close(); + + // Hide preview display until we re-open the camera again. + previewDisplayView.setVisibility(View.GONE); } @Override diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java index fc4c67755..8079daa75 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java @@ -32,16 +32,22 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { private static final String FOCAL_LENGTH_STREAM_NAME = "focal_length_pixel"; private static final String OUTPUT_LANDMARKS_STREAM_NAME = "face_landmarks_with_iris"; + private boolean haveAddedSidePackets = false; + @Override protected void onCameraStarted(SurfaceTexture surfaceTexture) { super.onCameraStarted(surfaceTexture); - float focalLength = cameraHelper.getFocalLengthPixels(); - if (focalLength != Float.MIN_VALUE) { - Packet focalLengthSidePacket = processor.getPacketCreator().createFloat32(focalLength); - Map inputSidePackets = new HashMap<>(); - inputSidePackets.put(FOCAL_LENGTH_STREAM_NAME, focalLengthSidePacket); - processor.setInputSidePackets(inputSidePackets); + // onCameraStarted gets called each time the activity resumes, but we only want to do this once. + if (!haveAddedSidePackets) { + float focalLength = cameraHelper.getFocalLengthPixels(); + if (focalLength != Float.MIN_VALUE) { + Packet focalLengthSidePacket = processor.getPacketCreator().createFloat32(focalLength); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(FOCAL_LENGTH_STREAM_NAME, focalLengthSidePacket); + processor.setInputSidePackets(inputSidePackets); + } + haveAddedSidePackets = true; } } diff --git a/mediapipe/examples/coral/Dockerfile b/mediapipe/examples/coral/Dockerfile index de5d3a909..bc655c580 100644 --- a/mediapipe/examples/coral/Dockerfile +++ b/mediapipe/examples/coral/Dockerfile @@ -62,7 +62,7 @@ COPY . /mediapipe/ # Install bazel # Please match the current MediaPipe Bazel requirements according to docs. -ARG BAZEL_VERSION=2.0.0 +ARG BAZEL_VERSION=3.4.1 RUN mkdir /bazel && \ wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget --no-check-certificate -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index a6e5fb01a..7917d3cc5 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -1107,6 +1107,19 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "basic_types_registration", + srcs = ["basic_types_registration.cc"], + visibility = ["//visibility:public"], + deps = [ + ":type_map", + "//mediapipe/framework/port:integral_types", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + cc_library( name = "validated_graph_config", srcs = ["validated_graph_config.cc"], diff --git a/mediapipe/framework/basic_types_registration.cc b/mediapipe/framework/basic_types_registration.cc new file mode 100644 index 000000000..7e5478118 --- /dev/null +++ b/mediapipe/framework/basic_types_registration.cc @@ -0,0 +1,28 @@ +#include +#include + +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/type_map.h" + +#define MEDIAPIPE_REGISTER_GENERIC_TYPE(type) \ + MEDIAPIPE_REGISTER_TYPE( \ + ::mediapipe::type_map_internal::ReflectType::Type, #type, \ + nullptr, nullptr) + +// Note: we cannot define a type which type hash id is already in the map. +// E.g. if tool::GetTypeHash() == tool::GetTypeHash(), then only one +// can be registered. + +MEDIAPIPE_REGISTER_GENERIC_TYPE(bool); +MEDIAPIPE_REGISTER_GENERIC_TYPE(double); +MEDIAPIPE_REGISTER_GENERIC_TYPE(float); +MEDIAPIPE_REGISTER_GENERIC_TYPE(int); +MEDIAPIPE_REGISTER_GENERIC_TYPE(int64); +MEDIAPIPE_REGISTER_GENERIC_TYPE(uint64); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector); +MEDIAPIPE_REGISTER_GENERIC_TYPE(::std::vector<::std::vector>); diff --git a/mediapipe/framework/collection_test.cc b/mediapipe/framework/collection_test.cc index 9fd8cb340..359342b3f 100644 --- a/mediapipe/framework/collection_test.cc +++ b/mediapipe/framework/collection_test.cc @@ -218,10 +218,11 @@ template EXPECT_EQ(values[5], collection_ptr->Get("TAG_C", 0)); // Test const-ness. - EXPECT_EQ(false, std::is_const::type>::value); - EXPECT_EQ(true, std::is_constGet("TAG_A", 0))>::type>::value); + EXPECT_EQ(false, std::is_const::type>::value); + EXPECT_EQ(true, + std::is_constGet("TAG_A", 0))>::type>::value); // Test access using a range based for. int i = 0; @@ -278,10 +279,11 @@ template EXPECT_EQ(values[5], collection_ptr->Get("TAG_C", 0)); // Test const-ness. - EXPECT_EQ(false, std::is_const::type>::value); - EXPECT_EQ(true, std::is_constGet("TAG_A", 0))>::type>::value); + EXPECT_EQ(false, std::is_const::type>::value); + EXPECT_EQ(true, + std::is_constGet("TAG_A", 0))>::type>::value); // Test access using a range based for. int i = 0; @@ -309,10 +311,10 @@ template // storage == kStoreValue. EXPECT_EQ(&values[i], collection_ptr->GetPtr(id)); EXPECT_EQ(values[i], *collection_ptr->GetPtr(id)); - EXPECT_EQ(false, std::is_const::type>::value); - EXPECT_EQ(true, std::is_constGetPtr(id))>::type>::value); + EXPECT_EQ(false, std::is_const::type>::value); + EXPECT_EQ(true, std::is_constGetPtr(id))>::type>::value); ++i; } @@ -386,10 +388,11 @@ template EXPECT_EQ(&values[5], collection_ptr->Get("TAG_C", 0)); // Test const-ness. - EXPECT_EQ(false, std::is_const::type>::value); - EXPECT_EQ(true, std::is_constGet("TAG_A", 0))>::type>::value); + EXPECT_EQ(false, std::is_const::type>::value); + EXPECT_EQ(true, + std::is_constGet("TAG_A", 0))>::type>::value); // Test access using a range based for. int i = 0; @@ -473,8 +476,8 @@ TEST(CollectionTest, TestIteratorFunctions) { collection.GetPtr(collection.GetId("TAG_B", 1)) = &values[4]; collection.GetPtr(collection.GetId("TAG_C", 0)) = &values[5]; - EXPECT_EQ(false, std::is_const::type>::value); + EXPECT_EQ(false, std::is_const::type>::value); EXPECT_EQ(values[0], *collection.begin()); EXPECT_EQ(false, collection.begin()->empty()); EXPECT_EQ(false, (*collection.begin()).empty()); @@ -483,8 +486,8 @@ TEST(CollectionTest, TestIteratorFunctions) { const auto* collection_ptr = &collection; - EXPECT_EQ(true, std::is_constbegin())>::type>::value); + EXPECT_EQ(true, std::is_constbegin())>::type>::value); EXPECT_EQ(values[0], *collection_ptr->begin()); EXPECT_EQ(false, collection_ptr->begin()->empty()); EXPECT_EQ(false, (*collection_ptr->begin()).empty()); diff --git a/mediapipe/framework/deps/file_helpers.cc b/mediapipe/framework/deps/file_helpers.cc index ae9048a7a..5a7e7e381 100644 --- a/mediapipe/framework/deps/file_helpers.cc +++ b/mediapipe/framework/deps/file_helpers.cc @@ -16,6 +16,7 @@ #ifdef _WIN32 #include +#include #else #include #endif // _WIN32 diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index 266a05956..50774e6de 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -23,8 +23,6 @@ package( licenses(["notice"]) -exports_files(["LICENSE"]) - mediapipe_proto_library( name = "detection_proto", srcs = ["detection.proto"], @@ -32,6 +30,18 @@ mediapipe_proto_library( deps = ["//mediapipe/framework/formats:location_data_proto"], ) +mediapipe_register_type( + base_name = "detection", + include_headers = ["mediapipe/framework/formats/detection.pb.h"], + types = [ + "::mediapipe::Detection", + "::mediapipe::DetectionList", + "::std::vector<::mediapipe::Detection>", + "::std::vector<::mediapipe::DetectionList>", + ], + deps = ["//mediapipe/framework/formats:detection_cc_proto"], +) + mediapipe_proto_library( name = "classification_proto", srcs = ["classification.proto"], @@ -214,6 +224,18 @@ mediapipe_proto_library( deps = ["//mediapipe/framework/formats:location_data_proto"], ) +mediapipe_register_type( + base_name = "rect", + include_headers = ["mediapipe/framework/formats/rect.pb.h"], + types = [ + "::mediapipe::Rect", + "::mediapipe::NormalizedRect", + "::std::vector<::mediapipe::Rect>", + "::std::vector<::mediapipe::NormalizedRect>", + ], + deps = [":rect_cc_proto"], +) + mediapipe_proto_library( name = "landmark_proto", srcs = ["landmark.proto"], @@ -231,6 +253,7 @@ mediapipe_register_type( "::mediapipe::NormalizedLandmarkList", "::std::vector<::mediapipe::Landmark>", "::std::vector<::mediapipe::NormalizedLandmark>", + "::std::vector<::mediapipe::NormalizedLandmarkList>", ], deps = [":landmark_cc_proto"], ) diff --git a/mediapipe/framework/formats/annotation/BUILD b/mediapipe/framework/formats/annotation/BUILD index a74b488e8..2e33f7668 100644 --- a/mediapipe/framework/formats/annotation/BUILD +++ b/mediapipe/framework/formats/annotation/BUILD @@ -20,8 +20,6 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) -exports_files(["LICENSE"]) - mediapipe_proto_library( name = "locus_proto", srcs = ["locus.proto"], diff --git a/mediapipe/framework/formats/motion/BUILD b/mediapipe/framework/formats/motion/BUILD index f91d2cade..a0422f555 100644 --- a/mediapipe/framework/formats/motion/BUILD +++ b/mediapipe/framework/formats/motion/BUILD @@ -18,8 +18,6 @@ licenses(["notice"]) -exports_files(["LICENSE"]) - load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/input_stream_manager.cc b/mediapipe/framework/input_stream_manager.cc index 38e6dca39..2f2f124d7 100644 --- a/mediapipe/framework/input_stream_manager.cc +++ b/mediapipe/framework/input_stream_manager.cc @@ -163,9 +163,10 @@ template } queue_became_full = (!was_queue_full && max_queue_size_ != -1 && queue_.size() >= max_queue_size_); - VLOG_IF(3, queue_.size() > 1) - << "Queue size greater than 1: stream name: " << name_ - << " queue_size: " << queue_.size(); + if (queue_.size() > 1) { + VLOG(3) << "Queue size greater than 1: stream name: " << name_ + << " queue_size: " << queue_.size(); + } VLOG(3) << "Input stream:" << name_ << " becomes non-empty status:" << queue_became_non_empty << " Size: " << queue_.size(); diff --git a/mediapipe/framework/legacy_calculator_support.h b/mediapipe/framework/legacy_calculator_support.h index 4cd15ce22..75ffa68e0 100644 --- a/mediapipe/framework/legacy_calculator_support.h +++ b/mediapipe/framework/legacy_calculator_support.h @@ -66,6 +66,7 @@ class LegacyCalculatorSupport { }; }; +#if !defined(_MSC_VER) // We only declare this variable for two specializations of the template because // it is only meant to be used for these two types. // Note that, since these variables are members of specific template @@ -79,6 +80,7 @@ thread_local CalculatorContext* template <> thread_local CalculatorContract* LegacyCalculatorSupport::Scoped::current_; +#endif } // namespace mediapipe diff --git a/mediapipe/framework/packet.h b/mediapipe/framework/packet.h index ee9a85aeb..5221f5553 100644 --- a/mediapipe/framework/packet.h +++ b/mediapipe/framework/packet.h @@ -439,13 +439,18 @@ struct is_concrete_proto_t template struct MessageRegistrationImpl { static NoDestructor registration; + // This could have been a lambda inside registration's initializer below, but + // MSVC has a bug with lambdas, so we put it here as a workaround. + static std::unique_ptr> CreateMessageHolder() { + return absl::make_unique>(new T); + } }; // Static members of template classes can be defined in the header. template NoDestructor MessageRegistrationImpl::registration(MessageHolderRegistry::Register( - T{}.GetTypeName(), [] { return absl::make_unique>(new T); })); + T{}.GetTypeName(), MessageRegistrationImpl::CreateMessageHolder)); // For non-Message payloads, this does nothing. template diff --git a/mediapipe/framework/profiler/BUILD b/mediapipe/framework/profiler/BUILD index fc3f53756..3ef48ed94 100644 --- a/mediapipe/framework/profiler/BUILD +++ b/mediapipe/framework/profiler/BUILD @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# licenses(["notice"]) diff --git a/mediapipe/framework/profiler/graph_profiler.cc b/mediapipe/framework/profiler/graph_profiler.cc index 45367efed..55a7b70b1 100644 --- a/mediapipe/framework/profiler/graph_profiler.cc +++ b/mediapipe/framework/profiler/graph_profiler.cc @@ -204,6 +204,10 @@ void GraphProfiler::Reset() { Resume(); if (is_tracing_ && IsTraceIntervalEnabled(profiler_config_, tracer()) && executor != nullptr) { + // Inform the user via logging the path to the trace logs. + ASSIGN_OR_RETURN(std::string trace_log_path, GetTraceLogPath()); + LOG(INFO) << "trace_log_path: " << trace_log_path; + is_running_ = true; executor->Schedule([this] { absl::Time deadline = clock_->TimeNow() + tracer()->GetTraceLogInterval(); @@ -583,8 +587,6 @@ void AssignNodeNames(GraphProfile* profile) { return ::mediapipe::OkStatus(); } ASSIGN_OR_RETURN(std::string trace_log_path, GetTraceLogPath()); - // Inform the user via logging the path to the trace logs. - LOG(INFO) << "trace_log_path: " << trace_log_path; int log_interval_count = GetLogIntervalCount(profiler_config_); int log_file_count = GetLogFileCount(profiler_config_); diff --git a/mediapipe/framework/subgraph.h b/mediapipe/framework/subgraph.h index 27bc32d43..1791e027f 100644 --- a/mediapipe/framework/subgraph.h +++ b/mediapipe/framework/subgraph.h @@ -49,6 +49,14 @@ class Subgraph { static T GetOptions(const Subgraph::SubgraphOptions& supgraph_options) { return tool::OptionsMap().Initialize(supgraph_options).Get(); } + + // Returns the CalculatorGraphConfig::Node specifying the subgraph. + // This provides to Subgraphs the same graph information that GetContract + // provides to Calculators. + static CalculatorGraphConfig::Node GetNode( + const Subgraph::SubgraphOptions& supgraph_options) { + return supgraph_options; + } }; using SubgraphRegistry = GlobalFactoryRegistry>; diff --git a/mediapipe/framework/testdata/BUILD b/mediapipe/framework/testdata/BUILD index 3863baebb..5c99ff71f 100644 --- a/mediapipe/framework/testdata/BUILD +++ b/mediapipe/framework/testdata/BUILD @@ -17,8 +17,6 @@ licenses(["notice"]) package(default_visibility = ["//visibility:private"]) -exports_files(["LICENSE"]) - load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") proto_library( diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 24f083dc4..2341d0023 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -13,10 +13,7 @@ # limitations under the License. # -load( - "//mediapipe/framework/port:build_config.bzl", - "mediapipe_cc_proto_library", -) +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load( "//mediapipe/framework/tool:mediapipe_graph.bzl", "data_as_c_string", @@ -44,9 +41,10 @@ cc_library( ], ) -proto_library( +mediapipe_proto_library( name = "calculator_graph_template_proto", srcs = ["calculator_graph_template.proto"], + def_py_proto = False, visibility = ["//visibility:public"], deps = [ "//mediapipe/framework:calculator_options_proto", @@ -55,43 +53,14 @@ proto_library( ], ) -java_proto_library( - name = "calculator_graph_template_java_proto", - visibility = ["//visibility:public"], - deps = [":calculator_graph_template_proto"], -) - -java_lite_proto_library( - name = "calculator_graph_template_java_proto_lite", - strict_deps = 0, - visibility = ["//visibility:public"], - deps = [":calculator_graph_template_proto"], -) - -proto_library( +mediapipe_proto_library( name = "source_proto", srcs = ["source.proto"], visibility = ["//visibility:public"], - deps = ["//mediapipe/framework:calculator_proto"], -) - -mediapipe_cc_proto_library( - name = "calculator_graph_template_cc_proto", - srcs = ["calculator_graph_template.proto"], - cc_deps = [ - "//mediapipe/framework:calculator_cc_proto", - "//mediapipe/framework/deps:proto_descriptor_cc_proto", + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", ], - visibility = ["//visibility:public"], - deps = [":calculator_graph_template_proto"], -) - -mediapipe_cc_proto_library( - name = "source_cc_proto", - srcs = ["source.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//visibility:public"], - deps = [":source_proto"], ) cc_binary( @@ -551,22 +520,16 @@ data_as_c_string( outs = ["test_binarypb.inc"], ) -proto_library( +mediapipe_proto_library( name = "node_chain_subgraph_proto", srcs = ["node_chain_subgraph.proto"], + visibility = ["//mediapipe:__subpackages__"], deps = [ + "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", ], ) -mediapipe_cc_proto_library( - name = "node_chain_subgraph_cc_proto", - srcs = ["node_chain_subgraph.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//mediapipe:__subpackages__"], - deps = [":node_chain_subgraph_proto"], -) - cc_test( name = "data_as_c_string_test", srcs = [ diff --git a/mediapipe/framework/tool/gate_subgraph.proto b/mediapipe/framework/tool/gate_subgraph.proto new file mode 100644 index 000000000..8dd124270 --- /dev/null +++ b/mediapipe/framework/tool/gate_subgraph.proto @@ -0,0 +1,22 @@ +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +option java_package = "com.google.mediapipe.proto"; +option java_outer_classname = "GateSubgraphProto"; + +// Options for a gate-subgraph directing traffic to one of several contained +// CalculatorGraphConfig's. +message GateSubgraphOptions { + extend mediapipe.CalculatorOptions { + optional GateSubgraphOptions ext = 297196839; + } + + // The contained literal subgraph configuration(s). + repeated CalculatorGraphConfig contained_graph = 1; + + // The contained registered subgraphs or calculators. + repeated CalculatorGraphConfig.Node contained_node = 2; +} diff --git a/mediapipe/framework/tool/subgraph_expansion.cc b/mediapipe/framework/tool/subgraph_expansion.cc index 67487a582..efa0d8e32 100644 --- a/mediapipe/framework/tool/subgraph_expansion.cc +++ b/mediapipe/framework/tool/subgraph_expansion.cc @@ -316,5 +316,25 @@ static ::mediapipe::Status PrefixNames(std::string prefix, return ::mediapipe::OkStatus(); } +CalculatorGraphConfig MakeSingleNodeGraph(CalculatorGraphConfig::Node node) { + using RepeatedStringField = proto_ns::RepeatedPtrField; + struct Connections { + const RepeatedStringField& node_conns; + RepeatedStringField* graph_conns; + }; + CalculatorGraphConfig config; + for (const Connections& item : std::vector{ + {node.input_stream(), config.mutable_input_stream()}, + {node.output_stream(), config.mutable_output_stream()}, + {node.input_side_packet(), config.mutable_input_side_packet()}, + {node.output_side_packet(), config.mutable_output_side_packet()}}) { + for (const auto& conn : item.node_conns) { + *item.graph_conns->Add() = conn; + } + } + *config.add_node() = std::move(node); + return config; +} + } // namespace tool } // namespace mediapipe diff --git a/mediapipe/framework/tool/subgraph_expansion.h b/mediapipe/framework/tool/subgraph_expansion.h index 553ed05d9..2fc994d15 100644 --- a/mediapipe/framework/tool/subgraph_expansion.h +++ b/mediapipe/framework/tool/subgraph_expansion.h @@ -72,6 +72,11 @@ namespace tool { CalculatorGraphConfig* config, const GraphRegistry* graph_registry = nullptr); +// Creates a graph wrapping the provided node and exposing all of its +// connections +CalculatorGraphConfig MakeSingleNodeGraph( + CalculatorGraphConfig::Node subgraph_node); + } // namespace tool } // namespace mediapipe diff --git a/mediapipe/gpu/BUILD b/mediapipe/gpu/BUILD index df15e7fa8..1e879b41a 100644 --- a/mediapipe/gpu/BUILD +++ b/mediapipe/gpu/BUILD @@ -942,7 +942,7 @@ objc_library( ios_unit_test( name = "gl_ios_test", minimum_os_version = MIN_IOS_VERSION, - runner = "//googlemac/iPhone/Shared/Testing/EarlGrey/Runner:IOS_LATEST", + runner = "//testing/utp/ios:IOS_LATEST", tags = [ "ios", ], diff --git a/mediapipe/gpu/gl_simple_shaders.cc b/mediapipe/gpu/gl_simple_shaders.cc index a3e8bc620..b9bef4c5a 100644 --- a/mediapipe/gpu/gl_simple_shaders.cc +++ b/mediapipe/gpu/gl_simple_shaders.cc @@ -62,6 +62,7 @@ namespace mediapipe { PRECISION_COMPAT \ "#if __VERSION__ < 130\n" \ "#define in varying\n" \ + "#define texture texture2D\n" \ "#if defined(GL_ES) && !defined(GL_FRAGMENT_PRECISION_HIGH)\n" \ "#define highp mediump\n" \ "#endif // GL_ES && !GL_FRAGMENT_PRECISION_HIGH\n" \ diff --git a/mediapipe/graphs/hand_tracking/BUILD b/mediapipe/graphs/hand_tracking/BUILD index 2b19d9a54..0705905a1 100644 --- a/mediapipe/graphs/hand_tracking/BUILD +++ b/mediapipe/graphs/hand_tracking/BUILD @@ -102,6 +102,13 @@ cc_library( ], ) +mediapipe_binary_graph( + name = "multi_hand_tracking_desktop_live_binary_graph", + graph = "multi_hand_tracking_desktop_live.pbtxt", + output_name = "multi_hand_tracking_desktop_live.binarypb", + deps = [":multi_hand_desktop_tflite_calculators"], +) + mediapipe_binary_graph( name = "multi_hand_tracking_mobile_gpu_binary_graph", graph = "multi_hand_tracking_mobile.pbtxt", diff --git a/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt index f009f49c3..4820f348e 100644 --- a/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt +++ b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt @@ -6,6 +6,9 @@ # Images coming into and out of the graph. input_stream: "input_video" output_stream: "output_video" +# Palm detections and hand landmarks info. +output_stream: "multi_palm_detections" +output_stream: "multi_hand_landmarks" # Determines if an input vector of NormalizedRect has a size greater than or # equal to the provided min_size. diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/BUILD b/mediapipe/graphs/instant_motion_tracking/calculators/BUILD index 844ed0332..311477959 100644 --- a/mediapipe/graphs/instant_motion_tracking/calculators/BUILD +++ b/mediapipe/graphs/instant_motion_tracking/calculators/BUILD @@ -18,8 +18,6 @@ licenses(["notice"]) package(default_visibility = ["//visibility:public"]) -exports_files(["LICENSE"]) - proto_library( name = "sticker_buffer_proto", srcs = [ diff --git a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc index 067b8d004..943e038d0 100644 --- a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc +++ b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc @@ -77,10 +77,19 @@ static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0, // Texture to use with animation file. Texture is REQUIRED to be passed into // the calculator, but can be passed in as a Side Packet OR Input Stream. // ANIMATION_ASSET (String, required): -// Path of animation file to load and render. Should be generated by -// //java/com/google/android/apps/motionstills/SimpleObjEncryptor with -// --compressed_mode=true. See comments and documentation there for more -// information on custom .obj.uuu file format. +// Path of animation file to load and render. The file format expects an +// arbitrary number of animation frames, concatenated directly together, +// with each animation frame looking like: +// HEADER +// VERTICES +// TEXTURE_COORDS +// INDICES +// The header consists of 3 int32 lengths, the sizes of the vertex data, +// the texcoord data, and the index data, respectively. Let us call those +// N1, N2, and N3. Then we expect N1 float32's for vertex information +// (x1,y1,z1,x2,y2,z2,etc.), followed by N2 float32's for texcoord +// information (u1,v1,u2,v2,u3,v3,etc.), followed by N3 shorts/int16's +// for triangle indices (a1,b1,c1,a2,b2,c2,etc.). // CAMERA_PARAMETERS_PROTO_STRING (String, optional): // Serialized proto std::string of CameraParametersProto. We need this to // get the right aspect ratio and field of view. diff --git a/mediapipe/java/com/google/mediapipe/components/BUILD b/mediapipe/java/com/google/mediapipe/components/BUILD index dcbdd3b72..4471a0c56 100644 --- a/mediapipe/java/com/google/mediapipe/components/BUILD +++ b/mediapipe/java/com/google/mediapipe/components/BUILD @@ -47,14 +47,18 @@ android_library( ], visibility = ["//visibility:public"], deps = [ + "//mediapipe/java/com/google/mediapipe/glutil", "//third_party:androidx_appcompat", + "//third_party:androidx_core", "//third_party:androidx_legacy_support_v4", - "//third_party:camera2", + "//third_party:camerax_camera2", "//third_party:camerax_core", + "//third_party:camerax_lifecycle", "@maven//:androidx_concurrent_concurrent_futures", "@maven//:androidx_lifecycle_lifecycle_common", "@maven//:com_google_code_findbugs_jsr305", "@maven//:com_google_guava_guava", + "@maven//:com_google_guava_listenablefuture", ], ) diff --git a/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java b/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java index 9ede4c6ca..d19311ab4 100644 --- a/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java +++ b/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java @@ -23,16 +23,28 @@ import android.hardware.camera2.CameraCharacteristics; import android.hardware.camera2.CameraManager; import android.hardware.camera2.CameraMetadata; import android.hardware.camera2.params.StreamConfigurationMap; +import android.opengl.GLES20; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.Process; import android.os.SystemClock; import android.util.Log; import android.util.Size; +import android.view.Surface; +import androidx.camera.core.Camera; +import androidx.camera.core.CameraSelector; import androidx.camera.core.CameraX; -import androidx.camera.core.CameraX.LensFacing; import androidx.camera.core.Preview; -import androidx.camera.core.PreviewConfig; +import androidx.camera.lifecycle.ProcessCameraProvider; +import androidx.core.content.ContextCompat; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.mediapipe.glutil.EglManager; import java.util.Arrays; import java.util.List; +import java.util.concurrent.Executor; +import java.util.concurrent.RejectedExecutionException; import javax.annotation.Nullable; +import javax.microedition.khronos.egl.EGLSurface; /** * Uses CameraX APIs for camera setup and access. @@ -40,6 +52,43 @@ import javax.annotation.Nullable; *

{@link CameraX} connects to the camera and provides video frames. */ public class CameraXPreviewHelper extends CameraHelper { + /** + * Provides an Executor that wraps a single-threaded Handler. + * + *

All operations involving the surface texture should happen in a single thread, and that + * thread should not be the main thread. + * + *

The surface provider callbacks require an Executor, and the onFrameAvailable callback + * requires a Handler. We want everything to run on the same thread, so we need an Executor that + * is also a Handler. + */ + private static final class SingleThreadHandlerExecutor implements Executor { + + private final HandlerThread handlerThread; + private final Handler handler; + + SingleThreadHandlerExecutor(String threadName, int priority) { + handlerThread = new HandlerThread(threadName, priority); + handlerThread.start(); + handler = new Handler(handlerThread.getLooper()); + } + + Handler getHandler() { + return handler; + } + + @Override + public void execute(Runnable command) { + if (!handler.post(command)) { + throw new RejectedExecutionException(handlerThread.getName() + " is shutting down."); + } + } + + boolean shutdown() { + return handlerThread.quitSafely(); + } + } + private static final String TAG = "CameraXPreviewHelper"; // Target frame and view resolution size in landscape. @@ -48,7 +97,12 @@ public class CameraXPreviewHelper extends CameraHelper { // Number of attempts for calculating the offset between the camera's clock and MONOTONIC clock. private static final int CLOCK_OFFSET_CALIBRATION_ATTEMPTS = 3; + private final SingleThreadHandlerExecutor renderExecutor = + new SingleThreadHandlerExecutor("RenderThread", Process.THREAD_PRIORITY_DEFAULT); + + private ProcessCameraProvider cameraProvider; private Preview preview; + private Camera camera; // Size of the camera-preview frames from the camera. private Size frameSize; @@ -73,51 +127,93 @@ public class CameraXPreviewHelper extends CameraHelper { } public void startCamera( - Activity context, CameraFacing cameraFacing, SurfaceTexture surfaceTexture, Size targetSize) { - if (targetSize == null) { - targetSize = TARGET_SIZE; - } + Activity context, + CameraFacing cameraFacing, + SurfaceTexture unusedSurfaceTexture, + Size targetSize) { + Executor mainThreadExecutor = ContextCompat.getMainExecutor(context); + ListenableFuture cameraProviderFuture = + ProcessCameraProvider.getInstance(context); - LensFacing cameraLensFacing = - cameraFacing == CameraHelper.CameraFacing.FRONT ? LensFacing.FRONT : LensFacing.BACK; - PreviewConfig previewConfig = - new PreviewConfig.Builder() - .setLensFacing(cameraLensFacing) - .setTargetResolution(targetSize) - .build(); - preview = new Preview(previewConfig); + targetSize = (targetSize == null ? TARGET_SIZE : targetSize); + // According to CameraX documentation + // (https://developer.android.com/training/camerax/configuration#specify-resolution): + // "Express the resolution Size in the coordinate frame after rotating the supported sizes by + // the target rotation." + // Since we only support portrait orientation, we unconditionally transpose width and height. + Size rotatedSize = + new Size(/* width= */ targetSize.getHeight(), /* height= */ targetSize.getWidth()); - preview.setOnPreviewOutputUpdateListener( - previewOutput -> { - if (!previewOutput.getTextureSize().equals(frameSize)) { - frameSize = previewOutput.getTextureSize(); - frameRotation = previewOutput.getRotationDegrees(); - if (frameSize.getWidth() == 0 || frameSize.getHeight() == 0) { - // Invalid frame size. Wait for valid input dimensions before updating display size. - Log.d(TAG, "Invalid frameSize."); - return; + cameraProviderFuture.addListener( + () -> { + try { + cameraProvider = cameraProviderFuture.get(); + } catch (Exception e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); } + Log.e(TAG, "Unable to get ProcessCameraProvider: ", e); + return; } - Integer selectedLensFacing = + preview = new Preview.Builder().setTargetResolution(rotatedSize).build(); + + CameraSelector cameraSelector = cameraFacing == CameraHelper.CameraFacing.FRONT - ? CameraMetadata.LENS_FACING_FRONT - : CameraMetadata.LENS_FACING_BACK; - cameraCharacteristics = getCameraCharacteristics(context, selectedLensFacing); - if (cameraCharacteristics != null) { - // Queries camera timestamp source. It should be one of REALTIME or UNKNOWN as - // documented in - // https://developer.android.com/reference/android/hardware/camera2/CameraCharacteristics.html#SENSOR_INFO_TIMESTAMP_SOURCE. - cameraTimestampSource = - cameraCharacteristics.get(CameraCharacteristics.SENSOR_INFO_TIMESTAMP_SOURCE); - focalLengthPixels = calculateFocalLengthInPixels(); - } + ? CameraSelector.DEFAULT_FRONT_CAMERA + : CameraSelector.DEFAULT_BACK_CAMERA; - if (onCameraStartedListener != null) { - onCameraStartedListener.onCameraStarted(previewOutput.getSurfaceTexture()); - } - }); - CameraX.bindToLifecycle(/*lifecycleOwner=*/ (LifecycleOwner) context, preview); + // Provide surface texture. + preview.setSurfaceProvider( + renderExecutor, + request -> { + Size resolution = request.getResolution(); + Log.d( + TAG, + String.format( + "Received surface request for resolution %dx%d", + resolution.getWidth(), resolution.getHeight())); + + SurfaceTexture previewFrameTexture = createSurfaceTexture(); + previewFrameTexture.setDefaultBufferSize( + resolution.getWidth(), resolution.getHeight()); + previewFrameTexture.setOnFrameAvailableListener( + frameTexture -> { + if (frameTexture != previewFrameTexture) { + return; + } + onInitialFrameReceived(context, frameTexture); + }, + renderExecutor.getHandler()); + Surface surface = new Surface(previewFrameTexture); + Log.d(TAG, "Providing surface"); + request.provideSurface( + surface, + renderExecutor, + result -> { + Log.d(TAG, "Surface request result: " + result); + // Per + // https://developer.android.com/reference/androidx/camera/core/SurfaceRequest.Result, + // the surface was either never used (RESULT_INVALID_SURFACE, + // RESULT_REQUEST_CANCELLED, RESULT_SURFACE_ALREADY_PROVIDED) or the surface + // was used successfully and was eventually detached + // (RESULT_SURFACE_USED_SUCCESSFULLY) so we can release it now to free up + // resources. + previewFrameTexture.release(); + surface.release(); + }); + }); + + // If we pause/resume the activity, we need to unbind the earlier preview use case, given + // the way the activity is currently structured. + cameraProvider.unbindAll(); + + // Bind preview use case to camera. + camera = + cameraProvider.bindToLifecycle( + /*lifecycleOwner=*/ (LifecycleOwner) context, cameraSelector, preview); + }, + mainThreadExecutor); } @Override @@ -127,17 +223,16 @@ public class CameraXPreviewHelper extends CameraHelper { @Override public Size computeDisplaySizeFromViewSize(Size viewSize) { - if (viewSize == null || frameSize == null) { - // Wait for all inputs before setting display size. - Log.d(TAG, "viewSize or frameSize is null."); - return null; - } - - Size optimalSize = getOptimalViewSize(viewSize); - return optimalSize != null ? optimalSize : frameSize; + // Camera target size is computed already, so just return the capture frame size. + return frameSize; } @Nullable + // TODO: Compute optimal view size from available stream sizes. + // Currently, we create the preview stream before we know what size our preview SurfaceView is. + // Instead, we should determine our optimal stream size (based on resolution and aspect ratio + // difference with the preview SurfaceView) and open the preview stream then. Until we make that + // change, this method is unused. private Size getOptimalViewSize(Size targetSize) { if (cameraCharacteristics != null) { StreamConfigurationMap map = @@ -221,6 +316,56 @@ public class CameraXPreviewHelper extends CameraHelper { return frameSize; } + private void onInitialFrameReceived(Activity context, SurfaceTexture previewFrameTexture) { + // This method is called by the onFrameAvailableListener we install when opening the camera + // session, the first time we receive a frame. In this method, we remove our callback, + // acknowledge the frame (via updateTextImage()), detach the texture from the GL context we + // created earlier (so that the MediaPipe pipeline can attach it), and perform some other + // one-time initialization based on the newly opened camera device. Finally, we indicate the + // camera session is ready via the onCameraStartedListener. + + // Remove our callback. + previewFrameTexture.setOnFrameAvailableListener(null); + + // Update texture image so we don't stall callbacks. + previewFrameTexture.updateTexImage(); + + // Detach the SurfaceTexture from the GL context we created earlier so that the MediaPipe + // pipeline can attach it. + previewFrameTexture.detachFromGLContext(); + + if (!preview.getAttachedSurfaceResolution().equals(frameSize)) { + frameSize = preview.getAttachedSurfaceResolution(); + frameRotation = camera.getCameraInfo().getSensorRotationDegrees(); + if (frameSize.getWidth() == 0 || frameSize.getHeight() == 0) { + // Invalid frame size. Wait for valid input dimensions before updating + // display size. + Log.d(TAG, "Invalid frameSize."); + return; + } + } + + Integer selectedLensFacing = + cameraFacing == CameraHelper.CameraFacing.FRONT + ? CameraMetadata.LENS_FACING_FRONT + : CameraMetadata.LENS_FACING_BACK; + cameraCharacteristics = getCameraCharacteristics(context, selectedLensFacing); + if (cameraCharacteristics != null) { + // Queries camera timestamp source. It should be one of REALTIME or UNKNOWN + // as documented in + // https://developer.android.com/reference/android/hardware/camera2/CameraCharacteristics.html#SENSOR_INFO_TIMESTAMP_SOURCE. + cameraTimestampSource = + cameraCharacteristics.get(CameraCharacteristics.SENSOR_INFO_TIMESTAMP_SOURCE); + focalLengthPixels = calculateFocalLengthInPixels(); + } + + OnCameraStartedListener listener = onCameraStartedListener; + if (listener != null) { + ContextCompat.getMainExecutor(context) + .execute(() -> listener.onCameraStarted(previewFrameTexture)); + } + } + // Computes the focal length of the camera in pixels based on lens and sensor properties. private float calculateFocalLengthInPixels() { // Focal length of the camera in millimeters. @@ -237,6 +382,17 @@ public class CameraXPreviewHelper extends CameraHelper { return frameSize.getWidth() * focalLengthMm / sensorWidthMm; } + private static SurfaceTexture createSurfaceTexture() { + // Create a temporary surface to make the context current. + EglManager eglManager = new EglManager(null); + EGLSurface tempEglSurface = eglManager.createOffscreenSurface(1, 1); + eglManager.makeCurrent(tempEglSurface, tempEglSurface); + int[] textures = new int[1]; + GLES20.glGenTextures(1, textures, 0); + SurfaceTexture previewFrameTexture = new SurfaceTexture(textures[0]); + return previewFrameTexture; + } + @Nullable private static CameraCharacteristics getCameraCharacteristics( Activity context, Integer lensFacing) { diff --git a/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt index 2df053820..9f634b0ff 100644 --- a/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt +++ b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt @@ -31,7 +31,7 @@ node { options: { [mediapipe.DetectionsToRectsCalculatorOptions.ext] { rotation_vector_start_keypoint_index: 33 # Left side of left eye. - rotation_vector_end_keypoint_index: 133 # Right side of right eye. + rotation_vector_end_keypoint_index: 263 # Right side of right eye. rotation_vector_target_angle_degrees: 0 } } diff --git a/mediapipe/objc/BUILD b/mediapipe/objc/BUILD index e4835b583..cc90b8d3c 100644 --- a/mediapipe/objc/BUILD +++ b/mediapipe/objc/BUILD @@ -90,16 +90,27 @@ objc_library( objc_library( name = "mediapipe_input_sources_ios", srcs = [ - "MPPCameraInputSource.m", - "MPPDisplayLinkWeakTarget.m", "MPPInputSource.m", "MPPPlayerInputSource.m", - ], + ] + select({ + "//mediapipe:ios": [ + "MPPCameraInputSource.m", + "MPPDisplayLinkWeakTarget.m", + ], + "//conditions:default": [], + }), hdrs = [ - "MPPCameraInputSource.h", - "MPPDisplayLinkWeakTarget.h", "MPPInputSource.h", "MPPPlayerInputSource.h", + ] + select({ + "//mediapipe:ios": [ + "MPPCameraInputSource.h", + "MPPDisplayLinkWeakTarget.h", + ], + "//conditions:default": [], + }), + sdk_frameworks = [ + "CoreVideo", ], visibility = ["//mediapipe/framework:mediapipe_internal"], ) diff --git a/mediapipe/objc/MPPPlayerInputSource.m b/mediapipe/objc/MPPPlayerInputSource.m index c20bb1fcd..d9c78054a 100644 --- a/mediapipe/objc/MPPPlayerInputSource.m +++ b/mediapipe/objc/MPPPlayerInputSource.m @@ -12,16 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. +#import + #import "MPPPlayerInputSource.h" +#if !TARGET_OS_OSX #import "mediapipe/objc/MPPDisplayLinkWeakTarget.h" +#endif @implementation MPPPlayerInputSource { AVAsset* _video; AVPlayerItem* _videoItem; AVPlayer* _videoPlayer; AVPlayerItemVideoOutput* _videoOutput; +#if !TARGET_OS_OSX CADisplayLink* _videoDisplayLink; MPPDisplayLinkWeakTarget* _displayLinkWeakTarget; +#else + CVDisplayLinkRef _videoDisplayLink; +#endif // TARGET_OS_OSX id _videoEndObserver; } @@ -40,6 +48,7 @@ _videoOutput.suppressesPlayerRendering = YES; [_videoItem addOutput:_videoOutput]; +#if !TARGET_OS_OSX _displayLinkWeakTarget = [[MPPDisplayLinkWeakTarget alloc] initWithTarget:self selector:@selector(videoUpdate:)]; @@ -47,7 +56,15 @@ selector:@selector(displayLinkCallback:)]; _videoDisplayLink.paused = YES; [_videoDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes]; - +#else + CGDirectDisplayID displayID = CGMainDisplayID(); + CVReturn error = CVDisplayLinkCreateWithCGDisplay(displayID, &_videoDisplayLink); + if (error) { + _videoDisplayLink = NULL; + } + CVDisplayLinkStop(_videoDisplayLink); + CVDisplayLinkSetOutputCallback(_videoDisplayLink, renderCallback, (__bridge void*)self); +#endif // TARGET_OS_OSX _videoPlayer = [AVPlayer playerWithPlayerItem:_videoItem]; _videoPlayer.actionAtItemEnd = AVPlayerActionAtItemEndNone; NSNotificationCenter* center = [NSNotificationCenter defaultCenter]; @@ -65,11 +82,19 @@ - (void)start { [_videoPlayer play]; +#if !TARGET_OS_OSX _videoDisplayLink.paused = NO; +#else + CVDisplayLinkStart(_videoDisplayLink); +#endif } - (void)stop { +#if !TARGET_OS_OSX _videoDisplayLink.paused = YES; +#else + CVDisplayLinkStop(_videoDisplayLink); +#endif [_videoPlayer pause]; } @@ -77,7 +102,20 @@ return _videoPlayer.rate != 0.0; } +#if !TARGET_OS_OSX - (void)videoUpdate:(CADisplayLink*)sender { + [self videoUpdateIfNeeded]; +} +#else +static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp* inNow, + const CVTimeStamp* inOutputTime, CVOptionFlags flagsIn, + CVOptionFlags* flagsOut, void* displayLinkContext) { + [(__bridge MPPPlayerInputSource*)displayLinkContext videoUpdateIfNeeded]; + return kCVReturnSuccess; +} +#endif // TARGET_OS_OSX + +- (void)videoUpdateIfNeeded { CMTime timestamp = [_videoItem currentTime]; if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) { CVPixelBufferRef pixelBuffer = @@ -96,7 +134,11 @@ - (void)dealloc { [[NSNotificationCenter defaultCenter] removeObserver:self]; +#if !TARGET_OS_OSX [_videoDisplayLink invalidate]; +#else + CVDisplayLinkRelease(_videoDisplayLink); +#endif _videoPlayer = nil; } diff --git a/mediapipe/python/pybind/BUILD b/mediapipe/python/pybind/BUILD index 9da2ed8ee..9a0f83141 100644 --- a/mediapipe/python/pybind/BUILD +++ b/mediapipe/python/pybind/BUILD @@ -43,6 +43,7 @@ pybind_library( deps = [ ":image_frame_util", ":util", + "//mediapipe/framework:type_map", ], ) diff --git a/mediapipe/python/pybind/calculator_graph.cc b/mediapipe/python/pybind/calculator_graph.cc index d883025d3..c97795918 100644 --- a/mediapipe/python/pybind/calculator_graph.cc +++ b/mediapipe/python/pybind/calculator_graph.cc @@ -98,12 +98,14 @@ void CalculatorGraphSubmodule(pybind11::module* module) { if ((init_with_binary_graph ? 1 : 0) + (init_with_graph_proto ? 1 : 0) + (init_with_validated_graph_config ? 1 : 0) != 1) { - throw RaisePyError( - PyExc_ValueError, - "Please provide \'binary_graph\' to initialize the graph with" - " binary graph or provide \'graph_config\' to initialize the " - " with graph config proto or provide \'validated_graph_config\' " - " to initialize the with ValidatedGraphConfig object."); + throw RaisePyError(PyExc_ValueError, + "Please provide one of the following: " + "\'binary_graph_path\' to initialize the graph " + "with a binary graph file, or " + "\'graph_config\' to initialize the graph with a " + "graph config proto, or " + "\'validated_graph_config\' to initialize the " + "graph with a ValidatedGraphConfig object."); } auto calculator_graph = absl::make_unique(); RaisePyErrorIfNotOk(calculator_graph->Initialize(graph_config_proto)); diff --git a/mediapipe/python/pybind/image_frame.cc b/mediapipe/python/pybind/image_frame.cc index 49eef09a1..0747f08f1 100644 --- a/mediapipe/python/pybind/image_frame.cc +++ b/mediapipe/python/pybind/image_frame.cc @@ -365,3 +365,7 @@ void ImageFrameSubmodule(pybind11::module* module) { } // namespace python } // namespace mediapipe + +#include "mediapipe/framework/type_map.h" +MEDIAPIPE_REGISTER_TYPE(::mediapipe::ImageFrame, "::mediapipe::ImageFrame", + nullptr, nullptr); diff --git a/mediapipe/python/pybind/validated_graph_config.cc b/mediapipe/python/pybind/validated_graph_config.cc index 0bb2721c9..bf0f81d2b 100644 --- a/mediapipe/python/pybind/validated_graph_config.cc +++ b/mediapipe/python/pybind/validated_graph_config.cc @@ -65,6 +65,14 @@ void ValidatedGraphConfigSubmodule(pybind11::module* module) { .c_str()); } } + if (!(init_with_binary_graph ^ init_with_graph_proto)) { + throw RaisePyError( + PyExc_ValueError, + "Please either provide \'binary_graph_path\' to initialize " + "a ValidatedGraphConfig object with a binary graph file or " + "\'graph_config\' to initialize a ValidatedGraphConfig " + "object with a graph config proto."); + } RaisePyErrorIfNotOk(self->Initialize(graph_config_proto)); }, R"doc(Initialize ValidatedGraphConfig with a CalculatorGraphConfig. diff --git a/mediapipe/util/annotation_renderer.h b/mediapipe/util/annotation_renderer.h index 71ca516e6..380bc3614 100644 --- a/mediapipe/util/annotation_renderer.h +++ b/mediapipe/util/annotation_renderer.h @@ -76,6 +76,7 @@ class AnnotationRenderer { // Should be in the range (0-1]. // See 'gpu_scale_factor' in annotation_overlay_calculator.proto void SetScaleFactor(float scale_factor); + float GetScaleFactor() { return scale_factor_; } private: // Draws a rectangle on the image as described in the annotation. diff --git a/mediapipe/util/sequence/media_sequence.py b/mediapipe/util/sequence/media_sequence.py index da18bffd0..034fd8937 100644 --- a/mediapipe/util/sequence/media_sequence.py +++ b/mediapipe/util/sequence/media_sequence.py @@ -407,10 +407,17 @@ def _create_region_with_prefix(name, prefix): get_bbox_xmax_at(index, sequence_example, prefix=prefix)), 1) def add_prefixed_bbox(values, sequence_example, prefix): - add_bbox_ymin(values[:, 0], sequence_example, prefix=prefix) - add_bbox_xmin(values[:, 1], sequence_example, prefix=prefix) - add_bbox_ymax(values[:, 2], sequence_example, prefix=prefix) - add_bbox_xmax(values[:, 3], sequence_example, prefix=prefix) + values = np.array(values) + if values.size == 0: + add_bbox_ymin([], sequence_example, prefix=prefix) + add_bbox_xmin([], sequence_example, prefix=prefix) + add_bbox_ymax([], sequence_example, prefix=prefix) + add_bbox_xmax([], sequence_example, prefix=prefix) + else: + add_bbox_ymin(values[:, 0], sequence_example, prefix=prefix) + add_bbox_xmin(values[:, 1], sequence_example, prefix=prefix) + add_bbox_ymax(values[:, 2], sequence_example, prefix=prefix) + add_bbox_xmax(values[:, 3], sequence_example, prefix=prefix) def get_prefixed_bbox_size(sequence_example, prefix): return get_bbox_ymin_size(sequence_example, prefix=prefix) def has_prefixed_bbox(sequence_example, prefix): diff --git a/mediapipe/util/sequence/media_sequence_test.py b/mediapipe/util/sequence/media_sequence_test.py index 2f30c554e..0c6ff9be7 100644 --- a/mediapipe/util/sequence/media_sequence_test.py +++ b/mediapipe/util/sequence/media_sequence_test.py @@ -128,8 +128,9 @@ class MediaSequenceTest(tf.test.TestCase): example = tf.train.SequenceExample() boxes = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]) + empty_boxes = np.array([]) ms.add_bbox(boxes, example) - ms.add_bbox(boxes, example) + ms.add_bbox(empty_boxes, example) self.assertEqual(2, ms.get_bbox_size(example)) self.assertAllClose(boxes, ms.get_bbox_at(0, example)) self.assertTrue(ms.has_bbox(example)) diff --git a/mediapipe/util/tflite/BUILD b/mediapipe/util/tflite/BUILD index 17f6a848a..2d2229388 100644 --- a/mediapipe/util/tflite/BUILD +++ b/mediapipe/util/tflite/BUILD @@ -32,13 +32,20 @@ cc_library( name = "cpu_op_resolver", srcs = ["cpu_op_resolver.cc"], hdrs = ["cpu_op_resolver.h"], + visibility = ["//visibility:public"], deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:logging", "//mediapipe/util/tflite/operations:max_pool_argmax", "//mediapipe/util/tflite/operations:max_unpooling", "//mediapipe/util/tflite/operations:transpose_conv_bias", "@org_tensorflow//tensorflow/lite:builtin_op_data", + "@org_tensorflow//tensorflow/lite:framework", "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", ], + # For using the symbol `MediaPipe_RegisterTfLiteOpResolver` in Python + # with `tensorflow.lite.python.interpreter.InterpreterWithCustomOps`. + alwayslink = 1, ) cc_library( diff --git a/mediapipe/util/tflite/cpu_op_resolver.cc b/mediapipe/util/tflite/cpu_op_resolver.cc index ab06c94ae..935bed08b 100644 --- a/mediapipe/util/tflite/cpu_op_resolver.cc +++ b/mediapipe/util/tflite/cpu_op_resolver.cc @@ -14,19 +14,23 @@ #include "mediapipe/util/tflite/cpu_op_resolver.h" +#include "mediapipe/framework/port/logging.h" #include "mediapipe/util/tflite/operations/max_pool_argmax.h" #include "mediapipe/util/tflite/operations/max_unpooling.h" #include "mediapipe/util/tflite/operations/transpose_conv_bias.h" #include "tensorflow/lite/builtin_op_data.h" +#include "tensorflow/lite/mutable_op_resolver.h" namespace mediapipe { -CpuOpResolver::CpuOpResolver() { - AddCustom("MaxPoolingWithArgmax2D", - tflite_operations::RegisterMaxPoolingWithArgmax2D()); - AddCustom("MaxUnpooling2D", tflite_operations::RegisterMaxUnpooling2D()); - AddCustom("Convolution2DTransposeBias", - tflite_operations::RegisterConvolution2DTransposeBias()); +void MediaPipe_RegisterTfLiteOpResolver(tflite::MutableOpResolver *resolver) { + CHECK(resolver != nullptr); + resolver->AddCustom("MaxPoolingWithArgmax2D", + tflite_operations::RegisterMaxPoolingWithArgmax2D()); + resolver->AddCustom("MaxUnpooling2D", + tflite_operations::RegisterMaxUnpooling2D()); + resolver->AddCustom("Convolution2DTransposeBias", + tflite_operations::RegisterConvolution2DTransposeBias()); } } // namespace mediapipe diff --git a/mediapipe/util/tflite/cpu_op_resolver.h b/mediapipe/util/tflite/cpu_op_resolver.h index 0ead1db3a..9754fbfc8 100644 --- a/mediapipe/util/tflite/cpu_op_resolver.h +++ b/mediapipe/util/tflite/cpu_op_resolver.h @@ -19,13 +19,17 @@ namespace mediapipe { -// This OpResolver is used for supporting the following ops on CPU.: +// This function registers the CPU implementations for following custom ops: // "Convolution2DTransposeBias" // "MaxPoolArgmax" // "MaxUnpooling" +extern "C" void MediaPipe_RegisterTfLiteOpResolver(tflite::MutableOpResolver*); + +// This resolver is used for the custom ops introduced by +// `MediaPipe_RegisterTfLiteOpResolver` (see above). class CpuOpResolver : public tflite::ops::builtin::BuiltinOpResolver { public: - CpuOpResolver(); + CpuOpResolver() { MediaPipe_RegisterTfLiteOpResolver(this); } }; } // namespace mediapipe diff --git a/mediapipe/util/tflite/tflite_gpu_runner.cc b/mediapipe/util/tflite/tflite_gpu_runner.cc index 3065cf8aa..2d72997e4 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.cc +++ b/mediapipe/util/tflite/tflite_gpu_runner.cc @@ -100,12 +100,19 @@ mediapipe::Status TFLiteGPURunner::Build() { // 1. Prepare inference builder. std::unique_ptr builder; // By default, we try CL first & fall back to GL if that fails. - absl::Status status = InitializeOpenCL(&builder); - if (status.ok()) { - LOG(INFO) << "OpenCL backend is used."; - } else { - LOG(ERROR) << "Falling back to OpenGL: " << status.message(); + if (opencl_is_forced_) { + MP_RETURN_IF_ERROR(InitializeOpenCL(&builder)); + } else if (opengl_is_forced_) { MP_RETURN_IF_ERROR(InitializeOpenGL(&builder)); + } else { + // try to build OpenCL first. If something goes wrong, fall back to OpenGL. + absl::Status status = InitializeOpenCL(&builder); + if (status.ok()) { + LOG(INFO) << "OpenCL backend is used."; + } else { + LOG(ERROR) << "Falling back to OpenGL: " << status.message(); + MP_RETURN_IF_ERROR(InitializeOpenGL(&builder)); + } } // Both graphs are not needed anymore. Make sure they are deleted. diff --git a/mediapipe/util/tflite/tflite_gpu_runner.h b/mediapipe/util/tflite/tflite_gpu_runner.h index 7d12a56f5..cc20c8ebf 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.h +++ b/mediapipe/util/tflite/tflite_gpu_runner.h @@ -56,6 +56,10 @@ class TFLiteGPURunner { mediapipe::Status InitializeWithModel( const tflite::FlatBufferModel& flatbuffer, const tflite::OpResolver& op_resolver); + + void ForceOpenGL() { opengl_is_forced_ = true; } + void ForceOpenCL() { opencl_is_forced_ = true; } + mediapipe::Status BindSSBOToInputTensor(GLuint ssbo_id, int input_id); mediapipe::Status BindSSBOToOutputTensor(GLuint ssbo_id, int output_id); @@ -105,6 +109,9 @@ class TFLiteGPURunner { // after graph_ becomes "converted" into runner_. std::vector input_shapes_; std::vector output_shapes_; + + bool opencl_is_forced_ = false; + bool opengl_is_forced_ = false; }; } // namespace gpu diff --git a/mediapipe/util/tracking/tracking.cc b/mediapipe/util/tracking/tracking.cc index 21e06138a..72245c0df 100644 --- a/mediapipe/util/tracking/tracking.cc +++ b/mediapipe/util/tracking/tracking.cc @@ -1945,9 +1945,10 @@ void MotionBox::EstimateObjectMotion( // For any additional degrees of freedom, require a good set of inliers. if (num_continued_inliers < options_.object_similarity_min_contd_inliers()) { - VLOG_IF(2, options_.tracking_degrees() != - TrackStepOptions::TRACKING_DEGREE_TRANSLATION) - << "Falling back to translation!!!"; + if (options_.tracking_degrees() != + TrackStepOptions::TRACKING_DEGREE_TRANSLATION) { + VLOG(2) << "Falling back to translation!!!"; + } VLOG(1) << "num_continued_inliers: " << num_continued_inliers << " < " << options_.object_similarity_min_contd_inliers() << ", fall back to translation"; diff --git a/setup.py b/setup.py index c86ef4c39..1a67a31f7 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ def _check_bazel(): sys.stderr.write('invalid bazel version number: %s\n' % version_segments) sys.exit(-1) bazel_version = int(''.join(['%03d' % int(seg) for seg in version_segments])) - if bazel_version < 2000000: + if bazel_version < 3400000: sys.stderr.write( 'the current bazel version is older than the minimum version that MediaPipe can support. Please upgrade bazel.' ) diff --git a/third_party/BUILD b/third_party/BUILD index 4d2676751..ef408e4a2 100644 --- a/third_party/BUILD +++ b/third_party/BUILD @@ -258,6 +258,13 @@ android_library( ], ) +android_library( + name = "camerax_camera2", + exports = [ + "@maven//:androidx_camera_camera_camera2", + ], +) + android_library( name = "camerax_core", exports = [ @@ -266,8 +273,8 @@ android_library( ) android_library( - name = "camera2", + name = "camerax_lifecycle", exports = [ - "@maven//:androidx_camera_camera_camera2", + "@maven//:androidx_camera_camera_lifecycle", ], )