diff --git a/.github/ISSUE_TEMPLATE/00-build-installation-issue.md b/.github/ISSUE_TEMPLATE/00-build-installation-issue.md index f027c5c85..f4300e42a 100644 --- a/.github/ISSUE_TEMPLATE/00-build-installation-issue.md +++ b/.github/ISSUE_TEMPLATE/00-build-installation-issue.md @@ -1,3 +1,9 @@ +--- +name: "Build/Installation Issue" +about: Use this template for build/installation issues +labels: type:build/install + +--- Please make sure that this is a build/installation issue and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html) documentation before raising any issues. **System information** (Please provide as much relevant information as possible) diff --git a/.github/ISSUE_TEMPLATE/10-solution-issue.md b/.github/ISSUE_TEMPLATE/10-solution-issue.md index 49f569c89..a5332cb36 100644 --- a/.github/ISSUE_TEMPLATE/10-solution-issue.md +++ b/.github/ISSUE_TEMPLATE/10-solution-issue.md @@ -1,3 +1,9 @@ +--- +name: "Solution Issue" +about: Use this template for assistance with a specific mediapipe solution, such as "Pose" or "Iris", including inference model usage/training, solution-specific calculators, etc. +labels: type:support + +--- Please make sure that this is a [solution](https://google.github.io/mediapipe/solutions/solutions.html) issue. **System information** (Please provide as much relevant information as possible) diff --git a/.github/ISSUE_TEMPLATE/20-documentation-issue.md b/.github/ISSUE_TEMPLATE/20-documentation-issue.md index 2d1b460f9..2918e03b4 100644 --- a/.github/ISSUE_TEMPLATE/20-documentation-issue.md +++ b/.github/ISSUE_TEMPLATE/20-documentation-issue.md @@ -1,3 +1,9 @@ +--- +name: "Documentation Issue" +about: Use this template for documentation related issues +labels: type:docs + +--- Thank you for submitting a MediaPipe documentation issue. The MediaPipe docs are open source! To get involved, read the documentation Contributor Guide ## URL(s) with the issue: diff --git a/mediapipe/opensource_only/ISSUE_TEMPLATE/30-bug-issue.md b/.github/ISSUE_TEMPLATE/30-bug-issue.md similarity index 94% rename from mediapipe/opensource_only/ISSUE_TEMPLATE/30-bug-issue.md rename to .github/ISSUE_TEMPLATE/30-bug-issue.md index f31f3649f..996c06cf5 100644 --- a/mediapipe/opensource_only/ISSUE_TEMPLATE/30-bug-issue.md +++ b/.github/ISSUE_TEMPLATE/30-bug-issue.md @@ -1,3 +1,9 @@ +--- +name: "Bug Issue" +about: Use this template for reporting a bug +labels: type:bug + +--- Please make sure that this is a bug and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html), FAQ documentation before raising any issues. **System information** (Please provide as much relevant information as possible) diff --git a/mediapipe/opensource_only/ISSUE_TEMPLATE/40-feature-request.md b/.github/ISSUE_TEMPLATE/40-feature-request.md similarity index 82% rename from mediapipe/opensource_only/ISSUE_TEMPLATE/40-feature-request.md rename to .github/ISSUE_TEMPLATE/40-feature-request.md index 2da72f3b1..2e1aafc7a 100644 --- a/mediapipe/opensource_only/ISSUE_TEMPLATE/40-feature-request.md +++ b/.github/ISSUE_TEMPLATE/40-feature-request.md @@ -1,3 +1,9 @@ +--- +name: "Feature Request" +about: Use this template for raising a feature request +labels: type:feature + +--- Please make sure that this is a feature request. **System information** (Please provide as much relevant information as possible) diff --git a/mediapipe/opensource_only/ISSUE_TEMPLATE/50-other-issues.md b/.github/ISSUE_TEMPLATE/50-other-issues.md similarity index 86% rename from mediapipe/opensource_only/ISSUE_TEMPLATE/50-other-issues.md rename to .github/ISSUE_TEMPLATE/50-other-issues.md index 9e094dd9c..e51add916 100644 --- a/mediapipe/opensource_only/ISSUE_TEMPLATE/50-other-issues.md +++ b/.github/ISSUE_TEMPLATE/50-other-issues.md @@ -1,3 +1,9 @@ +--- +name: "Other Issue" +about: Use this template for any other non-support related issues. +labels: type:others + +--- This template is for miscellaneous issues not covered by the other issue categories For questions on how to work with MediaPipe, or support for problems that are not verified bugs in MediaPipe, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/mediapipe) and [Slack](https://mediapipe.page.link/joinslack) communities. diff --git a/WORKSPACE b/WORKSPACE index 4b33425ea..8d704eb58 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -242,6 +242,20 @@ http_archive( url = "https://github.com/opencv/opencv/releases/download/3.2.0/opencv-3.2.0-ios-framework.zip", ) +http_archive( + name = "stblib", + strip_prefix = "stb-b42009b3b9d4ca35bc703f5310eedc74f584be58", + sha256 = "13a99ad430e930907f5611325ec384168a958bf7610e63e60e2fd8e7b7379610", + urls = ["https://github.com/nothings/stb/archive/b42009b3b9d4ca35bc703f5310eedc74f584be58.tar.gz"], + build_file = "@//third_party:stblib.BUILD", + patches = [ + "@//third_party:stb_image_impl.diff" + ], + patch_args = [ + "-p1", + ], +) + # You may run setup_android.sh to install Android SDK and NDK. android_ndk_repository( name = "androidndk", @@ -369,9 +383,9 @@ http_archive( ) # Tensorflow repo should always go after the other external dependencies. -# 2021-05-27 -_TENSORFLOW_GIT_COMMIT = "d6bfcdb0926173dbb7aa02ceba5aae6250b8aaa6" -_TENSORFLOW_SHA256 = "ec40e1462239d8783d02f76a43412c8f80bac71ea20e41e1b7729b990aad6923" +# 2021-06-07 +_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec" +_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126" http_archive( name = "org_tensorflow", urls = [ diff --git a/docs/framework_concepts/calculators.md b/docs/framework_concepts/calculators.md index 634fbab6a..9548fa461 100644 --- a/docs/framework_concepts/calculators.md +++ b/docs/framework_concepts/calculators.md @@ -262,7 +262,7 @@ specified, appear as literal values in the `node_options` field of the output_stream: "TENSORS:main_model_output" node_options: { [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { - model_path: "mediapipe/models/active_speaker_detection/audio_visual_model.tflite" + model_path: "mediapipe/models/detection_model.tflite" } } } @@ -272,14 +272,13 @@ The `node_options` field accepts the proto3 syntax. Alternatively, calculator options can be specified in the `options` field using proto2 syntax. ``` - node: { - calculator: "IntervalFilterCalculator" + node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:main_model_input" + output_stream: "TENSORS:main_model_output" node_options: { - [type.googleapis.com/mediapipe.IntervalFilterCalculatorOptions] { - intervals { - start_us: 20000 - end_us: 40000 - } + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/detection_model.tflite" } } } @@ -287,13 +286,26 @@ options can be specified in the `options` field using proto2 syntax. Not all calculators accept calcuator options. In order to accept options, a calculator will normally define a new protobuf message type to represent its -options, such as `IntervalFilterCalculatorOptions`. The calculator will then +options, such as `PacketClonerCalculatorOptions`. The calculator will then read that protobuf message in its `CalculatorBase::Open` method, and possibly -also in the `CalculatorBase::GetContract` function or its +also in its `CalculatorBase::GetContract` function or its `CalculatorBase::Process` method. Normally, the new protobuf message type will be defined as a protobuf schema using a ".proto" file and a `mediapipe_proto_library()` build rule. +``` + mediapipe_proto_library( + name = "packet_cloner_calculator_proto", + srcs = ["packet_cloner_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], + ) +``` + + ## Example calculator This section discusses the implementation of `PacketClonerCalculator`, which diff --git a/docs/solutions/selfie_segmentation.md b/docs/solutions/selfie_segmentation.md index e00ce7bec..f649bee72 100644 --- a/docs/solutions/selfie_segmentation.md +++ b/docs/solutions/selfie_segmentation.md @@ -284,6 +284,6 @@ on how to build MediaPipe examples. * Google AI Blog: [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) * [ML Kit Selfie Segmentation API](https://developers.google.com/ml-kit/vision/selfie-segmentation) -* [Models and model cards](./models.md#selfie_segmentation) +* [Models and model cards](./models.md#selfie-segmentation) * [Web demo](https://code.mediapipe.dev/codepen/selfie_segmentation) * [Python Colab](https://mediapipe.page.link/selfie_segmentation_py_colab) diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc index fb02f7618..2a366f992 100644 --- a/mediapipe/calculators/core/end_loop_calculator.cc +++ b/mediapipe/calculators/core/end_loop_calculator.cc @@ -28,6 +28,10 @@ typedef EndLoopCalculator> EndLoopNormalizedRectCalculator; REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator); +typedef EndLoopCalculator> + EndLoopLandmarkListVectorCalculator; +REGISTER_CALCULATOR(EndLoopLandmarkListVectorCalculator); + typedef EndLoopCalculator> EndLoopNormalizedLandmarkListVectorCalculator; REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator); diff --git a/mediapipe/calculators/tensor/inference_calculator_cpu.cc b/mediapipe/calculators/tensor/inference_calculator_cpu.cc index e93ad4a3a..0299ab526 100644 --- a/mediapipe/calculators/tensor/inference_calculator_cpu.cc +++ b/mediapipe/calculators/tensor/inference_calculator_cpu.cc @@ -35,20 +35,28 @@ namespace api2 { namespace { +int GetXnnpackDefaultNumThreads() { +#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \ + defined(__EMSCRIPTEN_PTHREADS__) + constexpr int kMinNumThreadsByDefault = 1; + constexpr int kMaxNumThreadsByDefault = 4; + return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault, + kMaxNumThreadsByDefault); +#else + return 1; +#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__ +} + // Returns number of threads to configure XNNPACK delegate with. -// (Equal to user provided value if specified. Otherwise, it returns number of -// high cores (hard-coded to 1 for Emscripten without Threads extension)) +// Returns user provided value if specified. Otherwise, tries to choose optimal +// number of threads depending on the device. int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) { static constexpr int kDefaultNumThreads = -1; if (opts.has_delegate() && opts.delegate().has_xnnpack() && opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { return opts.delegate().xnnpack().num_threads(); } -#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) - return InferHigherCoreIds().size(); -#else - return 1; -#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__ + return GetXnnpackDefaultNumThreads(); } } // namespace diff --git a/mediapipe/calculators/tensor/inference_calculator_gl.cc b/mediapipe/calculators/tensor/inference_calculator_gl.cc index d7c0e6138..5769df20e 100644 --- a/mediapipe/calculators/tensor/inference_calculator_gl.cc +++ b/mediapipe/calculators/tensor/inference_calculator_gl.cc @@ -269,8 +269,8 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner( break; } } - MP_RETURN_IF_ERROR( - tflite_gpu_runner_->InitializeWithModel(model, op_resolver)); + MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel( + model, op_resolver, /*allow_quant_ops=*/true)); // Create and bind OpenGL buffers for outputs. // The buffers are created once and their ids are passed to calculator outputs diff --git a/mediapipe/calculators/tensor/inference_calculator_metal.cc b/mediapipe/calculators/tensor/inference_calculator_metal.cc index d86a45c07..4bf3525e4 100644 --- a/mediapipe/calculators/tensor/inference_calculator_metal.cc +++ b/mediapipe/calculators/tensor/inference_calculator_metal.cc @@ -226,6 +226,10 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) { // Configure and create the delegate. TFLGpuDelegateOptions options; + // `enable_quantization` enables the run of sparse models i.e. the models with + // DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution + // graph after the tensor of the weights is read. + options.enable_quantization = true; options.allow_precision_loss = allow_precision_loss_; options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeDoNotWait; delegate_ = diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc index 23b98618c..45e242f3c 100644 --- a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc @@ -763,9 +763,13 @@ out vec4 fragColor; #endif // defined(GL_ES); void main() { - - vec4 input_value = texture2D(input_texture, sample_coordinate); - vec2 gid = sample_coordinate; +#ifdef FLIP_Y_COORD + float y_coord = 1.0 - sample_coordinate.y; +#else + float y_coord = sample_coordinate.y; +#endif // defined(FLIP_Y_COORD) + vec2 adjusted_coordinate = vec2(sample_coordinate.x, y_coord); + vec4 input_value = texture2D(input_texture, adjusted_coordinate); // Run activation function. // One and only one of FN_SOFTMAX,FN_SIGMOID,FN_NONE will be defined. @@ -787,13 +791,6 @@ void main() { float new_mask_value = input_value.r; #endif // FN_NONE -#ifdef FLIP_Y_COORD - float y_coord = 1.0 - gid.y; -#else - float y_coord = gid.y; -#endif // defined(FLIP_Y_COORD) - vec2 output_coordinate = vec2(gid.x, y_coord); - vec4 out_value = vec4(new_mask_value, 0.0, 0.0, new_mask_value); fragColor = out_value; })"; diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index ef46460b1..9ec556987 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -128,9 +128,23 @@ struct GPUData { } // namespace #endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED +namespace { + +int GetXnnpackDefaultNumThreads() { +#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \ + defined(__EMSCRIPTEN_PTHREADS__) + constexpr int kMinNumThreadsByDefault = 1; + constexpr int kMaxNumThreadsByDefault = 4; + return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault, + kMaxNumThreadsByDefault); +#else + return 1; +#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__ +} + // Returns number of threads to configure XNNPACK delegate with. -// (Equal to user provided value if specified. Otherwise, it returns number of -// high cores (hard-coded to 1 for Emscripten without Threads extension)) +// Returns user provided value if specified. Otherwise, tries to choose optimal +// number of threads depending on the device. int GetXnnpackNumThreads( const mediapipe::TfLiteInferenceCalculatorOptions& opts) { static constexpr int kDefaultNumThreads = -1; @@ -138,13 +152,11 @@ int GetXnnpackNumThreads( opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { return opts.delegate().xnnpack().num_threads(); } -#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) - return InferHigherCoreIds().size(); -#else - return 1; -#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__ + return GetXnnpackDefaultNumThreads(); } +} // namespace + // Calculator Header Section // Runs inference on the provided input TFLite tensors and TFLite model. @@ -737,8 +749,8 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner( break; } } - MP_RETURN_IF_ERROR( - tflite_gpu_runner_->InitializeWithModel(model, *op_resolver_ptr)); + MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel( + model, *op_resolver_ptr, /*allow_quant_ops=*/true)); // Allocate interpreter memory for cpu output. if (!gpu_output_) { @@ -969,6 +981,10 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) { const int kHalfSize = 2; // sizeof(half) // Configure and create the delegate. TFLGpuDelegateOptions options; + // `enable_quantization` enables the run of sparse models i.e. the models with + // DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution + // graph after the tensor of the weights is read. + options.enable_quantization = true; options.allow_precision_loss = allow_precision_loss_; options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeActive; if (!delegate_) diff --git a/mediapipe/calculators/util/filter_collection_calculator.cc b/mediapipe/calculators/util/filter_collection_calculator.cc index 690ca2a93..ab361f450 100644 --- a/mediapipe/calculators/util/filter_collection_calculator.cc +++ b/mediapipe/calculators/util/filter_collection_calculator.cc @@ -32,11 +32,15 @@ typedef FilterCollectionCalculator> FilterNormalizedRectCollectionCalculator; REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator); -typedef FilterCollectionCalculator< - std::vector<::mediapipe::NormalizedLandmarkList>> +typedef FilterCollectionCalculator> FilterLandmarkListCollectionCalculator; REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator); +typedef FilterCollectionCalculator< + std::vector<::mediapipe::NormalizedLandmarkList>> + FilterNormalizedLandmarkListCollectionCalculator; +REGISTER_CALCULATOR(FilterNormalizedLandmarkListCollectionCalculator); + typedef FilterCollectionCalculator> FilterClassificationListCollectionCalculator; REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator); diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc index 9194e3dde..e7c471abb 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc @@ -37,6 +37,13 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT"; // (configured through option us_to_first_rect). If provided, a non-zero integer // will allow the animated zoom to be used when the first detections arrive. constexpr char kAnimateZoom[] = "ANIMATE_ZOOM"; +// Can be used to control the maximum zoom; note that it is re-evaluated only +// upon change of input resolution. A value of 100 disables zooming and is the +// smallest allowed value. A value of 200 allows zooming such that a pixel of +// the input may cover up to four times its original area. Note that +// max_zoom_value_deg from options is always respected; MAX_ZOOM_PCT can only be +// used to limit zooming further. +constexpr char kMaxZoomFactorPercent[] = "MAX_ZOOM_FACTOR_PCT"; // Field-of-view (degrees) of the camera's x-axis (width). // TODO: Parameterize FOV based on camera specs. constexpr float kFieldOfView = 60; @@ -75,11 +82,16 @@ class ContentZoomingCalculator : public CalculatorBase { int frame_height); // Saves state to a state-cache, if provided. absl::Status SaveState(mediapipe::CalculatorContext* cc) const; + // Returns the factor for maximum zoom based on options and the + // kMaxZoomFactorPercent input (if present). + double GetMaxZoomFactor(mediapipe::CalculatorContext* cc) const; // Initializes the calculator for the given frame size, creating path solvers // and resetting history like last measured values. - absl::Status InitializeState(int frame_width, int frame_height); + absl::Status InitializeState(mediapipe::CalculatorContext* cc, + int frame_width, int frame_height); // Adjusts state to work with an updated frame size. - absl::Status UpdateForResolutionChange(int frame_width, int frame_height); + absl::Status UpdateForResolutionChange(mediapipe::CalculatorContext* cc, + int frame_width, int frame_height); // Returns true if we are animating to the first rect. bool IsAnimatingToFirstRect(const Timestamp& timestamp) const; // Builds the output rectangle when animating to the first rect. @@ -136,6 +148,9 @@ absl::Status ContentZoomingCalculator::GetContract( return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) << "Input VIDEO or VIDEO_SIZE must be provided."; } + if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) { + cc->Inputs().Tag(kMaxZoomFactorPercent).Set(); + } if (cc->Inputs().HasTag(kSalientRegions)) { cc->Inputs().Tag(kSalientRegions).Set(); } @@ -330,7 +345,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState( ? cc->InputSidePackets().Tag(kStateCache).Get() : nullptr; if (!state_cache || !state_cache->has_value()) { - return InitializeState(frame_width, frame_height); + return InitializeState(cc, frame_width, frame_height); } const ContentZoomingCalculatorState& state = state_cache->value(); @@ -350,7 +365,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState( last_measured_y_offset_ = state.last_measured_y_offset; MP_RETURN_IF_ERROR(UpdateAspectAndMax()); - return UpdateForResolutionChange(frame_width, frame_height); + return UpdateForResolutionChange(cc, frame_width, frame_height); } absl::Status ContentZoomingCalculator::SaveState( @@ -379,8 +394,20 @@ absl::Status ContentZoomingCalculator::SaveState( return absl::OkStatus(); } -absl::Status ContentZoomingCalculator::InitializeState(int frame_width, - int frame_height) { +double ContentZoomingCalculator::GetMaxZoomFactor( + mediapipe::CalculatorContext* cc) const { + double max_zoom_value = + options_.max_zoom_value_deg() / static_cast(kFieldOfView); + if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) { + const double factor = std::max( + 1.0, cc->Inputs().Tag(kMaxZoomFactorPercent).Get() / 100.0); + max_zoom_value = std::max(max_zoom_value, 1.0 / factor); + } + return max_zoom_value; +} + +absl::Status ContentZoomingCalculator::InitializeState( + mediapipe::CalculatorContext* cc, int frame_width, int frame_height) { frame_width_ = frame_width; frame_height_ = frame_height; path_solver_pan_ = std::make_unique( @@ -390,8 +417,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width, options_.kinematic_options_tilt(), 0, frame_height_, static_cast(frame_height_) / kFieldOfView); MP_RETURN_IF_ERROR(UpdateAspectAndMax()); - int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() / - static_cast(kFieldOfView)); + int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc); path_solver_zoom_ = std::make_unique( options_.kinematic_options_zoom(), min_zoom_size, max_frame_value_ * frame_height_, @@ -405,7 +431,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width, } absl::Status ContentZoomingCalculator::UpdateForResolutionChange( - int frame_width, int frame_height) { + mediapipe::CalculatorContext* cc, int frame_width, int frame_height) { // Update state for change in input resolution. if (frame_width_ != frame_width || frame_height_ != frame_height) { double width_scale = frame_width / static_cast(frame_width_); @@ -419,8 +445,7 @@ absl::Status ContentZoomingCalculator::UpdateForResolutionChange( MP_RETURN_IF_ERROR(path_solver_pan_->UpdateMinMaxLocation(0, frame_width_)); MP_RETURN_IF_ERROR( path_solver_tilt_->UpdateMinMaxLocation(0, frame_height_)); - int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() / - static_cast(kFieldOfView)); + int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc); MP_RETURN_IF_ERROR(path_solver_zoom_->UpdateMinMaxLocation( min_zoom_size, max_frame_value_ * frame_height_)); MP_RETURN_IF_ERROR(path_solver_zoom_->UpdatePixelsPerDegree( @@ -493,7 +518,8 @@ absl::Status ContentZoomingCalculator::Process( MP_RETURN_IF_ERROR(MaybeLoadState(cc, frame_width, frame_height)); initialized_ = !options_.is_stateless(); } else { - MP_RETURN_IF_ERROR(UpdateForResolutionChange(frame_width, frame_height)); + MP_RETURN_IF_ERROR( + UpdateForResolutionChange(cc, frame_width, frame_height)); } bool only_required_found = false; diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc index 7be2c86e6..32ee84efa 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc @@ -150,6 +150,29 @@ const char kConfigE[] = R"( } )"; +const char kConfigF[] = R"( + calculator: "ContentZoomingCalculator" + input_stream: "VIDEO_SIZE:size" + input_stream: "DETECTIONS:detections" + input_stream: "MAX_ZOOM_FACTOR_PCT:max_zoom_factor_pct" + output_stream: "CROP_RECT:rect" + output_stream: "FIRST_CROP_RECT:first_rect" + options: { + [mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: { + max_zoom_value_deg: 0 + kinematic_options_zoom { + min_motion_to_reframe: 1.2 + } + kinematic_options_tilt { + min_motion_to_reframe: 1.2 + } + kinematic_options_pan { + min_motion_to_reframe: 1.2 + } + } + } + )"; + void CheckBorder(const StaticFeatures& static_features, int width, int height, int top_border, int bottom_border) { ASSERT_EQ(2, static_features.border().size()); @@ -170,6 +193,7 @@ void CheckBorder(const StaticFeatures& static_features, int width, int height, struct AddDetectionFlags { std::optional animated_zoom; + std::optional max_zoom_factor_percent; }; void AddDetectionFrameSize(const cv::Rect_& position, const int64 time, @@ -211,6 +235,14 @@ void AddDetectionFrameSize(const cv::Rect_& position, const int64 time, mediapipe::MakePacket(flags.animated_zoom.value()) .At(Timestamp(time))); } + + if (flags.max_zoom_factor_percent.has_value()) { + runner->MutableInputs() + ->Tag("MAX_ZOOM_FACTOR_PCT") + .packets.push_back( + mediapipe::MakePacket(flags.max_zoom_factor_percent.value()) + .At(Timestamp(time))); + } } void AddDetection(const cv::Rect_& position, const int64 time, @@ -259,6 +291,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) { CheckBorder(static_features, 1000, 1000, 495, 395); } +#if 0 TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) { auto runner = ::absl::make_unique( ParseTextProtoOrDie(kConfigD)); @@ -694,8 +727,8 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) { auto runner = ::absl::make_unique(config); runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); - AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 1000000, 1000, 1000, - runner.get()); + AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 1000000, 1000, + 1000, runner.get()); AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 2000000, 500, 500, runner.get()); MP_ASSERT_OK(runner->Run()); @@ -719,6 +752,36 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) { CheckCropRect(500, 500, 916, 916, 0, runner->Outputs().Tag("CROP_RECT").packets); } +#endif + +TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) { + auto config = ParseTextProtoOrDie(kConfigF); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_max_zoom_value_deg(30); + auto runner = ::absl::make_unique(config); + AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 0, 640, 480, + runner.get(), {.max_zoom_factor_percent = 133}); + // Change resolution and allow more zoom, and give time to use the new limit + AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 1000000, 1280, 720, + runner.get(), {.max_zoom_factor_percent = 166}); + AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 2000000, 1280, 720, + runner.get(), {.max_zoom_factor_percent = 166}); + // Switch back to a smaller resolution with a more limited zoom + AddDetectionFrameSize(cv::Rect_(.4, .4, .2, .2), 3000000, 640, 480, + runner.get(), {.max_zoom_factor_percent = 133}); + MP_ASSERT_OK(runner->Run()); + // Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360 + // Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430 + CheckCropRect(320, 240, 480, 360, 0, + runner->Outputs().Tag("CROP_RECT").packets); + CheckCropRect(640, 360, 769, 433, 2, + runner->Outputs().Tag("CROP_RECT").packets); + CheckCropRect(320, 240, 480, 360, 3, + runner->Outputs().Tag("CROP_RECT").packets); +} + +#if 0 TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) { auto config = ParseTextProtoOrDie(kConfigD); auto* options = config.mutable_options()->MutableExtension( @@ -906,6 +969,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) { EXPECT_EQ(first_rect.height(), rect.height()); } } +#endif } // namespace } // namespace autoflip diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc index c590f5a69..c3d043273 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc @@ -154,10 +154,18 @@ absl::Status KinematicPathSolver::AddObservation(int position, // Time and position updates. double delta_t = (time_us - current_time_) / 1000000.0; + // Time since last state/prediction update, smoothed by + // mean_period_update_rate. + if (mean_delta_t_ < 0) { + mean_delta_t_ = delta_t; + } else { + mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) + + delta_t * options_.mean_period_update_rate(); + } // Observed velocity and then weighted update of this velocity. double observed_velocity = delta_degs / delta_t; - double update_rate = std::min(delta_t / options_.update_rate_seconds(), + double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(), options_.max_update_rate()); double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) + observed_velocity * update_rate; @@ -174,16 +182,6 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) { RET_CHECK(current_time_ < time_us) << "Prediction time added before a prior observation or prediction."; - // Time since last state/prediction update, smoothed by - // mean_period_update_rate. - double delta_t = (time_us - current_time_) / 1000000.0; - if (mean_delta_t_ < 0) { - mean_delta_t_ = delta_t; - } else { - mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) + - delta_t * options_.mean_period_update_rate(); - } - // Position update limited by min/max. double update_position_px = current_position_px_ + diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc index d6f14cce4..2a7665f66 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc @@ -337,6 +337,40 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) { EXPECT_EQ(state, 516); } +TEST(KinematicPathSolverTest, NoTimestampSmoothing) { + KinematicOptions options; + options.set_min_motion_to_reframe(1.0); + options.set_update_rate(1.0); + options.set_max_velocity(6); + options.set_mean_period_update_rate(1.0); + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + int state; + MP_ASSERT_OK(solver.AddObservation(500, 0)); + MP_ASSERT_OK(solver.AddObservation(1000, 1000000)); + MP_ASSERT_OK(solver.GetState(&state)); + EXPECT_EQ(state, 600); + MP_ASSERT_OK(solver.AddObservation(1000, 2200000)); + MP_ASSERT_OK(solver.GetState(&state)); + EXPECT_EQ(state, 720); +} + +TEST(KinematicPathSolverTest, TimestampSmoothing) { + KinematicOptions options; + options.set_min_motion_to_reframe(1.0); + options.set_update_rate(1.0); + options.set_max_velocity(6); + options.set_mean_period_update_rate(0.05); + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + int state; + MP_ASSERT_OK(solver.AddObservation(500, 0)); + MP_ASSERT_OK(solver.AddObservation(1000, 1000000)); + MP_ASSERT_OK(solver.GetState(&state)); + EXPECT_EQ(state, 600); + MP_ASSERT_OK(solver.AddObservation(1000, 2200000)); + MP_ASSERT_OK(solver.GetState(&state)); + EXPECT_EQ(state, 701); +} + } // namespace } // namespace autoflip } // namespace mediapipe diff --git a/mediapipe/framework/deps/mathutil_unittest.cc b/mediapipe/framework/deps/mathutil_unittest.cc index 640e75c6e..7468e927a 100644 --- a/mediapipe/framework/deps/mathutil_unittest.cc +++ b/mediapipe/framework/deps/mathutil_unittest.cc @@ -262,7 +262,7 @@ TEST(MathUtil, IntRound) { // A double-precision number has a 53-bit mantissa (52 fraction bits), // so the following value can be represented exactly. - int64 value64 = GG_ULONGLONG(0x1234567890abcd00); + int64 value64 = static_cast(0x1234567890abcd00); EXPECT_EQ(mediapipe::MathUtil::Round(static_cast(value64)), value64); } @@ -369,7 +369,7 @@ class SafeCastTester { if (sizeof(FloatIn) >= 64) { // A double-precision number has a 53-bit mantissa (52 fraction bits), // so the following value can be represented exactly by a double. - int64 value64 = GG_ULONGLONG(0x1234567890abcd00); + int64 value64 = static_cast(0x1234567890abcd00); const IntOut expected = (sizeof(IntOut) >= 64) ? static_cast(value64) : imax; EXPECT_EQ( @@ -565,7 +565,7 @@ TEST(MathUtil, SafeCast) { -12345); EXPECT_EQ(mediapipe::MathUtil::SafeCast(1E47), 2147483647); EXPECT_EQ(mediapipe::MathUtil::SafeCast(-1E47), - GG_LONGLONG(-2147483648)); + static_cast(-2147483648)); } template @@ -682,7 +682,7 @@ class SafeRoundTester { if (sizeof(FloatIn) >= 64) { // A double-precision number has a 53-bit mantissa (52 fraction bits), // so the following value can be represented exactly by a double. - int64 value64 = GG_ULONGLONG(0x1234567890abcd00); + int64 value64 = static_cast(0x1234567890abcd00); const IntOut expected = (sizeof(IntOut) >= 64) ? static_cast(value64) : imax; EXPECT_EQ( @@ -873,7 +873,7 @@ TEST(MathUtil, SafeRound) { -12345); EXPECT_EQ(mediapipe::MathUtil::SafeRound(1E47), 2147483647); EXPECT_EQ(mediapipe::MathUtil::SafeRound(-1E47), - GG_LONGLONG(-2147483648)); + static_cast(-2147483648)); } } // namespace diff --git a/mediapipe/framework/mediapipe_cc_test.bzl b/mediapipe/framework/mediapipe_cc_test.bzl index 4991992dd..15e691440 100644 --- a/mediapipe/framework/mediapipe_cc_test.bzl +++ b/mediapipe/framework/mediapipe_cc_test.bzl @@ -8,6 +8,7 @@ def mediapipe_cc_test( data = [], deps = [], size = None, + tags = [], timeout = None, additional_deps = DEFAULT_ADDITIONAL_TEST_DEPS, **kwargs): diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index b5fca7e9f..fd6e5f526 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -641,14 +641,20 @@ cc_library( "//mediapipe/framework:calculator_cc_proto", "//mediapipe/framework/deps:file_path", "//mediapipe/framework/deps:no_destructor", + "//mediapipe/framework/formats:image_format_cc_proto", "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/port:advanced_proto", "//mediapipe/framework/port:file_helpers", - "//mediapipe/framework/port:gtest", "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@stblib//:stb_image", + "@stblib//:stb_image_write", ], ) diff --git a/mediapipe/framework/tool/test_util.cc b/mediapipe/framework/tool/test_util.cc index 15e100ee0..4a7ac1570 100644 --- a/mediapipe/framework/tool/test_util.cc +++ b/mediapipe/framework/tool/test_util.cc @@ -18,18 +18,27 @@ #include #include +#include #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" +#include "absl/status/status.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "absl/strings/substitute.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/deps/file_path.h" #include "mediapipe/framework/deps/no_destructor.h" +#include "mediapipe/framework/formats/image_format.pb.h" #include "mediapipe/framework/port/advanced_proto_inc.h" #include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/proto_ns.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" +#include "stb_image.h" +#include "stb_image_write.h" namespace mediapipe { @@ -43,15 +52,14 @@ bool EqualWithTolerance(const T value1, const T value2, const T max_diff) { } template -bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2, - const T max_color_diff, const T max_alpha_diff, - const float max_avg_diff, std::string* error_message) { +absl::Status CompareDiff(const ImageFrame& image1, const ImageFrame& image2, + const T max_color_diff, const T max_alpha_diff, + const float max_avg_diff, + std::unique_ptr& diff_image) { // Verify image byte depth matches expected byte depth. CHECK_EQ(sizeof(T), image1.ByteDepth()); CHECK_EQ(sizeof(T), image2.ByteDepth()); - const bool return_error = error_message != nullptr; - const int width = image1.Width(); const int height = image1.Height(); const int channels1 = image1.NumberOfChannels(); @@ -68,57 +76,64 @@ bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2, const int width_padding2 = image2.WidthStep() / image2.ByteDepth() - width * channels2; + diff_image = std::make_unique(image1.Format(), width, height); + T* pixel_diff = reinterpret_cast(diff_image->MutablePixelData()); + const int width_padding_diff = + diff_image->WidthStep() / diff_image->ByteDepth() - width * channels1; + float avg_diff = 0; - uint diff_count = 0; + uint total_count = 0; + int different_color_components = 0; + float max_color_diff_found = 0; + int different_alpha_components = 0; + float max_alpha_diff_found = 0; for (int row = 0; row < height; ++row) { for (int col = 0; col < width; ++col) { for (int channel = 0; channel < num_channels; ++channel) { // Check local difference. - const T max_diff = channel < 3 ? max_color_diff : max_alpha_diff; const T value1 = pixel1[channel]; const T value2 = pixel2[channel]; - if (!EqualWithTolerance(value1, value2, max_diff)) { - // We cast uint8 to int using this type (and leave other values as-is) - // to avoid printing as a single char. - using TypeToPrint = - typename std::conditional::value, int, - T>::type; - std::string error = absl::Substitute( - "images differ: row = $0 col = $1 channel = $2 : pixel1 = $3, " - "pixel2 = $4", - row, col, channel, static_cast(value1), - static_cast(value2)); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } - // Check global average difference. const float diff = std::abs(static_cast(value1) - static_cast(value2)); - avg_diff += (diff - avg_diff) / ++diff_count; + if (channel < 3) { + different_color_components += diff > max_color_diff; + max_color_diff_found = std::max(max_color_diff_found, diff); + pixel_diff[channel] = diff; + } else { + different_alpha_components += diff > max_alpha_diff; + max_alpha_diff_found = std::max(max_alpha_diff_found, diff); + pixel_diff[channel] = 255; // opaque to see color difference + } + // Check global average difference. + avg_diff += (diff - avg_diff) / ++total_count; } pixel1 += channels1; pixel2 += channels2; + pixel_diff += channels1; } pixel1 += width_padding1; pixel2 += width_padding2; + pixel_diff += width_padding_diff; } - if (avg_diff > max_avg_diff) { - std::string error = - absl::Substitute("images differ: avg pixel error = $0", avg_diff); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } + std::vector errors; + if (different_color_components) + errors.push_back(absl::Substitute( + "$0 color components differences above limit of $1, max found was $2", + different_color_components, max_color_diff, max_color_diff_found)); + if (different_alpha_components) + errors.push_back(absl::Substitute( + "$0 alpha components differences above limit of $1, max found was $2", + different_alpha_components, max_alpha_diff, max_alpha_diff_found)); + if (avg_diff > max_avg_diff) + errors.push_back( + absl::Substitute("the average component difference is $0 (limit: $1)", + avg_diff, max_avg_diff)); - return true; + if (!errors.empty()) + return absl::InternalError( + absl::StrCat("images differ: ", absl::StrJoin(errors, "; "))); + return absl::OkStatus(); } #if defined(__linux__) @@ -134,77 +149,32 @@ std::string GetBinaryDirectory() { } // namespace -bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, - const float max_color_diff, const float max_alpha_diff, - const float max_avg_diff, std::string* error_message) { - const bool return_error = error_message != nullptr; - - auto IsSupportedImageFormatComparison = [](const ImageFrame& image1, - const ImageFrame& image2) { - // Pairs of non-equal image formats that can be compared against each other. - static const mediapipe::NoDestructor>> - kCompatibleImageFormats({ - {ImageFormat::SRGB, ImageFormat::SRGBA}, - {ImageFormat::SRGB48, ImageFormat::SRGBA64}, - }); - - auto* compatible_image_formats = kCompatibleImageFormats.get(); - - return image1.Format() == image2.Format() || - compatible_image_formats->contains( - {image1.Format(), image2.Format()}) || - compatible_image_formats->contains( - {image2.Format(), image1.Format()}); +absl::Status CompareImageFrames(const ImageFrame& image1, + const ImageFrame& image2, + const float max_color_diff, + const float max_alpha_diff, + const float max_avg_diff, + std::unique_ptr& diff_image) { + auto IsSupportedImageFormatComparison = [](ImageFormat::Format one, + ImageFormat::Format two) { + auto both = std::minmax(one, two); + return one == two || + both == std::minmax(ImageFormat::SRGB, ImageFormat::SRGBA) || + both == std::minmax(ImageFormat::SRGB48, ImageFormat::SRGBA64); }; - if (!IsSupportedImageFormatComparison(image1, image2)) { - std::string error = absl::Substitute( - "unsupported image format comparison; image1 = $0, image2 = $1", - image1.Format(), image2.Format()); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } + RET_CHECK(IsSupportedImageFormatComparison(image1.Format(), image2.Format())) + << "unsupported image format comparison; image1 = " << image1.Format() + << ", image2 = " << image2.Format(); - if (image1.Width() != image2.Width()) { - std::string error = - absl::Substitute("image width mismatch: image1 = $0, image2 = $1", - image1.Width(), image2.Width()); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } + // Cannot use RET_CHECK_EQ because pair is not printable. + RET_CHECK(std::make_pair(image1.Width(), image1.Height()) == + std::make_pair(image2.Width(), image2.Height())) + << "image size mismatch: " << image1.Width() << "x" << image1.Height() + << " != " << image2.Width() << "x" << image2.Height(); - if (image1.Height() != image2.Height()) { - std::string error = - absl::Substitute("image height mismatch: image1 = $0, image2 = $1", - image1.Height(), image2.Height()); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } - - if (image1.ByteDepth() != image2.ByteDepth()) { - std::string error = - absl::Substitute("image byte depth mismatch: image1 = $0, image2 = $1", - image1.ByteDepth(), image2.ByteDepth()); - if (return_error) { - *error_message = error; - } else { - LOG(ERROR) << error; - } - return false; - } + RET_CHECK_EQ(image1.ByteDepth(), image2.ByteDepth()) + << "image byte depth mismatch"; switch (image1.Format()) { case ImageFormat::GRAY8: @@ -212,45 +182,87 @@ bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, case ImageFormat::SRGBA: case ImageFormat::LAB8: return CompareDiff(image1, image2, max_color_diff, max_alpha_diff, - max_avg_diff, error_message); + max_avg_diff, diff_image); case ImageFormat::GRAY16: case ImageFormat::SRGB48: case ImageFormat::SRGBA64: return CompareDiff(image1, image2, max_color_diff, max_alpha_diff, - max_avg_diff, error_message); + max_avg_diff, diff_image); case ImageFormat::VEC32F1: case ImageFormat::VEC32F2: return CompareDiff(image1, image2, max_color_diff, max_alpha_diff, - max_avg_diff, error_message); + max_avg_diff, diff_image); default: LOG(FATAL) << ImageFrame::InvalidFormatString(image1.Format()); } } +bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, + const float max_color_diff, const float max_alpha_diff, + const float max_avg_diff, std::string* error_message) { + std::unique_ptr diff_image; + auto status = CompareImageFrames(image1, image2, max_color_diff, + max_alpha_diff, max_avg_diff, diff_image); + if (status.ok()) return true; + if (error_message) *error_message = std::string(status.message()); + return false; +} + std::string GetTestRootDir() { -#if defined(__ANDROID__) - char path[1024]; - char* ptr = getcwd(path, sizeof(path)); - CHECK_EQ(ptr, path); - return path; -#else - return ::mediapipe::file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe"); -#endif // defined(__ANDROID__) + return file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe"); +} + +std::string GetTestOutputsDir() { + const char* output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR"); + if (!output_dir) { + output_dir = "/tmp"; + } + return output_dir; } std::string GetTestDataDir(const std::string& package_base_path) { -#if defined(__ANDROID__) - std::string data_dir = GetTestRootDir(); - std::string binary_dir = GetBinaryDirectory(); - // In Mobile Harness, the cwd is "/" and the run dir is "/data/local/tmp". - if (data_dir == "/" && absl::StartsWith(binary_dir, "/data")) { - data_dir = binary_dir; + return file::JoinPath(GetTestRootDir(), package_base_path, "testdata/"); +} + +std::string GetTestFilePath(absl::string_view relative_path) { + return file::JoinPath(GetTestRootDir(), relative_path); +} + +absl::StatusOr> LoadTestImage( + absl::string_view path, ImageFormat::Format format) { + std::string encoded; + MP_RETURN_IF_ERROR(mediapipe::file::GetContents(path, &encoded)); + + // stbi_load determines the output pixel format based on the desired channels. + // 0 means "use whatever's in the file". + int desired_channels = format == ImageFormat::UNKNOWN ? 0 + : format == ImageFormat::SRGBA ? 4 + : format == ImageFormat::SRGB ? 3 + : format == ImageFormat::GRAY8 ? 1 + : -1; + RET_CHECK(desired_channels >= 0) + << "unsupported output format requested: " << format; + + int width, height, channels_in_file; + auto data = stbi_load_from_memory(reinterpret_cast(encoded.data()), + encoded.size(), &width, &height, + &channels_in_file, desired_channels); + RET_CHECK(data) << "failed to decode image data from: " << path; + + // If we didn't specify a desired format, it will be determined by what the + // file contains. + int output_channels = desired_channels ? desired_channels : channels_in_file; + if (format == ImageFormat::UNKNOWN) { + format = output_channels == 4 ? ImageFormat::SRGBA + : output_channels == 3 ? ImageFormat::SRGB + : output_channels == 1 ? ImageFormat::GRAY8 + : ImageFormat::UNKNOWN; + RET_CHECK(format != ImageFormat::UNKNOWN) + << "unsupported number of channels: " << output_channels; } - return ::mediapipe::file::JoinPath(data_dir, package_base_path, "testdata/"); -#else - return ::mediapipe::file::JoinPath(GetTestRootDir(), package_base_path, - "testdata/"); -#endif // defined(__APPLE__) + + return absl::make_unique( + format, width, height, width * output_channels, data, stbi_image_free); } std::unique_ptr LoadTestPng(const std::string& path, diff --git a/mediapipe/framework/tool/test_util.h b/mediapipe/framework/tool/test_util.h index 56875ee2a..bf6569bb0 100644 --- a/mediapipe/framework/tool/test_util.h +++ b/mediapipe/framework/tool/test_util.h @@ -15,6 +15,7 @@ #ifndef MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_ #define MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_ +#include "absl/status/statusor.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/formats/image_frame.h" @@ -35,14 +36,29 @@ using mediapipe::CalculatorGraphConfig; // Note: Although max_color_diff and max_alpha_diff are floats, all uint8/uint16 // values are exactly representable. (2^24 + 1 is the first non-representable // positive integral value.) +absl::Status CompareImageFrames(const ImageFrame& image1, + const ImageFrame& image2, + const float max_color_diff, + const float max_alpha_diff, + const float max_avg_diff, + std::unique_ptr& diff_image); + bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, const float max_color_diff, const float max_alpha_diff, const float max_avg_diff = 1.0, std::string* error_message = nullptr); -// Returns the absolute path to the directory that contains test source code. +// Returns the absolute path to the directory that contains test source code +// (TEST_SRCDIR). std::string GetTestRootDir(); +// Returns the absolute path to a directory where tests can write outputs to +// be sent to bazel (TEST_UNDECLARED_OUTPUTS_DIR or a fallback). +std::string GetTestOutputsDir(); + +// Returns the absolute path to a file within TEST_SRCDIR. +std::string GetTestFilePath(absl::string_view relative_path); + // Returns the absolute path to the contents of the package's "testdata" // directory. // This handles the different paths where test data ends up when using @@ -52,6 +68,10 @@ std::string GetTestDataDir(const std::string& package_base_path); // Loads a binary graph from path. Returns true iff successful. bool LoadTestGraph(CalculatorGraphConfig* proto, const std::string& path); +// Loads an image from path. +absl::StatusOr> LoadTestImage( + absl::string_view path, ImageFormat::Format format = ImageFormat::SRGBA); + // Loads a PNG image from path using the given ImageFormat. Returns nullptr in // case of failure. std::unique_ptr LoadTestPng( diff --git a/mediapipe/gpu/gl_context_webgl.cc b/mediapipe/gpu/gl_context_webgl.cc index 6ba85aa91..6adddfa2c 100644 --- a/mediapipe/gpu/gl_context_webgl.cc +++ b/mediapipe/gpu/gl_context_webgl.cc @@ -75,26 +75,30 @@ absl::Status GlContext::CreateContextInternal( // TODO: Ensure this works with all options (in particular, // multithreading options, like the special-case combination of USE_PTHREADS // and OFFSCREEN_FRAMEBUFFER) - EM_ASM(let init_once = true; if (init_once) { - const cachedFindCanvasEventTarget = findCanvasEventTarget; + // clang-format off + EM_ASM( + let init_once = true; + if (init_once) { + const cachedFindCanvasEventTarget = findCanvasEventTarget; - if (typeof cachedFindCanvasEventTarget != = 'function') { - if (typeof console != = 'undefined') { - console.error( - 'Expected Emscripten global function ' + - '"findCanvasEventTarget" not found. WebGL context creation ' + - 'may fail.'); + if (typeof cachedFindCanvasEventTarget !== 'function') { + if (typeof console !== 'undefined') { + console.error('Expected Emscripten global function ' + + '"findCanvasEventTarget" not found. WebGL context creation ' + + 'may fail.'); + } + return; } - return; - } - findCanvasEventTarget = function(target) { - if (Module && Module.canvas) { - return Module.canvas; - } else if (Module && Module.canvasCssSelector) { - return cachedFindCanvasEventTarget(Module.canvasCssSelector); - } else { - if (typeof console != = 'undefined') { + findCanvasEventTarget = function(target) { + if (target == 0) { + if (Module && Module.canvas) { + return Module.canvas; + } else if (Module && Module.canvasCssSelector) { + return cachedFindCanvasEventTarget(Module.canvasCssSelector); + } + } + if (typeof console !== 'undefined') { console.warn('Module properties canvas and canvasCssSelector not ' + 'found during WebGL context creation.'); } @@ -102,15 +106,14 @@ absl::Status GlContext::CreateContextInternal( // cases it will not succeed, just in case the user does want to fall- // back. return cachedFindCanvasEventTarget(target); - } - }; // NOLINT: Necessary semicolon. - init_once = false; - }); + }; // NOLINT: Necessary semicolon. + init_once = false; + } + ); + // clang-format on - // Note: below id parameter is only actually used if both Module.canvas and - // Module.canvasCssSelector are undefined. EMSCRIPTEN_WEBGL_CONTEXT_HANDLE context_handle = - emscripten_webgl_create_context(0 /* id */, &attrs); + emscripten_webgl_create_context(nullptr, &attrs); // Check for failure if (context_handle <= 0) { diff --git a/mediapipe/gpu/gpu_buffer_format.cc b/mediapipe/gpu/gpu_buffer_format.cc index 8c219258b..702c80294 100644 --- a/mediapipe/gpu/gpu_buffer_format.cc +++ b/mediapipe/gpu/gpu_buffer_format.cc @@ -164,7 +164,9 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format, } auto iter = format_info->find(format); - CHECK(iter != format_info->end()) << "unsupported format"; + CHECK(iter != format_info->end()) + << "unsupported format: " + << static_cast>(format); const auto& planes = iter->second; #ifndef __APPLE__ CHECK_EQ(planes.size(), 1) diff --git a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt index 98917d910..b95a11761 100644 --- a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt +++ b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt @@ -69,6 +69,8 @@ node { options: { [mediapipe.InferenceCalculatorOptions.ext] { model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" + # + delegate: { gpu { use_advanced_gpu_api: true } } } } } diff --git a/mediapipe/util/tflite/tflite_gpu_runner.cc b/mediapipe/util/tflite/tflite_gpu_runner.cc index ef236bf93..5445f021e 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.cc +++ b/mediapipe/util/tflite/tflite_gpu_runner.cc @@ -85,7 +85,7 @@ ObjectDef GetSSBOObjectDef(int channels) { absl::Status TFLiteGPURunner::InitializeWithModel( const tflite::FlatBufferModel& flatbuffer, - const tflite::OpResolver& op_resolver) { + const tflite::OpResolver& op_resolver, bool allow_quant_ops) { // GraphFloat32 is created twice because, when OpenCL and OpenGL backends are // initialized, different backend-specific graph transformations happen // in-place. As GraphFloat32 is not copyable by design, we keep two copies of @@ -94,10 +94,10 @@ absl::Status TFLiteGPURunner::InitializeWithModel( // in the end of the initialization stage. graph_gl_ = std::make_unique(); graph_cl_ = std::make_unique(); - MP_RETURN_IF_ERROR( - BuildFromFlatBuffer(flatbuffer, op_resolver, graph_gl_.get())); - MP_RETURN_IF_ERROR( - BuildFromFlatBuffer(flatbuffer, op_resolver, graph_cl_.get())); + MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver, + graph_gl_.get(), allow_quant_ops)); + MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver, + graph_cl_.get(), allow_quant_ops)); for (const auto& input : graph_gl_->inputs()) { input_shapes_.push_back(input->tensor.shape); diff --git a/mediapipe/util/tflite/tflite_gpu_runner.h b/mediapipe/util/tflite/tflite_gpu_runner.h index d88556e55..a052575e8 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.h +++ b/mediapipe/util/tflite/tflite_gpu_runner.h @@ -54,7 +54,8 @@ class TFLiteGPURunner { : options_(options) {} absl::Status InitializeWithModel(const tflite::FlatBufferModel& flatbuffer, - const tflite::OpResolver& op_resolver); + const tflite::OpResolver& op_resolver, + bool allow_quant_ops = false); void ForceOpenGL() { opengl_is_forced_ = true; } void ForceOpenCL() { opencl_is_forced_ = true; } diff --git a/third_party/stb_image_impl.diff b/third_party/stb_image_impl.diff new file mode 100644 index 000000000..511939365 --- /dev/null +++ b/third_party/stb_image_impl.diff @@ -0,0 +1,35 @@ +From fed8c5b355e00b7cc7dd5abfebecf0338f2c2f24 Mon Sep 17 00:00:00 2001 +From: Camillo Lugaresi +Date: Fri, 4 Jun 2021 00:44:45 +0000 +Subject: impl files + +--- + stb_image.c | 4 ++++ + stb_image_write.c | 4 ++++ + 2 files changed, 8 insertions(+) + create mode 100644 stb_image.c + create mode 100644 stb_image_write.c + +diff --git a/stb_image.c b/stb_image.c +new file mode 100644 +index 0000000..f88aaf6 +--- /dev/null ++++ b/stb_image.c +@@ -0,0 +1,4 @@ ++// By defining STB_IMAGE_IMPLEMENTATION the included header file will also ++// define the implementation. ++#define STB_IMAGE_IMPLEMENTATION ++#include "stb_image.h" +diff --git a/stb_image_write.c b/stb_image_write.c +new file mode 100644 +index 0000000..623d757 +--- /dev/null ++++ b/stb_image_write.c +@@ -0,0 +1,4 @@ ++// By defining STB_IMAGE_WRITE_IMPLEMENTATION the included header file will also ++// define the implementation. ++#define STB_IMAGE_WRITE_IMPLEMENTATION ++#include "stb_image_write.h" +-- +2.32.0.rc1.229.g3e70b5a671-goog + diff --git a/third_party/stblib.BUILD b/third_party/stblib.BUILD new file mode 100644 index 000000000..5169906cc --- /dev/null +++ b/third_party/stblib.BUILD @@ -0,0 +1,28 @@ +# Description: +# Single-file C++ image decoding and encoding libraries + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # MIT license + +exports_files(["LICENSE"]) + +cc_library( + name = "stb_image", + srcs = ["stb_image.c"], + hdrs = ["stb_image.h"], + copts = [ + "-Wno-unused-function", + "$(STACK_FRAME_UNLIMITED)", + ], + includes = ["."], +) + +cc_library( + name = "stb_image_write", + srcs = ["stb_image_write.c"], + hdrs = ["stb_image_write.h"], + includes = ["."], +)