Project import generated by Copybara.

GitOrigin-RevId: ec25bf2e416c3689477e82946fb69de2e53b9161
This commit is contained in:
MediaPipe Team 2021-06-09 15:43:03 -07:00 committed by chuoling
parent b48d72e43f
commit b544a314b3
32 changed files with 561 additions and 234 deletions

View File

@ -1,3 +1,9 @@
---
name: "Build/Installation Issue"
about: Use this template for build/installation issues
labels: type:build/install
---
<em>Please make sure that this is a build/installation issue and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html) documentation before raising any issues.</em> <em>Please make sure that this is a build/installation issue and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html) documentation before raising any issues.</em>
**System information** (Please provide as much relevant information as possible) **System information** (Please provide as much relevant information as possible)

View File

@ -1,3 +1,9 @@
---
name: "Solution Issue"
about: Use this template for assistance with a specific mediapipe solution, such as "Pose" or "Iris", including inference model usage/training, solution-specific calculators, etc.
labels: type:support
---
<em>Please make sure that this is a [solution](https://google.github.io/mediapipe/solutions/solutions.html) issue.<em> <em>Please make sure that this is a [solution](https://google.github.io/mediapipe/solutions/solutions.html) issue.<em>
**System information** (Please provide as much relevant information as possible) **System information** (Please provide as much relevant information as possible)

View File

@ -1,3 +1,9 @@
---
name: "Documentation Issue"
about: Use this template for documentation related issues
labels: type:docs
---
Thank you for submitting a MediaPipe documentation issue. Thank you for submitting a MediaPipe documentation issue.
The MediaPipe docs are open source! To get involved, read the documentation Contributor Guide The MediaPipe docs are open source! To get involved, read the documentation Contributor Guide
## URL(s) with the issue: ## URL(s) with the issue:

View File

@ -1,3 +1,9 @@
---
name: "Bug Issue"
about: Use this template for reporting a bug
labels: type:bug
---
<em>Please make sure that this is a bug and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html), FAQ documentation before raising any issues.</em> <em>Please make sure that this is a bug and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html), FAQ documentation before raising any issues.</em>
**System information** (Please provide as much relevant information as possible) **System information** (Please provide as much relevant information as possible)

View File

@ -1,3 +1,9 @@
---
name: "Feature Request"
about: Use this template for raising a feature request
labels: type:feature
---
<em>Please make sure that this is a feature request.</em> <em>Please make sure that this is a feature request.</em>
**System information** (Please provide as much relevant information as possible) **System information** (Please provide as much relevant information as possible)

View File

@ -1,3 +1,9 @@
---
name: "Other Issue"
about: Use this template for any other non-support related issues.
labels: type:others
---
This template is for miscellaneous issues not covered by the other issue categories This template is for miscellaneous issues not covered by the other issue categories
For questions on how to work with MediaPipe, or support for problems that are not verified bugs in MediaPipe, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/mediapipe) and [Slack](https://mediapipe.page.link/joinslack) communities. For questions on how to work with MediaPipe, or support for problems that are not verified bugs in MediaPipe, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/mediapipe) and [Slack](https://mediapipe.page.link/joinslack) communities.

View File

@ -242,6 +242,20 @@ http_archive(
url = "https://github.com/opencv/opencv/releases/download/3.2.0/opencv-3.2.0-ios-framework.zip", url = "https://github.com/opencv/opencv/releases/download/3.2.0/opencv-3.2.0-ios-framework.zip",
) )
http_archive(
name = "stblib",
strip_prefix = "stb-b42009b3b9d4ca35bc703f5310eedc74f584be58",
sha256 = "13a99ad430e930907f5611325ec384168a958bf7610e63e60e2fd8e7b7379610",
urls = ["https://github.com/nothings/stb/archive/b42009b3b9d4ca35bc703f5310eedc74f584be58.tar.gz"],
build_file = "@//third_party:stblib.BUILD",
patches = [
"@//third_party:stb_image_impl.diff"
],
patch_args = [
"-p1",
],
)
# You may run setup_android.sh to install Android SDK and NDK. # You may run setup_android.sh to install Android SDK and NDK.
android_ndk_repository( android_ndk_repository(
name = "androidndk", name = "androidndk",
@ -369,9 +383,9 @@ http_archive(
) )
# Tensorflow repo should always go after the other external dependencies. # Tensorflow repo should always go after the other external dependencies.
# 2021-05-27 # 2021-06-07
_TENSORFLOW_GIT_COMMIT = "d6bfcdb0926173dbb7aa02ceba5aae6250b8aaa6" _TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
_TENSORFLOW_SHA256 = "ec40e1462239d8783d02f76a43412c8f80bac71ea20e41e1b7729b990aad6923" _TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
http_archive( http_archive(
name = "org_tensorflow", name = "org_tensorflow",
urls = [ urls = [

View File

@ -262,7 +262,7 @@ specified, appear as literal values in the `node_options` field of the
output_stream: "TENSORS:main_model_output" output_stream: "TENSORS:main_model_output"
node_options: { node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/active_speaker_detection/audio_visual_model.tflite" model_path: "mediapipe/models/detection_model.tflite"
} }
} }
} }
@ -272,14 +272,13 @@ The `node_options` field accepts the proto3 syntax. Alternatively, calculator
options can be specified in the `options` field using proto2 syntax. options can be specified in the `options` field using proto2 syntax.
``` ```
node: { node {
calculator: "IntervalFilterCalculator" calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:main_model_input"
output_stream: "TENSORS:main_model_output"
node_options: { node_options: {
[type.googleapis.com/mediapipe.IntervalFilterCalculatorOptions] { [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
intervals { model_path: "mediapipe/models/detection_model.tflite"
start_us: 20000
end_us: 40000
}
} }
} }
} }
@ -287,13 +286,26 @@ options can be specified in the `options` field using proto2 syntax.
Not all calculators accept calcuator options. In order to accept options, a Not all calculators accept calcuator options. In order to accept options, a
calculator will normally define a new protobuf message type to represent its calculator will normally define a new protobuf message type to represent its
options, such as `IntervalFilterCalculatorOptions`. The calculator will then options, such as `PacketClonerCalculatorOptions`. The calculator will then
read that protobuf message in its `CalculatorBase::Open` method, and possibly read that protobuf message in its `CalculatorBase::Open` method, and possibly
also in the `CalculatorBase::GetContract` function or its also in its `CalculatorBase::GetContract` function or its
`CalculatorBase::Process` method. Normally, the new protobuf message type will `CalculatorBase::Process` method. Normally, the new protobuf message type will
be defined as a protobuf schema using a ".proto" file and a be defined as a protobuf schema using a ".proto" file and a
`mediapipe_proto_library()` build rule. `mediapipe_proto_library()` build rule.
```
mediapipe_proto_library(
name = "packet_cloner_calculator_proto",
srcs = ["packet_cloner_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
```
## Example calculator ## Example calculator
This section discusses the implementation of `PacketClonerCalculator`, which This section discusses the implementation of `PacketClonerCalculator`, which

View File

@ -284,6 +284,6 @@ on how to build MediaPipe examples.
* Google AI Blog: * Google AI Blog:
[Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
* [ML Kit Selfie Segmentation API](https://developers.google.com/ml-kit/vision/selfie-segmentation) * [ML Kit Selfie Segmentation API](https://developers.google.com/ml-kit/vision/selfie-segmentation)
* [Models and model cards](./models.md#selfie_segmentation) * [Models and model cards](./models.md#selfie-segmentation)
* [Web demo](https://code.mediapipe.dev/codepen/selfie_segmentation) * [Web demo](https://code.mediapipe.dev/codepen/selfie_segmentation)
* [Python Colab](https://mediapipe.page.link/selfie_segmentation_py_colab) * [Python Colab](https://mediapipe.page.link/selfie_segmentation_py_colab)

View File

@ -28,6 +28,10 @@ typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedRect>>
EndLoopNormalizedRectCalculator; EndLoopNormalizedRectCalculator;
REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator); REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator);
typedef EndLoopCalculator<std::vector<::mediapipe::LandmarkList>>
EndLoopLandmarkListVectorCalculator;
REGISTER_CALCULATOR(EndLoopLandmarkListVectorCalculator);
typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedLandmarkList>> typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedLandmarkList>>
EndLoopNormalizedLandmarkListVectorCalculator; EndLoopNormalizedLandmarkListVectorCalculator;
REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator); REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator);

View File

@ -35,20 +35,28 @@ namespace api2 {
namespace { namespace {
int GetXnnpackDefaultNumThreads() {
#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
defined(__EMSCRIPTEN_PTHREADS__)
constexpr int kMinNumThreadsByDefault = 1;
constexpr int kMaxNumThreadsByDefault = 4;
return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
kMaxNumThreadsByDefault);
#else
return 1;
#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
}
// Returns number of threads to configure XNNPACK delegate with. // Returns number of threads to configure XNNPACK delegate with.
// (Equal to user provided value if specified. Otherwise, it returns number of // Returns user provided value if specified. Otherwise, tries to choose optimal
// high cores (hard-coded to 1 for Emscripten without Threads extension)) // number of threads depending on the device.
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) { int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
static constexpr int kDefaultNumThreads = -1; static constexpr int kDefaultNumThreads = -1;
if (opts.has_delegate() && opts.delegate().has_xnnpack() && if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
return opts.delegate().xnnpack().num_threads(); return opts.delegate().xnnpack().num_threads();
} }
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) return GetXnnpackDefaultNumThreads();
return InferHigherCoreIds().size();
#else
return 1;
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
} }
} // namespace } // namespace

View File

@ -269,8 +269,8 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
break; break;
} }
} }
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
tflite_gpu_runner_->InitializeWithModel(model, op_resolver)); model, op_resolver, /*allow_quant_ops=*/true));
// Create and bind OpenGL buffers for outputs. // Create and bind OpenGL buffers for outputs.
// The buffers are created once and their ids are passed to calculator outputs // The buffers are created once and their ids are passed to calculator outputs

View File

@ -226,6 +226,10 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) {
// Configure and create the delegate. // Configure and create the delegate.
TFLGpuDelegateOptions options; TFLGpuDelegateOptions options;
// `enable_quantization` enables the run of sparse models i.e. the models with
// DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
// graph after the tensor of the weights is read.
options.enable_quantization = true;
options.allow_precision_loss = allow_precision_loss_; options.allow_precision_loss = allow_precision_loss_;
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeDoNotWait; options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeDoNotWait;
delegate_ = delegate_ =

View File

@ -763,9 +763,13 @@ out vec4 fragColor;
#endif // defined(GL_ES); #endif // defined(GL_ES);
void main() { void main() {
#ifdef FLIP_Y_COORD
vec4 input_value = texture2D(input_texture, sample_coordinate); float y_coord = 1.0 - sample_coordinate.y;
vec2 gid = sample_coordinate; #else
float y_coord = sample_coordinate.y;
#endif // defined(FLIP_Y_COORD)
vec2 adjusted_coordinate = vec2(sample_coordinate.x, y_coord);
vec4 input_value = texture2D(input_texture, adjusted_coordinate);
// Run activation function. // Run activation function.
// One and only one of FN_SOFTMAX,FN_SIGMOID,FN_NONE will be defined. // One and only one of FN_SOFTMAX,FN_SIGMOID,FN_NONE will be defined.
@ -787,13 +791,6 @@ void main() {
float new_mask_value = input_value.r; float new_mask_value = input_value.r;
#endif // FN_NONE #endif // FN_NONE
#ifdef FLIP_Y_COORD
float y_coord = 1.0 - gid.y;
#else
float y_coord = gid.y;
#endif // defined(FLIP_Y_COORD)
vec2 output_coordinate = vec2(gid.x, y_coord);
vec4 out_value = vec4(new_mask_value, 0.0, 0.0, new_mask_value); vec4 out_value = vec4(new_mask_value, 0.0, 0.0, new_mask_value);
fragColor = out_value; fragColor = out_value;
})"; })";

View File

@ -128,9 +128,23 @@ struct GPUData {
} // namespace } // namespace
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED #endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
namespace {
int GetXnnpackDefaultNumThreads() {
#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
defined(__EMSCRIPTEN_PTHREADS__)
constexpr int kMinNumThreadsByDefault = 1;
constexpr int kMaxNumThreadsByDefault = 4;
return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
kMaxNumThreadsByDefault);
#else
return 1;
#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
}
// Returns number of threads to configure XNNPACK delegate with. // Returns number of threads to configure XNNPACK delegate with.
// (Equal to user provided value if specified. Otherwise, it returns number of // Returns user provided value if specified. Otherwise, tries to choose optimal
// high cores (hard-coded to 1 for Emscripten without Threads extension)) // number of threads depending on the device.
int GetXnnpackNumThreads( int GetXnnpackNumThreads(
const mediapipe::TfLiteInferenceCalculatorOptions& opts) { const mediapipe::TfLiteInferenceCalculatorOptions& opts) {
static constexpr int kDefaultNumThreads = -1; static constexpr int kDefaultNumThreads = -1;
@ -138,13 +152,11 @@ int GetXnnpackNumThreads(
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
return opts.delegate().xnnpack().num_threads(); return opts.delegate().xnnpack().num_threads();
} }
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) return GetXnnpackDefaultNumThreads();
return InferHigherCoreIds().size();
#else
return 1;
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
} }
} // namespace
// Calculator Header Section // Calculator Header Section
// Runs inference on the provided input TFLite tensors and TFLite model. // Runs inference on the provided input TFLite tensors and TFLite model.
@ -737,8 +749,8 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
break; break;
} }
} }
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
tflite_gpu_runner_->InitializeWithModel(model, *op_resolver_ptr)); model, *op_resolver_ptr, /*allow_quant_ops=*/true));
// Allocate interpreter memory for cpu output. // Allocate interpreter memory for cpu output.
if (!gpu_output_) { if (!gpu_output_) {
@ -969,6 +981,10 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
const int kHalfSize = 2; // sizeof(half) const int kHalfSize = 2; // sizeof(half)
// Configure and create the delegate. // Configure and create the delegate.
TFLGpuDelegateOptions options; TFLGpuDelegateOptions options;
// `enable_quantization` enables the run of sparse models i.e. the models with
// DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
// graph after the tensor of the weights is read.
options.enable_quantization = true;
options.allow_precision_loss = allow_precision_loss_; options.allow_precision_loss = allow_precision_loss_;
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeActive; options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeActive;
if (!delegate_) if (!delegate_)

View File

@ -32,11 +32,15 @@ typedef FilterCollectionCalculator<std::vector<::mediapipe::NormalizedRect>>
FilterNormalizedRectCollectionCalculator; FilterNormalizedRectCollectionCalculator;
REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator); REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator);
typedef FilterCollectionCalculator< typedef FilterCollectionCalculator<std::vector<::mediapipe::LandmarkList>>
std::vector<::mediapipe::NormalizedLandmarkList>>
FilterLandmarkListCollectionCalculator; FilterLandmarkListCollectionCalculator;
REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator); REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator);
typedef FilterCollectionCalculator<
std::vector<::mediapipe::NormalizedLandmarkList>>
FilterNormalizedLandmarkListCollectionCalculator;
REGISTER_CALCULATOR(FilterNormalizedLandmarkListCollectionCalculator);
typedef FilterCollectionCalculator<std::vector<::mediapipe::ClassificationList>> typedef FilterCollectionCalculator<std::vector<::mediapipe::ClassificationList>>
FilterClassificationListCollectionCalculator; FilterClassificationListCollectionCalculator;
REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator); REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator);

View File

@ -37,6 +37,13 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
// (configured through option us_to_first_rect). If provided, a non-zero integer // (configured through option us_to_first_rect). If provided, a non-zero integer
// will allow the animated zoom to be used when the first detections arrive. // will allow the animated zoom to be used when the first detections arrive.
constexpr char kAnimateZoom[] = "ANIMATE_ZOOM"; constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
// Can be used to control the maximum zoom; note that it is re-evaluated only
// upon change of input resolution. A value of 100 disables zooming and is the
// smallest allowed value. A value of 200 allows zooming such that a pixel of
// the input may cover up to four times its original area. Note that
// max_zoom_value_deg from options is always respected; MAX_ZOOM_PCT can only be
// used to limit zooming further.
constexpr char kMaxZoomFactorPercent[] = "MAX_ZOOM_FACTOR_PCT";
// Field-of-view (degrees) of the camera's x-axis (width). // Field-of-view (degrees) of the camera's x-axis (width).
// TODO: Parameterize FOV based on camera specs. // TODO: Parameterize FOV based on camera specs.
constexpr float kFieldOfView = 60; constexpr float kFieldOfView = 60;
@ -75,11 +82,16 @@ class ContentZoomingCalculator : public CalculatorBase {
int frame_height); int frame_height);
// Saves state to a state-cache, if provided. // Saves state to a state-cache, if provided.
absl::Status SaveState(mediapipe::CalculatorContext* cc) const; absl::Status SaveState(mediapipe::CalculatorContext* cc) const;
// Returns the factor for maximum zoom based on options and the
// kMaxZoomFactorPercent input (if present).
double GetMaxZoomFactor(mediapipe::CalculatorContext* cc) const;
// Initializes the calculator for the given frame size, creating path solvers // Initializes the calculator for the given frame size, creating path solvers
// and resetting history like last measured values. // and resetting history like last measured values.
absl::Status InitializeState(int frame_width, int frame_height); absl::Status InitializeState(mediapipe::CalculatorContext* cc,
int frame_width, int frame_height);
// Adjusts state to work with an updated frame size. // Adjusts state to work with an updated frame size.
absl::Status UpdateForResolutionChange(int frame_width, int frame_height); absl::Status UpdateForResolutionChange(mediapipe::CalculatorContext* cc,
int frame_width, int frame_height);
// Returns true if we are animating to the first rect. // Returns true if we are animating to the first rect.
bool IsAnimatingToFirstRect(const Timestamp& timestamp) const; bool IsAnimatingToFirstRect(const Timestamp& timestamp) const;
// Builds the output rectangle when animating to the first rect. // Builds the output rectangle when animating to the first rect.
@ -136,6 +148,9 @@ absl::Status ContentZoomingCalculator::GetContract(
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
<< "Input VIDEO or VIDEO_SIZE must be provided."; << "Input VIDEO or VIDEO_SIZE must be provided.";
} }
if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
cc->Inputs().Tag(kMaxZoomFactorPercent).Set<int>();
}
if (cc->Inputs().HasTag(kSalientRegions)) { if (cc->Inputs().HasTag(kSalientRegions)) {
cc->Inputs().Tag(kSalientRegions).Set<DetectionSet>(); cc->Inputs().Tag(kSalientRegions).Set<DetectionSet>();
} }
@ -330,7 +345,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
? cc->InputSidePackets().Tag(kStateCache).Get<StateCacheType*>() ? cc->InputSidePackets().Tag(kStateCache).Get<StateCacheType*>()
: nullptr; : nullptr;
if (!state_cache || !state_cache->has_value()) { if (!state_cache || !state_cache->has_value()) {
return InitializeState(frame_width, frame_height); return InitializeState(cc, frame_width, frame_height);
} }
const ContentZoomingCalculatorState& state = state_cache->value(); const ContentZoomingCalculatorState& state = state_cache->value();
@ -350,7 +365,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
last_measured_y_offset_ = state.last_measured_y_offset; last_measured_y_offset_ = state.last_measured_y_offset;
MP_RETURN_IF_ERROR(UpdateAspectAndMax()); MP_RETURN_IF_ERROR(UpdateAspectAndMax());
return UpdateForResolutionChange(frame_width, frame_height); return UpdateForResolutionChange(cc, frame_width, frame_height);
} }
absl::Status ContentZoomingCalculator::SaveState( absl::Status ContentZoomingCalculator::SaveState(
@ -379,8 +394,20 @@ absl::Status ContentZoomingCalculator::SaveState(
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status ContentZoomingCalculator::InitializeState(int frame_width, double ContentZoomingCalculator::GetMaxZoomFactor(
int frame_height) { mediapipe::CalculatorContext* cc) const {
double max_zoom_value =
options_.max_zoom_value_deg() / static_cast<double>(kFieldOfView);
if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
const double factor = std::max(
1.0, cc->Inputs().Tag(kMaxZoomFactorPercent).Get<int>() / 100.0);
max_zoom_value = std::max(max_zoom_value, 1.0 / factor);
}
return max_zoom_value;
}
absl::Status ContentZoomingCalculator::InitializeState(
mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
frame_width_ = frame_width; frame_width_ = frame_width;
frame_height_ = frame_height; frame_height_ = frame_height;
path_solver_pan_ = std::make_unique<KinematicPathSolver>( path_solver_pan_ = std::make_unique<KinematicPathSolver>(
@ -390,8 +417,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
options_.kinematic_options_tilt(), 0, frame_height_, options_.kinematic_options_tilt(), 0, frame_height_,
static_cast<float>(frame_height_) / kFieldOfView); static_cast<float>(frame_height_) / kFieldOfView);
MP_RETURN_IF_ERROR(UpdateAspectAndMax()); MP_RETURN_IF_ERROR(UpdateAspectAndMax());
int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() / int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
static_cast<double>(kFieldOfView));
path_solver_zoom_ = std::make_unique<KinematicPathSolver>( path_solver_zoom_ = std::make_unique<KinematicPathSolver>(
options_.kinematic_options_zoom(), min_zoom_size, options_.kinematic_options_zoom(), min_zoom_size,
max_frame_value_ * frame_height_, max_frame_value_ * frame_height_,
@ -405,7 +431,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
} }
absl::Status ContentZoomingCalculator::UpdateForResolutionChange( absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
int frame_width, int frame_height) { mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
// Update state for change in input resolution. // Update state for change in input resolution.
if (frame_width_ != frame_width || frame_height_ != frame_height) { if (frame_width_ != frame_width || frame_height_ != frame_height) {
double width_scale = frame_width / static_cast<double>(frame_width_); double width_scale = frame_width / static_cast<double>(frame_width_);
@ -419,8 +445,7 @@ absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
MP_RETURN_IF_ERROR(path_solver_pan_->UpdateMinMaxLocation(0, frame_width_)); MP_RETURN_IF_ERROR(path_solver_pan_->UpdateMinMaxLocation(0, frame_width_));
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(
path_solver_tilt_->UpdateMinMaxLocation(0, frame_height_)); path_solver_tilt_->UpdateMinMaxLocation(0, frame_height_));
int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() / int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
static_cast<double>(kFieldOfView));
MP_RETURN_IF_ERROR(path_solver_zoom_->UpdateMinMaxLocation( MP_RETURN_IF_ERROR(path_solver_zoom_->UpdateMinMaxLocation(
min_zoom_size, max_frame_value_ * frame_height_)); min_zoom_size, max_frame_value_ * frame_height_));
MP_RETURN_IF_ERROR(path_solver_zoom_->UpdatePixelsPerDegree( MP_RETURN_IF_ERROR(path_solver_zoom_->UpdatePixelsPerDegree(
@ -493,7 +518,8 @@ absl::Status ContentZoomingCalculator::Process(
MP_RETURN_IF_ERROR(MaybeLoadState(cc, frame_width, frame_height)); MP_RETURN_IF_ERROR(MaybeLoadState(cc, frame_width, frame_height));
initialized_ = !options_.is_stateless(); initialized_ = !options_.is_stateless();
} else { } else {
MP_RETURN_IF_ERROR(UpdateForResolutionChange(frame_width, frame_height)); MP_RETURN_IF_ERROR(
UpdateForResolutionChange(cc, frame_width, frame_height));
} }
bool only_required_found = false; bool only_required_found = false;

View File

@ -150,6 +150,29 @@ const char kConfigE[] = R"(
} }
)"; )";
const char kConfigF[] = R"(
calculator: "ContentZoomingCalculator"
input_stream: "VIDEO_SIZE:size"
input_stream: "DETECTIONS:detections"
input_stream: "MAX_ZOOM_FACTOR_PCT:max_zoom_factor_pct"
output_stream: "CROP_RECT:rect"
output_stream: "FIRST_CROP_RECT:first_rect"
options: {
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
max_zoom_value_deg: 0
kinematic_options_zoom {
min_motion_to_reframe: 1.2
}
kinematic_options_tilt {
min_motion_to_reframe: 1.2
}
kinematic_options_pan {
min_motion_to_reframe: 1.2
}
}
}
)";
void CheckBorder(const StaticFeatures& static_features, int width, int height, void CheckBorder(const StaticFeatures& static_features, int width, int height,
int top_border, int bottom_border) { int top_border, int bottom_border) {
ASSERT_EQ(2, static_features.border().size()); ASSERT_EQ(2, static_features.border().size());
@ -170,6 +193,7 @@ void CheckBorder(const StaticFeatures& static_features, int width, int height,
struct AddDetectionFlags { struct AddDetectionFlags {
std::optional<bool> animated_zoom; std::optional<bool> animated_zoom;
std::optional<int> max_zoom_factor_percent;
}; };
void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time, void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
@ -211,6 +235,14 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
mediapipe::MakePacket<bool>(flags.animated_zoom.value()) mediapipe::MakePacket<bool>(flags.animated_zoom.value())
.At(Timestamp(time))); .At(Timestamp(time)));
} }
if (flags.max_zoom_factor_percent.has_value()) {
runner->MutableInputs()
->Tag("MAX_ZOOM_FACTOR_PCT")
.packets.push_back(
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
.At(Timestamp(time)));
}
} }
void AddDetection(const cv::Rect_<float>& position, const int64 time, void AddDetection(const cv::Rect_<float>& position, const int64 time,
@ -259,6 +291,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
CheckBorder(static_features, 1000, 1000, 495, 395); CheckBorder(static_features, 1000, 1000, 495, 395);
} }
#if 0
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) { TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
auto runner = ::absl::make_unique<CalculatorRunner>( auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD)); ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD));
@ -694,8 +727,8 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000, AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000,
runner.get()); 1000, runner.get());
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 500, 500, AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 500, 500,
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
@ -719,6 +752,36 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
CheckCropRect(500, 500, 916, 916, 0, CheckCropRect(500, 500, 916, 916, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag("CROP_RECT").packets);
} }
#endif
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigF);
auto* options = config.mutable_options()->MutableExtension(
ContentZoomingCalculatorOptions::ext);
options->set_max_zoom_value_deg(30);
auto runner = ::absl::make_unique<CalculatorRunner>(config);
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 640, 480,
runner.get(), {.max_zoom_factor_percent = 133});
// Change resolution and allow more zoom, and give time to use the new limit
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1280, 720,
runner.get(), {.max_zoom_factor_percent = 166});
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 1280, 720,
runner.get(), {.max_zoom_factor_percent = 166});
// Switch back to a smaller resolution with a more limited zoom
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 3000000, 640, 480,
runner.get(), {.max_zoom_factor_percent = 133});
MP_ASSERT_OK(runner->Run());
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
CheckCropRect(320, 240, 480, 360, 0,
runner->Outputs().Tag("CROP_RECT").packets);
CheckCropRect(640, 360, 769, 433, 2,
runner->Outputs().Tag("CROP_RECT").packets);
CheckCropRect(320, 240, 480, 360, 3,
runner->Outputs().Tag("CROP_RECT").packets);
}
#if 0
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) { TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD); auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
auto* options = config.mutable_options()->MutableExtension( auto* options = config.mutable_options()->MutableExtension(
@ -906,6 +969,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
EXPECT_EQ(first_rect.height(), rect.height()); EXPECT_EQ(first_rect.height(), rect.height());
} }
} }
#endif
} // namespace } // namespace
} // namespace autoflip } // namespace autoflip

View File

@ -154,10 +154,18 @@ absl::Status KinematicPathSolver::AddObservation(int position,
// Time and position updates. // Time and position updates.
double delta_t = (time_us - current_time_) / 1000000.0; double delta_t = (time_us - current_time_) / 1000000.0;
// Time since last state/prediction update, smoothed by
// mean_period_update_rate.
if (mean_delta_t_ < 0) {
mean_delta_t_ = delta_t;
} else {
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
delta_t * options_.mean_period_update_rate();
}
// Observed velocity and then weighted update of this velocity. // Observed velocity and then weighted update of this velocity.
double observed_velocity = delta_degs / delta_t; double observed_velocity = delta_degs / delta_t;
double update_rate = std::min(delta_t / options_.update_rate_seconds(), double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
options_.max_update_rate()); options_.max_update_rate());
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) + double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
observed_velocity * update_rate; observed_velocity * update_rate;
@ -174,16 +182,6 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
RET_CHECK(current_time_ < time_us) RET_CHECK(current_time_ < time_us)
<< "Prediction time added before a prior observation or prediction."; << "Prediction time added before a prior observation or prediction.";
// Time since last state/prediction update, smoothed by
// mean_period_update_rate.
double delta_t = (time_us - current_time_) / 1000000.0;
if (mean_delta_t_ < 0) {
mean_delta_t_ = delta_t;
} else {
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
delta_t * options_.mean_period_update_rate();
}
// Position update limited by min/max. // Position update limited by min/max.
double update_position_px = double update_position_px =
current_position_px_ + current_position_px_ +

View File

@ -337,6 +337,40 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
EXPECT_EQ(state, 516); EXPECT_EQ(state, 516);
} }
TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
KinematicOptions options;
options.set_min_motion_to_reframe(1.0);
options.set_update_rate(1.0);
options.set_max_velocity(6);
options.set_mean_period_update_rate(1.0);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state;
MP_ASSERT_OK(solver.AddObservation(500, 0));
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 600);
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 720);
}
TEST(KinematicPathSolverTest, TimestampSmoothing) {
KinematicOptions options;
options.set_min_motion_to_reframe(1.0);
options.set_update_rate(1.0);
options.set_max_velocity(6);
options.set_mean_period_update_rate(0.05);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state;
MP_ASSERT_OK(solver.AddObservation(500, 0));
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 600);
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 701);
}
} // namespace } // namespace
} // namespace autoflip } // namespace autoflip
} // namespace mediapipe } // namespace mediapipe

View File

@ -262,7 +262,7 @@ TEST(MathUtil, IntRound) {
// A double-precision number has a 53-bit mantissa (52 fraction bits), // A double-precision number has a 53-bit mantissa (52 fraction bits),
// so the following value can be represented exactly. // so the following value can be represented exactly.
int64 value64 = GG_ULONGLONG(0x1234567890abcd00); int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
EXPECT_EQ(mediapipe::MathUtil::Round<int64>(static_cast<double>(value64)), EXPECT_EQ(mediapipe::MathUtil::Round<int64>(static_cast<double>(value64)),
value64); value64);
} }
@ -369,7 +369,7 @@ class SafeCastTester {
if (sizeof(FloatIn) >= 64) { if (sizeof(FloatIn) >= 64) {
// A double-precision number has a 53-bit mantissa (52 fraction bits), // A double-precision number has a 53-bit mantissa (52 fraction bits),
// so the following value can be represented exactly by a double. // so the following value can be represented exactly by a double.
int64 value64 = GG_ULONGLONG(0x1234567890abcd00); int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
const IntOut expected = const IntOut expected =
(sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax; (sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
EXPECT_EQ( EXPECT_EQ(
@ -565,7 +565,7 @@ TEST(MathUtil, SafeCast) {
-12345); -12345);
EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(1E47), 2147483647); EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(1E47), 2147483647);
EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(-1E47), EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(-1E47),
GG_LONGLONG(-2147483648)); static_cast<int64_t>(-2147483648));
} }
template <class FloatIn, class IntOut> template <class FloatIn, class IntOut>
@ -682,7 +682,7 @@ class SafeRoundTester {
if (sizeof(FloatIn) >= 64) { if (sizeof(FloatIn) >= 64) {
// A double-precision number has a 53-bit mantissa (52 fraction bits), // A double-precision number has a 53-bit mantissa (52 fraction bits),
// so the following value can be represented exactly by a double. // so the following value can be represented exactly by a double.
int64 value64 = GG_ULONGLONG(0x1234567890abcd00); int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
const IntOut expected = const IntOut expected =
(sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax; (sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
EXPECT_EQ( EXPECT_EQ(
@ -873,7 +873,7 @@ TEST(MathUtil, SafeRound) {
-12345); -12345);
EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(1E47), 2147483647); EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(1E47), 2147483647);
EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(-1E47), EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(-1E47),
GG_LONGLONG(-2147483648)); static_cast<int64_t>(-2147483648));
} }
} // namespace } // namespace

View File

@ -8,6 +8,7 @@ def mediapipe_cc_test(
data = [], data = [],
deps = [], deps = [],
size = None, size = None,
tags = [],
timeout = None, timeout = None,
additional_deps = DEFAULT_ADDITIONAL_TEST_DEPS, additional_deps = DEFAULT_ADDITIONAL_TEST_DEPS,
**kwargs): **kwargs):

View File

@ -641,14 +641,20 @@ cc_library(
"//mediapipe/framework:calculator_cc_proto", "//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework/deps:file_path", "//mediapipe/framework/deps:file_path",
"//mediapipe/framework/deps:no_destructor", "//mediapipe/framework/deps:no_destructor",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:advanced_proto", "//mediapipe/framework/port:advanced_proto",
"//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:file_helpers",
"//mediapipe/framework/port:gtest",
"//mediapipe/framework/port:logging", "//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
"@stblib//:stb_image",
"@stblib//:stb_image_write",
], ],
) )

View File

@ -18,18 +18,27 @@
#include <unistd.h> #include <unistd.h>
#include <memory> #include <memory>
#include <string>
#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_set.h"
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/strings/match.h" #include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/substitute.h" #include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/deps/file_path.h" #include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/deps/no_destructor.h" #include "mediapipe/framework/deps/no_destructor.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/port/advanced_proto_inc.h" #include "mediapipe/framework/port/advanced_proto_inc.h"
#include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/port/file_helpers.h"
#include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/proto_ns.h" #include "mediapipe/framework/port/proto_ns.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#include "stb_image.h"
#include "stb_image_write.h"
namespace mediapipe { namespace mediapipe {
@ -43,15 +52,14 @@ bool EqualWithTolerance(const T value1, const T value2, const T max_diff) {
} }
template <typename T> template <typename T>
bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2, absl::Status CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
const T max_color_diff, const T max_alpha_diff, const T max_color_diff, const T max_alpha_diff,
const float max_avg_diff, std::string* error_message) { const float max_avg_diff,
std::unique_ptr<ImageFrame>& diff_image) {
// Verify image byte depth matches expected byte depth. // Verify image byte depth matches expected byte depth.
CHECK_EQ(sizeof(T), image1.ByteDepth()); CHECK_EQ(sizeof(T), image1.ByteDepth());
CHECK_EQ(sizeof(T), image2.ByteDepth()); CHECK_EQ(sizeof(T), image2.ByteDepth());
const bool return_error = error_message != nullptr;
const int width = image1.Width(); const int width = image1.Width();
const int height = image1.Height(); const int height = image1.Height();
const int channels1 = image1.NumberOfChannels(); const int channels1 = image1.NumberOfChannels();
@ -68,57 +76,64 @@ bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
const int width_padding2 = const int width_padding2 =
image2.WidthStep() / image2.ByteDepth() - width * channels2; image2.WidthStep() / image2.ByteDepth() - width * channels2;
diff_image = std::make_unique<ImageFrame>(image1.Format(), width, height);
T* pixel_diff = reinterpret_cast<T*>(diff_image->MutablePixelData());
const int width_padding_diff =
diff_image->WidthStep() / diff_image->ByteDepth() - width * channels1;
float avg_diff = 0; float avg_diff = 0;
uint diff_count = 0; uint total_count = 0;
int different_color_components = 0;
float max_color_diff_found = 0;
int different_alpha_components = 0;
float max_alpha_diff_found = 0;
for (int row = 0; row < height; ++row) { for (int row = 0; row < height; ++row) {
for (int col = 0; col < width; ++col) { for (int col = 0; col < width; ++col) {
for (int channel = 0; channel < num_channels; ++channel) { for (int channel = 0; channel < num_channels; ++channel) {
// Check local difference. // Check local difference.
const T max_diff = channel < 3 ? max_color_diff : max_alpha_diff;
const T value1 = pixel1[channel]; const T value1 = pixel1[channel];
const T value2 = pixel2[channel]; const T value2 = pixel2[channel];
if (!EqualWithTolerance<T>(value1, value2, max_diff)) {
// We cast uint8 to int using this type (and leave other values as-is)
// to avoid printing as a single char.
using TypeToPrint =
typename std::conditional<std::is_same<T, uint8>::value, int,
T>::type;
std::string error = absl::Substitute(
"images differ: row = $0 col = $1 channel = $2 : pixel1 = $3, "
"pixel2 = $4",
row, col, channel, static_cast<TypeToPrint>(value1),
static_cast<TypeToPrint>(value2));
if (return_error) {
*error_message = error;
} else {
LOG(ERROR) << error;
}
return false;
}
// Check global average difference.
const float diff = const float diff =
std::abs(static_cast<float>(value1) - static_cast<float>(value2)); std::abs(static_cast<float>(value1) - static_cast<float>(value2));
avg_diff += (diff - avg_diff) / ++diff_count; if (channel < 3) {
different_color_components += diff > max_color_diff;
max_color_diff_found = std::max(max_color_diff_found, diff);
pixel_diff[channel] = diff;
} else {
different_alpha_components += diff > max_alpha_diff;
max_alpha_diff_found = std::max(max_alpha_diff_found, diff);
pixel_diff[channel] = 255; // opaque to see color difference
}
// Check global average difference.
avg_diff += (diff - avg_diff) / ++total_count;
} }
pixel1 += channels1; pixel1 += channels1;
pixel2 += channels2; pixel2 += channels2;
pixel_diff += channels1;
} }
pixel1 += width_padding1; pixel1 += width_padding1;
pixel2 += width_padding2; pixel2 += width_padding2;
pixel_diff += width_padding_diff;
} }
if (avg_diff > max_avg_diff) { std::vector<std::string> errors;
std::string error = if (different_color_components)
absl::Substitute("images differ: avg pixel error = $0", avg_diff); errors.push_back(absl::Substitute(
if (return_error) { "$0 color components differences above limit of $1, max found was $2",
*error_message = error; different_color_components, max_color_diff, max_color_diff_found));
} else { if (different_alpha_components)
LOG(ERROR) << error; errors.push_back(absl::Substitute(
} "$0 alpha components differences above limit of $1, max found was $2",
return false; different_alpha_components, max_alpha_diff, max_alpha_diff_found));
} if (avg_diff > max_avg_diff)
errors.push_back(
absl::Substitute("the average component difference is $0 (limit: $1)",
avg_diff, max_avg_diff));
return true; if (!errors.empty())
return absl::InternalError(
absl::StrCat("images differ: ", absl::StrJoin(errors, "; ")));
return absl::OkStatus();
} }
#if defined(__linux__) #if defined(__linux__)
@ -134,77 +149,32 @@ std::string GetBinaryDirectory() {
} // namespace } // namespace
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, absl::Status CompareImageFrames(const ImageFrame& image1,
const float max_color_diff, const float max_alpha_diff, const ImageFrame& image2,
const float max_avg_diff, std::string* error_message) { const float max_color_diff,
const bool return_error = error_message != nullptr; const float max_alpha_diff,
const float max_avg_diff,
auto IsSupportedImageFormatComparison = [](const ImageFrame& image1, std::unique_ptr<ImageFrame>& diff_image) {
const ImageFrame& image2) { auto IsSupportedImageFormatComparison = [](ImageFormat::Format one,
// Pairs of non-equal image formats that can be compared against each other. ImageFormat::Format two) {
static const mediapipe::NoDestructor<absl::flat_hash_set< auto both = std::minmax(one, two);
std::pair<ImageFormat::Format, ImageFormat::Format>>> return one == two ||
kCompatibleImageFormats({ both == std::minmax(ImageFormat::SRGB, ImageFormat::SRGBA) ||
{ImageFormat::SRGB, ImageFormat::SRGBA}, both == std::minmax(ImageFormat::SRGB48, ImageFormat::SRGBA64);
{ImageFormat::SRGB48, ImageFormat::SRGBA64},
});
auto* compatible_image_formats = kCompatibleImageFormats.get();
return image1.Format() == image2.Format() ||
compatible_image_formats->contains(
{image1.Format(), image2.Format()}) ||
compatible_image_formats->contains(
{image2.Format(), image1.Format()});
}; };
if (!IsSupportedImageFormatComparison(image1, image2)) { RET_CHECK(IsSupportedImageFormatComparison(image1.Format(), image2.Format()))
std::string error = absl::Substitute( << "unsupported image format comparison; image1 = " << image1.Format()
"unsupported image format comparison; image1 = $0, image2 = $1", << ", image2 = " << image2.Format();
image1.Format(), image2.Format());
if (return_error) {
*error_message = error;
} else {
LOG(ERROR) << error;
}
return false;
}
if (image1.Width() != image2.Width()) { // Cannot use RET_CHECK_EQ because pair is not printable.
std::string error = RET_CHECK(std::make_pair(image1.Width(), image1.Height()) ==
absl::Substitute("image width mismatch: image1 = $0, image2 = $1", std::make_pair(image2.Width(), image2.Height()))
image1.Width(), image2.Width()); << "image size mismatch: " << image1.Width() << "x" << image1.Height()
if (return_error) { << " != " << image2.Width() << "x" << image2.Height();
*error_message = error;
} else {
LOG(ERROR) << error;
}
return false;
}
if (image1.Height() != image2.Height()) { RET_CHECK_EQ(image1.ByteDepth(), image2.ByteDepth())
std::string error = << "image byte depth mismatch";
absl::Substitute("image height mismatch: image1 = $0, image2 = $1",
image1.Height(), image2.Height());
if (return_error) {
*error_message = error;
} else {
LOG(ERROR) << error;
}
return false;
}
if (image1.ByteDepth() != image2.ByteDepth()) {
std::string error =
absl::Substitute("image byte depth mismatch: image1 = $0, image2 = $1",
image1.ByteDepth(), image2.ByteDepth());
if (return_error) {
*error_message = error;
} else {
LOG(ERROR) << error;
}
return false;
}
switch (image1.Format()) { switch (image1.Format()) {
case ImageFormat::GRAY8: case ImageFormat::GRAY8:
@ -212,45 +182,87 @@ bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
case ImageFormat::SRGBA: case ImageFormat::SRGBA:
case ImageFormat::LAB8: case ImageFormat::LAB8:
return CompareDiff<uint8>(image1, image2, max_color_diff, max_alpha_diff, return CompareDiff<uint8>(image1, image2, max_color_diff, max_alpha_diff,
max_avg_diff, error_message); max_avg_diff, diff_image);
case ImageFormat::GRAY16: case ImageFormat::GRAY16:
case ImageFormat::SRGB48: case ImageFormat::SRGB48:
case ImageFormat::SRGBA64: case ImageFormat::SRGBA64:
return CompareDiff<uint16>(image1, image2, max_color_diff, max_alpha_diff, return CompareDiff<uint16>(image1, image2, max_color_diff, max_alpha_diff,
max_avg_diff, error_message); max_avg_diff, diff_image);
case ImageFormat::VEC32F1: case ImageFormat::VEC32F1:
case ImageFormat::VEC32F2: case ImageFormat::VEC32F2:
return CompareDiff<float>(image1, image2, max_color_diff, max_alpha_diff, return CompareDiff<float>(image1, image2, max_color_diff, max_alpha_diff,
max_avg_diff, error_message); max_avg_diff, diff_image);
default: default:
LOG(FATAL) << ImageFrame::InvalidFormatString(image1.Format()); LOG(FATAL) << ImageFrame::InvalidFormatString(image1.Format());
} }
} }
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
const float max_color_diff, const float max_alpha_diff,
const float max_avg_diff, std::string* error_message) {
std::unique_ptr<ImageFrame> diff_image;
auto status = CompareImageFrames(image1, image2, max_color_diff,
max_alpha_diff, max_avg_diff, diff_image);
if (status.ok()) return true;
if (error_message) *error_message = std::string(status.message());
return false;
}
std::string GetTestRootDir() { std::string GetTestRootDir() {
#if defined(__ANDROID__) return file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe");
char path[1024]; }
char* ptr = getcwd(path, sizeof(path));
CHECK_EQ(ptr, path); std::string GetTestOutputsDir() {
return path; const char* output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
#else if (!output_dir) {
return ::mediapipe::file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe"); output_dir = "/tmp";
#endif // defined(__ANDROID__) }
return output_dir;
} }
std::string GetTestDataDir(const std::string& package_base_path) { std::string GetTestDataDir(const std::string& package_base_path) {
#if defined(__ANDROID__) return file::JoinPath(GetTestRootDir(), package_base_path, "testdata/");
std::string data_dir = GetTestRootDir(); }
std::string binary_dir = GetBinaryDirectory();
// In Mobile Harness, the cwd is "/" and the run dir is "/data/local/tmp". std::string GetTestFilePath(absl::string_view relative_path) {
if (data_dir == "/" && absl::StartsWith(binary_dir, "/data")) { return file::JoinPath(GetTestRootDir(), relative_path);
data_dir = binary_dir; }
absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
absl::string_view path, ImageFormat::Format format) {
std::string encoded;
MP_RETURN_IF_ERROR(mediapipe::file::GetContents(path, &encoded));
// stbi_load determines the output pixel format based on the desired channels.
// 0 means "use whatever's in the file".
int desired_channels = format == ImageFormat::UNKNOWN ? 0
: format == ImageFormat::SRGBA ? 4
: format == ImageFormat::SRGB ? 3
: format == ImageFormat::GRAY8 ? 1
: -1;
RET_CHECK(desired_channels >= 0)
<< "unsupported output format requested: " << format;
int width, height, channels_in_file;
auto data = stbi_load_from_memory(reinterpret_cast<stbi_uc*>(encoded.data()),
encoded.size(), &width, &height,
&channels_in_file, desired_channels);
RET_CHECK(data) << "failed to decode image data from: " << path;
// If we didn't specify a desired format, it will be determined by what the
// file contains.
int output_channels = desired_channels ? desired_channels : channels_in_file;
if (format == ImageFormat::UNKNOWN) {
format = output_channels == 4 ? ImageFormat::SRGBA
: output_channels == 3 ? ImageFormat::SRGB
: output_channels == 1 ? ImageFormat::GRAY8
: ImageFormat::UNKNOWN;
RET_CHECK(format != ImageFormat::UNKNOWN)
<< "unsupported number of channels: " << output_channels;
} }
return ::mediapipe::file::JoinPath(data_dir, package_base_path, "testdata/");
#else return absl::make_unique<ImageFrame>(
return ::mediapipe::file::JoinPath(GetTestRootDir(), package_base_path, format, width, height, width * output_channels, data, stbi_image_free);
"testdata/");
#endif // defined(__APPLE__)
} }
std::unique_ptr<ImageFrame> LoadTestPng(const std::string& path, std::unique_ptr<ImageFrame> LoadTestPng(const std::string& path,

View File

@ -15,6 +15,7 @@
#ifndef MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_ #ifndef MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_
#define MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_ #define MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_
#include "absl/status/statusor.h"
#include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame.h"
@ -35,14 +36,29 @@ using mediapipe::CalculatorGraphConfig;
// Note: Although max_color_diff and max_alpha_diff are floats, all uint8/uint16 // Note: Although max_color_diff and max_alpha_diff are floats, all uint8/uint16
// values are exactly representable. (2^24 + 1 is the first non-representable // values are exactly representable. (2^24 + 1 is the first non-representable
// positive integral value.) // positive integral value.)
absl::Status CompareImageFrames(const ImageFrame& image1,
const ImageFrame& image2,
const float max_color_diff,
const float max_alpha_diff,
const float max_avg_diff,
std::unique_ptr<ImageFrame>& diff_image);
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2, bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
const float max_color_diff, const float max_alpha_diff, const float max_color_diff, const float max_alpha_diff,
const float max_avg_diff = 1.0, const float max_avg_diff = 1.0,
std::string* error_message = nullptr); std::string* error_message = nullptr);
// Returns the absolute path to the directory that contains test source code. // Returns the absolute path to the directory that contains test source code
// (TEST_SRCDIR).
std::string GetTestRootDir(); std::string GetTestRootDir();
// Returns the absolute path to a directory where tests can write outputs to
// be sent to bazel (TEST_UNDECLARED_OUTPUTS_DIR or a fallback).
std::string GetTestOutputsDir();
// Returns the absolute path to a file within TEST_SRCDIR.
std::string GetTestFilePath(absl::string_view relative_path);
// Returns the absolute path to the contents of the package's "testdata" // Returns the absolute path to the contents of the package's "testdata"
// directory. // directory.
// This handles the different paths where test data ends up when using // This handles the different paths where test data ends up when using
@ -52,6 +68,10 @@ std::string GetTestDataDir(const std::string& package_base_path);
// Loads a binary graph from path. Returns true iff successful. // Loads a binary graph from path. Returns true iff successful.
bool LoadTestGraph(CalculatorGraphConfig* proto, const std::string& path); bool LoadTestGraph(CalculatorGraphConfig* proto, const std::string& path);
// Loads an image from path.
absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
absl::string_view path, ImageFormat::Format format = ImageFormat::SRGBA);
// Loads a PNG image from path using the given ImageFormat. Returns nullptr in // Loads a PNG image from path using the given ImageFormat. Returns nullptr in
// case of failure. // case of failure.
std::unique_ptr<ImageFrame> LoadTestPng( std::unique_ptr<ImageFrame> LoadTestPng(

View File

@ -75,26 +75,30 @@ absl::Status GlContext::CreateContextInternal(
// TODO: Ensure this works with all options (in particular, // TODO: Ensure this works with all options (in particular,
// multithreading options, like the special-case combination of USE_PTHREADS // multithreading options, like the special-case combination of USE_PTHREADS
// and OFFSCREEN_FRAMEBUFFER) // and OFFSCREEN_FRAMEBUFFER)
EM_ASM(let init_once = true; if (init_once) { // clang-format off
EM_ASM(
let init_once = true;
if (init_once) {
const cachedFindCanvasEventTarget = findCanvasEventTarget; const cachedFindCanvasEventTarget = findCanvasEventTarget;
if (typeof cachedFindCanvasEventTarget != = 'function') { if (typeof cachedFindCanvasEventTarget !== 'function') {
if (typeof console != = 'undefined') { if (typeof console !== 'undefined') {
console.error( console.error('Expected Emscripten global function '
'Expected Emscripten global function ' + + '"findCanvasEventTarget" not found. WebGL context creation '
'"findCanvasEventTarget" not found. WebGL context creation ' + + 'may fail.');
'may fail.');
} }
return; return;
} }
findCanvasEventTarget = function(target) { findCanvasEventTarget = function(target) {
if (target == 0) {
if (Module && Module.canvas) { if (Module && Module.canvas) {
return Module.canvas; return Module.canvas;
} else if (Module && Module.canvasCssSelector) { } else if (Module && Module.canvasCssSelector) {
return cachedFindCanvasEventTarget(Module.canvasCssSelector); return cachedFindCanvasEventTarget(Module.canvasCssSelector);
} else { }
if (typeof console != = 'undefined') { }
if (typeof console !== 'undefined') {
console.warn('Module properties canvas and canvasCssSelector not ' + console.warn('Module properties canvas and canvasCssSelector not ' +
'found during WebGL context creation.'); 'found during WebGL context creation.');
} }
@ -102,15 +106,14 @@ absl::Status GlContext::CreateContextInternal(
// cases it will not succeed, just in case the user does want to fall- // cases it will not succeed, just in case the user does want to fall-
// back. // back.
return cachedFindCanvasEventTarget(target); return cachedFindCanvasEventTarget(target);
}
}; // NOLINT: Necessary semicolon. }; // NOLINT: Necessary semicolon.
init_once = false; init_once = false;
}); }
);
// clang-format on
// Note: below id parameter is only actually used if both Module.canvas and
// Module.canvasCssSelector are undefined.
EMSCRIPTEN_WEBGL_CONTEXT_HANDLE context_handle = EMSCRIPTEN_WEBGL_CONTEXT_HANDLE context_handle =
emscripten_webgl_create_context(0 /* id */, &attrs); emscripten_webgl_create_context(nullptr, &attrs);
// Check for failure // Check for failure
if (context_handle <= 0) { if (context_handle <= 0) {

View File

@ -164,7 +164,9 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format,
} }
auto iter = format_info->find(format); auto iter = format_info->find(format);
CHECK(iter != format_info->end()) << "unsupported format"; CHECK(iter != format_info->end())
<< "unsupported format: "
<< static_cast<std::underlying_type_t<decltype(format)>>(format);
const auto& planes = iter->second; const auto& planes = iter->second;
#ifndef __APPLE__ #ifndef __APPLE__
CHECK_EQ(planes.size(), 1) CHECK_EQ(planes.size(), 1)

View File

@ -69,6 +69,8 @@ node {
options: { options: {
[mediapipe.InferenceCalculatorOptions.ext] { [mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"
#
delegate: { gpu { use_advanced_gpu_api: true } }
} }
} }
} }

View File

@ -85,7 +85,7 @@ ObjectDef GetSSBOObjectDef(int channels) {
absl::Status TFLiteGPURunner::InitializeWithModel( absl::Status TFLiteGPURunner::InitializeWithModel(
const tflite::FlatBufferModel& flatbuffer, const tflite::FlatBufferModel& flatbuffer,
const tflite::OpResolver& op_resolver) { const tflite::OpResolver& op_resolver, bool allow_quant_ops) {
// GraphFloat32 is created twice because, when OpenCL and OpenGL backends are // GraphFloat32 is created twice because, when OpenCL and OpenGL backends are
// initialized, different backend-specific graph transformations happen // initialized, different backend-specific graph transformations happen
// in-place. As GraphFloat32 is not copyable by design, we keep two copies of // in-place. As GraphFloat32 is not copyable by design, we keep two copies of
@ -94,10 +94,10 @@ absl::Status TFLiteGPURunner::InitializeWithModel(
// in the end of the initialization stage. // in the end of the initialization stage.
graph_gl_ = std::make_unique<GraphFloat32>(); graph_gl_ = std::make_unique<GraphFloat32>();
graph_cl_ = std::make_unique<GraphFloat32>(); graph_cl_ = std::make_unique<GraphFloat32>();
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
BuildFromFlatBuffer(flatbuffer, op_resolver, graph_gl_.get())); graph_gl_.get(), allow_quant_ops));
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
BuildFromFlatBuffer(flatbuffer, op_resolver, graph_cl_.get())); graph_cl_.get(), allow_quant_ops));
for (const auto& input : graph_gl_->inputs()) { for (const auto& input : graph_gl_->inputs()) {
input_shapes_.push_back(input->tensor.shape); input_shapes_.push_back(input->tensor.shape);

View File

@ -54,7 +54,8 @@ class TFLiteGPURunner {
: options_(options) {} : options_(options) {}
absl::Status InitializeWithModel(const tflite::FlatBufferModel& flatbuffer, absl::Status InitializeWithModel(const tflite::FlatBufferModel& flatbuffer,
const tflite::OpResolver& op_resolver); const tflite::OpResolver& op_resolver,
bool allow_quant_ops = false);
void ForceOpenGL() { opengl_is_forced_ = true; } void ForceOpenGL() { opengl_is_forced_ = true; }
void ForceOpenCL() { opencl_is_forced_ = true; } void ForceOpenCL() { opencl_is_forced_ = true; }

35
third_party/stb_image_impl.diff vendored Normal file
View File

@ -0,0 +1,35 @@
From fed8c5b355e00b7cc7dd5abfebecf0338f2c2f24 Mon Sep 17 00:00:00 2001
From: Camillo Lugaresi <camillol@google.com>
Date: Fri, 4 Jun 2021 00:44:45 +0000
Subject: impl files
---
stb_image.c | 4 ++++
stb_image_write.c | 4 ++++
2 files changed, 8 insertions(+)
create mode 100644 stb_image.c
create mode 100644 stb_image_write.c
diff --git a/stb_image.c b/stb_image.c
new file mode 100644
index 0000000..f88aaf6
--- /dev/null
+++ b/stb_image.c
@@ -0,0 +1,4 @@
+// By defining STB_IMAGE_IMPLEMENTATION the included header file will also
+// define the implementation.
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
diff --git a/stb_image_write.c b/stb_image_write.c
new file mode 100644
index 0000000..623d757
--- /dev/null
+++ b/stb_image_write.c
@@ -0,0 +1,4 @@
+// By defining STB_IMAGE_WRITE_IMPLEMENTATION the included header file will also
+// define the implementation.
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
--
2.32.0.rc1.229.g3e70b5a671-goog

28
third_party/stblib.BUILD vendored Normal file
View File

@ -0,0 +1,28 @@
# Description:
# Single-file C++ image decoding and encoding libraries
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # MIT license
exports_files(["LICENSE"])
cc_library(
name = "stb_image",
srcs = ["stb_image.c"],
hdrs = ["stb_image.h"],
copts = [
"-Wno-unused-function",
"$(STACK_FRAME_UNLIMITED)",
],
includes = ["."],
)
cc_library(
name = "stb_image_write",
srcs = ["stb_image_write.c"],
hdrs = ["stb_image_write.h"],
includes = ["."],
)