Project import generated by Copybara.
GitOrigin-RevId: ec25bf2e416c3689477e82946fb69de2e53b9161
This commit is contained in:
parent
b48d72e43f
commit
b544a314b3
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Build/Installation Issue"
|
||||
about: Use this template for build/installation issues
|
||||
labels: type:build/install
|
||||
|
||||
---
|
||||
<em>Please make sure that this is a build/installation issue and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html) documentation before raising any issues.</em>
|
||||
|
||||
**System information** (Please provide as much relevant information as possible)
|
||||
|
|
6
.github/ISSUE_TEMPLATE/10-solution-issue.md
vendored
6
.github/ISSUE_TEMPLATE/10-solution-issue.md
vendored
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Solution Issue"
|
||||
about: Use this template for assistance with a specific mediapipe solution, such as "Pose" or "Iris", including inference model usage/training, solution-specific calculators, etc.
|
||||
labels: type:support
|
||||
|
||||
---
|
||||
<em>Please make sure that this is a [solution](https://google.github.io/mediapipe/solutions/solutions.html) issue.<em>
|
||||
|
||||
**System information** (Please provide as much relevant information as possible)
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Documentation Issue"
|
||||
about: Use this template for documentation related issues
|
||||
labels: type:docs
|
||||
|
||||
---
|
||||
Thank you for submitting a MediaPipe documentation issue.
|
||||
The MediaPipe docs are open source! To get involved, read the documentation Contributor Guide
|
||||
## URL(s) with the issue:
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Bug Issue"
|
||||
about: Use this template for reporting a bug
|
||||
labels: type:bug
|
||||
|
||||
---
|
||||
<em>Please make sure that this is a bug and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html), FAQ documentation before raising any issues.</em>
|
||||
|
||||
**System information** (Please provide as much relevant information as possible)
|
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Feature Request"
|
||||
about: Use this template for raising a feature request
|
||||
labels: type:feature
|
||||
|
||||
---
|
||||
<em>Please make sure that this is a feature request.</em>
|
||||
|
||||
**System information** (Please provide as much relevant information as possible)
|
|
@ -1,3 +1,9 @@
|
|||
---
|
||||
name: "Other Issue"
|
||||
about: Use this template for any other non-support related issues.
|
||||
labels: type:others
|
||||
|
||||
---
|
||||
This template is for miscellaneous issues not covered by the other issue categories
|
||||
|
||||
For questions on how to work with MediaPipe, or support for problems that are not verified bugs in MediaPipe, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/mediapipe) and [Slack](https://mediapipe.page.link/joinslack) communities.
|
20
WORKSPACE
20
WORKSPACE
|
@ -242,6 +242,20 @@ http_archive(
|
|||
url = "https://github.com/opencv/opencv/releases/download/3.2.0/opencv-3.2.0-ios-framework.zip",
|
||||
)
|
||||
|
||||
http_archive(
|
||||
name = "stblib",
|
||||
strip_prefix = "stb-b42009b3b9d4ca35bc703f5310eedc74f584be58",
|
||||
sha256 = "13a99ad430e930907f5611325ec384168a958bf7610e63e60e2fd8e7b7379610",
|
||||
urls = ["https://github.com/nothings/stb/archive/b42009b3b9d4ca35bc703f5310eedc74f584be58.tar.gz"],
|
||||
build_file = "@//third_party:stblib.BUILD",
|
||||
patches = [
|
||||
"@//third_party:stb_image_impl.diff"
|
||||
],
|
||||
patch_args = [
|
||||
"-p1",
|
||||
],
|
||||
)
|
||||
|
||||
# You may run setup_android.sh to install Android SDK and NDK.
|
||||
android_ndk_repository(
|
||||
name = "androidndk",
|
||||
|
@ -369,9 +383,9 @@ http_archive(
|
|||
)
|
||||
|
||||
# Tensorflow repo should always go after the other external dependencies.
|
||||
# 2021-05-27
|
||||
_TENSORFLOW_GIT_COMMIT = "d6bfcdb0926173dbb7aa02ceba5aae6250b8aaa6"
|
||||
_TENSORFLOW_SHA256 = "ec40e1462239d8783d02f76a43412c8f80bac71ea20e41e1b7729b990aad6923"
|
||||
# 2021-06-07
|
||||
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
|
||||
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
|
||||
http_archive(
|
||||
name = "org_tensorflow",
|
||||
urls = [
|
||||
|
|
|
@ -262,7 +262,7 @@ specified, appear as literal values in the `node_options` field of the
|
|||
output_stream: "TENSORS:main_model_output"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/active_speaker_detection/audio_visual_model.tflite"
|
||||
model_path: "mediapipe/models/detection_model.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -272,14 +272,13 @@ The `node_options` field accepts the proto3 syntax. Alternatively, calculator
|
|||
options can be specified in the `options` field using proto2 syntax.
|
||||
|
||||
```
|
||||
node: {
|
||||
calculator: "IntervalFilterCalculator"
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:main_model_input"
|
||||
output_stream: "TENSORS:main_model_output"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.IntervalFilterCalculatorOptions] {
|
||||
intervals {
|
||||
start_us: 20000
|
||||
end_us: 40000
|
||||
}
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/detection_model.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -287,13 +286,26 @@ options can be specified in the `options` field using proto2 syntax.
|
|||
|
||||
Not all calculators accept calcuator options. In order to accept options, a
|
||||
calculator will normally define a new protobuf message type to represent its
|
||||
options, such as `IntervalFilterCalculatorOptions`. The calculator will then
|
||||
options, such as `PacketClonerCalculatorOptions`. The calculator will then
|
||||
read that protobuf message in its `CalculatorBase::Open` method, and possibly
|
||||
also in the `CalculatorBase::GetContract` function or its
|
||||
also in its `CalculatorBase::GetContract` function or its
|
||||
`CalculatorBase::Process` method. Normally, the new protobuf message type will
|
||||
be defined as a protobuf schema using a ".proto" file and a
|
||||
`mediapipe_proto_library()` build rule.
|
||||
|
||||
```
|
||||
mediapipe_proto_library(
|
||||
name = "packet_cloner_calculator_proto",
|
||||
srcs = ["packet_cloner_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## Example calculator
|
||||
|
||||
This section discusses the implementation of `PacketClonerCalculator`, which
|
||||
|
|
|
@ -284,6 +284,6 @@ on how to build MediaPipe examples.
|
|||
* Google AI Blog:
|
||||
[Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
|
||||
* [ML Kit Selfie Segmentation API](https://developers.google.com/ml-kit/vision/selfie-segmentation)
|
||||
* [Models and model cards](./models.md#selfie_segmentation)
|
||||
* [Models and model cards](./models.md#selfie-segmentation)
|
||||
* [Web demo](https://code.mediapipe.dev/codepen/selfie_segmentation)
|
||||
* [Python Colab](https://mediapipe.page.link/selfie_segmentation_py_colab)
|
||||
|
|
|
@ -28,6 +28,10 @@ typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedRect>>
|
|||
EndLoopNormalizedRectCalculator;
|
||||
REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator);
|
||||
|
||||
typedef EndLoopCalculator<std::vector<::mediapipe::LandmarkList>>
|
||||
EndLoopLandmarkListVectorCalculator;
|
||||
REGISTER_CALCULATOR(EndLoopLandmarkListVectorCalculator);
|
||||
|
||||
typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedLandmarkList>>
|
||||
EndLoopNormalizedLandmarkListVectorCalculator;
|
||||
REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator);
|
||||
|
|
|
@ -35,20 +35,28 @@ namespace api2 {
|
|||
|
||||
namespace {
|
||||
|
||||
int GetXnnpackDefaultNumThreads() {
|
||||
#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
|
||||
defined(__EMSCRIPTEN_PTHREADS__)
|
||||
constexpr int kMinNumThreadsByDefault = 1;
|
||||
constexpr int kMaxNumThreadsByDefault = 4;
|
||||
return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
|
||||
kMaxNumThreadsByDefault);
|
||||
#else
|
||||
return 1;
|
||||
#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
|
||||
}
|
||||
|
||||
// Returns number of threads to configure XNNPACK delegate with.
|
||||
// (Equal to user provided value if specified. Otherwise, it returns number of
|
||||
// high cores (hard-coded to 1 for Emscripten without Threads extension))
|
||||
// Returns user provided value if specified. Otherwise, tries to choose optimal
|
||||
// number of threads depending on the device.
|
||||
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
|
||||
static constexpr int kDefaultNumThreads = -1;
|
||||
if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
|
||||
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
|
||||
return opts.delegate().xnnpack().num_threads();
|
||||
}
|
||||
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
|
||||
return InferHigherCoreIds().size();
|
||||
#else
|
||||
return 1;
|
||||
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
|
||||
return GetXnnpackDefaultNumThreads();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -269,8 +269,8 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
|
|||
break;
|
||||
}
|
||||
}
|
||||
MP_RETURN_IF_ERROR(
|
||||
tflite_gpu_runner_->InitializeWithModel(model, op_resolver));
|
||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
|
||||
model, op_resolver, /*allow_quant_ops=*/true));
|
||||
|
||||
// Create and bind OpenGL buffers for outputs.
|
||||
// The buffers are created once and their ids are passed to calculator outputs
|
||||
|
|
|
@ -226,6 +226,10 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) {
|
|||
|
||||
// Configure and create the delegate.
|
||||
TFLGpuDelegateOptions options;
|
||||
// `enable_quantization` enables the run of sparse models i.e. the models with
|
||||
// DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
|
||||
// graph after the tensor of the weights is read.
|
||||
options.enable_quantization = true;
|
||||
options.allow_precision_loss = allow_precision_loss_;
|
||||
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeDoNotWait;
|
||||
delegate_ =
|
||||
|
|
|
@ -763,9 +763,13 @@ out vec4 fragColor;
|
|||
#endif // defined(GL_ES);
|
||||
|
||||
void main() {
|
||||
|
||||
vec4 input_value = texture2D(input_texture, sample_coordinate);
|
||||
vec2 gid = sample_coordinate;
|
||||
#ifdef FLIP_Y_COORD
|
||||
float y_coord = 1.0 - sample_coordinate.y;
|
||||
#else
|
||||
float y_coord = sample_coordinate.y;
|
||||
#endif // defined(FLIP_Y_COORD)
|
||||
vec2 adjusted_coordinate = vec2(sample_coordinate.x, y_coord);
|
||||
vec4 input_value = texture2D(input_texture, adjusted_coordinate);
|
||||
|
||||
// Run activation function.
|
||||
// One and only one of FN_SOFTMAX,FN_SIGMOID,FN_NONE will be defined.
|
||||
|
@ -787,13 +791,6 @@ void main() {
|
|||
float new_mask_value = input_value.r;
|
||||
#endif // FN_NONE
|
||||
|
||||
#ifdef FLIP_Y_COORD
|
||||
float y_coord = 1.0 - gid.y;
|
||||
#else
|
||||
float y_coord = gid.y;
|
||||
#endif // defined(FLIP_Y_COORD)
|
||||
vec2 output_coordinate = vec2(gid.x, y_coord);
|
||||
|
||||
vec4 out_value = vec4(new_mask_value, 0.0, 0.0, new_mask_value);
|
||||
fragColor = out_value;
|
||||
})";
|
||||
|
|
|
@ -128,9 +128,23 @@ struct GPUData {
|
|||
} // namespace
|
||||
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
|
||||
namespace {
|
||||
|
||||
int GetXnnpackDefaultNumThreads() {
|
||||
#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
|
||||
defined(__EMSCRIPTEN_PTHREADS__)
|
||||
constexpr int kMinNumThreadsByDefault = 1;
|
||||
constexpr int kMaxNumThreadsByDefault = 4;
|
||||
return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
|
||||
kMaxNumThreadsByDefault);
|
||||
#else
|
||||
return 1;
|
||||
#endif // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
|
||||
}
|
||||
|
||||
// Returns number of threads to configure XNNPACK delegate with.
|
||||
// (Equal to user provided value if specified. Otherwise, it returns number of
|
||||
// high cores (hard-coded to 1 for Emscripten without Threads extension))
|
||||
// Returns user provided value if specified. Otherwise, tries to choose optimal
|
||||
// number of threads depending on the device.
|
||||
int GetXnnpackNumThreads(
|
||||
const mediapipe::TfLiteInferenceCalculatorOptions& opts) {
|
||||
static constexpr int kDefaultNumThreads = -1;
|
||||
|
@ -138,13 +152,11 @@ int GetXnnpackNumThreads(
|
|||
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
|
||||
return opts.delegate().xnnpack().num_threads();
|
||||
}
|
||||
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
|
||||
return InferHigherCoreIds().size();
|
||||
#else
|
||||
return 1;
|
||||
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
|
||||
return GetXnnpackDefaultNumThreads();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Calculator Header Section
|
||||
|
||||
// Runs inference on the provided input TFLite tensors and TFLite model.
|
||||
|
@ -737,8 +749,8 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
|
|||
break;
|
||||
}
|
||||
}
|
||||
MP_RETURN_IF_ERROR(
|
||||
tflite_gpu_runner_->InitializeWithModel(model, *op_resolver_ptr));
|
||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
|
||||
model, *op_resolver_ptr, /*allow_quant_ops=*/true));
|
||||
|
||||
// Allocate interpreter memory for cpu output.
|
||||
if (!gpu_output_) {
|
||||
|
@ -969,6 +981,10 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
|
|||
const int kHalfSize = 2; // sizeof(half)
|
||||
// Configure and create the delegate.
|
||||
TFLGpuDelegateOptions options;
|
||||
// `enable_quantization` enables the run of sparse models i.e. the models with
|
||||
// DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
|
||||
// graph after the tensor of the weights is read.
|
||||
options.enable_quantization = true;
|
||||
options.allow_precision_loss = allow_precision_loss_;
|
||||
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeActive;
|
||||
if (!delegate_)
|
||||
|
|
|
@ -32,11 +32,15 @@ typedef FilterCollectionCalculator<std::vector<::mediapipe::NormalizedRect>>
|
|||
FilterNormalizedRectCollectionCalculator;
|
||||
REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator);
|
||||
|
||||
typedef FilterCollectionCalculator<
|
||||
std::vector<::mediapipe::NormalizedLandmarkList>>
|
||||
typedef FilterCollectionCalculator<std::vector<::mediapipe::LandmarkList>>
|
||||
FilterLandmarkListCollectionCalculator;
|
||||
REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator);
|
||||
|
||||
typedef FilterCollectionCalculator<
|
||||
std::vector<::mediapipe::NormalizedLandmarkList>>
|
||||
FilterNormalizedLandmarkListCollectionCalculator;
|
||||
REGISTER_CALCULATOR(FilterNormalizedLandmarkListCollectionCalculator);
|
||||
|
||||
typedef FilterCollectionCalculator<std::vector<::mediapipe::ClassificationList>>
|
||||
FilterClassificationListCollectionCalculator;
|
||||
REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator);
|
||||
|
|
|
@ -37,6 +37,13 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
|
|||
// (configured through option us_to_first_rect). If provided, a non-zero integer
|
||||
// will allow the animated zoom to be used when the first detections arrive.
|
||||
constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
|
||||
// Can be used to control the maximum zoom; note that it is re-evaluated only
|
||||
// upon change of input resolution. A value of 100 disables zooming and is the
|
||||
// smallest allowed value. A value of 200 allows zooming such that a pixel of
|
||||
// the input may cover up to four times its original area. Note that
|
||||
// max_zoom_value_deg from options is always respected; MAX_ZOOM_PCT can only be
|
||||
// used to limit zooming further.
|
||||
constexpr char kMaxZoomFactorPercent[] = "MAX_ZOOM_FACTOR_PCT";
|
||||
// Field-of-view (degrees) of the camera's x-axis (width).
|
||||
// TODO: Parameterize FOV based on camera specs.
|
||||
constexpr float kFieldOfView = 60;
|
||||
|
@ -75,11 +82,16 @@ class ContentZoomingCalculator : public CalculatorBase {
|
|||
int frame_height);
|
||||
// Saves state to a state-cache, if provided.
|
||||
absl::Status SaveState(mediapipe::CalculatorContext* cc) const;
|
||||
// Returns the factor for maximum zoom based on options and the
|
||||
// kMaxZoomFactorPercent input (if present).
|
||||
double GetMaxZoomFactor(mediapipe::CalculatorContext* cc) const;
|
||||
// Initializes the calculator for the given frame size, creating path solvers
|
||||
// and resetting history like last measured values.
|
||||
absl::Status InitializeState(int frame_width, int frame_height);
|
||||
absl::Status InitializeState(mediapipe::CalculatorContext* cc,
|
||||
int frame_width, int frame_height);
|
||||
// Adjusts state to work with an updated frame size.
|
||||
absl::Status UpdateForResolutionChange(int frame_width, int frame_height);
|
||||
absl::Status UpdateForResolutionChange(mediapipe::CalculatorContext* cc,
|
||||
int frame_width, int frame_height);
|
||||
// Returns true if we are animating to the first rect.
|
||||
bool IsAnimatingToFirstRect(const Timestamp& timestamp) const;
|
||||
// Builds the output rectangle when animating to the first rect.
|
||||
|
@ -136,6 +148,9 @@ absl::Status ContentZoomingCalculator::GetContract(
|
|||
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Input VIDEO or VIDEO_SIZE must be provided.";
|
||||
}
|
||||
if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
|
||||
cc->Inputs().Tag(kMaxZoomFactorPercent).Set<int>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kSalientRegions)) {
|
||||
cc->Inputs().Tag(kSalientRegions).Set<DetectionSet>();
|
||||
}
|
||||
|
@ -330,7 +345,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
|
|||
? cc->InputSidePackets().Tag(kStateCache).Get<StateCacheType*>()
|
||||
: nullptr;
|
||||
if (!state_cache || !state_cache->has_value()) {
|
||||
return InitializeState(frame_width, frame_height);
|
||||
return InitializeState(cc, frame_width, frame_height);
|
||||
}
|
||||
|
||||
const ContentZoomingCalculatorState& state = state_cache->value();
|
||||
|
@ -350,7 +365,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
|
|||
last_measured_y_offset_ = state.last_measured_y_offset;
|
||||
MP_RETURN_IF_ERROR(UpdateAspectAndMax());
|
||||
|
||||
return UpdateForResolutionChange(frame_width, frame_height);
|
||||
return UpdateForResolutionChange(cc, frame_width, frame_height);
|
||||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::SaveState(
|
||||
|
@ -379,8 +394,20 @@ absl::Status ContentZoomingCalculator::SaveState(
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
|
||||
int frame_height) {
|
||||
double ContentZoomingCalculator::GetMaxZoomFactor(
|
||||
mediapipe::CalculatorContext* cc) const {
|
||||
double max_zoom_value =
|
||||
options_.max_zoom_value_deg() / static_cast<double>(kFieldOfView);
|
||||
if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
|
||||
const double factor = std::max(
|
||||
1.0, cc->Inputs().Tag(kMaxZoomFactorPercent).Get<int>() / 100.0);
|
||||
max_zoom_value = std::max(max_zoom_value, 1.0 / factor);
|
||||
}
|
||||
return max_zoom_value;
|
||||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::InitializeState(
|
||||
mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
|
||||
frame_width_ = frame_width;
|
||||
frame_height_ = frame_height;
|
||||
path_solver_pan_ = std::make_unique<KinematicPathSolver>(
|
||||
|
@ -390,8 +417,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
|
|||
options_.kinematic_options_tilt(), 0, frame_height_,
|
||||
static_cast<float>(frame_height_) / kFieldOfView);
|
||||
MP_RETURN_IF_ERROR(UpdateAspectAndMax());
|
||||
int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() /
|
||||
static_cast<double>(kFieldOfView));
|
||||
int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
|
||||
path_solver_zoom_ = std::make_unique<KinematicPathSolver>(
|
||||
options_.kinematic_options_zoom(), min_zoom_size,
|
||||
max_frame_value_ * frame_height_,
|
||||
|
@ -405,7 +431,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
|
|||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
|
||||
int frame_width, int frame_height) {
|
||||
mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
|
||||
// Update state for change in input resolution.
|
||||
if (frame_width_ != frame_width || frame_height_ != frame_height) {
|
||||
double width_scale = frame_width / static_cast<double>(frame_width_);
|
||||
|
@ -419,8 +445,7 @@ absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
|
|||
MP_RETURN_IF_ERROR(path_solver_pan_->UpdateMinMaxLocation(0, frame_width_));
|
||||
MP_RETURN_IF_ERROR(
|
||||
path_solver_tilt_->UpdateMinMaxLocation(0, frame_height_));
|
||||
int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() /
|
||||
static_cast<double>(kFieldOfView));
|
||||
int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->UpdateMinMaxLocation(
|
||||
min_zoom_size, max_frame_value_ * frame_height_));
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->UpdatePixelsPerDegree(
|
||||
|
@ -493,7 +518,8 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
MP_RETURN_IF_ERROR(MaybeLoadState(cc, frame_width, frame_height));
|
||||
initialized_ = !options_.is_stateless();
|
||||
} else {
|
||||
MP_RETURN_IF_ERROR(UpdateForResolutionChange(frame_width, frame_height));
|
||||
MP_RETURN_IF_ERROR(
|
||||
UpdateForResolutionChange(cc, frame_width, frame_height));
|
||||
}
|
||||
|
||||
bool only_required_found = false;
|
||||
|
|
|
@ -150,6 +150,29 @@ const char kConfigE[] = R"(
|
|||
}
|
||||
)";
|
||||
|
||||
const char kConfigF[] = R"(
|
||||
calculator: "ContentZoomingCalculator"
|
||||
input_stream: "VIDEO_SIZE:size"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
input_stream: "MAX_ZOOM_FACTOR_PCT:max_zoom_factor_pct"
|
||||
output_stream: "CROP_RECT:rect"
|
||||
output_stream: "FIRST_CROP_RECT:first_rect"
|
||||
options: {
|
||||
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
|
||||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
void CheckBorder(const StaticFeatures& static_features, int width, int height,
|
||||
int top_border, int bottom_border) {
|
||||
ASSERT_EQ(2, static_features.border().size());
|
||||
|
@ -170,6 +193,7 @@ void CheckBorder(const StaticFeatures& static_features, int width, int height,
|
|||
|
||||
struct AddDetectionFlags {
|
||||
std::optional<bool> animated_zoom;
|
||||
std::optional<int> max_zoom_factor_percent;
|
||||
};
|
||||
|
||||
void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
||||
|
@ -211,6 +235,14 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
|||
mediapipe::MakePacket<bool>(flags.animated_zoom.value())
|
||||
.At(Timestamp(time)));
|
||||
}
|
||||
|
||||
if (flags.max_zoom_factor_percent.has_value()) {
|
||||
runner->MutableInputs()
|
||||
->Tag("MAX_ZOOM_FACTOR_PCT")
|
||||
.packets.push_back(
|
||||
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
|
||||
.At(Timestamp(time)));
|
||||
}
|
||||
}
|
||||
|
||||
void AddDetection(const cv::Rect_<float>& position, const int64 time,
|
||||
|
@ -259,6 +291,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
|
|||
CheckBorder(static_features, 1000, 1000, 495, 395);
|
||||
}
|
||||
|
||||
#if 0
|
||||
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(
|
||||
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD));
|
||||
|
@ -694,8 +727,8 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
|||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
|
||||
runner.get());
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000,
|
||||
1000, runner.get());
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 500, 500,
|
||||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
@ -719,6 +752,36 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
|
|||
CheckCropRect(500, 500, 916, 916, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
||||
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigF);
|
||||
auto* options = config.mutable_options()->MutableExtension(
|
||||
ContentZoomingCalculatorOptions::ext);
|
||||
options->set_max_zoom_value_deg(30);
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 640, 480,
|
||||
runner.get(), {.max_zoom_factor_percent = 133});
|
||||
// Change resolution and allow more zoom, and give time to use the new limit
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1280, 720,
|
||||
runner.get(), {.max_zoom_factor_percent = 166});
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 1280, 720,
|
||||
runner.get(), {.max_zoom_factor_percent = 166});
|
||||
// Switch back to a smaller resolution with a more limited zoom
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 3000000, 640, 480,
|
||||
runner.get(), {.max_zoom_factor_percent = 133});
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
|
||||
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
|
||||
CheckCropRect(320, 240, 480, 360, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(640, 360, 769, 433, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(320, 240, 480, 360, 3,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
}
|
||||
|
||||
#if 0
|
||||
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
||||
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
|
||||
auto* options = config.mutable_options()->MutableExtension(
|
||||
|
@ -906,6 +969,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
|
|||
EXPECT_EQ(first_rect.height(), rect.height());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
} // namespace autoflip
|
||||
|
|
|
@ -154,10 +154,18 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
|
||||
// Time and position updates.
|
||||
double delta_t = (time_us - current_time_) / 1000000.0;
|
||||
// Time since last state/prediction update, smoothed by
|
||||
// mean_period_update_rate.
|
||||
if (mean_delta_t_ < 0) {
|
||||
mean_delta_t_ = delta_t;
|
||||
} else {
|
||||
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
|
||||
delta_t * options_.mean_period_update_rate();
|
||||
}
|
||||
|
||||
// Observed velocity and then weighted update of this velocity.
|
||||
double observed_velocity = delta_degs / delta_t;
|
||||
double update_rate = std::min(delta_t / options_.update_rate_seconds(),
|
||||
double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
|
||||
options_.max_update_rate());
|
||||
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
|
||||
observed_velocity * update_rate;
|
||||
|
@ -174,16 +182,6 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
|
|||
RET_CHECK(current_time_ < time_us)
|
||||
<< "Prediction time added before a prior observation or prediction.";
|
||||
|
||||
// Time since last state/prediction update, smoothed by
|
||||
// mean_period_update_rate.
|
||||
double delta_t = (time_us - current_time_) / 1000000.0;
|
||||
if (mean_delta_t_ < 0) {
|
||||
mean_delta_t_ = delta_t;
|
||||
} else {
|
||||
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
|
||||
delta_t * options_.mean_period_update_rate();
|
||||
}
|
||||
|
||||
// Position update limited by min/max.
|
||||
double update_position_px =
|
||||
current_position_px_ +
|
||||
|
|
|
@ -337,6 +337,40 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
|||
EXPECT_EQ(state, 516);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_update_rate(1.0);
|
||||
options.set_max_velocity(6);
|
||||
options.set_mean_period_update_rate(1.0);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 600);
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 720);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_update_rate(1.0);
|
||||
options.set_max_velocity(6);
|
||||
options.set_mean_period_update_rate(0.05);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 600);
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 701);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -262,7 +262,7 @@ TEST(MathUtil, IntRound) {
|
|||
|
||||
// A double-precision number has a 53-bit mantissa (52 fraction bits),
|
||||
// so the following value can be represented exactly.
|
||||
int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
|
||||
int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
|
||||
EXPECT_EQ(mediapipe::MathUtil::Round<int64>(static_cast<double>(value64)),
|
||||
value64);
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ class SafeCastTester {
|
|||
if (sizeof(FloatIn) >= 64) {
|
||||
// A double-precision number has a 53-bit mantissa (52 fraction bits),
|
||||
// so the following value can be represented exactly by a double.
|
||||
int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
|
||||
int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
|
||||
const IntOut expected =
|
||||
(sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
|
||||
EXPECT_EQ(
|
||||
|
@ -565,7 +565,7 @@ TEST(MathUtil, SafeCast) {
|
|||
-12345);
|
||||
EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(1E47), 2147483647);
|
||||
EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(-1E47),
|
||||
GG_LONGLONG(-2147483648));
|
||||
static_cast<int64_t>(-2147483648));
|
||||
}
|
||||
|
||||
template <class FloatIn, class IntOut>
|
||||
|
@ -682,7 +682,7 @@ class SafeRoundTester {
|
|||
if (sizeof(FloatIn) >= 64) {
|
||||
// A double-precision number has a 53-bit mantissa (52 fraction bits),
|
||||
// so the following value can be represented exactly by a double.
|
||||
int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
|
||||
int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
|
||||
const IntOut expected =
|
||||
(sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
|
||||
EXPECT_EQ(
|
||||
|
@ -873,7 +873,7 @@ TEST(MathUtil, SafeRound) {
|
|||
-12345);
|
||||
EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(1E47), 2147483647);
|
||||
EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(-1E47),
|
||||
GG_LONGLONG(-2147483648));
|
||||
static_cast<int64_t>(-2147483648));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -8,6 +8,7 @@ def mediapipe_cc_test(
|
|||
data = [],
|
||||
deps = [],
|
||||
size = None,
|
||||
tags = [],
|
||||
timeout = None,
|
||||
additional_deps = DEFAULT_ADDITIONAL_TEST_DEPS,
|
||||
**kwargs):
|
||||
|
|
|
@ -641,14 +641,20 @@ cc_library(
|
|||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/deps:no_destructor",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/port:advanced_proto",
|
||||
"//mediapipe/framework/port:file_helpers",
|
||||
"//mediapipe/framework/port:gtest",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@stblib//:stb_image",
|
||||
"@stblib//:stb_image_write",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -18,18 +18,27 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/framework/calculator.pb.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/deps/no_destructor.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/port/advanced_proto_inc.h"
|
||||
#include "mediapipe/framework/port/file_helpers.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/proto_ns.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "stb_image.h"
|
||||
#include "stb_image_write.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
|
@ -43,15 +52,14 @@ bool EqualWithTolerance(const T value1, const T value2, const T max_diff) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
|
||||
absl::Status CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
|
||||
const T max_color_diff, const T max_alpha_diff,
|
||||
const float max_avg_diff, std::string* error_message) {
|
||||
const float max_avg_diff,
|
||||
std::unique_ptr<ImageFrame>& diff_image) {
|
||||
// Verify image byte depth matches expected byte depth.
|
||||
CHECK_EQ(sizeof(T), image1.ByteDepth());
|
||||
CHECK_EQ(sizeof(T), image2.ByteDepth());
|
||||
|
||||
const bool return_error = error_message != nullptr;
|
||||
|
||||
const int width = image1.Width();
|
||||
const int height = image1.Height();
|
||||
const int channels1 = image1.NumberOfChannels();
|
||||
|
@ -68,57 +76,64 @@ bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
|
|||
const int width_padding2 =
|
||||
image2.WidthStep() / image2.ByteDepth() - width * channels2;
|
||||
|
||||
diff_image = std::make_unique<ImageFrame>(image1.Format(), width, height);
|
||||
T* pixel_diff = reinterpret_cast<T*>(diff_image->MutablePixelData());
|
||||
const int width_padding_diff =
|
||||
diff_image->WidthStep() / diff_image->ByteDepth() - width * channels1;
|
||||
|
||||
float avg_diff = 0;
|
||||
uint diff_count = 0;
|
||||
uint total_count = 0;
|
||||
int different_color_components = 0;
|
||||
float max_color_diff_found = 0;
|
||||
int different_alpha_components = 0;
|
||||
float max_alpha_diff_found = 0;
|
||||
for (int row = 0; row < height; ++row) {
|
||||
for (int col = 0; col < width; ++col) {
|
||||
for (int channel = 0; channel < num_channels; ++channel) {
|
||||
// Check local difference.
|
||||
const T max_diff = channel < 3 ? max_color_diff : max_alpha_diff;
|
||||
const T value1 = pixel1[channel];
|
||||
const T value2 = pixel2[channel];
|
||||
if (!EqualWithTolerance<T>(value1, value2, max_diff)) {
|
||||
// We cast uint8 to int using this type (and leave other values as-is)
|
||||
// to avoid printing as a single char.
|
||||
using TypeToPrint =
|
||||
typename std::conditional<std::is_same<T, uint8>::value, int,
|
||||
T>::type;
|
||||
std::string error = absl::Substitute(
|
||||
"images differ: row = $0 col = $1 channel = $2 : pixel1 = $3, "
|
||||
"pixel2 = $4",
|
||||
row, col, channel, static_cast<TypeToPrint>(value1),
|
||||
static_cast<TypeToPrint>(value2));
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Check global average difference.
|
||||
const float diff =
|
||||
std::abs(static_cast<float>(value1) - static_cast<float>(value2));
|
||||
avg_diff += (diff - avg_diff) / ++diff_count;
|
||||
if (channel < 3) {
|
||||
different_color_components += diff > max_color_diff;
|
||||
max_color_diff_found = std::max(max_color_diff_found, diff);
|
||||
pixel_diff[channel] = diff;
|
||||
} else {
|
||||
different_alpha_components += diff > max_alpha_diff;
|
||||
max_alpha_diff_found = std::max(max_alpha_diff_found, diff);
|
||||
pixel_diff[channel] = 255; // opaque to see color difference
|
||||
}
|
||||
// Check global average difference.
|
||||
avg_diff += (diff - avg_diff) / ++total_count;
|
||||
}
|
||||
pixel1 += channels1;
|
||||
pixel2 += channels2;
|
||||
pixel_diff += channels1;
|
||||
}
|
||||
pixel1 += width_padding1;
|
||||
pixel2 += width_padding2;
|
||||
pixel_diff += width_padding_diff;
|
||||
}
|
||||
|
||||
if (avg_diff > max_avg_diff) {
|
||||
std::string error =
|
||||
absl::Substitute("images differ: avg pixel error = $0", avg_diff);
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
std::vector<std::string> errors;
|
||||
if (different_color_components)
|
||||
errors.push_back(absl::Substitute(
|
||||
"$0 color components differences above limit of $1, max found was $2",
|
||||
different_color_components, max_color_diff, max_color_diff_found));
|
||||
if (different_alpha_components)
|
||||
errors.push_back(absl::Substitute(
|
||||
"$0 alpha components differences above limit of $1, max found was $2",
|
||||
different_alpha_components, max_alpha_diff, max_alpha_diff_found));
|
||||
if (avg_diff > max_avg_diff)
|
||||
errors.push_back(
|
||||
absl::Substitute("the average component difference is $0 (limit: $1)",
|
||||
avg_diff, max_avg_diff));
|
||||
|
||||
return true;
|
||||
if (!errors.empty())
|
||||
return absl::InternalError(
|
||||
absl::StrCat("images differ: ", absl::StrJoin(errors, "; ")));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
|
@ -134,77 +149,32 @@ std::string GetBinaryDirectory() {
|
|||
|
||||
} // namespace
|
||||
|
||||
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
|
||||
const float max_color_diff, const float max_alpha_diff,
|
||||
const float max_avg_diff, std::string* error_message) {
|
||||
const bool return_error = error_message != nullptr;
|
||||
|
||||
auto IsSupportedImageFormatComparison = [](const ImageFrame& image1,
|
||||
const ImageFrame& image2) {
|
||||
// Pairs of non-equal image formats that can be compared against each other.
|
||||
static const mediapipe::NoDestructor<absl::flat_hash_set<
|
||||
std::pair<ImageFormat::Format, ImageFormat::Format>>>
|
||||
kCompatibleImageFormats({
|
||||
{ImageFormat::SRGB, ImageFormat::SRGBA},
|
||||
{ImageFormat::SRGB48, ImageFormat::SRGBA64},
|
||||
});
|
||||
|
||||
auto* compatible_image_formats = kCompatibleImageFormats.get();
|
||||
|
||||
return image1.Format() == image2.Format() ||
|
||||
compatible_image_formats->contains(
|
||||
{image1.Format(), image2.Format()}) ||
|
||||
compatible_image_formats->contains(
|
||||
{image2.Format(), image1.Format()});
|
||||
absl::Status CompareImageFrames(const ImageFrame& image1,
|
||||
const ImageFrame& image2,
|
||||
const float max_color_diff,
|
||||
const float max_alpha_diff,
|
||||
const float max_avg_diff,
|
||||
std::unique_ptr<ImageFrame>& diff_image) {
|
||||
auto IsSupportedImageFormatComparison = [](ImageFormat::Format one,
|
||||
ImageFormat::Format two) {
|
||||
auto both = std::minmax(one, two);
|
||||
return one == two ||
|
||||
both == std::minmax(ImageFormat::SRGB, ImageFormat::SRGBA) ||
|
||||
both == std::minmax(ImageFormat::SRGB48, ImageFormat::SRGBA64);
|
||||
};
|
||||
|
||||
if (!IsSupportedImageFormatComparison(image1, image2)) {
|
||||
std::string error = absl::Substitute(
|
||||
"unsupported image format comparison; image1 = $0, image2 = $1",
|
||||
image1.Format(), image2.Format());
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
RET_CHECK(IsSupportedImageFormatComparison(image1.Format(), image2.Format()))
|
||||
<< "unsupported image format comparison; image1 = " << image1.Format()
|
||||
<< ", image2 = " << image2.Format();
|
||||
|
||||
if (image1.Width() != image2.Width()) {
|
||||
std::string error =
|
||||
absl::Substitute("image width mismatch: image1 = $0, image2 = $1",
|
||||
image1.Width(), image2.Width());
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Cannot use RET_CHECK_EQ because pair is not printable.
|
||||
RET_CHECK(std::make_pair(image1.Width(), image1.Height()) ==
|
||||
std::make_pair(image2.Width(), image2.Height()))
|
||||
<< "image size mismatch: " << image1.Width() << "x" << image1.Height()
|
||||
<< " != " << image2.Width() << "x" << image2.Height();
|
||||
|
||||
if (image1.Height() != image2.Height()) {
|
||||
std::string error =
|
||||
absl::Substitute("image height mismatch: image1 = $0, image2 = $1",
|
||||
image1.Height(), image2.Height());
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (image1.ByteDepth() != image2.ByteDepth()) {
|
||||
std::string error =
|
||||
absl::Substitute("image byte depth mismatch: image1 = $0, image2 = $1",
|
||||
image1.ByteDepth(), image2.ByteDepth());
|
||||
if (return_error) {
|
||||
*error_message = error;
|
||||
} else {
|
||||
LOG(ERROR) << error;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
RET_CHECK_EQ(image1.ByteDepth(), image2.ByteDepth())
|
||||
<< "image byte depth mismatch";
|
||||
|
||||
switch (image1.Format()) {
|
||||
case ImageFormat::GRAY8:
|
||||
|
@ -212,45 +182,87 @@ bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
|
|||
case ImageFormat::SRGBA:
|
||||
case ImageFormat::LAB8:
|
||||
return CompareDiff<uint8>(image1, image2, max_color_diff, max_alpha_diff,
|
||||
max_avg_diff, error_message);
|
||||
max_avg_diff, diff_image);
|
||||
case ImageFormat::GRAY16:
|
||||
case ImageFormat::SRGB48:
|
||||
case ImageFormat::SRGBA64:
|
||||
return CompareDiff<uint16>(image1, image2, max_color_diff, max_alpha_diff,
|
||||
max_avg_diff, error_message);
|
||||
max_avg_diff, diff_image);
|
||||
case ImageFormat::VEC32F1:
|
||||
case ImageFormat::VEC32F2:
|
||||
return CompareDiff<float>(image1, image2, max_color_diff, max_alpha_diff,
|
||||
max_avg_diff, error_message);
|
||||
max_avg_diff, diff_image);
|
||||
default:
|
||||
LOG(FATAL) << ImageFrame::InvalidFormatString(image1.Format());
|
||||
}
|
||||
}
|
||||
|
||||
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
|
||||
const float max_color_diff, const float max_alpha_diff,
|
||||
const float max_avg_diff, std::string* error_message) {
|
||||
std::unique_ptr<ImageFrame> diff_image;
|
||||
auto status = CompareImageFrames(image1, image2, max_color_diff,
|
||||
max_alpha_diff, max_avg_diff, diff_image);
|
||||
if (status.ok()) return true;
|
||||
if (error_message) *error_message = std::string(status.message());
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string GetTestRootDir() {
|
||||
#if defined(__ANDROID__)
|
||||
char path[1024];
|
||||
char* ptr = getcwd(path, sizeof(path));
|
||||
CHECK_EQ(ptr, path);
|
||||
return path;
|
||||
#else
|
||||
return ::mediapipe::file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe");
|
||||
#endif // defined(__ANDROID__)
|
||||
return file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe");
|
||||
}
|
||||
|
||||
std::string GetTestOutputsDir() {
|
||||
const char* output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
|
||||
if (!output_dir) {
|
||||
output_dir = "/tmp";
|
||||
}
|
||||
return output_dir;
|
||||
}
|
||||
|
||||
std::string GetTestDataDir(const std::string& package_base_path) {
|
||||
#if defined(__ANDROID__)
|
||||
std::string data_dir = GetTestRootDir();
|
||||
std::string binary_dir = GetBinaryDirectory();
|
||||
// In Mobile Harness, the cwd is "/" and the run dir is "/data/local/tmp".
|
||||
if (data_dir == "/" && absl::StartsWith(binary_dir, "/data")) {
|
||||
data_dir = binary_dir;
|
||||
return file::JoinPath(GetTestRootDir(), package_base_path, "testdata/");
|
||||
}
|
||||
|
||||
std::string GetTestFilePath(absl::string_view relative_path) {
|
||||
return file::JoinPath(GetTestRootDir(), relative_path);
|
||||
}
|
||||
|
||||
absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
|
||||
absl::string_view path, ImageFormat::Format format) {
|
||||
std::string encoded;
|
||||
MP_RETURN_IF_ERROR(mediapipe::file::GetContents(path, &encoded));
|
||||
|
||||
// stbi_load determines the output pixel format based on the desired channels.
|
||||
// 0 means "use whatever's in the file".
|
||||
int desired_channels = format == ImageFormat::UNKNOWN ? 0
|
||||
: format == ImageFormat::SRGBA ? 4
|
||||
: format == ImageFormat::SRGB ? 3
|
||||
: format == ImageFormat::GRAY8 ? 1
|
||||
: -1;
|
||||
RET_CHECK(desired_channels >= 0)
|
||||
<< "unsupported output format requested: " << format;
|
||||
|
||||
int width, height, channels_in_file;
|
||||
auto data = stbi_load_from_memory(reinterpret_cast<stbi_uc*>(encoded.data()),
|
||||
encoded.size(), &width, &height,
|
||||
&channels_in_file, desired_channels);
|
||||
RET_CHECK(data) << "failed to decode image data from: " << path;
|
||||
|
||||
// If we didn't specify a desired format, it will be determined by what the
|
||||
// file contains.
|
||||
int output_channels = desired_channels ? desired_channels : channels_in_file;
|
||||
if (format == ImageFormat::UNKNOWN) {
|
||||
format = output_channels == 4 ? ImageFormat::SRGBA
|
||||
: output_channels == 3 ? ImageFormat::SRGB
|
||||
: output_channels == 1 ? ImageFormat::GRAY8
|
||||
: ImageFormat::UNKNOWN;
|
||||
RET_CHECK(format != ImageFormat::UNKNOWN)
|
||||
<< "unsupported number of channels: " << output_channels;
|
||||
}
|
||||
return ::mediapipe::file::JoinPath(data_dir, package_base_path, "testdata/");
|
||||
#else
|
||||
return ::mediapipe::file::JoinPath(GetTestRootDir(), package_base_path,
|
||||
"testdata/");
|
||||
#endif // defined(__APPLE__)
|
||||
|
||||
return absl::make_unique<ImageFrame>(
|
||||
format, width, height, width * output_channels, data, stbi_image_free);
|
||||
}
|
||||
|
||||
std::unique_ptr<ImageFrame> LoadTestPng(const std::string& path,
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_
|
||||
#define MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/framework/calculator.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
|
||||
|
@ -35,14 +36,29 @@ using mediapipe::CalculatorGraphConfig;
|
|||
// Note: Although max_color_diff and max_alpha_diff are floats, all uint8/uint16
|
||||
// values are exactly representable. (2^24 + 1 is the first non-representable
|
||||
// positive integral value.)
|
||||
absl::Status CompareImageFrames(const ImageFrame& image1,
|
||||
const ImageFrame& image2,
|
||||
const float max_color_diff,
|
||||
const float max_alpha_diff,
|
||||
const float max_avg_diff,
|
||||
std::unique_ptr<ImageFrame>& diff_image);
|
||||
|
||||
bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
|
||||
const float max_color_diff, const float max_alpha_diff,
|
||||
const float max_avg_diff = 1.0,
|
||||
std::string* error_message = nullptr);
|
||||
|
||||
// Returns the absolute path to the directory that contains test source code.
|
||||
// Returns the absolute path to the directory that contains test source code
|
||||
// (TEST_SRCDIR).
|
||||
std::string GetTestRootDir();
|
||||
|
||||
// Returns the absolute path to a directory where tests can write outputs to
|
||||
// be sent to bazel (TEST_UNDECLARED_OUTPUTS_DIR or a fallback).
|
||||
std::string GetTestOutputsDir();
|
||||
|
||||
// Returns the absolute path to a file within TEST_SRCDIR.
|
||||
std::string GetTestFilePath(absl::string_view relative_path);
|
||||
|
||||
// Returns the absolute path to the contents of the package's "testdata"
|
||||
// directory.
|
||||
// This handles the different paths where test data ends up when using
|
||||
|
@ -52,6 +68,10 @@ std::string GetTestDataDir(const std::string& package_base_path);
|
|||
// Loads a binary graph from path. Returns true iff successful.
|
||||
bool LoadTestGraph(CalculatorGraphConfig* proto, const std::string& path);
|
||||
|
||||
// Loads an image from path.
|
||||
absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
|
||||
absl::string_view path, ImageFormat::Format format = ImageFormat::SRGBA);
|
||||
|
||||
// Loads a PNG image from path using the given ImageFormat. Returns nullptr in
|
||||
// case of failure.
|
||||
std::unique_ptr<ImageFrame> LoadTestPng(
|
||||
|
|
|
@ -75,26 +75,30 @@ absl::Status GlContext::CreateContextInternal(
|
|||
// TODO: Ensure this works with all options (in particular,
|
||||
// multithreading options, like the special-case combination of USE_PTHREADS
|
||||
// and OFFSCREEN_FRAMEBUFFER)
|
||||
EM_ASM(let init_once = true; if (init_once) {
|
||||
// clang-format off
|
||||
EM_ASM(
|
||||
let init_once = true;
|
||||
if (init_once) {
|
||||
const cachedFindCanvasEventTarget = findCanvasEventTarget;
|
||||
|
||||
if (typeof cachedFindCanvasEventTarget != = 'function') {
|
||||
if (typeof console != = 'undefined') {
|
||||
console.error(
|
||||
'Expected Emscripten global function ' +
|
||||
'"findCanvasEventTarget" not found. WebGL context creation ' +
|
||||
'may fail.');
|
||||
if (typeof cachedFindCanvasEventTarget !== 'function') {
|
||||
if (typeof console !== 'undefined') {
|
||||
console.error('Expected Emscripten global function '
|
||||
+ '"findCanvasEventTarget" not found. WebGL context creation '
|
||||
+ 'may fail.');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
findCanvasEventTarget = function(target) {
|
||||
if (target == 0) {
|
||||
if (Module && Module.canvas) {
|
||||
return Module.canvas;
|
||||
} else if (Module && Module.canvasCssSelector) {
|
||||
return cachedFindCanvasEventTarget(Module.canvasCssSelector);
|
||||
} else {
|
||||
if (typeof console != = 'undefined') {
|
||||
}
|
||||
}
|
||||
if (typeof console !== 'undefined') {
|
||||
console.warn('Module properties canvas and canvasCssSelector not ' +
|
||||
'found during WebGL context creation.');
|
||||
}
|
||||
|
@ -102,15 +106,14 @@ absl::Status GlContext::CreateContextInternal(
|
|||
// cases it will not succeed, just in case the user does want to fall-
|
||||
// back.
|
||||
return cachedFindCanvasEventTarget(target);
|
||||
}
|
||||
}; // NOLINT: Necessary semicolon.
|
||||
init_once = false;
|
||||
});
|
||||
}
|
||||
);
|
||||
// clang-format on
|
||||
|
||||
// Note: below id parameter is only actually used if both Module.canvas and
|
||||
// Module.canvasCssSelector are undefined.
|
||||
EMSCRIPTEN_WEBGL_CONTEXT_HANDLE context_handle =
|
||||
emscripten_webgl_create_context(0 /* id */, &attrs);
|
||||
emscripten_webgl_create_context(nullptr, &attrs);
|
||||
|
||||
// Check for failure
|
||||
if (context_handle <= 0) {
|
||||
|
|
|
@ -164,7 +164,9 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format,
|
|||
}
|
||||
|
||||
auto iter = format_info->find(format);
|
||||
CHECK(iter != format_info->end()) << "unsupported format";
|
||||
CHECK(iter != format_info->end())
|
||||
<< "unsupported format: "
|
||||
<< static_cast<std::underlying_type_t<decltype(format)>>(format);
|
||||
const auto& planes = iter->second;
|
||||
#ifndef __APPLE__
|
||||
CHECK_EQ(planes.size(), 1)
|
||||
|
|
|
@ -69,6 +69,8 @@ node {
|
|||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"
|
||||
#
|
||||
delegate: { gpu { use_advanced_gpu_api: true } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ ObjectDef GetSSBOObjectDef(int channels) {
|
|||
|
||||
absl::Status TFLiteGPURunner::InitializeWithModel(
|
||||
const tflite::FlatBufferModel& flatbuffer,
|
||||
const tflite::OpResolver& op_resolver) {
|
||||
const tflite::OpResolver& op_resolver, bool allow_quant_ops) {
|
||||
// GraphFloat32 is created twice because, when OpenCL and OpenGL backends are
|
||||
// initialized, different backend-specific graph transformations happen
|
||||
// in-place. As GraphFloat32 is not copyable by design, we keep two copies of
|
||||
|
@ -94,10 +94,10 @@ absl::Status TFLiteGPURunner::InitializeWithModel(
|
|||
// in the end of the initialization stage.
|
||||
graph_gl_ = std::make_unique<GraphFloat32>();
|
||||
graph_cl_ = std::make_unique<GraphFloat32>();
|
||||
MP_RETURN_IF_ERROR(
|
||||
BuildFromFlatBuffer(flatbuffer, op_resolver, graph_gl_.get()));
|
||||
MP_RETURN_IF_ERROR(
|
||||
BuildFromFlatBuffer(flatbuffer, op_resolver, graph_cl_.get()));
|
||||
MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
|
||||
graph_gl_.get(), allow_quant_ops));
|
||||
MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
|
||||
graph_cl_.get(), allow_quant_ops));
|
||||
|
||||
for (const auto& input : graph_gl_->inputs()) {
|
||||
input_shapes_.push_back(input->tensor.shape);
|
||||
|
|
|
@ -54,7 +54,8 @@ class TFLiteGPURunner {
|
|||
: options_(options) {}
|
||||
|
||||
absl::Status InitializeWithModel(const tflite::FlatBufferModel& flatbuffer,
|
||||
const tflite::OpResolver& op_resolver);
|
||||
const tflite::OpResolver& op_resolver,
|
||||
bool allow_quant_ops = false);
|
||||
|
||||
void ForceOpenGL() { opengl_is_forced_ = true; }
|
||||
void ForceOpenCL() { opencl_is_forced_ = true; }
|
||||
|
|
35
third_party/stb_image_impl.diff
vendored
Normal file
35
third_party/stb_image_impl.diff
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
From fed8c5b355e00b7cc7dd5abfebecf0338f2c2f24 Mon Sep 17 00:00:00 2001
|
||||
From: Camillo Lugaresi <camillol@google.com>
|
||||
Date: Fri, 4 Jun 2021 00:44:45 +0000
|
||||
Subject: impl files
|
||||
|
||||
---
|
||||
stb_image.c | 4 ++++
|
||||
stb_image_write.c | 4 ++++
|
||||
2 files changed, 8 insertions(+)
|
||||
create mode 100644 stb_image.c
|
||||
create mode 100644 stb_image_write.c
|
||||
|
||||
diff --git a/stb_image.c b/stb_image.c
|
||||
new file mode 100644
|
||||
index 0000000..f88aaf6
|
||||
--- /dev/null
|
||||
+++ b/stb_image.c
|
||||
@@ -0,0 +1,4 @@
|
||||
+// By defining STB_IMAGE_IMPLEMENTATION the included header file will also
|
||||
+// define the implementation.
|
||||
+#define STB_IMAGE_IMPLEMENTATION
|
||||
+#include "stb_image.h"
|
||||
diff --git a/stb_image_write.c b/stb_image_write.c
|
||||
new file mode 100644
|
||||
index 0000000..623d757
|
||||
--- /dev/null
|
||||
+++ b/stb_image_write.c
|
||||
@@ -0,0 +1,4 @@
|
||||
+// By defining STB_IMAGE_WRITE_IMPLEMENTATION the included header file will also
|
||||
+// define the implementation.
|
||||
+#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
+#include "stb_image_write.h"
|
||||
--
|
||||
2.32.0.rc1.229.g3e70b5a671-goog
|
||||
|
28
third_party/stblib.BUILD
vendored
Normal file
28
third_party/stblib.BUILD
vendored
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Description:
|
||||
# Single-file C++ image decoding and encoding libraries
|
||||
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # MIT license
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
cc_library(
|
||||
name = "stb_image",
|
||||
srcs = ["stb_image.c"],
|
||||
hdrs = ["stb_image.h"],
|
||||
copts = [
|
||||
"-Wno-unused-function",
|
||||
"$(STACK_FRAME_UNLIMITED)",
|
||||
],
|
||||
includes = ["."],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "stb_image_write",
|
||||
srcs = ["stb_image_write.c"],
|
||||
hdrs = ["stb_image_write.h"],
|
||||
includes = ["."],
|
||||
)
|
Loading…
Reference in New Issue
Block a user