Project import generated by Copybara.
GitOrigin-RevId: b695dda274aa3ac3c7d054e150bd9eb5c1285b19
This commit is contained in:
parent
66b377c825
commit
dd02df1dbe
|
@ -38,6 +38,8 @@ namespace mediapipe {
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
// Optionally, you can pass in a side packet that will override `max_vec_size`
|
||||
// that is specified in the options.
|
||||
template <typename T>
|
||||
class ClipVectorSizeCalculator : public CalculatorBase {
|
||||
public:
|
||||
|
@ -53,6 +55,10 @@ class ClipVectorSizeCalculator : public CalculatorBase {
|
|||
|
||||
cc->Inputs().Index(0).Set<std::vector<T>>();
|
||||
cc->Outputs().Index(0).Set<std::vector<T>>();
|
||||
// Optional input side packet that determines `max_vec_size`.
|
||||
if (cc->InputSidePackets().NumEntries() > 0) {
|
||||
cc->InputSidePackets().Index(0).Set<int>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
@ -61,6 +67,11 @@ class ClipVectorSizeCalculator : public CalculatorBase {
|
|||
cc->SetOffset(TimestampDiff(0));
|
||||
max_vec_size_ = cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>()
|
||||
.max_vec_size();
|
||||
// Override `max_vec_size` if passed as side packet.
|
||||
if (cc->InputSidePackets().NumEntries() > 0 &&
|
||||
!cc->InputSidePackets().Index(0).IsEmpty()) {
|
||||
max_vec_size_ = cc->InputSidePackets().Index(0).Get<int>();
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -176,4 +176,31 @@ TEST(TestClipUniqueIntPtrVectorSizeCalculatorTest, ConsumeOneTimestamp) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(TestClipIntVectorSizeCalculatorTest, SidePacket) {
|
||||
CalculatorGraphConfig::Node node_config =
|
||||
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
|
||||
calculator: "TestClipIntVectorSizeCalculator"
|
||||
input_stream: "input_vector"
|
||||
input_side_packet: "max_vec_size"
|
||||
output_stream: "output_vector"
|
||||
options {
|
||||
[mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 1 }
|
||||
}
|
||||
)");
|
||||
CalculatorRunner runner(node_config);
|
||||
// This should override the default of 1 set in the options.
|
||||
runner.MutableSidePackets()->Index(0) = Adopt(new int(2));
|
||||
std::vector<int> input = {0, 1, 2, 3};
|
||||
AddInputVector(input, /*timestamp=*/1, &runner);
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
|
||||
EXPECT_EQ(1, outputs.size());
|
||||
EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
|
||||
const std::vector<int>& output = outputs[0].Get<std::vector<int>>();
|
||||
EXPECT_EQ(2, output.size());
|
||||
std::vector<int> expected_vector = {0, 1};
|
||||
EXPECT_EQ(expected_vector, output);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -35,6 +35,16 @@ namespace mediapipe {
|
|||
typedef ConcatenateVectorCalculator<float> ConcatenateFloatVectorCalculator;
|
||||
REGISTER_CALCULATOR(ConcatenateFloatVectorCalculator);
|
||||
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "ConcatenateInt32VectorCalculator"
|
||||
// input_stream: "int32_vector_1"
|
||||
// input_stream: "int32_vector_2"
|
||||
// output_stream: "concatenated_int32_vector"
|
||||
// }
|
||||
typedef ConcatenateVectorCalculator<int32> ConcatenateInt32VectorCalculator;
|
||||
REGISTER_CALCULATOR(ConcatenateInt32VectorCalculator);
|
||||
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "ConcatenateTfLiteTensorVectorCalculator"
|
||||
|
|
|
@ -138,8 +138,7 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
|
|||
// Note: To enable horizontal or vertical flipping, specify them in the
|
||||
// calculator options. Flipping is applied after rotation.
|
||||
//
|
||||
// Note: Only scale mode STRETCH is currently supported on CPU,
|
||||
// and flipping is not yet supported either.
|
||||
// Note: Only scale mode STRETCH is currently supported on CPU.
|
||||
//
|
||||
class ImageTransformationCalculator : public CalculatorBase {
|
||||
public:
|
||||
|
@ -316,6 +315,11 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
cv::Mat input_mat = formats::MatView(&input_img);
|
||||
cv::Mat scaled_mat;
|
||||
|
||||
if (!output_height_ || !output_width_) {
|
||||
output_height_ = input_height;
|
||||
output_width_ = input_width;
|
||||
}
|
||||
|
||||
if (scale_mode_ == mediapipe::ScaleMode_Mode_STRETCH) {
|
||||
cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_));
|
||||
} else {
|
||||
|
@ -367,10 +371,21 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
cv::Mat rotation_mat = cv::getRotationMatrix2D(src_center, angle, 1.0);
|
||||
cv::warpAffine(scaled_mat, rotated_mat, rotation_mat, scaled_mat.size());
|
||||
|
||||
cv::Mat flipped_mat;
|
||||
if (options_.flip_horizontally() || options_.flip_vertically()) {
|
||||
const int flip_code =
|
||||
options_.flip_horizontally() && options_.flip_vertically()
|
||||
? -1
|
||||
: options_.flip_horizontally();
|
||||
cv::flip(rotated_mat, flipped_mat, flip_code);
|
||||
} else {
|
||||
flipped_mat = rotated_mat;
|
||||
}
|
||||
|
||||
std::unique_ptr<ImageFrame> output_frame(
|
||||
new ImageFrame(input_img.Format(), output_width, output_height));
|
||||
cv::Mat output_mat = formats::MatView(output_frame.get());
|
||||
rotated_mat.copyTo(output_mat);
|
||||
flipped_mat.copyTo(output_mat);
|
||||
cc->Outputs().Tag("IMAGE").Add(output_frame.release(), cc->InputTimestamp());
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
|
@ -440,9 +455,8 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
cc->InputSidePackets().Tag("ROTATION_DEGREES").Get<int>());
|
||||
}
|
||||
|
||||
static mediapipe::FrameScaleMode scale_mode =
|
||||
mediapipe::FrameScaleModeFromProto(scale_mode_,
|
||||
mediapipe::FrameScaleMode::kStretch);
|
||||
mediapipe::FrameScaleMode scale_mode = mediapipe::FrameScaleModeFromProto(
|
||||
scale_mode_, mediapipe::FrameScaleMode::kStretch);
|
||||
mediapipe::FrameRotation rotation =
|
||||
mediapipe::FrameRotationFromDegrees(RotationModeToDegrees(rotation_));
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ namespace mediapipe {
|
|||
|
||||
const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||
const char kImageTag[] = "IMAGE";
|
||||
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
||||
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
||||
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||
const char kBBoxTag[] = "BBOX";
|
||||
|
@ -145,6 +146,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
}
|
||||
cc->Inputs().Tag(tag).Set<std::vector<Detection>>();
|
||||
}
|
||||
if (absl::StartsWith(tag, kFloatContextFeaturePrefixTag)) {
|
||||
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
||||
}
|
||||
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
||||
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
||||
}
|
||||
|
@ -344,6 +348,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
sequence_.get());
|
||||
}
|
||||
}
|
||||
if (absl::StartsWith(tag, kFloatContextFeaturePrefixTag) &&
|
||||
!cc->Inputs().Tag(tag).IsEmpty()) {
|
||||
std::string key =
|
||||
tag.substr(sizeof(kFloatContextFeaturePrefixTag) /
|
||||
sizeof(*kFloatContextFeaturePrefixTag) -
|
||||
1);
|
||||
RET_CHECK_EQ(cc->InputTimestamp(), Timestamp::PostStream());
|
||||
mpms::SetContextFeatureFloats(
|
||||
key, cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
||||
sequence_.get());
|
||||
}
|
||||
if (absl::StartsWith(tag, kFloatFeaturePrefixTag) &&
|
||||
!cc->Inputs().Tag(tag).IsEmpty()) {
|
||||
std::string key = tag.substr(sizeof(kFloatFeaturePrefixTag) /
|
||||
|
|
|
@ -194,6 +194,38 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
|
||||
SetUpCalculator(
|
||||
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"},
|
||||
{}, false, true);
|
||||
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||
|
||||
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3);
|
||||
runner_->MutableInputs()
|
||||
->Tag("FLOAT_CONTEXT_FEATURE_TEST")
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
||||
vf_ptr = absl::make_unique<std::vector<float>>(2, 4);
|
||||
runner_->MutableInputs()
|
||||
->Tag("FLOAT_CONTEXT_FEATURE_OTHER")
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
||||
|
||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
||||
Adopt(input_sequence.release());
|
||||
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const tf::SequenceExample& output_sequence =
|
||||
output_packets[0].Get<tf::SequenceExample>();
|
||||
|
||||
ASSERT_THAT(mpms::GetContextFeatureFloats("TEST", output_sequence),
|
||||
testing::ElementsAre(3, 3));
|
||||
ASSERT_THAT(mpms::GetContextFeatureFloats("OTHER", output_sequence),
|
||||
testing::ElementsAre(4, 4));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
|
||||
tf::Features context;
|
||||
(*context.mutable_feature())["TEST"].mutable_bytes_list()->add_value("YES");
|
||||
|
|
|
@ -508,6 +508,17 @@ proto_library(
|
|||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "timed_box_list_to_render_data_calculator_proto",
|
||||
srcs = ["timed_box_list_to_render_data_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util:color_proto",
|
||||
"//mediapipe/util:render_data_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "labels_to_render_data_calculator_proto",
|
||||
srcs = ["labels_to_render_data_calculator.proto"],
|
||||
|
@ -651,6 +662,37 @@ cc_library(
|
|||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "timed_box_list_to_render_data_calculator_cc_proto",
|
||||
srcs = ["timed_box_list_to_render_data_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":timed_box_list_to_render_data_calculator_proto"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "timed_box_list_to_render_data_calculator",
|
||||
srcs = ["timed_box_list_to_render_data_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":timed_box_list_to_render_data_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
"//mediapipe/util/tracking:box_tracker_cc_proto",
|
||||
"//mediapipe/util/tracking:tracking_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "labels_to_render_data_calculator",
|
||||
srcs = ["labels_to_render_data_calculator.cc"],
|
||||
|
|
|
@ -37,6 +37,8 @@ namespace mediapipe {
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
// Optionally, uses a side packet to override `min_size` specified in the
|
||||
// calculator options.
|
||||
template <typename IterableT>
|
||||
class CollectionHasMinSizeCalculator : public CalculatorBase {
|
||||
public:
|
||||
|
@ -54,6 +56,10 @@ class CollectionHasMinSizeCalculator : public CalculatorBase {
|
|||
cc->Inputs().Tag("ITERABLE").Set<IterableT>();
|
||||
cc->Outputs().Index(0).Set<bool>();
|
||||
|
||||
// Optional input side packet that determines `min_size_`.
|
||||
if (cc->InputSidePackets().NumEntries() > 0) {
|
||||
cc->InputSidePackets().Index(0).Set<int>();
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -62,6 +68,11 @@ class CollectionHasMinSizeCalculator : public CalculatorBase {
|
|||
min_size_ =
|
||||
cc->Options<::mediapipe::CollectionHasMinSizeCalculatorOptions>()
|
||||
.min_size();
|
||||
// Override `min_size` if passed as side packet.
|
||||
if (cc->InputSidePackets().NumEntries() > 0 &&
|
||||
!cc->InputSidePackets().Index(0).IsEmpty()) {
|
||||
min_size_ = cc->InputSidePackets().Index(0).Get<int>();
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mediapipe/calculators/util/timed_box_list_to_render_data_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/color.pb.h"
|
||||
#include "mediapipe/util/render_data.pb.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.pb.h"
|
||||
#include "mediapipe/util/tracking/tracking.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kTimedBoxListTag[] = "BOX_LIST";
|
||||
constexpr char kRenderDataTag[] = "RENDER_DATA";
|
||||
|
||||
void AddTimedBoxProtoToRenderData(
|
||||
const TimedBoxProto& box_proto,
|
||||
const TimedBoxListToRenderDataCalculatorOptions& options,
|
||||
RenderData* render_data) {
|
||||
if (box_proto.has_quad() && box_proto.quad().vertices_size() > 0 &&
|
||||
box_proto.quad().vertices_size() % 2 == 0) {
|
||||
const int num_corners = box_proto.quad().vertices_size() / 2;
|
||||
for (int i = 0; i < num_corners; ++i) {
|
||||
const int next_corner = (i + 1) % num_corners;
|
||||
auto* line_annotation = render_data->add_render_annotations();
|
||||
line_annotation->mutable_color()->set_r(options.box_color().r());
|
||||
line_annotation->mutable_color()->set_g(options.box_color().g());
|
||||
line_annotation->mutable_color()->set_b(options.box_color().b());
|
||||
line_annotation->set_thickness(options.thickness());
|
||||
RenderAnnotation::Line* line = line_annotation->mutable_line();
|
||||
line->set_x_start(box_proto.quad().vertices(i * 2));
|
||||
line->set_y_start(box_proto.quad().vertices(i * 2 + 1));
|
||||
line->set_x_end(box_proto.quad().vertices(next_corner * 2));
|
||||
line->set_y_end(box_proto.quad().vertices(next_corner * 2 + 1));
|
||||
}
|
||||
} else {
|
||||
auto* rect_annotation = render_data->add_render_annotations();
|
||||
rect_annotation->mutable_color()->set_r(options.box_color().r());
|
||||
rect_annotation->mutable_color()->set_g(options.box_color().g());
|
||||
rect_annotation->mutable_color()->set_b(options.box_color().b());
|
||||
rect_annotation->set_thickness(options.thickness());
|
||||
RenderAnnotation::Rectangle* rect = rect_annotation->mutable_rectangle();
|
||||
rect->set_normalized(true);
|
||||
rect->set_left(box_proto.left());
|
||||
rect->set_right(box_proto.right());
|
||||
rect->set_top(box_proto.top());
|
||||
rect->set_bottom(box_proto.bottom());
|
||||
rect->set_rotation(box_proto.rotation());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator that converts TimedBoxProtoList proto to RenderData proto for
|
||||
// visualization. If the input TimedBoxProto contains `quad` field, this
|
||||
// calculator will draw a quadrilateral based on it. Otherwise this calculator
|
||||
// will draw a rotated rectangle based on `top`, `bottom`, `left`, `right` and
|
||||
// `rotation` fields
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "TimedBoxListToRenderDataCalculator"
|
||||
// input_stream: "BOX_LIST:landmarks"
|
||||
// output_stream: "RENDER_DATA:render_data"
|
||||
// options {
|
||||
// [TimedBoxListToRenderDataCalculatorOptions.ext] {
|
||||
// box_color { r: 0 g: 255 b: 0 }
|
||||
// thickness: 4.0
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class TimedBoxListToRenderDataCalculator : public CalculatorBase {
|
||||
public:
|
||||
TimedBoxListToRenderDataCalculator() {}
|
||||
~TimedBoxListToRenderDataCalculator() override {}
|
||||
TimedBoxListToRenderDataCalculator(
|
||||
const TimedBoxListToRenderDataCalculator&) = delete;
|
||||
TimedBoxListToRenderDataCalculator& operator=(
|
||||
const TimedBoxListToRenderDataCalculator&) = delete;
|
||||
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
TimedBoxListToRenderDataCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(TimedBoxListToRenderDataCalculator);
|
||||
|
||||
::mediapipe::Status TimedBoxListToRenderDataCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
if (cc->Inputs().HasTag(kTimedBoxListTag)) {
|
||||
cc->Inputs().Tag(kTimedBoxListTag).Set<TimedBoxProtoList>();
|
||||
}
|
||||
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TimedBoxListToRenderDataCalculator::Open(
|
||||
CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<TimedBoxListToRenderDataCalculatorOptions>();
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TimedBoxListToRenderDataCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
auto render_data = absl::make_unique<RenderData>();
|
||||
|
||||
if (cc->Inputs().HasTag(kTimedBoxListTag)) {
|
||||
const auto& box_list =
|
||||
cc->Inputs().Tag(kTimedBoxListTag).Get<TimedBoxProtoList>();
|
||||
|
||||
for (const auto& box : box_list.box()) {
|
||||
AddTimedBoxProtoToRenderData(box, options_, render_data.get());
|
||||
}
|
||||
}
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kRenderDataTag)
|
||||
.Add(render_data.release(), cc->InputTimestamp());
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/color.proto";
|
||||
|
||||
message TimedBoxListToRenderDataCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional TimedBoxListToRenderDataCalculatorOptions ext = 289899854;
|
||||
}
|
||||
|
||||
// Color of boxes.
|
||||
optional Color box_color = 1;
|
||||
|
||||
// Thickness of the drawing of boxes.
|
||||
optional double thickness = 2 [default = 1.0];
|
||||
}
|
|
@ -66,6 +66,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_de
|
|||
--calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
|
||||
```
|
||||
|
||||
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
|
||||
|
||||
#### Graph
|
||||
|
||||
![graph visualization](images/face_detection_desktop.png)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
- [Overview](#overview)
|
||||
- [OpenGL Support](#opengl-support)
|
||||
- [Desktop GPUs](#desktop-gpu-linux)
|
||||
- [Life of a GPU calculator](#life-of-a-gpu-calculator)
|
||||
- [GpuBuffer to ImageFrame converters](#gpubuffer-to-imageframe-converters)
|
||||
- [Disable GPU support](#disable-gpu-support)
|
||||
|
@ -22,13 +23,60 @@ Below are the design principles for GPU support in MediaPipe
|
|||
* Because different platforms may require different techniques for best performance, the API should allow flexibility in the way things are implemented behind the scenes.
|
||||
* A calculator should be allowed maximum flexibility in using the GPU for all or part of its operation, combining it with the CPU if necessary.
|
||||
|
||||
### OpenGL support
|
||||
### OpenGL Support
|
||||
|
||||
MediaPipe supports OpenGL ES up to version 3.2 on Android and up to ES 3.0 on iOS. In addition, MediaPipe also supports Metal on iOS.
|
||||
MediaPipe supports OpenGL ES up to version 3.2 on Android/Linux and up to ES 3.0
|
||||
on iOS. In addition, MediaPipe also supports Metal on iOS.
|
||||
|
||||
* MediaPipe allows graphs to run OpenGL in multiple GL contexts. For example, this can be very useful in graphs that combine a slower GPU inference path (eg, at 10 FPS) with a faster GPU rendering path (eg, at 30 FPS): since one GL context corresponds to one sequential command queue, using the same context for both tasks would reduce the rendering frame rate. One challenge MediaPipe's use of multiple contexts solves is the ability to communicate across them. An example scenario is one with an input video that is sent to both the rendering and inferences paths, and rendering needs to have access to the latest output from inference.
|
||||
OpenGL ES 3.1 or greater is required (on Android/Linux systems) for running
|
||||
machine learning inference calculators and graphs.
|
||||
|
||||
* An OpenGL context cannot be accessed by multiple threads at the same time. Furthermore, switching the active GL context on the same thread can be slow on some Android devices. Therefore, our approach is to have one dedicated thread per context. Each thread issues GL commands, building up a serial command queue on its context, which is then executed by the GPU asynchronously.
|
||||
MediaPipe allows graphs to run OpenGL in multiple GL contexts. For example, this
|
||||
can be very useful in graphs that combine a slower GPU inference path (eg, at 10
|
||||
FPS) with a faster GPU rendering path (eg, at 30 FPS): since one GL context
|
||||
corresponds to one sequential command queue, using the same context for both
|
||||
tasks would reduce the rendering frame rate.
|
||||
|
||||
One challenge MediaPipe's use of multiple contexts solves is the ability to
|
||||
communicate across them. An example scenario is one with an input video that is
|
||||
sent to both the rendering and inferences paths, and rendering needs to have
|
||||
access to the latest output from inference.
|
||||
|
||||
An OpenGL context cannot be accessed by multiple threads at the same time.
|
||||
Furthermore, switching the active GL context on the same thread can be slow on
|
||||
some Android devices. Therefore, our approach is to have one dedicated thread
|
||||
per context. Each thread issues GL commands, building up a serial command queue
|
||||
on its context, which is then executed by the GPU asynchronously.
|
||||
|
||||
#### Desktop GPU (Linux)
|
||||
|
||||
MediaPipe GPU can run on linux systems with video cards that support OpenGL ES
|
||||
3.1 and up.
|
||||
|
||||
To check if your linux desktop GPU can run mediapipe:
|
||||
|
||||
```bash
|
||||
$ sudo apt-get install mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev
|
||||
$ sudo apt-get install mesa-utils
|
||||
$ glxinfo | grep -i opengl
|
||||
```
|
||||
|
||||
My linux box prints:
|
||||
|
||||
```bash
|
||||
$ glxinfo | grep -i opengl
|
||||
...
|
||||
OpenGL ES profile version string: OpenGL ES 3.2 NVIDIA 430.50
|
||||
OpenGL ES profile shading language version string: OpenGL ES GLSL ES 3.20
|
||||
OpenGL ES profile extensions:
|
||||
```
|
||||
|
||||
*^notice the OpenGL ES 3.2 text^*
|
||||
|
||||
To run MediaPipe GPU on desktop, you need to see ES 3.1 or greater printed.
|
||||
|
||||
If OpenGL ES is not printed, or is below 3.1, then the GPU inference will not
|
||||
run.
|
||||
|
||||
### Life of a GPU calculator
|
||||
|
||||
|
|
|
@ -40,6 +40,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair
|
|||
--calculator_graph_config_file=mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt
|
||||
```
|
||||
|
||||
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
|
||||
|
||||
#### Graph
|
||||
|
||||
![hair_segmentation_mobile_gpu_graph](images/mobile/hair_segmentation_mobile_gpu.png)
|
||||
|
|
|
@ -61,6 +61,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tra
|
|||
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
|
||||
```
|
||||
|
||||
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
|
||||
|
||||
#### Graph
|
||||
|
||||
![graph visualization](images/hand_tracking_desktop.png)
|
||||
|
|
|
@ -231,7 +231,7 @@ To build and run iOS apps:
|
|||
|
||||
* Install [Homebrew](https://brew.sh).
|
||||
* Install [Xcode](https://developer.apple.com/xcode/) and its Command Line
|
||||
Tools by `xcode-select install`.
|
||||
Tools by `xcode-select --install`.
|
||||
|
||||
2. Checkout MediaPipe repository.
|
||||
|
||||
|
@ -331,7 +331,7 @@ To build and run iOS apps:
|
|||
|
||||
```
|
||||
|
||||
5. Make sure that Python 3 and Python "six" library is installed.
|
||||
5. Make sure that Python 3 and the Python "six" library are installed.
|
||||
|
||||
```
|
||||
$ brew install python
|
||||
|
@ -574,6 +574,7 @@ This will use a Docker image that will isolate mediapipe's installation from the
|
|||
|
||||
Requirements:
|
||||
|
||||
* Java Runtime.
|
||||
* Android SDK release 28.0.3 and above.
|
||||
* Android NDK r17c and above.
|
||||
|
||||
|
|
|
@ -61,6 +61,8 @@ $ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/mu
|
|||
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt
|
||||
```
|
||||
|
||||
Issues running? Please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
|
||||
|
||||
#### Graph
|
||||
|
||||
![graph visualization](images/multi_hand_tracking_desktop.png)
|
||||
|
|
|
@ -214,7 +214,6 @@ To build and run the TensorFlow Lite example on desktop (CPU) with Webcam, run:
|
|||
# Video from webcam running on desktop CPU
|
||||
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
mediapipe/examples/desktop/object_detection:object_detection_cpu
|
||||
|
||||
# It should print:
|
||||
#Target //mediapipe/examples/desktop/object_detection:object_detection_cpu up-to-date:
|
||||
# bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu
|
||||
|
|
|
@ -149,12 +149,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -180,12 +180,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -158,12 +158,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -151,12 +151,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -138,12 +138,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -52,15 +52,24 @@ struct PolynomialResidual {
|
|||
const double out_;
|
||||
};
|
||||
|
||||
// Computes the amount of delta position change along the fitted polynomial
|
||||
// curve, translates the delta from being relative to the origin of the original
|
||||
// dimension to being relative to the center of the original dimension, then
|
||||
// regulates the delta to avoid moving camera off the frame boundaries.
|
||||
float ComputeDelta(const float in, const int original_dimension,
|
||||
const int output_dimension, const double a, const double b,
|
||||
const double c, const double d, const double k) {
|
||||
// The value `out` here represents a normalized distance between the center of
|
||||
// the output window and the origin of the original window.
|
||||
float out =
|
||||
a * in + b * in * in + c * in * in * in + d * in * in * in * in + k;
|
||||
float delta = (out - 0.5) * 2 * output_dimension;
|
||||
const float max_delta = (original_dimension - output_dimension) / 2.0f;
|
||||
// Translate `out` to a pixel distance between the center of the output window
|
||||
// and the center of the original window. This value can be negative, 0, or
|
||||
// positive.
|
||||
float delta = (out - 0.5) * original_dimension;
|
||||
|
||||
// Make sure delta doesn't move the camera off the frame boundary.
|
||||
const float max_delta = (original_dimension - output_dimension) / 2.0f;
|
||||
if (delta > max_delta) {
|
||||
delta = max_delta;
|
||||
} else if (delta < -max_delta) {
|
||||
|
|
|
@ -140,7 +140,7 @@ constexpr double prediction[] = {
|
|||
-24.550379, -24.06503,
|
||||
};
|
||||
|
||||
void GenerateDataPoints(
|
||||
void GenerateDataPointsFromRealVideo(
|
||||
const int focus_point_frames_length,
|
||||
const int prior_focus_point_frames_length,
|
||||
std::vector<FocusPointFrame>* focus_point_frames,
|
||||
|
@ -163,14 +163,15 @@ void GenerateDataPoints(
|
|||
}
|
||||
}
|
||||
|
||||
TEST(PolynomialRegressionPathSolverTest, Success) {
|
||||
TEST(PolynomialRegressionPathSolverTest, SuccessInTrackingCameraMode) {
|
||||
PolynomialRegressionPathSolver solver;
|
||||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
GenerateDataPoints(/* focus_point_frames_length = */ 100,
|
||||
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 100,
|
||||
/* prior_focus_point_frames_length = */ 100,
|
||||
&focus_point_frames, &prior_focus_point_frames);
|
||||
&focus_point_frames,
|
||||
&prior_focus_point_frames);
|
||||
constexpr int kFrameWidth = 200;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
|
@ -185,14 +186,86 @@ TEST(PolynomialRegressionPathSolverTest, Success) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(PolynomialRegressionPathSolverTest, SuccessInStationaryCameraMode) {
|
||||
PolynomialRegressionPathSolver solver;
|
||||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
|
||||
constexpr int kFocusPointFramesLength = 100;
|
||||
constexpr float kNormStationaryFocusPointX = 0.34f;
|
||||
|
||||
for (int i = 0; i < kFocusPointFramesLength; i++) {
|
||||
FocusPoint sp;
|
||||
// Add a fixed normalized focus point location.
|
||||
sp.set_norm_point_x(kNormStationaryFocusPointX);
|
||||
FocusPointFrame spf;
|
||||
*spf.add_point() = sp;
|
||||
focus_point_frames.push_back(spf);
|
||||
}
|
||||
constexpr int kFrameWidth = 300;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
constexpr int kCropHeight = 300;
|
||||
MP_ASSERT_OK(solver.ComputeCameraPath(
|
||||
focus_point_frames, prior_focus_point_frames, kFrameWidth, kFrameHeight,
|
||||
kCropWidth, kCropHeight, &all_xforms));
|
||||
ASSERT_EQ(all_xforms.size(), kFocusPointFramesLength);
|
||||
|
||||
constexpr int kExpectedShift = -48;
|
||||
for (int i = 0; i < all_xforms.size(); i++) {
|
||||
cv::Mat mat = all_xforms[i];
|
||||
EXPECT_FLOAT_EQ(mat.at<float>(0, 2), kExpectedShift);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the case where focus points are so close to boundaries that the amount
|
||||
// of shifts would have moved the camera to go outside frame boundaries. In this
|
||||
// case, the solver should regulate the camera position shift to keep it stay
|
||||
// inside the viewport.
|
||||
TEST(PolynomialRegressionPathSolverTest, SuccessWhenFocusPointCloseToBoundary) {
|
||||
PolynomialRegressionPathSolver solver;
|
||||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
|
||||
constexpr int kFocusPointFramesLength = 100;
|
||||
constexpr float kNormStationaryFocusPointX = 0.99f;
|
||||
|
||||
for (int i = 0; i < kFocusPointFramesLength; i++) {
|
||||
FocusPoint sp;
|
||||
// Add a fixed normalized focus point location.
|
||||
sp.set_norm_point_x(kNormStationaryFocusPointX);
|
||||
FocusPointFrame spf;
|
||||
*spf.add_point() = sp;
|
||||
focus_point_frames.push_back(spf);
|
||||
}
|
||||
constexpr int kFrameWidth = 500;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
constexpr int kCropHeight = 300;
|
||||
MP_ASSERT_OK(solver.ComputeCameraPath(
|
||||
focus_point_frames, prior_focus_point_frames, kFrameWidth, kFrameHeight,
|
||||
kCropWidth, kCropHeight, &all_xforms));
|
||||
ASSERT_EQ(all_xforms.size(), kFocusPointFramesLength);
|
||||
|
||||
// Regulate max delta change = (500 - 100) / 2.
|
||||
constexpr int kExpectedShift = 200;
|
||||
for (int i = 0; i < all_xforms.size(); i++) {
|
||||
cv::Mat mat = all_xforms[i];
|
||||
EXPECT_FLOAT_EQ(mat.at<float>(0, 2), kExpectedShift);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PolynomialRegressionPathSolverTest, FewFramesShouldWork) {
|
||||
PolynomialRegressionPathSolver solver;
|
||||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
GenerateDataPoints(/* focus_point_frames_length = */ 1,
|
||||
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 1,
|
||||
/* prior_focus_point_frames_length = */ 1,
|
||||
&focus_point_frames, &prior_focus_point_frames);
|
||||
&focus_point_frames,
|
||||
&prior_focus_point_frames);
|
||||
constexpr int kFrameWidth = 200;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
|
@ -208,9 +281,10 @@ TEST(PolynomialRegressionPathSolverTest, OneCurrentFrameShouldWork) {
|
|||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
GenerateDataPoints(/* focus_point_frames_length = */ 1,
|
||||
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 1,
|
||||
/* prior_focus_point_frames_length = */ 0,
|
||||
&focus_point_frames, &prior_focus_point_frames);
|
||||
&focus_point_frames,
|
||||
&prior_focus_point_frames);
|
||||
constexpr int kFrameWidth = 200;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
|
@ -226,9 +300,10 @@ TEST(PolynomialRegressionPathSolverTest, ZeroFrameShouldFail) {
|
|||
std::vector<FocusPointFrame> focus_point_frames;
|
||||
std::vector<FocusPointFrame> prior_focus_point_frames;
|
||||
std::vector<cv::Mat> all_xforms;
|
||||
GenerateDataPoints(/* focus_point_frames_length = */ 0,
|
||||
GenerateDataPointsFromRealVideo(/* focus_point_frames_length = */ 0,
|
||||
/* prior_focus_point_frames_length = */ 0,
|
||||
&focus_point_frames, &prior_focus_point_frames);
|
||||
&focus_point_frames,
|
||||
&prior_focus_point_frames);
|
||||
constexpr int kFrameWidth = 200;
|
||||
constexpr int kFrameHeight = 300;
|
||||
constexpr int kCropWidth = 100;
|
||||
|
|
|
@ -371,11 +371,12 @@ void ImageFrame::CopyPixelData(ImageFormat::Format format, int width,
|
|||
void ImageFrame::CopyToBuffer(uint8* buffer, int buffer_size) const {
|
||||
CHECK(buffer);
|
||||
CHECK_EQ(1, ByteDepth());
|
||||
int data_size = width_ * height_ * NumberOfChannels() * ByteDepth();
|
||||
const int data_size = width_ * height_ * NumberOfChannels();
|
||||
CHECK_LE(data_size, buffer_size);
|
||||
if (IsContiguous()) {
|
||||
// The data is stored contiguously, we can just copy.
|
||||
std::copy_n(pixel_data_.get(), data_size, buffer);
|
||||
const uint8* src = reinterpret_cast<const uint8*>(pixel_data_.get());
|
||||
std::copy_n(src, data_size, buffer);
|
||||
} else {
|
||||
InternalCopyToBuffer(0 /* contiguous storage */,
|
||||
reinterpret_cast<char*>(buffer));
|
||||
|
|
|
@ -29,6 +29,9 @@ message Rect {
|
|||
|
||||
// Rotation angle is counter-clockwise in radian.
|
||||
optional float rotation = 5 [default = 0.0];
|
||||
|
||||
// Optional unique id to help associate different Rects to each other.
|
||||
optional int64 rect_id = 6;
|
||||
}
|
||||
|
||||
// A rectangle with rotation in normalized coordinates. The values of box center
|
||||
|
@ -45,4 +48,8 @@ message NormalizedRect {
|
|||
|
||||
// Rotation angle is counter-clockwise in radian.
|
||||
optional float rotation = 5 [default = 0.0];
|
||||
|
||||
// Optional unique id to help associate different NormalizedRects to each
|
||||
// other.
|
||||
optional int64 rect_id = 6;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,9 @@ public abstract class CameraHelper {
|
|||
*/
|
||||
public abstract Size computeDisplaySizeFromViewSize(Size viewSize);
|
||||
|
||||
/** Returns a boolean which is true if the camera is in Portrait mode, false in Landscape mode. */
|
||||
public abstract boolean isCameraRotated();
|
||||
|
||||
public void setOnCameraStartedListener(@Nullable OnCameraStartedListener listener) {
|
||||
onCameraStartedListener = listener;
|
||||
}
|
||||
|
|
|
@ -16,13 +16,21 @@ package com.google.mediapipe.components;
|
|||
|
||||
import android.app.Activity;
|
||||
import androidx.lifecycle.LifecycleOwner;
|
||||
import android.content.Context;
|
||||
import android.graphics.SurfaceTexture;
|
||||
import android.hardware.camera2.CameraAccessException;
|
||||
import android.hardware.camera2.CameraCharacteristics;
|
||||
import android.hardware.camera2.CameraManager;
|
||||
import android.hardware.camera2.CameraMetadata;
|
||||
import android.hardware.camera2.params.StreamConfigurationMap;
|
||||
import android.util.Log;
|
||||
import android.util.Size;
|
||||
import androidx.camera.core.CameraX;
|
||||
import androidx.camera.core.CameraX.LensFacing;
|
||||
import androidx.camera.core.Preview;
|
||||
import androidx.camera.core.PreviewConfig;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Uses CameraX APIs for camera setup and access.
|
||||
|
@ -32,6 +40,9 @@ import androidx.camera.core.PreviewConfig;
|
|||
public class CameraXPreviewHelper extends CameraHelper {
|
||||
private static final String TAG = "CameraXPreviewHelper";
|
||||
|
||||
// Target frame and view resolution size in landscape.
|
||||
private static final Size TARGET_SIZE = new Size(1280, 720);
|
||||
|
||||
private Preview preview;
|
||||
|
||||
// Size of the camera-preview frames from the camera.
|
||||
|
@ -39,6 +50,10 @@ public class CameraXPreviewHelper extends CameraHelper {
|
|||
// Rotation of the camera-preview frames in degrees.
|
||||
private int frameRotation;
|
||||
|
||||
// Focal length resolved in pixels on the frame texture.
|
||||
private float focalLengthPixels;
|
||||
private CameraCharacteristics cameraCharacteristics = null;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("RestrictTo") // See b/132705545.
|
||||
public void startCamera(
|
||||
|
@ -46,7 +61,10 @@ public class CameraXPreviewHelper extends CameraHelper {
|
|||
LensFacing cameraLensFacing =
|
||||
cameraFacing == CameraHelper.CameraFacing.FRONT ? LensFacing.FRONT : LensFacing.BACK;
|
||||
PreviewConfig previewConfig =
|
||||
new PreviewConfig.Builder().setLensFacing(cameraLensFacing).build();
|
||||
new PreviewConfig.Builder()
|
||||
.setLensFacing(cameraLensFacing)
|
||||
.setTargetResolution(TARGET_SIZE)
|
||||
.build();
|
||||
preview = new Preview(previewConfig);
|
||||
|
||||
preview.setOnPreviewOutputUpdateListener(
|
||||
|
@ -60,11 +78,22 @@ public class CameraXPreviewHelper extends CameraHelper {
|
|||
return;
|
||||
}
|
||||
}
|
||||
Integer selectedLensFacing =
|
||||
cameraFacing == CameraHelper.CameraFacing.FRONT
|
||||
? CameraMetadata.LENS_FACING_FRONT
|
||||
: CameraMetadata.LENS_FACING_BACK;
|
||||
calculateFocalLength(context, selectedLensFacing);
|
||||
if (onCameraStartedListener != null) {
|
||||
onCameraStartedListener.onCameraStarted(previewOutput.getSurfaceTexture());
|
||||
}
|
||||
});
|
||||
CameraX.bindToLifecycle(/*lifecycleOwner=*/ (LifecycleOwner) context, preview);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCameraRotated() {
|
||||
return frameRotation % 180 == 90;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,28 +104,79 @@ public class CameraXPreviewHelper extends CameraHelper {
|
|||
return null;
|
||||
}
|
||||
|
||||
// Valid rotation values are 0, 90, 180 and 270.
|
||||
// Frames are rotated relative to the device's "natural" landscape orientation. When in portrait
|
||||
// mode, valid rotation values are 90 or 270, and the width/height should be swapped to
|
||||
// calculate aspect ratio.
|
||||
float frameAspectRatio =
|
||||
frameRotation == 90 || frameRotation == 270
|
||||
? frameSize.getHeight() / (float) frameSize.getWidth()
|
||||
: frameSize.getWidth() / (float) frameSize.getHeight();
|
||||
|
||||
float viewAspectRatio = viewSize.getWidth() / (float) viewSize.getHeight();
|
||||
|
||||
// Match shortest sides together.
|
||||
int scaledWidth;
|
||||
int scaledHeight;
|
||||
if (frameAspectRatio < viewAspectRatio) {
|
||||
scaledWidth = viewSize.getWidth();
|
||||
scaledHeight = Math.round(viewSize.getWidth() / frameAspectRatio);
|
||||
} else {
|
||||
scaledHeight = viewSize.getHeight();
|
||||
scaledWidth = Math.round(viewSize.getHeight() * frameAspectRatio);
|
||||
Size optimalSize = getOptimalViewSize(viewSize);
|
||||
return optimalSize != null ? optimalSize : frameSize;
|
||||
}
|
||||
|
||||
return new Size(scaledWidth, scaledHeight);
|
||||
private Size getOptimalViewSize(Size targetSize) {
|
||||
if (cameraCharacteristics != null) {
|
||||
StreamConfigurationMap map =
|
||||
cameraCharacteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP);
|
||||
Size[] outputSizes = map.getOutputSizes(SurfaceTexture.class);
|
||||
|
||||
int selectedWidth = -1;
|
||||
int selectedHeight = -1;
|
||||
float selectedAspectRatioDifference = 1e3f;
|
||||
float targetAspectRatio = targetSize.getWidth() / (float) targetSize.getHeight();
|
||||
|
||||
// Find the smallest size >= target size with the closest aspect ratio.
|
||||
for (Size size : outputSizes) {
|
||||
float aspectRatio = (float) size.getWidth() / size.getHeight();
|
||||
float aspectRatioDifference = Math.abs(aspectRatio - targetAspectRatio);
|
||||
if (aspectRatioDifference <= selectedAspectRatioDifference) {
|
||||
if ((selectedWidth == -1 && selectedHeight == -1)
|
||||
|| (size.getWidth() <= selectedWidth
|
||||
&& size.getWidth() >= frameSize.getWidth()
|
||||
&& size.getHeight() <= selectedHeight
|
||||
&& size.getHeight() >= frameSize.getHeight())) {
|
||||
selectedWidth = size.getWidth();
|
||||
selectedHeight = size.getHeight();
|
||||
selectedAspectRatioDifference = aspectRatioDifference;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (selectedWidth != -1 && selectedHeight != -1) {
|
||||
return new Size(selectedWidth, selectedHeight);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public float getFocalLengthPixels() {
|
||||
return focalLengthPixels;
|
||||
}
|
||||
|
||||
private void calculateFocalLength(Activity context, Integer lensFacing) {
|
||||
CameraManager cameraManager = (CameraManager) context.getSystemService(Context.CAMERA_SERVICE);
|
||||
try {
|
||||
List<String> cameraList = Arrays.asList(cameraManager.getCameraIdList());
|
||||
for (String availableCameraId : cameraList) {
|
||||
CameraCharacteristics availableCameraCharacteristics =
|
||||
cameraManager.getCameraCharacteristics(availableCameraId);
|
||||
Integer availableLensFacing =
|
||||
availableCameraCharacteristics.get(CameraCharacteristics.LENS_FACING);
|
||||
if (availableLensFacing == null) {
|
||||
continue;
|
||||
}
|
||||
if (availableLensFacing.equals(lensFacing)) {
|
||||
cameraCharacteristics = availableCameraCharacteristics;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Focal length of the camera in millimeters.
|
||||
// Note that CameraCharacteristics returns a list of focal lengths and there could be more
|
||||
// than one focal length available if optical zoom is enabled or there are multiple physical
|
||||
// cameras in the logical camera referenced here. A theoretically correct of doing this would
|
||||
// be to use the focal length set explicitly via Camera2 API, as documented in
|
||||
// https://developer.android.com/reference/android/hardware/camera2/CaptureRequest#LENS_FOCAL_LENGTH.
|
||||
float focalLengthMm =
|
||||
cameraCharacteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)[0];
|
||||
// Sensor Width of the camera in millimeters.
|
||||
float sensorWidthMm =
|
||||
cameraCharacteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE).getWidth();
|
||||
focalLengthPixels = frameSize.getWidth() * focalLengthMm / sensorWidthMm;
|
||||
} catch (CameraAccessException e) {
|
||||
Log.e(TAG, "Accessing camera ID info got error: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -332,17 +332,10 @@ public class ExternalTextureConverter implements TextureFrameProducer {
|
|||
bindFramebuffer(outputFrame.getTextureName(), destinationWidth, destinationHeight);
|
||||
renderer.render(surfaceTexture);
|
||||
|
||||
// Populate frame timestamp with the System.nanoTime() timestamp after render() as renderer
|
||||
// Populate frame timestamp with surface texture timestamp after render() as renderer
|
||||
// ensures that surface texture has the up-to-date timestamp. (Also adjust |timestampOffset|
|
||||
// to ensure that timestamps increase monotonically.)
|
||||
// We assume that the camera timestamp is generated at the same time as this method is called
|
||||
// and get the time via System.nanoTime(). This timestamp is aligned with the clock used by
|
||||
// the microphone which returns timestamps aligned to the same time base as System.nanoTime().
|
||||
// Data sent from camera and microphone should have timestamps aligned on the same clock and
|
||||
// timebase so that the data can be processed by a MediaPipe graph simultaneously.
|
||||
// Android's SurfaceTexture.getTimestamp() method is not aligned to the System.nanoTime()
|
||||
// clock, so it cannot be used for texture timestamps in this method.
|
||||
long textureTimestamp = System.nanoTime() / NANOS_PER_MICRO;
|
||||
long textureTimestamp = surfaceTexture.getTimestamp() / NANOS_PER_MICRO;
|
||||
if (previousTimestampValid && textureTimestamp + timestampOffset <= previousTimestamp) {
|
||||
timestampOffset = previousTimestamp + 1 - textureTimestamp;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ package com.google.mediapipe.components;
|
|||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.media.AudioFormat;
|
||||
import android.util.Log;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.mediapipe.framework.AndroidAssetUtil;
|
||||
|
@ -39,15 +38,12 @@ import javax.annotation.Nullable;
|
|||
|
||||
/**
|
||||
* A {@link com.google.mediapipe.components.TextureFrameProcessor} that sends video frames through a
|
||||
* MediaPipe graph and a {@link com.google.mediapipe.components.AudioDataProcessor} that sends audio
|
||||
* data samples through a MediaPipe graph.
|
||||
* MediaPipe graph.
|
||||
*/
|
||||
public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor {
|
||||
public class FrameProcessor implements TextureFrameProcessor {
|
||||
private static final String TAG = "FrameProcessor";
|
||||
private static final int BYTES_PER_MONO_SAMPLE = 2; // 16 bit PCM encoding.
|
||||
|
||||
private List<TextureFrameConsumer> videoConsumers = new ArrayList<>();
|
||||
private List<AudioDataConsumer> audioConsumers = new ArrayList<>();
|
||||
private List<TextureFrameConsumer> consumers = new ArrayList<>();
|
||||
private Graph mediapipeGraph;
|
||||
private AndroidPacketCreator packetCreator;
|
||||
private OnWillAddFrameListener addFrameListener;
|
||||
|
@ -57,15 +53,6 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
private SurfaceOutput videoSurfaceOutput;
|
||||
private final AtomicBoolean started = new AtomicBoolean(false);
|
||||
private boolean hybridPath = false;
|
||||
// Input stream of audio data. Can be null.
|
||||
private String audioInputStream;
|
||||
// Output stream of audio data. Can be null.
|
||||
private String audioOutputStream;
|
||||
// Number of channels of audio data read in the input stream. This can be only 1 or 2, as
|
||||
// AudioRecord supports only AudioFormat.CHANNEL_IN_MONO and AudioFormat.CHANNEL_IN_STEREO.
|
||||
private int numAudioChannels = 1;
|
||||
// Sample rate of audio data sent to the MediaPipe graph.
|
||||
private double audioSampleRate;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -104,7 +91,7 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
public void process(Packet packet) {
|
||||
List<TextureFrameConsumer> currentConsumers;
|
||||
synchronized (this) {
|
||||
currentConsumers = videoConsumers;
|
||||
currentConsumers = consumers;
|
||||
}
|
||||
for (TextureFrameConsumer consumer : currentConsumers) {
|
||||
TextureFrame frame = PacketGetter.getTextureFrame(packet);
|
||||
|
@ -128,54 +115,6 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
videoSurfaceOutput = mediapipeGraph.addSurfaceOutput(videoOutputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds input streams to process audio data and output streams that output processed audio data.
|
||||
*
|
||||
* @param inputStream the graph input stream that will receive input audio samples.
|
||||
* @param outputStream the output stream from which output audio samples will be produced.
|
||||
* @param numChannels the number of audio channels in the input audio stream.
|
||||
* @param audioSampleRateInHz the sample rate for audio samples in hertz (Hz).
|
||||
*/
|
||||
public void addAudioStreams(
|
||||
@Nullable String inputStream,
|
||||
@Nullable String outputStream,
|
||||
int numChannels,
|
||||
double audioSampleRateInHz) {
|
||||
audioInputStream = inputStream;
|
||||
audioOutputStream = outputStream;
|
||||
numAudioChannels = numChannels;
|
||||
audioSampleRate = audioSampleRateInHz;
|
||||
|
||||
if (audioInputStream != null) {
|
||||
Packet audioHeader =
|
||||
packetCreator.createTimeSeriesHeader(numAudioChannels, audioSampleRateInHz);
|
||||
mediapipeGraph.setStreamHeader(audioInputStream, audioHeader);
|
||||
}
|
||||
|
||||
if (audioOutputStream != null) {
|
||||
AudioFormat audioFormat =
|
||||
new AudioFormat.Builder()
|
||||
.setSampleRate((int) audioSampleRateInHz)
|
||||
.setChannelMask(numAudioChannels)
|
||||
.build();
|
||||
mediapipeGraph.addPacketCallback(
|
||||
audioOutputStream,
|
||||
new PacketCallback() {
|
||||
@Override
|
||||
public void process(Packet packet) {
|
||||
List<AudioDataConsumer> currentAudioConsumers;
|
||||
synchronized (this) {
|
||||
currentAudioConsumers = audioConsumers;
|
||||
}
|
||||
for (AudioDataConsumer consumer : currentAudioConsumers) {
|
||||
byte[] audioData = PacketGetter.getAudioByteData(packet);
|
||||
consumer.onNewAudioData(audioData, packet.getTimestamp(), audioFormat);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface to be used so that this class can receive a callback when onNewFrame has determined
|
||||
* it will process an input frame. Can be used to feed packets to accessory streams.
|
||||
|
@ -195,16 +134,9 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setConsumer(TextureFrameConsumer consumer) {
|
||||
public void setConsumer(TextureFrameConsumer listener) {
|
||||
synchronized (this) {
|
||||
videoConsumers = Arrays.asList(consumer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAudioConsumer(AudioDataConsumer consumer) {
|
||||
synchronized (this) {
|
||||
audioConsumers = Arrays.asList(consumer);
|
||||
consumers = Arrays.asList(listener);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,20 +153,20 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
mediapipeGraph.addPacketCallback(outputStream, callback);
|
||||
}
|
||||
|
||||
public void addConsumer(TextureFrameConsumer consumer) {
|
||||
public void addConsumer(TextureFrameConsumer listener) {
|
||||
synchronized (this) {
|
||||
List<TextureFrameConsumer> newConsumers = new ArrayList<>(videoConsumers);
|
||||
newConsumers.add(consumer);
|
||||
videoConsumers = newConsumers;
|
||||
List<TextureFrameConsumer> newConsumers = new ArrayList<>(consumers);
|
||||
newConsumers.add(listener);
|
||||
consumers = newConsumers;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean removeConsumer(TextureFrameConsumer listener) {
|
||||
boolean existed;
|
||||
synchronized (this) {
|
||||
List<TextureFrameConsumer> newConsumers = new ArrayList<>(videoConsumers);
|
||||
List<TextureFrameConsumer> newConsumers = new ArrayList<>(consumers);
|
||||
existed = newConsumers.remove(listener);
|
||||
videoConsumers = newConsumers;
|
||||
consumers = newConsumers;
|
||||
}
|
||||
return existed;
|
||||
}
|
||||
|
@ -373,45 +305,4 @@ public class FrameProcessor implements TextureFrameProcessor, AudioDataProcessor
|
|||
private void startGraph() {
|
||||
mediapipeGraph.startRunningGraph();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onNewAudioData(byte[] audioData, long timestampMicros, AudioFormat audioFormat) {
|
||||
if (audioFormat.getChannelCount() != numAudioChannels
|
||||
|| audioFormat.getSampleRate() != audioSampleRate) {
|
||||
Log.e(TAG, "Producer's AudioFormat doesn't match FrameProcessor's AudioFormat");
|
||||
return;
|
||||
}
|
||||
Preconditions.checkNotNull(audioInputStream);
|
||||
|
||||
int numSamples = audioData.length / BYTES_PER_MONO_SAMPLE / numAudioChannels;
|
||||
byte[] data = audioData;
|
||||
Packet audioPacket = packetCreator.createAudioPacket(data, numAudioChannels, numSamples);
|
||||
try {
|
||||
// addConsumablePacketToInputStream allows the graph to take exclusive ownership of the
|
||||
// packet, which may allow for more memory optimizations.
|
||||
mediapipeGraph.addConsumablePacketToInputStream(
|
||||
audioInputStream, audioPacket, timestampMicros);
|
||||
} catch (MediaPipeException e) {
|
||||
Log.e(TAG, "Mediapipe error: ", e);
|
||||
}
|
||||
audioPacket.release();
|
||||
}
|
||||
|
||||
public void addAudioConsumer(AudioDataConsumer consumer) {
|
||||
synchronized (this) {
|
||||
List<AudioDataConsumer> newConsumers = new ArrayList<>(audioConsumers);
|
||||
newConsumers.add(consumer);
|
||||
audioConsumers = newConsumers;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean removeAudioConsumer(AudioDataConsumer consumer) {
|
||||
boolean existed;
|
||||
synchronized (this) {
|
||||
List<AudioDataConsumer> newConsumers = new ArrayList<>(audioConsumers);
|
||||
existed = newConsumers.remove(consumer);
|
||||
audioConsumers = newConsumers;
|
||||
}
|
||||
return existed;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -411,7 +411,7 @@ void Graph::SetPacketJavaClass(JNIEnv* env) {
|
|||
|
||||
// Set the timestamp of the packet in-place by calling the rvalue-reference
|
||||
// version of At here.
|
||||
packet = std::move(packet).At(Timestamp(timestamp));
|
||||
packet = std::move(packet).At(Timestamp::CreateNoErrorChecking(timestamp));
|
||||
|
||||
// Then std::move it into the input stream.
|
||||
return AddPacketToInputStream(stream_name, std::move(packet));
|
||||
|
|
|
@ -240,11 +240,11 @@ JNIEXPORT void JNICALL GRAPH_METHOD(nativeAddPacketToInputStream)(
|
|||
mediapipe::android::Graph* mediapipe_graph =
|
||||
reinterpret_cast<mediapipe::android::Graph*>(context);
|
||||
// We push in a copy of the current packet at the given timestamp.
|
||||
ThrowIfError(env,
|
||||
mediapipe_graph->AddPacketToInputStream(
|
||||
ThrowIfError(
|
||||
env, mediapipe_graph->AddPacketToInputStream(
|
||||
JStringToStdString(env, stream_name),
|
||||
mediapipe::android::Graph::GetPacketFromHandle(packet).At(
|
||||
mediapipe::Timestamp(timestamp))));
|
||||
mediapipe::Timestamp::CreateNoErrorChecking(timestamp))));
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL GRAPH_METHOD(nativeMovePacketToInputStream)(
|
||||
|
|
|
@ -203,14 +203,14 @@ SetClipStartTimestamp(1000000, &sequence);
|
|||
SetClipEndTimestamp(6000000, &sequence);
|
||||
|
||||
// For an object tracking task with action labels:
|
||||
std::vector<mediapipe::Locations> locations_on_frame_1;
|
||||
std::vector<mediapipe::Location> locations_on_frame_1;
|
||||
AddBBox(locations_on_frame_1, &sequence);
|
||||
AddBBoxTimestamp(3000000, &sequence);
|
||||
AddBBoxLabelIndex({4, 3}, &sequence);
|
||||
AddBBoxLabelString({"run", "jump"}, &sequence);
|
||||
AddBBoxTrackString({"id_0", "id_1"}, &sequence);
|
||||
// AddBBoxClassString({"cls_0", "cls_0"}, &sequence); // if required
|
||||
std::vector<mediapipe::Locations> locations_on_frame_2;
|
||||
std::vector<mediapipe::Location> locations_on_frame_2;
|
||||
AddBBox(locations_on_frame_2, &sequence);
|
||||
AddBBoxTimestamp(5000000, &sequence);
|
||||
AddBBoxLabelIndex({3}, &sequence);
|
||||
|
@ -470,8 +470,8 @@ tasks and tracking (or class) fields for tracking information.
|
|||
|`CLASS_SEGMENTATION/image/format`|context bytes|`set_class_segmentation_format` / `SetClassSegmentationFormat`|The encoding format of the class label images.|
|
||||
|`CLASS_SEGMENTATION/image/height`|context int|`set_class_segmentation_height` / `SetClassSegmentationHeight`|The height of the image in pixels.|
|
||||
|`CLASS_SEGMENTATION/image/width`|context int|`set_class_segmentation_width` / `SetClassSegmentationWidth`|The width of the image in pixels.|
|
||||
|`CLASS_SEGMENTATION/image/class/ label/index`|context int list|`set_class_segmentation_label_index` / `SetClassSegmentationLabelIndex`|If necessary a mapping from values in the image to class labels.|
|
||||
|`CLASS_SEGMENTATION/image/class/ label/string`|context bytes list|`set_class_segmentation_label_string` / `SetClassSegmentationLabelString`|A mapping from values in the image to class labels.|
|
||||
|`CLASS_SEGMENTATION/image/class/ label/index`|context int list|`set_class_segmentation_class_label_index` / `SetClassSegmentationClassLabelIndex`|If necessary a mapping from values in the image to class labels.|
|
||||
|`CLASS_SEGMENTATION/image/class/ label/string`|context bytes list|`set_class_segmentation_class_label_string` / `SetClassSegmentationClassLabelString`|A mapping from values in the image to class labels.|
|
||||
|
||||
### Keys related to image instance segmentation
|
||||
| key | type | python call / c++ call | description |
|
||||
|
@ -482,8 +482,8 @@ tasks and tracking (or class) fields for tracking information.
|
|||
|`INSTANCE_SEGMENTATION/image/ format`|context bytes|`set_instance_segmentation_format` / `SetInstanceSegmentationFormat`|The encoding format of the object instance labels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ height`|context int|`set_instance_segmentation_height` / `SetInstanceSegmentationHeight`|The height of the image in pixels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ width`|context int|`set_instance_segmentation_width` / `SetInstanceSegmentationWidth`|The width of the image in pixels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ class/label/index`|context int list|`set_instance_segmentation_label_index` / `SetInstanceSegmentationLabelIndex`|If necessary a mapping from values in the image to class labels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ class/label/string`|context bytes list|`set_instance_segmentation_label_string` / `SetInstanceSegmentationLabelString`|A mapping from values in the image to class labels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ class/label/index`|context int list|`set_instance_segmentation_class_label_index` / `SetInstanceSegmentationClassLabelIndex`|If necessary a mapping from values in the image to class labels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ class/label/string`|context bytes list|`set_instance_segmentation_class_label_string` / `SetInstanceSegmentationClassLabelString`|A mapping from values in the image to class labels.|
|
||||
|`INSTANCE_SEGMENTATION/image/ object/class/index`|context int|`set_instance_segmentation_object_class_index` / `SetInstanceSegmentationObjectClassIndex`|If necessary a mapping from values in the image to class indices.|
|
||||
|
||||
### Keys related to optical flow
|
||||
|
|
|
@ -563,6 +563,8 @@ const char kFeatureNumSamplesKey[] = "feature/num_samples";
|
|||
const char kFeaturePacketRateKey[] = "feature/packet_rate";
|
||||
// For audio, the original audio sampling rate the feature is derived from.
|
||||
const char kFeatureAudioSampleRateKey[] = "feature/audio_sample_rate";
|
||||
// The feature as a list of floats.
|
||||
const char kContextFeatureFloatsKey[] = "context_feature/floats";
|
||||
|
||||
// Feature list keys:
|
||||
// The feature as a list of floats.
|
||||
|
@ -593,6 +595,8 @@ void AddAudioAsFeature(const std::string& prefix,
|
|||
|
||||
PREFIXED_VECTOR_INT64_CONTEXT_FEATURE(FeatureDimensions, kFeatureDimensionsKey);
|
||||
PREFIXED_FLOAT_CONTEXT_FEATURE(FeatureRate, kFeatureRateKey);
|
||||
PREFIXED_VECTOR_FLOAT_CONTEXT_FEATURE(ContextFeatureFloats,
|
||||
kContextFeatureFloatsKey);
|
||||
PREFIXED_BYTES_CONTEXT_FEATURE(FeatureBytesFormat, kFeatureBytesFormatKey);
|
||||
PREFIXED_VECTOR_FLOAT_FEATURE_LIST(FeatureFloats, kFeatureFloatsKey);
|
||||
PREFIXED_VECTOR_BYTES_FEATURE_LIST(FeatureBytes, kFeatureBytesKey);
|
||||
|
|
Loading…
Reference in New Issue
Block a user