Update PackMediaSequenceCalculator to support adding clip/media/id to the MediaSequence.
As the media ID is usually a video ID which is provided to the graph as a side packet, in this graph it expects it to be provided as as a input side packet instead of an input stream. PiperOrigin-RevId: 558266967
This commit is contained in:
parent
fda0d19337
commit
a44c810921
|
@ -929,6 +929,7 @@ cc_test(
|
||||||
"//mediapipe/calculators/image:opencv_image_encoder_calculator_cc_proto",
|
"//mediapipe/calculators/image:opencv_image_encoder_calculator_cc_proto",
|
||||||
"//mediapipe/framework:calculator_framework",
|
"//mediapipe/framework:calculator_framework",
|
||||||
"//mediapipe/framework:calculator_runner",
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework:packet",
|
||||||
"//mediapipe/framework:timestamp",
|
"//mediapipe/framework:timestamp",
|
||||||
"//mediapipe/framework/formats:classification_cc_proto",
|
"//mediapipe/framework/formats:classification_cc_proto",
|
||||||
"//mediapipe/framework/formats:detection_cc_proto",
|
"//mediapipe/framework/formats:detection_cc_proto",
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@ const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||||
const char kBBoxTag[] = "BBOX";
|
const char kBBoxTag[] = "BBOX";
|
||||||
const char kKeypointsTag[] = "KEYPOINTS";
|
const char kKeypointsTag[] = "KEYPOINTS";
|
||||||
const char kSegmentationMaskTag[] = "CLASS_SEGMENTATION";
|
const char kSegmentationMaskTag[] = "CLASS_SEGMENTATION";
|
||||||
|
const char kClipMediaIdTag[] = "CLIP_MEDIA_ID";
|
||||||
|
|
||||||
namespace tf = ::tensorflow;
|
namespace tf = ::tensorflow;
|
||||||
namespace mpms = mediapipe::mediasequence;
|
namespace mpms = mediapipe::mediasequence;
|
||||||
|
@ -56,17 +58,21 @@ namespace mpms = mediapipe::mediasequence;
|
||||||
// context features can be supplied verbatim in the calculator's options. The
|
// context features can be supplied verbatim in the calculator's options. The
|
||||||
// SequenceExample will conform to the description in media_sequence.h.
|
// SequenceExample will conform to the description in media_sequence.h.
|
||||||
//
|
//
|
||||||
// The supported input stream tags are "IMAGE", which stores the encoded
|
// The supported input stream tags are:
|
||||||
// images from the OpenCVImageEncoderCalculator, "IMAGE_LABEL", which stores
|
// * "IMAGE", which stores the encoded images from the
|
||||||
// image labels from vector<Classification>, "FORWARD_FLOW_ENCODED", which
|
// OpenCVImageEncoderCalculator,
|
||||||
// stores the encoded optical flow from the same calculator, "BBOX" which stores
|
// * "IMAGE_LABEL", which stores image labels from vector<Classification>,
|
||||||
// bounding boxes from vector<Detections>, and streams with the
|
// * "FORWARD_FLOW_ENCODED", which stores the encoded optical flow from the same
|
||||||
// "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s
|
// calculator,
|
||||||
// associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints
|
// * "BBOX" which stores bounding boxes from vector<Detections>,
|
||||||
// from flat_hash_map<string, vector<pair<float, float>>>. "IMAGE_${NAME}",
|
// * streams with the "FLOAT_FEATURE_${NAME}" pattern, which stores the values
|
||||||
// "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of
|
// from vector<float>'s associated with the name ${NAME},
|
||||||
// each stream, which allows for multiple image streams to be included. However,
|
// * "KEYPOINTS" stores a map of 2D keypoints from flat_hash_map<string,
|
||||||
// the default names are suppored by more tools.
|
// vector<pair<float, float>>>,
|
||||||
|
// * "CLIP_MEDIA_ID", which stores the clip's media ID as a string.
|
||||||
|
// "IMAGE_${NAME}", "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store
|
||||||
|
// prefixed versions of each stream, which allows for multiple image streams to
|
||||||
|
// be included. However, the default names are suppored by more tools.
|
||||||
//
|
//
|
||||||
// Example config:
|
// Example config:
|
||||||
// node {
|
// node {
|
||||||
|
@ -102,6 +108,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
RET_CHECK(cc->InputSidePackets().HasTag(kSequenceExampleTag));
|
RET_CHECK(cc->InputSidePackets().HasTag(kSequenceExampleTag));
|
||||||
cc->InputSidePackets().Tag(kSequenceExampleTag).Set<tf::SequenceExample>();
|
cc->InputSidePackets().Tag(kSequenceExampleTag).Set<tf::SequenceExample>();
|
||||||
|
if (cc->InputSidePackets().HasTag(kClipMediaIdTag)) {
|
||||||
|
cc->InputSidePackets().Tag(kClipMediaIdTag).Set<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
if (cc->Inputs().HasTag(kForwardFlowEncodedTag)) {
|
if (cc->Inputs().HasTag(kForwardFlowEncodedTag)) {
|
||||||
cc->Inputs()
|
cc->Inputs()
|
||||||
|
@ -190,6 +199,11 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
cc->InputSidePackets()
|
cc->InputSidePackets()
|
||||||
.Tag(kSequenceExampleTag)
|
.Tag(kSequenceExampleTag)
|
||||||
.Get<tf::SequenceExample>());
|
.Get<tf::SequenceExample>());
|
||||||
|
if (cc->InputSidePackets().HasTag(kClipMediaIdTag) &&
|
||||||
|
!cc->InputSidePackets().Tag(kClipMediaIdTag).IsEmpty()) {
|
||||||
|
clip_media_id_ =
|
||||||
|
cc->InputSidePackets().Tag(kClipMediaIdTag).Get<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
const auto& context_features =
|
const auto& context_features =
|
||||||
cc->Options<PackMediaSequenceCalculatorOptions>().context_feature_map();
|
cc->Options<PackMediaSequenceCalculatorOptions>().context_feature_map();
|
||||||
|
@ -592,10 +606,14 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (clip_media_id_.has_value()) {
|
||||||
|
mpms::SetClipMediaId(*clip_media_id_, sequence_.get());
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<tf::SequenceExample> sequence_;
|
std::unique_ptr<tf::SequenceExample> sequence_;
|
||||||
|
std::optional<std::string> clip_media_id_ = std::nullopt;
|
||||||
std::map<std::string, bool> features_present_;
|
std::map<std::string, bool> features_present_;
|
||||||
bool replace_keypoints_;
|
bool replace_keypoints_;
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "mediapipe/framework/formats/detection.pb.h"
|
#include "mediapipe/framework/formats/detection.pb.h"
|
||||||
#include "mediapipe/framework/formats/location.h"
|
#include "mediapipe/framework/formats/location.h"
|
||||||
#include "mediapipe/framework/formats/location_opencv.h"
|
#include "mediapipe/framework/formats/location_opencv.h"
|
||||||
|
#include "mediapipe/framework/packet.h"
|
||||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
#include "mediapipe/framework/port/status_matchers.h"
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
#include "mediapipe/framework/timestamp.h"
|
#include "mediapipe/framework/timestamp.h"
|
||||||
|
@ -63,6 +64,7 @@ constexpr char kImageLabelOtherTag[] = "IMAGE_LABEL_OTHER";
|
||||||
constexpr char kImagePrefixTag[] = "IMAGE_PREFIX";
|
constexpr char kImagePrefixTag[] = "IMAGE_PREFIX";
|
||||||
constexpr char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
constexpr char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||||
constexpr char kImageTag[] = "IMAGE";
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
constexpr char kClipMediaIdTag[] = "CLIP_MEDIA_ID";
|
||||||
|
|
||||||
class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
|
@ -70,10 +72,14 @@ class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
||||||
const tf::Features& features,
|
const tf::Features& features,
|
||||||
const bool output_only_if_all_present,
|
const bool output_only_if_all_present,
|
||||||
const bool replace_instead_of_append,
|
const bool replace_instead_of_append,
|
||||||
const bool output_as_zero_timestamp = false) {
|
const bool output_as_zero_timestamp = false,
|
||||||
|
const std::vector<std::string>& input_side_packets = {
|
||||||
|
"SEQUENCE_EXAMPLE:input_sequence"}) {
|
||||||
CalculatorGraphConfig::Node config;
|
CalculatorGraphConfig::Node config;
|
||||||
config.set_calculator("PackMediaSequenceCalculator");
|
config.set_calculator("PackMediaSequenceCalculator");
|
||||||
config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence");
|
for (const std::string& side_packet : input_side_packets) {
|
||||||
|
config.add_input_side_packet(side_packet);
|
||||||
|
}
|
||||||
config.add_output_stream("SEQUENCE_EXAMPLE:output_sequence");
|
config.add_output_stream("SEQUENCE_EXAMPLE:output_sequence");
|
||||||
for (const std::string& stream : input_streams) {
|
for (const std::string& stream : input_streams) {
|
||||||
config.add_input_stream(stream);
|
config.add_input_stream(stream);
|
||||||
|
@ -833,6 +839,88 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoMaskDetections) {
|
||||||
testing::ElementsAreArray(::std::vector<std::string>({"mask"})));
|
testing::ElementsAreArray(::std::vector<std::string>({"mask"})));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, AddClipMediaId) {
|
||||||
|
SetUpCalculator(
|
||||||
|
/*input_streams=*/{"FLOAT_FEATURE_TEST:test",
|
||||||
|
"FLOAT_FEATURE_OTHER:test2"},
|
||||||
|
/*features=*/{},
|
||||||
|
/*output_only_if_all_present=*/false,
|
||||||
|
/*replace_instead_of_append=*/true,
|
||||||
|
/*output_as_zero_timestamp=*/false, /*input_side_packets=*/
|
||||||
|
{"SEQUENCE_EXAMPLE:input_sequence", "CLIP_MEDIA_ID:video_id"});
|
||||||
|
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||||
|
const std::string test_video_id = "test_video_id";
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatFeatureTestTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
|
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatFeatureOtherTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
runner_->MutableSidePackets()->Tag(kClipMediaIdTag) =
|
||||||
|
MakePacket<std::string>(test_video_id);
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(test_video_id, mpms::GetClipMediaId(output_sequence));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, ReplaceClipMediaId) {
|
||||||
|
SetUpCalculator(
|
||||||
|
/*input_streams=*/{"FLOAT_FEATURE_TEST:test",
|
||||||
|
"FLOAT_FEATURE_OTHER:test2"},
|
||||||
|
/*features=*/{},
|
||||||
|
/*output_only_if_all_present=*/false,
|
||||||
|
/*replace_instead_of_append=*/true,
|
||||||
|
/*output_as_zero_timestamp=*/false, /*input_side_packets=*/
|
||||||
|
{"SEQUENCE_EXAMPLE:input_sequence", "CLIP_MEDIA_ID:video_id"});
|
||||||
|
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||||
|
const std::string existing_video_id = "existing_video_id";
|
||||||
|
mpms::SetClipMediaId(existing_video_id, input_sequence.get());
|
||||||
|
const std::string test_video_id = "test_video_id";
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatFeatureTestTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
|
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatFeatureOtherTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
runner_->MutableSidePackets()->Tag(kClipMediaIdTag) =
|
||||||
|
MakePacket<std::string>(test_video_id).At(Timestamp(0));
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(test_video_id, mpms::GetClipMediaId(output_sequence));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(PackMediaSequenceCalculatorTest, MissingStreamOK) {
|
TEST_F(PackMediaSequenceCalculatorTest, MissingStreamOK) {
|
||||||
SetUpCalculator(
|
SetUpCalculator(
|
||||||
{"FORWARD_FLOW_ENCODED:flow", "FLOAT_FEATURE_I3D_FLOW:feature"}, {},
|
{"FORWARD_FLOW_ENCODED:flow", "FLOAT_FEATURE_I3D_FLOW:feature"}, {},
|
||||||
|
|
Loading…
Reference in New Issue
Block a user