Update PackMediaSequenceCalculator to support adding clip/media/id to the MediaSequence.
As the media ID is usually a video ID which is provided to the graph as a side packet, in this graph it expects it to be provided as as a input side packet instead of an input stream. PiperOrigin-RevId: 558266967
This commit is contained in:
parent
fda0d19337
commit
a44c810921
|
@ -929,6 +929,7 @@ cc_test(
|
|||
"//mediapipe/calculators/image:opencv_image_encoder_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework:packet",
|
||||
"//mediapipe/framework:timestamp",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -45,6 +46,7 @@ const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
|||
const char kBBoxTag[] = "BBOX";
|
||||
const char kKeypointsTag[] = "KEYPOINTS";
|
||||
const char kSegmentationMaskTag[] = "CLASS_SEGMENTATION";
|
||||
const char kClipMediaIdTag[] = "CLIP_MEDIA_ID";
|
||||
|
||||
namespace tf = ::tensorflow;
|
||||
namespace mpms = mediapipe::mediasequence;
|
||||
|
@ -56,17 +58,21 @@ namespace mpms = mediapipe::mediasequence;
|
|||
// context features can be supplied verbatim in the calculator's options. The
|
||||
// SequenceExample will conform to the description in media_sequence.h.
|
||||
//
|
||||
// The supported input stream tags are "IMAGE", which stores the encoded
|
||||
// images from the OpenCVImageEncoderCalculator, "IMAGE_LABEL", which stores
|
||||
// image labels from vector<Classification>, "FORWARD_FLOW_ENCODED", which
|
||||
// stores the encoded optical flow from the same calculator, "BBOX" which stores
|
||||
// bounding boxes from vector<Detections>, and streams with the
|
||||
// "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s
|
||||
// associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints
|
||||
// from flat_hash_map<string, vector<pair<float, float>>>. "IMAGE_${NAME}",
|
||||
// "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of
|
||||
// each stream, which allows for multiple image streams to be included. However,
|
||||
// the default names are suppored by more tools.
|
||||
// The supported input stream tags are:
|
||||
// * "IMAGE", which stores the encoded images from the
|
||||
// OpenCVImageEncoderCalculator,
|
||||
// * "IMAGE_LABEL", which stores image labels from vector<Classification>,
|
||||
// * "FORWARD_FLOW_ENCODED", which stores the encoded optical flow from the same
|
||||
// calculator,
|
||||
// * "BBOX" which stores bounding boxes from vector<Detections>,
|
||||
// * streams with the "FLOAT_FEATURE_${NAME}" pattern, which stores the values
|
||||
// from vector<float>'s associated with the name ${NAME},
|
||||
// * "KEYPOINTS" stores a map of 2D keypoints from flat_hash_map<string,
|
||||
// vector<pair<float, float>>>,
|
||||
// * "CLIP_MEDIA_ID", which stores the clip's media ID as a string.
|
||||
// "IMAGE_${NAME}", "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store
|
||||
// prefixed versions of each stream, which allows for multiple image streams to
|
||||
// be included. However, the default names are suppored by more tools.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
|
@ -102,6 +108,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->InputSidePackets().HasTag(kSequenceExampleTag));
|
||||
cc->InputSidePackets().Tag(kSequenceExampleTag).Set<tf::SequenceExample>();
|
||||
if (cc->InputSidePackets().HasTag(kClipMediaIdTag)) {
|
||||
cc->InputSidePackets().Tag(kClipMediaIdTag).Set<std::string>();
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kForwardFlowEncodedTag)) {
|
||||
cc->Inputs()
|
||||
|
@ -190,6 +199,11 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
cc->InputSidePackets()
|
||||
.Tag(kSequenceExampleTag)
|
||||
.Get<tf::SequenceExample>());
|
||||
if (cc->InputSidePackets().HasTag(kClipMediaIdTag) &&
|
||||
!cc->InputSidePackets().Tag(kClipMediaIdTag).IsEmpty()) {
|
||||
clip_media_id_ =
|
||||
cc->InputSidePackets().Tag(kClipMediaIdTag).Get<std::string>();
|
||||
}
|
||||
|
||||
const auto& context_features =
|
||||
cc->Options<PackMediaSequenceCalculatorOptions>().context_feature_map();
|
||||
|
@ -592,10 +606,14 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (clip_media_id_.has_value()) {
|
||||
mpms::SetClipMediaId(*clip_media_id_, sequence_.get());
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
std::unique_ptr<tf::SequenceExample> sequence_;
|
||||
std::optional<std::string> clip_media_id_ = std::nullopt;
|
||||
std::map<std::string, bool> features_present_;
|
||||
bool replace_keypoints_;
|
||||
};
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/location.h"
|
||||
#include "mediapipe/framework/formats/location_opencv.h"
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
#include "mediapipe/framework/timestamp.h"
|
||||
|
@ -63,6 +64,7 @@ constexpr char kImageLabelOtherTag[] = "IMAGE_LABEL_OTHER";
|
|||
constexpr char kImagePrefixTag[] = "IMAGE_PREFIX";
|
||||
constexpr char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kClipMediaIdTag[] = "CLIP_MEDIA_ID";
|
||||
|
||||
class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
||||
protected:
|
||||
|
@ -70,10 +72,14 @@ class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
|||
const tf::Features& features,
|
||||
const bool output_only_if_all_present,
|
||||
const bool replace_instead_of_append,
|
||||
const bool output_as_zero_timestamp = false) {
|
||||
const bool output_as_zero_timestamp = false,
|
||||
const std::vector<std::string>& input_side_packets = {
|
||||
"SEQUENCE_EXAMPLE:input_sequence"}) {
|
||||
CalculatorGraphConfig::Node config;
|
||||
config.set_calculator("PackMediaSequenceCalculator");
|
||||
config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence");
|
||||
for (const std::string& side_packet : input_side_packets) {
|
||||
config.add_input_side_packet(side_packet);
|
||||
}
|
||||
config.add_output_stream("SEQUENCE_EXAMPLE:output_sequence");
|
||||
for (const std::string& stream : input_streams) {
|
||||
config.add_input_stream(stream);
|
||||
|
@ -833,6 +839,88 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoMaskDetections) {
|
|||
testing::ElementsAreArray(::std::vector<std::string>({"mask"})));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, AddClipMediaId) {
|
||||
SetUpCalculator(
|
||||
/*input_streams=*/{"FLOAT_FEATURE_TEST:test",
|
||||
"FLOAT_FEATURE_OTHER:test2"},
|
||||
/*features=*/{},
|
||||
/*output_only_if_all_present=*/false,
|
||||
/*replace_instead_of_append=*/true,
|
||||
/*output_as_zero_timestamp=*/false, /*input_side_packets=*/
|
||||
{"SEQUENCE_EXAMPLE:input_sequence", "CLIP_MEDIA_ID:video_id"});
|
||||
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||
const std::string test_video_id = "test_video_id";
|
||||
|
||||
int num_timesteps = 2;
|
||||
for (int i = 0; i < num_timesteps; ++i) {
|
||||
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||
runner_->MutableInputs()
|
||||
->Tag(kFloatFeatureTestTag)
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||
runner_->MutableInputs()
|
||||
->Tag(kFloatFeatureOtherTag)
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||
}
|
||||
|
||||
runner_->MutableSidePackets()->Tag(kClipMediaIdTag) =
|
||||
MakePacket<std::string>(test_video_id);
|
||||
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||
Adopt(input_sequence.release());
|
||||
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const tf::SequenceExample& output_sequence =
|
||||
output_packets[0].Get<tf::SequenceExample>();
|
||||
|
||||
ASSERT_EQ(test_video_id, mpms::GetClipMediaId(output_sequence));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, ReplaceClipMediaId) {
|
||||
SetUpCalculator(
|
||||
/*input_streams=*/{"FLOAT_FEATURE_TEST:test",
|
||||
"FLOAT_FEATURE_OTHER:test2"},
|
||||
/*features=*/{},
|
||||
/*output_only_if_all_present=*/false,
|
||||
/*replace_instead_of_append=*/true,
|
||||
/*output_as_zero_timestamp=*/false, /*input_side_packets=*/
|
||||
{"SEQUENCE_EXAMPLE:input_sequence", "CLIP_MEDIA_ID:video_id"});
|
||||
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||
const std::string existing_video_id = "existing_video_id";
|
||||
mpms::SetClipMediaId(existing_video_id, input_sequence.get());
|
||||
const std::string test_video_id = "test_video_id";
|
||||
|
||||
int num_timesteps = 2;
|
||||
for (int i = 0; i < num_timesteps; ++i) {
|
||||
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||
runner_->MutableInputs()
|
||||
->Tag(kFloatFeatureTestTag)
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||
runner_->MutableInputs()
|
||||
->Tag(kFloatFeatureOtherTag)
|
||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||
}
|
||||
|
||||
runner_->MutableSidePackets()->Tag(kClipMediaIdTag) =
|
||||
MakePacket<std::string>(test_video_id).At(Timestamp(0));
|
||||
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||
Adopt(input_sequence.release());
|
||||
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const tf::SequenceExample& output_sequence =
|
||||
output_packets[0].Get<tf::SequenceExample>();
|
||||
|
||||
ASSERT_EQ(test_video_id, mpms::GetClipMediaId(output_sequence));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, MissingStreamOK) {
|
||||
SetUpCalculator(
|
||||
{"FORWARD_FLOW_ENCODED:flow", "FLOAT_FEATURE_I3D_FLOW:feature"}, {},
|
||||
|
|
Loading…
Reference in New Issue
Block a user