code fill
This commit is contained in:
parent
d109328198
commit
5578aa50e8
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -8,3 +8,4 @@ Cargo.lock
|
||||||
|
|
||||||
# These are backup files generated by rustfmt
|
# These are backup files generated by rustfmt
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
|
/refs/
|
||||||
|
|
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
[package]
|
||||||
|
name = "ux-mediapipe"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "mediapipe"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
cgmath = "0.18.0"
|
||||||
|
libc = "0.2.0"
|
||||||
|
opencv = {version = "0.63.0", default-features = false, features = ["videoio", "highgui", "imgproc"]}
|
||||||
|
protobuf = "2.23.0"
|
|
@ -1,2 +1,4 @@
|
||||||
# ux-media
|
# ux-mediapipe
|
||||||
Rust and mediapipe
|
Rust and mediapipe
|
||||||
|
|
||||||
|
bazel build --define MEDIAPIPE_DISABLE_GPU=1 mediapipe:mediagraph
|
48
examples/hand_tracking_desktop_live_gpu.txt
Normal file
48
examples/hand_tracking_desktop_live_gpu.txt
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/predicted hands, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "hand_landmarks"
|
||||||
|
|
||||||
|
# Generates side packet cotaining max number of hands to detect/track.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_hands"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects/tracks hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkTrackingGpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_HANDS:num_hands"
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||||
|
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||||
|
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders annotations and overlays them on top of the input
|
||||||
|
# images (see hand_renderer_gpu.pbtxt).
|
||||||
|
node {
|
||||||
|
calculator: "HandRendererSubgraph"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
input_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
input_stream: "HANDEDNESS:handedness"
|
||||||
|
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||||
|
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
212
examples/hello.rs
Normal file
212
examples/hello.rs
Normal file
|
@ -0,0 +1,212 @@
|
||||||
|
#![allow(unused_variables)]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use mediapipe::*;
|
||||||
|
|
||||||
|
mod examples {
|
||||||
|
use super::*;
|
||||||
|
use opencv::prelude::*;
|
||||||
|
use opencv::{highgui, imgproc, videoio, Result};
|
||||||
|
|
||||||
|
pub fn corner_rectangle() -> Result<()> {
|
||||||
|
let window = "video capture";
|
||||||
|
|
||||||
|
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||||
|
|
||||||
|
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||||
|
if !cap.is_opened()? {
|
||||||
|
panic!("Unable to open default cam")
|
||||||
|
}
|
||||||
|
|
||||||
|
let detector = hands::HandDetector::default();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let mut frame = Mat::default();
|
||||||
|
cap.read(&mut frame)?;
|
||||||
|
let size = frame.size()?;
|
||||||
|
if size.width > 0 {
|
||||||
|
highgui::imshow(window, &mut frame)?
|
||||||
|
}
|
||||||
|
let key = highgui::wait_key(10)?;
|
||||||
|
if key > 0 && key != 255 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// pub fn face_detection() -> Result<()> {
|
||||||
|
// let window = "video capture";
|
||||||
|
|
||||||
|
// highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||||
|
|
||||||
|
// let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||||
|
// if !cap.is_opened()? {
|
||||||
|
// panic!("Unable to open default cam")
|
||||||
|
// }
|
||||||
|
|
||||||
|
// let detector = mediapipe::face_detection::FaceDetector::default();
|
||||||
|
|
||||||
|
// loop {
|
||||||
|
// let mut frame = Mat::default();
|
||||||
|
// cap.read(&mut frame)?;
|
||||||
|
// let size = frame.size()?;
|
||||||
|
// if size.width > 0 {
|
||||||
|
// highgui::imshow(window, &mut frame)?
|
||||||
|
// }
|
||||||
|
// let key = highgui::wait_key(10)?;
|
||||||
|
// if key > 0 && key != 255 {
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Ok(())
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn face_mesh() -> Result<()> {
|
||||||
|
let window = "video capture";
|
||||||
|
|
||||||
|
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||||
|
|
||||||
|
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||||
|
if !cap.is_opened()? {
|
||||||
|
panic!("Unable to open default cam")
|
||||||
|
}
|
||||||
|
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||||
|
|
||||||
|
let mut mesh = FaceMesh::default();
|
||||||
|
let mut detector = face_mesh::FaceMeshDetector::default();
|
||||||
|
|
||||||
|
let mut raw_frame = Mat::default();
|
||||||
|
let mut rgb_frame = Mat::default();
|
||||||
|
let mut flip_frame = Mat::default();
|
||||||
|
loop {
|
||||||
|
cap.read(&mut raw_frame)?;
|
||||||
|
|
||||||
|
let size = raw_frame.size()?;
|
||||||
|
if size.width > 0 && !raw_frame.empty() {
|
||||||
|
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||||
|
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||||
|
|
||||||
|
detector.process(&flip_frame, &mut mesh);
|
||||||
|
|
||||||
|
highgui::imshow(window, &mut flip_frame)?;
|
||||||
|
println!(
|
||||||
|
"LANDMARK: {} {} {}",
|
||||||
|
mesh.data[0].x, mesh.data[0].y, mesh.data[0].z
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("WARN: Skip empty frame");
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = highgui::wait_key(10)?;
|
||||||
|
if key > 0 && key != 255 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hand_tracking() -> Result<()> {
|
||||||
|
let window = "video capture";
|
||||||
|
|
||||||
|
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||||
|
|
||||||
|
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||||
|
if !cap.is_opened()? {
|
||||||
|
panic!("Unable to open default cam")
|
||||||
|
}
|
||||||
|
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||||
|
|
||||||
|
let mut left = Hand::default();
|
||||||
|
let mut right = Hand::default();
|
||||||
|
let mut detector = hands::HandDetector::default();
|
||||||
|
|
||||||
|
let mut raw_frame = Mat::default();
|
||||||
|
let mut rgb_frame = Mat::default();
|
||||||
|
let mut flip_frame = Mat::default();
|
||||||
|
loop {
|
||||||
|
cap.read(&mut raw_frame)?;
|
||||||
|
|
||||||
|
let size = raw_frame.size()?;
|
||||||
|
if size.width > 0 && !raw_frame.empty() {
|
||||||
|
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||||
|
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||||
|
|
||||||
|
detector.process(&flip_frame, &mut left, &mut right);
|
||||||
|
|
||||||
|
highgui::imshow(window, &mut flip_frame)?;
|
||||||
|
println!(
|
||||||
|
"LANDMARK: {} {} {}",
|
||||||
|
left.data[0].x, left.data[0].y, left.data[0].z
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("WARN: Skip empty frame");
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = highgui::wait_key(10)?;
|
||||||
|
if key > 0 && key != 255 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pose_estimation() -> Result<()> {
|
||||||
|
let window = "video capture";
|
||||||
|
|
||||||
|
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||||
|
|
||||||
|
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||||
|
if !cap.is_opened()? {
|
||||||
|
panic!("Unable to open default cam")
|
||||||
|
}
|
||||||
|
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||||
|
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||||
|
|
||||||
|
let mut pose = Pose::default();
|
||||||
|
let mut detector = pose::PoseDetector::default();
|
||||||
|
|
||||||
|
let mut raw_frame = Mat::default();
|
||||||
|
let mut rgb_frame = Mat::default();
|
||||||
|
let mut flip_frame = Mat::default();
|
||||||
|
loop {
|
||||||
|
cap.read(&mut raw_frame)?;
|
||||||
|
|
||||||
|
let size = raw_frame.size()?;
|
||||||
|
if size.width > 0 && !raw_frame.empty() {
|
||||||
|
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||||
|
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||||
|
|
||||||
|
detector.process(&rgb_frame, &mut pose);
|
||||||
|
|
||||||
|
highgui::imshow(window, &mut rgb_frame)?;
|
||||||
|
println!(
|
||||||
|
"LANDMARK: {} {} {}",
|
||||||
|
pose.data[0].x, pose.data[0].y, pose.data[0].z
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("WARN: Skip empty frame");
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = highgui::wait_key(10)?;
|
||||||
|
if key > 0 && key != 255 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// examples::pose_estimation().unwrap()
|
||||||
|
// examples::hand_tracking().unwrap()
|
||||||
|
examples::face_mesh().unwrap()
|
||||||
|
}
|
36
mediapipe/graphs/edge_detection/BUILD
Normal file
36
mediapipe/graphs/edge_detection/BUILD
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/image:luminance_calculator",
|
||||||
|
"//mediapipe/calculators/image:sobel_edges_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_gpu_binary_graph",
|
||||||
|
graph = "edge_detection_mobile_gpu.pbtxt",
|
||||||
|
output_name = "mobile_gpu.binarypb",
|
||||||
|
)
|
|
@ -0,0 +1,22 @@
|
||||||
|
# MediaPipe graph that performs GPU Sobel edge detection on a live video stream.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:helloworld
|
||||||
|
# and mediapipe/examples/ios/helloworld.
|
||||||
|
|
||||||
|
# Images coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Converts RGB images into luminance images, still stored in RGB format.
|
||||||
|
node: {
|
||||||
|
calculator: "LuminanceCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "luma_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies the Sobel filter to luminance images stored in RGB format.
|
||||||
|
node: {
|
||||||
|
calculator: "SobelEdgesCalculator"
|
||||||
|
input_stream: "luma_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
}
|
95
mediapipe/graphs/face_detection/BUILD
Normal file
95
mediapipe/graphs/face_detection/BUILD
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_gpu_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_detection_mobile_cpu_binary_graph",
|
||||||
|
graph = "face_detection_mobile_cpu.pbtxt",
|
||||||
|
output_name = "face_detection_mobile_cpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_detection_mobile_gpu_binary_graph",
|
||||||
|
graph = "face_detection_mobile_gpu.pbtxt",
|
||||||
|
output_name = "face_detection_mobile_gpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_mobile_gpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_detection_full_range_mobile_gpu_binary_graph",
|
||||||
|
graph = "face_detection_full_range_mobile_gpu.pbtxt",
|
||||||
|
output_name = "face_detection_full_range_mobile_gpu.binarypb",
|
||||||
|
deps = [":face_detection_full_range_mobile_gpu_deps"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_desktop_live_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||||
|
|
||||||
|
# CPU buffer. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "face_detections"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
|
||||||
|
# mediapipie/examples/ios/facedetectiongpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,76 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "face_detections"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transfers the input image from GPU to CPU memory for the purpose of
|
||||||
|
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
|
||||||
|
# origin defined at the bottom-left corner (OpenGL convention). As a result,
|
||||||
|
# the transferred image on CPU also shares the same representation.
|
||||||
|
node: {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "throttled_input_video"
|
||||||
|
output_stream: "input_video_cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeCpu"
|
||||||
|
input_stream: "IMAGE:input_video_cpu"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_video_cpu"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video_cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
|
||||||
|
# the graph.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "output_video_cpu"
|
||||||
|
output_stream: "output_video"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "face_detections"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
44
mediapipe/graphs/face_effect/BUILD
Normal file
44
mediapipe/graphs/face_effect/BUILD
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_effect_gpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:immediate_mux_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/framework/tool:switch_container",
|
||||||
|
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_detection_gpu",
|
||||||
|
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_landmarks_gpu",
|
||||||
|
"//mediapipe/modules/face_geometry:effect_renderer_calculator",
|
||||||
|
"//mediapipe/modules/face_geometry:env_generator_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_effect_gpu_binary_graph",
|
||||||
|
graph = "face_effect_gpu.pbtxt",
|
||||||
|
output_name = "face_effect_gpu.binarypb",
|
||||||
|
deps = [":face_effect_gpu_deps"],
|
||||||
|
)
|
47
mediapipe/graphs/face_effect/data/BUILD
Normal file
47
mediapipe/graphs/face_effect/data/BUILD
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
encode_binary_proto(
|
||||||
|
name = "axis",
|
||||||
|
input = "axis.pbtxt",
|
||||||
|
message_type = "mediapipe.face_geometry.Mesh3d",
|
||||||
|
output = "axis.binarypb",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
encode_binary_proto(
|
||||||
|
name = "glasses",
|
||||||
|
input = "glasses.pbtxt",
|
||||||
|
message_type = "mediapipe.face_geometry.Mesh3d",
|
||||||
|
output = "glasses.binarypb",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# `.pngblob` is used instead of `.png` to prevent iOS build from preprocessing the image.
|
||||||
|
# OpenCV is unable to read a PNG file preprocessed by the iOS build.
|
||||||
|
exports_files([
|
||||||
|
"axis.pngblob",
|
||||||
|
"facepaint.pngblob",
|
||||||
|
"glasses.pngblob",
|
||||||
|
])
|
320
mediapipe/graphs/face_effect/data/axis.pbtxt
Normal file
320
mediapipe/graphs/face_effect/data/axis.pbtxt
Normal file
|
@ -0,0 +1,320 @@
|
||||||
|
vertex_type: VERTEX_PT
|
||||||
|
primitive_type: TRIANGLE
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.873006
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.928502
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.928502
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.873006
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.928502
|
||||||
|
vertex_buffer: 0.500000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.873006
|
||||||
|
vertex_buffer: 0.500000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.928502
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.873006
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.928502
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 11.500000
|
||||||
|
vertex_buffer: 0.873006
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.983999
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.983999
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.817509
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.500000
|
||||||
|
vertex_buffer: 0.817509
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.069341
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.123429
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.123429
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.069341
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.123419
|
||||||
|
vertex_buffer: 0.499992
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.069341
|
||||||
|
vertex_buffer: 0.500000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.123429
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.069341
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.123429
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.069341
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.177516
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.177516
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.015254
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.015254
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.472252
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.527748
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.527748
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.472252
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.527748
|
||||||
|
vertex_buffer: 0.500000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.472252
|
||||||
|
vertex_buffer: 0.500000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.527748
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.472252
|
||||||
|
vertex_buffer: 0.250000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.527748
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.472252
|
||||||
|
vertex_buffer: 0.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.583245
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: 0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.583245
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.400000
|
||||||
|
vertex_buffer: 0.416755
|
||||||
|
vertex_buffer: 1.000000
|
||||||
|
vertex_buffer: -0.100000
|
||||||
|
vertex_buffer: 3.000000
|
||||||
|
vertex_buffer: 8.600000
|
||||||
|
vertex_buffer: 0.416755
|
||||||
|
vertex_buffer: 0.750000
|
||||||
|
index_buffer: 0
|
||||||
|
index_buffer: 1
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 0
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 4
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 4
|
||||||
|
index_buffer: 5
|
||||||
|
index_buffer: 5
|
||||||
|
index_buffer: 4
|
||||||
|
index_buffer: 6
|
||||||
|
index_buffer: 5
|
||||||
|
index_buffer: 6
|
||||||
|
index_buffer: 7
|
||||||
|
index_buffer: 7
|
||||||
|
index_buffer: 6
|
||||||
|
index_buffer: 8
|
||||||
|
index_buffer: 7
|
||||||
|
index_buffer: 8
|
||||||
|
index_buffer: 9
|
||||||
|
index_buffer: 1
|
||||||
|
index_buffer: 10
|
||||||
|
index_buffer: 11
|
||||||
|
index_buffer: 1
|
||||||
|
index_buffer: 11
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 12
|
||||||
|
index_buffer: 0
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 12
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 13
|
||||||
|
index_buffer: 14
|
||||||
|
index_buffer: 15
|
||||||
|
index_buffer: 16
|
||||||
|
index_buffer: 14
|
||||||
|
index_buffer: 16
|
||||||
|
index_buffer: 17
|
||||||
|
index_buffer: 17
|
||||||
|
index_buffer: 16
|
||||||
|
index_buffer: 18
|
||||||
|
index_buffer: 17
|
||||||
|
index_buffer: 18
|
||||||
|
index_buffer: 19
|
||||||
|
index_buffer: 19
|
||||||
|
index_buffer: 18
|
||||||
|
index_buffer: 20
|
||||||
|
index_buffer: 19
|
||||||
|
index_buffer: 20
|
||||||
|
index_buffer: 21
|
||||||
|
index_buffer: 21
|
||||||
|
index_buffer: 20
|
||||||
|
index_buffer: 22
|
||||||
|
index_buffer: 21
|
||||||
|
index_buffer: 22
|
||||||
|
index_buffer: 23
|
||||||
|
index_buffer: 15
|
||||||
|
index_buffer: 24
|
||||||
|
index_buffer: 25
|
||||||
|
index_buffer: 15
|
||||||
|
index_buffer: 25
|
||||||
|
index_buffer: 16
|
||||||
|
index_buffer: 26
|
||||||
|
index_buffer: 14
|
||||||
|
index_buffer: 17
|
||||||
|
index_buffer: 26
|
||||||
|
index_buffer: 17
|
||||||
|
index_buffer: 27
|
||||||
|
index_buffer: 28
|
||||||
|
index_buffer: 29
|
||||||
|
index_buffer: 30
|
||||||
|
index_buffer: 28
|
||||||
|
index_buffer: 30
|
||||||
|
index_buffer: 31
|
||||||
|
index_buffer: 31
|
||||||
|
index_buffer: 30
|
||||||
|
index_buffer: 32
|
||||||
|
index_buffer: 31
|
||||||
|
index_buffer: 32
|
||||||
|
index_buffer: 33
|
||||||
|
index_buffer: 33
|
||||||
|
index_buffer: 32
|
||||||
|
index_buffer: 34
|
||||||
|
index_buffer: 33
|
||||||
|
index_buffer: 34
|
||||||
|
index_buffer: 35
|
||||||
|
index_buffer: 35
|
||||||
|
index_buffer: 34
|
||||||
|
index_buffer: 36
|
||||||
|
index_buffer: 35
|
||||||
|
index_buffer: 36
|
||||||
|
index_buffer: 37
|
||||||
|
index_buffer: 29
|
||||||
|
index_buffer: 38
|
||||||
|
index_buffer: 39
|
||||||
|
index_buffer: 29
|
||||||
|
index_buffer: 39
|
||||||
|
index_buffer: 30
|
||||||
|
index_buffer: 40
|
||||||
|
index_buffer: 28
|
||||||
|
index_buffer: 31
|
||||||
|
index_buffer: 40
|
||||||
|
index_buffer: 31
|
||||||
|
index_buffer: 41
|
BIN
mediapipe/graphs/face_effect/data/axis.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/axis.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 492 B |
BIN
mediapipe/graphs/face_effect/data/facepaint.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/facepaint.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 593 KiB |
27815
mediapipe/graphs/face_effect/data/glasses.pbtxt
Normal file
27815
mediapipe/graphs/face_effect/data/glasses.pbtxt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
mediapipe/graphs/face_effect/data/glasses.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/glasses.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 293 KiB |
130
mediapipe/graphs/face_effect/face_effect_gpu.pbtxt
Normal file
130
mediapipe/graphs/face_effect/face_effect_gpu.pbtxt
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
# MediaPipe graph that applies a face effect to the input video stream.
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# An integer, which indicate which effect is selected. (int)
|
||||||
|
#
|
||||||
|
# If `selected_effect_id` is `0`, the Axis effect is selected.
|
||||||
|
# If `selected_effect_id` is `1`, the Facepaint effect is selected.
|
||||||
|
# If `selected_effect_id` is `2`, the Glasses effect is selected.
|
||||||
|
#
|
||||||
|
# No other values are allowed for `selected_effect_id`.
|
||||||
|
input_stream: "selected_effect_id"
|
||||||
|
|
||||||
|
# Indicates whether to use the face detection as the input source. (bool)
|
||||||
|
#
|
||||||
|
# If `true`, the face detection pipeline will be used to produce landmarks.
|
||||||
|
# If `false`, the face landmark pipeline will be used to produce landmarks.
|
||||||
|
input_side_packet: "use_face_detection_input_source"
|
||||||
|
|
||||||
|
# Output image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# A list of geometry data for a single detected face.
|
||||||
|
#
|
||||||
|
# NOTE: there will not be an output packet in this stream for this particular
|
||||||
|
# timestamp if none of faces detected.
|
||||||
|
#
|
||||||
|
# (std::vector<face_geometry::FaceGeometry>)
|
||||||
|
output_stream: "multi_face_geometry"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates an environment that describes the current virtual scene.
|
||||||
|
node {
|
||||||
|
calculator: "FaceGeometryEnvGeneratorCalculator"
|
||||||
|
output_side_packet: "ENVIRONMENT:environment"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceGeometryEnvGeneratorCalculatorOptions] {
|
||||||
|
environment: {
|
||||||
|
origin_point_location: TOP_LEFT_CORNER
|
||||||
|
perspective_camera: {
|
||||||
|
vertical_fov_degrees: 63.0 # 63 degrees
|
||||||
|
near: 1.0 # 1cm
|
||||||
|
far: 10000.0 # 100m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Computes the face geometry for a single face. The input source is defined
|
||||||
|
# through `use_face_detection_input_source`.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "ENABLE:use_face_detection_input_source"
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SingleFaceGeometryFromLandmarksGpu"
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SingleFaceGeometryFromDetectionGpu"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders the selected effect based on `selected_effect_id`.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_stream: "SELECT:selected_effect_id"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "FaceGeometryEffectRendererCalculator"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||||
|
effect_texture_path: "mediapipe/graphs/face_effect/data/axis.pngblob"
|
||||||
|
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/axis.binarypb"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "FaceGeometryEffectRendererCalculator"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||||
|
effect_texture_path: "mediapipe/graphs/face_effect/data/facepaint.pngblob"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "FaceGeometryEffectRendererCalculator"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||||
|
effect_texture_path: "mediapipe/graphs/face_effect/data/glasses.pngblob"
|
||||||
|
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/glasses.binarypb"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
61
mediapipe/graphs/face_effect/subgraphs/BUILD
Normal file
61
mediapipe/graphs/face_effect/subgraphs/BUILD
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmarks_smoothing",
|
||||||
|
graph = "face_landmarks_smoothing.pbtxt",
|
||||||
|
register_as = "FaceLandmarksSmoothing",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "single_face_geometry_from_detection_gpu",
|
||||||
|
graph = "single_face_geometry_from_detection_gpu.pbtxt",
|
||||||
|
register_as = "SingleFaceGeometryFromDetectionGpu",
|
||||||
|
deps = [
|
||||||
|
":face_landmarks_smoothing",
|
||||||
|
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
|
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "single_face_geometry_from_landmarks_gpu",
|
||||||
|
graph = "single_face_geometry_from_landmarks_gpu.pbtxt",
|
||||||
|
register_as = "SingleFaceGeometryFromLandmarksGpu",
|
||||||
|
deps = [
|
||||||
|
":face_landmarks_smoothing",
|
||||||
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
|
||||||
|
"//mediapipe/modules/face_geometry:face_geometry_from_landmarks",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,24 @@
|
||||||
|
# MediaPipe subgraph that smoothes face landmarks.
|
||||||
|
|
||||||
|
type: "FaceLandmarksSmoothing"
|
||||||
|
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
|
||||||
|
|
||||||
|
# Applies smoothing to a face landmark list. The filter options were handpicked
|
||||||
|
# to achieve better visual results.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksSmoothingCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] {
|
||||||
|
velocity_filter: {
|
||||||
|
window_size: 5
|
||||||
|
velocity_scale: 20.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
# MediaPipe subgraph that extracts geometry from a single face using the face
|
||||||
|
# landmark pipeline on an input GPU image. The face landmarks are also
|
||||||
|
# "smoothed" to achieve better visual results.
|
||||||
|
|
||||||
|
type: "SingleFaceGeometryFromDetectionGpu"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
|
||||||
|
# Environment that describes the current virtual scene.
|
||||||
|
# (face_geometry::Environment)
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
|
||||||
|
# A list of geometry data for a single detected face. The size of this
|
||||||
|
# collection is at most 1 because of the single-face use in this graph.
|
||||||
|
# (std::vector<face_geometry::FaceGeometry>)
|
||||||
|
#
|
||||||
|
# NOTE: if no face is detected at a particular timestamp, there will not be an
|
||||||
|
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks using the face
|
||||||
|
# detection pipeline.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeGpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "DETECTIONS:multi_face_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts the first face detection associated with the most prominent face from
|
||||||
|
# a collection.
|
||||||
|
node {
|
||||||
|
calculator: "SplitDetectionVectorCalculator"
|
||||||
|
input_stream: "multi_face_detection"
|
||||||
|
output_stream: "face_detection"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts face detection keypoints as a normalized landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionToLandmarksCalculator"
|
||||||
|
input_stream: "DETECTION:face_detection"
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts the input image frame dimensions as a separate packet.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
output_stream: "SIZE:input_image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies smoothing to the face landmarks previously extracted from the face
|
||||||
|
# detection keypoints.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksSmoothing"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts smoothed face landmarks back into the detection format.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToDetectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:smoothed_face_landmarks"
|
||||||
|
output_stream: "DETECTION:smoothed_face_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Puts the smoothed single face detection back into a collection to simplify
|
||||||
|
# passing the result into the `FaceGeometryFromDetection` subgraph.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateDetectionVectorCalculator"
|
||||||
|
input_stream: "smoothed_face_detection"
|
||||||
|
output_stream: "multi_smoothed_face_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Computes face geometry from the single face detection.
|
||||||
|
node {
|
||||||
|
calculator: "FaceGeometryFromDetection"
|
||||||
|
input_stream: "MULTI_FACE_DETECTION:multi_smoothed_face_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
# MediaPipe subgraph that extracts geometry from a single face using the face
|
||||||
|
# landmark pipeline on an input GPU image. The face landmarks are also
|
||||||
|
# "smoothed" to achieve better visual results.
|
||||||
|
|
||||||
|
type: "SingleFaceGeometryFromLandmarksGpu"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
|
||||||
|
# Environment that describes the current virtual scene.
|
||||||
|
# (face_geometry::Environment)
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
|
||||||
|
# A list of geometry data for a single detected face. The size of this
|
||||||
|
# collection is at most 1 because of the single-face use in this graph.
|
||||||
|
# (std::vector<face_geometry::FaceGeometry>)
|
||||||
|
#
|
||||||
|
# NOTE: if no face is detected at a particular timestamp, there will not be an
|
||||||
|
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
|
||||||
|
# Creates a packet to inform the `FaceLandmarkFrontGpu` subgraph to detect at
|
||||||
|
# most 1 face.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_faces"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks using the face
|
||||||
|
# landmark pipeline.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontGpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts a single set of face landmarks associated with the most prominent
|
||||||
|
# face detected from a collection.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "multi_face_landmarks"
|
||||||
|
output_stream: "face_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts the input image frame dimensions as a separate packet.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
output_stream: "SIZE:input_image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies smoothing to the single set of face landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksSmoothing"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Puts the single set of smoothed landmarks back into a collection to simplify
|
||||||
|
# passing the result into the `FaceGeometryFromLandmarks` subgraph.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateLandmarListVectorCalculator"
|
||||||
|
input_stream: "smoothed_face_landmarks"
|
||||||
|
output_stream: "multi_smoothed_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Computes face geometry from face landmarks for a single face.
|
||||||
|
node {
|
||||||
|
calculator: "FaceGeometryFromLandmarks"
|
||||||
|
input_stream: "MULTI_FACE_LANDMARKS:multi_smoothed_face_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:input_image_size"
|
||||||
|
input_side_packet: "ENVIRONMENT:environment"
|
||||||
|
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
|
}
|
69
mediapipe/graphs/face_mesh/BUILD
Normal file
69
mediapipe/graphs/face_mesh/BUILD
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_gpu_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_mesh_mobile_gpu_binary_graph",
|
||||||
|
graph = "face_mesh_mobile.pbtxt",
|
||||||
|
output_name = "face_mesh_mobile_gpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
37
mediapipe/graphs/face_mesh/calculators/BUILD
Normal file
37
mediapipe/graphs/face_mesh/calculators/BUILD
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_landmarks_to_render_data_calculator",
|
||||||
|
srcs = ["face_landmarks_to_render_data_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/util:color_cc_proto",
|
||||||
|
"//mediapipe/util:render_data_cc_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
|
@ -0,0 +1,104 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_join.h"
|
||||||
|
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h"
|
||||||
|
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/util/color.pb.h"
|
||||||
|
#include "mediapipe/util/render_data.pb.h"
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr int kNumFaceLandmarkConnections = 132;
|
||||||
|
// Pairs of landmark indices to be rendered with connections.
|
||||||
|
constexpr int kFaceLandmarkConnections[] = {
|
||||||
|
// Lips.
|
||||||
|
61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321,
|
||||||
|
321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267,
|
||||||
|
269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14,
|
||||||
|
14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81,
|
||||||
|
81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308,
|
||||||
|
// Left eye.
|
||||||
|
33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133,
|
||||||
|
33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173,
|
||||||
|
133,
|
||||||
|
// Left eyebrow.
|
||||||
|
46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107,
|
||||||
|
// Left iris.
|
||||||
|
474, 475, 475, 476, 476, 477, 477, 474,
|
||||||
|
// Right eye.
|
||||||
|
263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382,
|
||||||
|
362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398,
|
||||||
|
398, 362,
|
||||||
|
// Right eyebrow.
|
||||||
|
276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296,
|
||||||
|
336,
|
||||||
|
// Right iris.
|
||||||
|
469, 470, 470, 471, 471, 472, 472, 469,
|
||||||
|
// Face oval.
|
||||||
|
10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
|
||||||
|
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
|
||||||
|
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
|
||||||
|
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
|
||||||
|
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// A calculator that converts face landmarks to RenderData proto for
|
||||||
|
// visualization. Ignores landmark_connections specified in
|
||||||
|
// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set
|
||||||
|
// of landmark connections specific to face landmark (defined in
|
||||||
|
// kFaceLandmarkConnections[] above).
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node {
|
||||||
|
// calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
// input_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
// output_stream: "RENDER_DATA:render_data"
|
||||||
|
// options {
|
||||||
|
// [LandmarksToRenderDataCalculatorOptions.ext] {
|
||||||
|
// landmark_color { r: 0 g: 255 b: 0 }
|
||||||
|
// connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
// thickness: 4.0
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
class FaceLandmarksToRenderDataCalculator
|
||||||
|
: public LandmarksToRenderDataCalculator {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator);
|
||||||
|
|
||||||
|
absl::Status FaceLandmarksToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
options_ = cc->Options<mediapipe::LandmarksToRenderDataCalculatorOptions>();
|
||||||
|
|
||||||
|
for (int i = 0; i < kNumFaceLandmarkConnections; ++i) {
|
||||||
|
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]);
|
||||||
|
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
70
mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt
Normal file
70
mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
|
||||||
|
# Path to the input video file. (string)
|
||||||
|
input_side_packet: "input_video_path"
|
||||||
|
# Path to the output video file. (string)
|
||||||
|
input_side_packet: "output_video_path"
|
||||||
|
|
||||||
|
# max_queue_size limits the number of packets enqueued on any input stream
|
||||||
|
# by throttling inputs to the graph. This makes the graph only process one
|
||||||
|
# frame per time.
|
||||||
|
max_queue_size: 1
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines side packets for further use in the graph.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
output_side_packet: "PACKET:1:with_attention"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders face-landmark annotation onto the input video.
|
||||||
|
node {
|
||||||
|
calculator: "FaceRendererCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
Normal file
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||||
|
|
||||||
|
# Input image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/processed faces, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "multi_face_landmarks"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines side packets for further use in the graph.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
output_side_packet: "PACKET:1:with_attention"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders face-landmark annotation onto the input image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceRendererCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
Normal file
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||||
|
|
||||||
|
# Input image. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/processed faces, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "multi_face_landmarks"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines side packets for further use in the graph.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
output_side_packet: "PACKET:1:with_attention"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders face-landmark annotation onto the input image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceRendererGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
67
mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt
Normal file
67
mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Max number of faces to detect/process. (int)
|
||||||
|
input_side_packet: "num_faces"
|
||||||
|
|
||||||
|
# Output image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/processed faces, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "multi_face_landmarks"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines side packets for further use in the graph.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:with_attention"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders face-landmark annotation onto the input image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceRendererGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
52
mediapipe/graphs/face_mesh/subgraphs/BUILD
Normal file
52
mediapipe/graphs/face_mesh/subgraphs/BUILD
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "renderer_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_renderer_gpu",
|
||||||
|
graph = "face_renderer_gpu.pbtxt",
|
||||||
|
register_as = "FaceRendererGpu",
|
||||||
|
deps = [
|
||||||
|
":renderer_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_renderer_cpu",
|
||||||
|
graph = "face_renderer_cpu.pbtxt",
|
||||||
|
register_as = "FaceRendererCpu",
|
||||||
|
deps = [
|
||||||
|
":renderer_calculators",
|
||||||
|
],
|
||||||
|
)
|
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt
Normal file
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
# MediaPipe face mesh rendering subgraph.
|
||||||
|
|
||||||
|
type: "FaceRendererCpu"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
# Collection of detected/predicted faces, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
# Regions of interest calculated based on palm detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:rects"
|
||||||
|
# Detected palms. (std::vector<Detection>)
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# CPU image with rendered data. (ImageFrame)
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:detections_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||||
|
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||||
|
# timestamp for downstream calculators to inform them that all elements in the
|
||||||
|
# vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_face_landmarks"
|
||||||
|
output_stream: "ITEM:face_landmarks"
|
||||||
|
output_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
thickness: 2
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopRenderDataCalculator"
|
||||||
|
input_stream: "ITEM:landmarks_render_data"
|
||||||
|
input_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:rects"
|
||||||
|
output_stream: "RENDER_DATA:rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "detections_render_data"
|
||||||
|
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||||
|
input_stream: "rects_render_data"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
}
|
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt
Normal file
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
# MediaPipe face mesh rendering subgraph.
|
||||||
|
|
||||||
|
type: "FaceRendererGpu"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
# Collection of detected/predicted faces, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
# Regions of interest calculated based on palm detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:rects"
|
||||||
|
# Detected palms. (std::vector<Detection>)
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# GPU image with rendered data. (GpuBuffer)
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:detections_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||||
|
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||||
|
# timestamp for downstream calculators to inform them that all elements in the
|
||||||
|
# vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_face_landmarks"
|
||||||
|
output_stream: "ITEM:face_landmarks"
|
||||||
|
output_stream: "BATCH_END:end_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
thickness: 2
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopRenderDataCalculator"
|
||||||
|
input_stream: "ITEM:landmarks_render_data"
|
||||||
|
input_stream: "BATCH_END:end_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:rects"
|
||||||
|
output_stream: "RENDER_DATA:rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
input_stream: "detections_render_data"
|
||||||
|
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||||
|
input_stream: "rects_render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_image"
|
||||||
|
}
|
61
mediapipe/graphs/hair_segmentation/BUILD
Normal file
61
mediapipe/graphs/hair_segmentation/BUILD
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/image:recolor_calculator",
|
||||||
|
"//mediapipe/calculators/image:set_alpha_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
|
||||||
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/image:recolor_calculator",
|
||||||
|
"//mediapipe/calculators/image:set_alpha_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_gpu_binary_graph",
|
||||||
|
graph = "hair_segmentation_mobile_gpu.pbtxt",
|
||||||
|
output_name = "mobile_gpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
|
@ -0,0 +1,152 @@
|
||||||
|
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/desktop/hair_segmentation:hair_segmentation_cpu
|
||||||
|
|
||||||
|
# Images on CPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding hair mask before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:hair_mask"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on CPU to a 512x512 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the hair
|
||||||
|
# segmentation model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "IMAGE:transformed_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 512
|
||||||
|
output_height: 512
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Caches a mask fed back from the previous round of hair segmentation, and upon
|
||||||
|
# the arrival of the next input image sends out the cached mask with the
|
||||||
|
# timestamp replaced by that of the input image, essentially generating a packet
|
||||||
|
# that carries the previous mask. Note that upon the arrival of the very first
|
||||||
|
# input image, an empty packet is sent out to jump start the feedback loop.
|
||||||
|
node {
|
||||||
|
calculator: "PreviousLoopbackCalculator"
|
||||||
|
input_stream: "MAIN:throttled_input_video"
|
||||||
|
input_stream: "LOOP:hair_mask"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "LOOP"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "PREV_LOOP:previous_hair_mask"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Embeds the hair mask generated from the previous round of hair segmentation
|
||||||
|
# as the alpha channel of the current input image.
|
||||||
|
node {
|
||||||
|
calculator: "SetAlphaCalculator"
|
||||||
|
input_stream: "IMAGE:transformed_input_video"
|
||||||
|
input_stream: "ALPHA:previous_hair_mask"
|
||||||
|
output_stream: "IMAGE:mask_embedded_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on CPU into an image tensor stored in
|
||||||
|
# TfLiteTensor. The zero_center option is set to false to normalize the
|
||||||
|
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
|
||||||
|
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
|
||||||
|
# image tensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE:mask_embedded_input_video"
|
||||||
|
output_stream: "TENSORS:image_tensor"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||||
|
zero_center: false
|
||||||
|
max_num_channels: 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||||
|
# supports custom ops needed by the model used in this graph.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteCustomOpResolverCalculator"
|
||||||
|
output_side_packet: "op_resolver"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
|
||||||
|
use_gpu: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# tensor representing the hair segmentation, which has the same width and height
|
||||||
|
# as the input image tensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS:image_tensor"
|
||||||
|
output_stream: "TENSORS:segmentation_tensor"
|
||||||
|
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/hair_segmentation.tflite"
|
||||||
|
use_gpu: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
|
||||||
|
# mask of values in [0, 255], stored in a CPU buffer. It also
|
||||||
|
# takes the mask generated previously as another input to improve the temporal
|
||||||
|
# consistency.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToSegmentationCalculator"
|
||||||
|
input_stream: "TENSORS:segmentation_tensor"
|
||||||
|
input_stream: "PREV_MASK:previous_hair_mask"
|
||||||
|
output_stream: "MASK:hair_mask"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
|
||||||
|
tensor_width: 512
|
||||||
|
tensor_height: 512
|
||||||
|
tensor_channels: 2
|
||||||
|
combine_with_previous_ratio: 0.9
|
||||||
|
output_layer_index: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Colors the hair segmentation with the color specified in the option.
|
||||||
|
node {
|
||||||
|
calculator: "RecolorCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "MASK:hair_mask"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
|
||||||
|
color { r: 0 g: 0 b: 255 }
|
||||||
|
mask_channel: RED
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,152 @@
|
||||||
|
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding hair mask before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:hair_mask"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on GPU to a 512x512 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the hair
|
||||||
|
# segmentation model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 512
|
||||||
|
output_height: 512
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Caches a mask fed back from the previous round of hair segmentation, and upon
|
||||||
|
# the arrival of the next input image sends out the cached mask with the
|
||||||
|
# timestamp replaced by that of the input image, essentially generating a packet
|
||||||
|
# that carries the previous mask. Note that upon the arrival of the very first
|
||||||
|
# input image, an empty packet is sent out to jump start the feedback loop.
|
||||||
|
node {
|
||||||
|
calculator: "PreviousLoopbackCalculator"
|
||||||
|
input_stream: "MAIN:throttled_input_video"
|
||||||
|
input_stream: "LOOP:hair_mask"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "LOOP"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "PREV_LOOP:previous_hair_mask"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Embeds the hair mask generated from the previous round of hair segmentation
|
||||||
|
# as the alpha channel of the current input image.
|
||||||
|
node {
|
||||||
|
calculator: "SetAlphaCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||||
|
input_stream: "ALPHA_GPU:previous_hair_mask"
|
||||||
|
output_stream: "IMAGE_GPU:mask_embedded_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on GPU into an image tensor stored in
|
||||||
|
# tflite::gpu::GlBuffer. The zero_center option is set to false to normalize the
|
||||||
|
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
|
||||||
|
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
|
||||||
|
# image tensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:mask_embedded_input_video"
|
||||||
|
output_stream: "TENSORS_GPU:image_tensor"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||||
|
zero_center: false
|
||||||
|
max_num_channels: 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||||
|
# supports custom ops needed by the model used in this graph.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteCustomOpResolverCalculator"
|
||||||
|
output_side_packet: "op_resolver"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
|
||||||
|
use_gpu: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||||
|
# tensor representing the hair segmentation, which has the same width and height
|
||||||
|
# as the input image tensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS_GPU:image_tensor"
|
||||||
|
output_stream: "TENSORS_GPU:segmentation_tensor"
|
||||||
|
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/hair_segmentation.tflite"
|
||||||
|
use_gpu: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
|
||||||
|
# mask of values in [0.f, 1.f], stored in the R channel of a GPU buffer. It also
|
||||||
|
# takes the mask generated previously as another input to improve the temporal
|
||||||
|
# consistency.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToSegmentationCalculator"
|
||||||
|
input_stream: "TENSORS_GPU:segmentation_tensor"
|
||||||
|
input_stream: "PREV_MASK_GPU:previous_hair_mask"
|
||||||
|
output_stream: "MASK_GPU:hair_mask"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
|
||||||
|
tensor_width: 512
|
||||||
|
tensor_height: 512
|
||||||
|
tensor_channels: 2
|
||||||
|
combine_with_previous_ratio: 0.9
|
||||||
|
output_layer_index: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Colors the hair segmentation with the color specified in the option.
|
||||||
|
node {
|
||||||
|
calculator: "RecolorCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "MASK_GPU:hair_mask"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
|
||||||
|
color { r: 0 g: 0 b: 255 }
|
||||||
|
mask_channel: RED
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
91
mediapipe/graphs/hand_tracking/BUILD
Normal file
91
mediapipe/graphs/hand_tracking/BUILD
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
exports_files(glob([
|
||||||
|
"*.pbtxt",
|
||||||
|
]))
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_offline_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:immediate_mux_calculator",
|
||||||
|
"//mediapipe/calculators/core:packet_inner_join_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_tflite_calculators",
|
||||||
|
deps = [
|
||||||
|
":desktop_offline_calculators",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:merge_calculator",
|
||||||
|
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_cpu",
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "hand_tracking_desktop_live_binary_graph",
|
||||||
|
graph = "hand_tracking_desktop_live.pbtxt",
|
||||||
|
output_name = "hand_tracking_desktop_live.binarypb",
|
||||||
|
deps = [":desktop_tflite_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_gpu",
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "hand_tracking_mobile_gpu_binary_graph",
|
||||||
|
graph = "hand_tracking_mobile.pbtxt",
|
||||||
|
output_name = "hand_tracking_mobile_gpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "detection_mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/palm_detection:palm_detection_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "hand_detection_mobile_gpu_binary_graph",
|
||||||
|
graph = "hand_detection_mobile.pbtxt",
|
||||||
|
output_name = "hand_detection_mobile_gpu.binarypb",
|
||||||
|
deps = [":detection_mobile_calculators"],
|
||||||
|
)
|
17
mediapipe/graphs/hand_tracking/calculators/BUILD
Normal file
17
mediapipe/graphs/hand_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
61
mediapipe/graphs/hand_tracking/hand_detection_desktop.pbtxt
Normal file
61
mediapipe/graphs/hand_tracking/hand_detection_desktop.pbtxt
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/desktop/hand_tracking:hand_detection_tflite.
|
||||||
|
|
||||||
|
# max_queue_size limits the number of packets enqueued on any input stream
|
||||||
|
# by throttling inputs to the graph. This makes the graph only process one
|
||||||
|
# frame per time.
|
||||||
|
max_queue_size: 1
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects palms.
|
||||||
|
node {
|
||||||
|
calculator: "PalmDetectionCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "DETECTIONS:output_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the original image coming into
|
||||||
|
# the graph.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipe/examples/desktop/hand_tracking:hand_detection_cpu.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Detects palms.
|
||||||
|
node {
|
||||||
|
calculator: "PalmDetectionCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "DETECTIONS:output_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the original image coming into
|
||||||
|
# the graph.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
59
mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt
Normal file
59
mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
# MediaPipe graph that performs hand detection with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/handdetectiongpu and
|
||||||
|
# mediapipe/examples/ios/handdetectiongpu.
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for PalmDetectionGpu
|
||||||
|
# downstream in the graph to finish its tasks before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images in PalmDetectionGpu to 1. This prevents the nodes in
|
||||||
|
# PalmDetectionGpu from queuing up incoming images and data excessively, which
|
||||||
|
# leads to increased latency and memory usage, unwanted in real-time mobile
|
||||||
|
# applications. It also eliminates unnecessarily computation, e.g., the output
|
||||||
|
# produced by a node in the subgraph may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects palms.
|
||||||
|
node {
|
||||||
|
calculator: "PalmDetectionGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:palm_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
68
mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt
Normal file
68
mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
# MediaPipe graph that performs hands tracking on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipe/examples/desktop/hand_tracking:hand_tracking_tflite.
|
||||||
|
|
||||||
|
# max_queue_size limits the number of packets enqueued on any input stream
|
||||||
|
# by throttling inputs to the graph. This makes the graph only process one
|
||||||
|
# frame per time.
|
||||||
|
max_queue_size: 1
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates side packet cotaining max number of hands to detect/track.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_hands"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects/tracks hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkTrackingCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_HANDS:num_hands"
|
||||||
|
output_stream: "LANDMARKS:landmarks"
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
output_stream: "PALM_DETECTIONS:multi_palm_detections"
|
||||||
|
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
|
||||||
|
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders annotations and overlays them on top of the input
|
||||||
|
# images (see hand_renderer_cpu.pbtxt).
|
||||||
|
node {
|
||||||
|
calculator: "HandRendererSubgraph"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "DETECTIONS:multi_palm_detections"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "HANDEDNESS:handedness"
|
||||||
|
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||||
|
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
# MediaPipe graph that performs hands tracking on desktop with TensorFlow
|
||||||
|
# Lite on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Generates side packet cotaining max number of hands to detect/track.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_hands"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects/tracks hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkTrackingCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_HANDS:num_hands"
|
||||||
|
output_stream: "LANDMARKS:landmarks"
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
output_stream: "PALM_DETECTIONS:multi_palm_detections"
|
||||||
|
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
|
||||||
|
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders annotations and overlays them on top of the input
|
||||||
|
# images (see hand_renderer_cpu.pbtxt).
|
||||||
|
node {
|
||||||
|
calculator: "HandRendererSubgraph"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "DETECTIONS:multi_palm_detections"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "HANDEDNESS:handedness"
|
||||||
|
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||||
|
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/predicted hands, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "hand_landmarks"
|
||||||
|
|
||||||
|
# Generates side packet cotaining max number of hands to detect/track.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_hands"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects/tracks hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkTrackingGpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_HANDS:num_hands"
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||||
|
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||||
|
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders annotations and overlays them on top of the input
|
||||||
|
# images (see hand_renderer_gpu.pbtxt).
|
||||||
|
node {
|
||||||
|
calculator: "HandRendererSubgraph"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
input_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
input_stream: "HANDEDNESS:handedness"
|
||||||
|
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||||
|
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
65
mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
Normal file
65
mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Max number of hands to detect/process. (int)
|
||||||
|
input_side_packet: "num_hands"
|
||||||
|
|
||||||
|
# Model complexity (0 or 1). (int)
|
||||||
|
input_side_packet: "model_complexity"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Collection of detected/predicted hands, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
output_stream: "hand_landmarks"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects/tracks hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkTrackingGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
input_side_packet: "NUM_HANDS:num_hands"
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||||
|
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||||
|
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that renders annotations and overlays them on top of the input
|
||||||
|
# images (see hand_renderer_gpu.pbtxt).
|
||||||
|
node {
|
||||||
|
calculator: "HandRendererSubgraph"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
input_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
input_stream: "HANDEDNESS:handedness"
|
||||||
|
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||||
|
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
58
mediapipe/graphs/hand_tracking/subgraphs/BUILD
Normal file
58
mediapipe/graphs/hand_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_renderer_cpu",
|
||||||
|
graph = "hand_renderer_cpu.pbtxt",
|
||||||
|
register_as = "HandRendererSubgraph",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:end_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:labels_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_renderer_gpu",
|
||||||
|
graph = "hand_renderer_gpu.pbtxt",
|
||||||
|
register_as = "HandRendererSubgraph",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:end_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:labels_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||||
|
],
|
||||||
|
)
|
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_cpu.pbtxt
Normal file
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
# MediaPipe graph to render hand landmarks and some related debug information.
|
||||||
|
|
||||||
|
type: "HandRendererSubgraph"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
# Collection of detected/predicted hands, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
input_stream: "LANDMARKS:multi_hand_landmarks"
|
||||||
|
# Handedness of the detected hand (i.e. is hand left or right).
|
||||||
|
# (std::vector<ClassificationList>)
|
||||||
|
input_stream: "HANDEDNESS:multi_handedness"
|
||||||
|
# Regions of interest calculated based on palm detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||||
|
# Regions of interest calculated based on landmarks.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||||
|
# Detected palms. (std::vector<Detection>)
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
|
||||||
|
# Updated CPU image. (ImageFrame)
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:multi_hand_rects"
|
||||||
|
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:multi_palm_rects"
|
||||||
|
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 125 g: 0 b: 122 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
|
||||||
|
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||||
|
# timestamp for downstream calculators to inform them that all elements in the
|
||||||
|
# vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_hand_landmarks"
|
||||||
|
output_stream: "ITEM:single_hand_landmarks"
|
||||||
|
output_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 16
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 20
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopRenderDataCalculator"
|
||||||
|
input_stream: "ITEM:single_hand_landmark_render_data"
|
||||||
|
input_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Don't render handedness if there are more than one handedness reported.
|
||||||
|
node {
|
||||||
|
calculator: "ClassificationListVectorHasMinSizeCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_handedness"
|
||||||
|
output_stream: "disallow_handedness_rendering"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
|
||||||
|
min_size: 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "multi_handedness"
|
||||||
|
input_stream: "DISALLOW:disallow_handedness_rendering"
|
||||||
|
output_stream: "allowed_multi_handedness"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
|
||||||
|
empty_packets_as_allow: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitClassificationListVectorCalculator"
|
||||||
|
input_stream: "allowed_multi_handedness"
|
||||||
|
output_stream: "handedness"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts classification to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LabelsToRenderDataCalculator"
|
||||||
|
input_stream: "CLASSIFICATIONS:handedness"
|
||||||
|
output_stream: "RENDER_DATA:handedness_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 10.0
|
||||||
|
font_height_px: 50
|
||||||
|
horizontal_offset_px: 30
|
||||||
|
vertical_offset_px: 50
|
||||||
|
|
||||||
|
max_num_labels: 1
|
||||||
|
location: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images. Consumes
|
||||||
|
# a vector of RenderData objects and draws each of them on the input frame.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
input_stream: "multi_hand_rects_render_data"
|
||||||
|
input_stream: "multi_palm_rects_render_data"
|
||||||
|
input_stream: "handedness_render_data"
|
||||||
|
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
}
|
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt
Normal file
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
# MediaPipe graph to render hand landmarks and some related debug information.
|
||||||
|
|
||||||
|
type: "HandRendererSubgraph"
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
# Collection of detected/predicted hands, each represented as a list of
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
input_stream: "LANDMARKS:multi_hand_landmarks"
|
||||||
|
# Handedness of the detected hand (i.e. is hand left or right).
|
||||||
|
# (std::vector<ClassificationList>)
|
||||||
|
input_stream: "HANDEDNESS:multi_handedness"
|
||||||
|
# Regions of interest calculated based on palm detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||||
|
# Regions of interest calculated based on landmarks.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||||
|
# Detected palms. (std::vector<Detection>)
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
|
||||||
|
# Updated GPU buffer. (GpuBuffer)
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:palm_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:multi_hand_rects"
|
||||||
|
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECTS:multi_palm_rects"
|
||||||
|
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 125 g: 0 b: 122 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
|
||||||
|
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||||
|
# timestamp for downstream calculators to inform them that all elements in the
|
||||||
|
# vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_hand_landmarks"
|
||||||
|
output_stream: "ITEM:single_hand_landmarks"
|
||||||
|
output_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 16
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 20
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopRenderDataCalculator"
|
||||||
|
input_stream: "ITEM:single_hand_landmark_render_data"
|
||||||
|
input_stream: "BATCH_END:landmark_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Don't render handedness if there are more than one handedness reported.
|
||||||
|
node {
|
||||||
|
calculator: "ClassificationListVectorHasMinSizeCalculator"
|
||||||
|
input_stream: "ITERABLE:multi_handedness"
|
||||||
|
output_stream: "disallow_handedness_rendering"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
|
||||||
|
min_size: 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "multi_handedness"
|
||||||
|
input_stream: "DISALLOW:disallow_handedness_rendering"
|
||||||
|
output_stream: "allowed_multi_handedness"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
|
||||||
|
empty_packets_as_allow: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitClassificationListVectorCalculator"
|
||||||
|
input_stream: "allowed_multi_handedness"
|
||||||
|
output_stream: "handedness"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts classification to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LabelsToRenderDataCalculator"
|
||||||
|
input_stream: "CLASSIFICATIONS:handedness"
|
||||||
|
output_stream: "RENDER_DATA:handedness_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 10.0
|
||||||
|
font_height_px: 50
|
||||||
|
horizontal_offset_px: 30
|
||||||
|
vertical_offset_px: 50
|
||||||
|
|
||||||
|
max_num_labels: 1
|
||||||
|
location: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images. Consumes
|
||||||
|
# a vector of RenderData objects and draws each of them on the input frame.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
input_stream: "multi_hand_rects_render_data"
|
||||||
|
input_stream: "multi_palm_rects_render_data"
|
||||||
|
input_stream: "handedness_render_data"
|
||||||
|
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_image"
|
||||||
|
}
|
70
mediapipe/graphs/holistic_tracking/BUILD
Normal file
70
mediapipe/graphs/holistic_tracking/BUILD
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "holistic_tracking_to_render_data",
|
||||||
|
graph = "holistic_tracking_to_render_data.pbtxt",
|
||||||
|
register_as = "HolisticTrackingToRenderData",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||||
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
|
"//mediapipe/calculators/core:merge_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_scale_calculator",
|
||||||
|
"//mediapipe/modules/holistic_landmark:hand_wrist_for_pose",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "holistic_tracking_gpu_deps",
|
||||||
|
deps = [
|
||||||
|
":holistic_tracking_to_render_data",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "holistic_tracking_gpu",
|
||||||
|
graph = "holistic_tracking_gpu.pbtxt",
|
||||||
|
output_name = "holistic_tracking_gpu.binarypb",
|
||||||
|
deps = [":holistic_tracking_gpu_deps"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "holistic_tracking_cpu_graph_deps",
|
||||||
|
deps = [
|
||||||
|
":holistic_tracking_to_render_data",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Tracks and renders pose + hands + face landmarks.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||||
|
max_in_flight: 1
|
||||||
|
max_in_queue: 1
|
||||||
|
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||||
|
# than 1 second.
|
||||||
|
in_flight_timeout: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "HolisticLandmarkCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "POSE_ROI:pose_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets image size.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose, hands and face landmarks to a render data vector.
|
||||||
|
node {
|
||||||
|
calculator: "HolisticTrackingToRenderData"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
input_stream: "POSE_ROI:pose_roi"
|
||||||
|
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "VECTOR:render_data_vector"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Tracks and renders pose + hands + face landmarks.
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# GPU image with rendered results. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||||
|
max_in_flight: 1
|
||||||
|
max_in_queue: 1
|
||||||
|
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||||
|
# than 1 second.
|
||||||
|
in_flight_timeout: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "HolisticLandmarkGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "POSE_ROI:pose_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets image size.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose, hands and face landmarks to a render data vector.
|
||||||
|
node {
|
||||||
|
calculator: "HolisticTrackingToRenderData"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
input_stream: "POSE_ROI:pose_roi"
|
||||||
|
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "VECTOR:render_data_vector"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,757 @@
|
||||||
|
# Converts pose + hands + face landmarks to a render data vector.
|
||||||
|
|
||||||
|
type: "HolisticTrackingToRenderData"
|
||||||
|
|
||||||
|
# Image size. (std::pair<int, int>)
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
# Pose landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "POSE_LANDMARKS:landmarks"
|
||||||
|
# Region of interest calculated based on pose landmarks. (NormalizedRect)
|
||||||
|
input_stream: "POSE_ROI:roi"
|
||||||
|
# Left hand landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# Right hand landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# Face landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Render data vector. (std::vector<RenderData>)
|
||||||
|
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# ------------------ Calculates scale for render objects -------------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Calculates rendering scale based on the pose bounding box.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderScaleCalculator"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "RENDER_SCALE:render_scale"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderScaleCalculatorOptions] {
|
||||||
|
multiplier: 0.0008
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# --------------- Combines pose and hands into pose skeleton ---------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Gets pose landmarks before wrists.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks"
|
||||||
|
output_stream: "landmarks_before_wrist"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 11 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets pose left wrist landmark.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks"
|
||||||
|
output_stream: "landmarks_left_wrist"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 15 end: 16 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets pose right wrist landmark.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks"
|
||||||
|
output_stream: "landmarks_right_wrist"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 16 end: 17 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets pose landmarks after wrists.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks"
|
||||||
|
output_stream: "landmarks_after_wrist"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 23 end: 33 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets left hand wrist landmark.
|
||||||
|
node {
|
||||||
|
calculator: "HandWristForPose"
|
||||||
|
input_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "WRIST_LANDMARK:left_hand_wrist_landmark"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets left hand wrist landmark or keep pose wrist landmark if hand was not
|
||||||
|
# predicted.
|
||||||
|
node {
|
||||||
|
calculator: "MergeCalculator"
|
||||||
|
input_stream: "left_hand_wrist_landmark"
|
||||||
|
input_stream: "landmarks_left_wrist"
|
||||||
|
output_stream: "merged_left_hand_wrist_landmark"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets right hand wrist landmark.
|
||||||
|
node {
|
||||||
|
calculator: "HandWristForPose"
|
||||||
|
input_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
output_stream: "WRIST_LANDMARK:right_hand_wrist_landmark"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets right hand wrist landmark or keep pose wrist landmark if hand was not
|
||||||
|
# predicted.
|
||||||
|
node {
|
||||||
|
calculator: "MergeCalculator"
|
||||||
|
input_stream: "right_hand_wrist_landmark"
|
||||||
|
input_stream: "landmarks_right_wrist"
|
||||||
|
output_stream: "merged_right_hand_wrist_landmark"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combines pose landmarks all together.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks_before_wrist"
|
||||||
|
input_stream: "merged_left_hand_wrist_landmark"
|
||||||
|
input_stream: "merged_right_hand_wrist_landmark"
|
||||||
|
input_stream: "landmarks_after_wrist"
|
||||||
|
output_stream: "landmarks_merged"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConcatenateVectorCalculatorOptions] {
|
||||||
|
only_emit_if_all_present: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Takes left pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks_merged"
|
||||||
|
output_stream: "landmarks_left_side"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
ranges: { begin: 2 end: 3 }
|
||||||
|
ranges: { begin: 4 end: 5 }
|
||||||
|
ranges: { begin: 6 end: 7 }
|
||||||
|
ranges: { begin: 8 end: 9 }
|
||||||
|
ranges: { begin: 10 end: 11 }
|
||||||
|
ranges: { begin: 12 end: 13 }
|
||||||
|
ranges: { begin: 14 end: 15 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Takes right pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "landmarks_merged"
|
||||||
|
output_stream: "landmarks_right_side"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 1 end: 2 }
|
||||||
|
ranges: { begin: 3 end: 4 }
|
||||||
|
ranges: { begin: 5 end: 6 }
|
||||||
|
ranges: { begin: 7 end: 8 }
|
||||||
|
ranges: { begin: 9 end: 10 }
|
||||||
|
ranges: { begin: 11 end: 12 }
|
||||||
|
ranges: { begin: 13 end: 14 }
|
||||||
|
ranges: { begin: 15 end: 16 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# ---------------------------------- Pose ----------------------------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Converts pose connections to white lines.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks_merged"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 15
|
||||||
|
|
||||||
|
landmark_color { r: 255 g: 255 b: 255 }
|
||||||
|
connection_color { r: 255 g: 255 b: 255 }
|
||||||
|
thickness: 3.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
utilize_visibility: true
|
||||||
|
visibility_threshold: 0.1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose joints to big white circles.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks_merged"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_background_joints_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 255 g: 255 b: 255 }
|
||||||
|
connection_color { r: 255 g: 255 b: 255 }
|
||||||
|
thickness: 5.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
utilize_visibility: true
|
||||||
|
visibility_threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose left side joints to orange circles (inside white ones).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks_left_side"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_left_joints_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 255 g: 138 b: 0 }
|
||||||
|
connection_color { r: 255 g: 138 b: 0 }
|
||||||
|
thickness: 3.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
utilize_visibility: true
|
||||||
|
visibility_threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose right side joints to cyan circles (inside white ones).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks_right_side"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:landmarks_right_joints_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 0 g: 217 b: 231 }
|
||||||
|
connection_color { r: 0 g: 217 b: 231 }
|
||||||
|
thickness: 3.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
utilize_visibility: true
|
||||||
|
visibility_threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# ------------------------------- Left hand --------------------------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Converts left hand connections to white lines.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:left_hand_landmarks_connections_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 16
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 20
|
||||||
|
landmark_color { r: 255 g: 255 b: 255 }
|
||||||
|
connection_color { r: 255 g: 255 b: 255 }
|
||||||
|
thickness: 4.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts left hand color joints.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:left_hand_landmarks_joints_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 255 g: 138 b: 0 }
|
||||||
|
connection_color { r: 255 g: 138 b: 0 }
|
||||||
|
thickness: 3.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# -------------------------------- Right hand ------------------------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Converts right hand connections to white lines.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:right_hand_landmarks_connections_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 15
|
||||||
|
landmark_connections: 16
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 18
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 19
|
||||||
|
landmark_connections: 20
|
||||||
|
landmark_color { r: 255 g: 255 b: 255 }
|
||||||
|
connection_color { r: 255 g: 255 b: 255 }
|
||||||
|
thickness: 4.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts right hand color joints.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:right_hand_landmarks_joints_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 0 g: 217 b: 231 }
|
||||||
|
connection_color { r: 0 g: 217 b: 231 }
|
||||||
|
thickness: 3.0
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# ---------------------------------- Face ----------------------------------- #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
# Converts face connections to white lines.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:face_landmarks_connections_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
# Lips.
|
||||||
|
landmark_connections: 61
|
||||||
|
landmark_connections: 146
|
||||||
|
landmark_connections: 146
|
||||||
|
landmark_connections: 91
|
||||||
|
landmark_connections: 91
|
||||||
|
landmark_connections: 181
|
||||||
|
landmark_connections: 181
|
||||||
|
landmark_connections: 84
|
||||||
|
landmark_connections: 84
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 17
|
||||||
|
landmark_connections: 314
|
||||||
|
landmark_connections: 314
|
||||||
|
landmark_connections: 405
|
||||||
|
landmark_connections: 405
|
||||||
|
landmark_connections: 321
|
||||||
|
landmark_connections: 321
|
||||||
|
landmark_connections: 375
|
||||||
|
landmark_connections: 375
|
||||||
|
landmark_connections: 291
|
||||||
|
landmark_connections: 61
|
||||||
|
landmark_connections: 185
|
||||||
|
landmark_connections: 185
|
||||||
|
landmark_connections: 40
|
||||||
|
landmark_connections: 40
|
||||||
|
landmark_connections: 39
|
||||||
|
landmark_connections: 39
|
||||||
|
landmark_connections: 37
|
||||||
|
landmark_connections: 37
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 267
|
||||||
|
landmark_connections: 267
|
||||||
|
landmark_connections: 269
|
||||||
|
landmark_connections: 269
|
||||||
|
landmark_connections: 270
|
||||||
|
landmark_connections: 270
|
||||||
|
landmark_connections: 409
|
||||||
|
landmark_connections: 409
|
||||||
|
landmark_connections: 291
|
||||||
|
landmark_connections: 78
|
||||||
|
landmark_connections: 95
|
||||||
|
landmark_connections: 95
|
||||||
|
landmark_connections: 88
|
||||||
|
landmark_connections: 88
|
||||||
|
landmark_connections: 178
|
||||||
|
landmark_connections: 178
|
||||||
|
landmark_connections: 87
|
||||||
|
landmark_connections: 87
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 317
|
||||||
|
landmark_connections: 317
|
||||||
|
landmark_connections: 402
|
||||||
|
landmark_connections: 402
|
||||||
|
landmark_connections: 318
|
||||||
|
landmark_connections: 318
|
||||||
|
landmark_connections: 324
|
||||||
|
landmark_connections: 324
|
||||||
|
landmark_connections: 308
|
||||||
|
landmark_connections: 78
|
||||||
|
landmark_connections: 191
|
||||||
|
landmark_connections: 191
|
||||||
|
landmark_connections: 80
|
||||||
|
landmark_connections: 80
|
||||||
|
landmark_connections: 81
|
||||||
|
landmark_connections: 81
|
||||||
|
landmark_connections: 82
|
||||||
|
landmark_connections: 82
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 312
|
||||||
|
landmark_connections: 312
|
||||||
|
landmark_connections: 311
|
||||||
|
landmark_connections: 311
|
||||||
|
landmark_connections: 310
|
||||||
|
landmark_connections: 310
|
||||||
|
landmark_connections: 415
|
||||||
|
landmark_connections: 415
|
||||||
|
landmark_connections: 308
|
||||||
|
# Left eye.
|
||||||
|
landmark_connections: 33
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 163
|
||||||
|
landmark_connections: 163
|
||||||
|
landmark_connections: 144
|
||||||
|
landmark_connections: 144
|
||||||
|
landmark_connections: 145
|
||||||
|
landmark_connections: 145
|
||||||
|
landmark_connections: 153
|
||||||
|
landmark_connections: 153
|
||||||
|
landmark_connections: 154
|
||||||
|
landmark_connections: 154
|
||||||
|
landmark_connections: 155
|
||||||
|
landmark_connections: 155
|
||||||
|
landmark_connections: 133
|
||||||
|
landmark_connections: 33
|
||||||
|
landmark_connections: 246
|
||||||
|
landmark_connections: 246
|
||||||
|
landmark_connections: 161
|
||||||
|
landmark_connections: 161
|
||||||
|
landmark_connections: 160
|
||||||
|
landmark_connections: 160
|
||||||
|
landmark_connections: 159
|
||||||
|
landmark_connections: 159
|
||||||
|
landmark_connections: 158
|
||||||
|
landmark_connections: 158
|
||||||
|
landmark_connections: 157
|
||||||
|
landmark_connections: 157
|
||||||
|
landmark_connections: 173
|
||||||
|
landmark_connections: 173
|
||||||
|
landmark_connections: 133
|
||||||
|
# Left eyebrow.
|
||||||
|
landmark_connections: 46
|
||||||
|
landmark_connections: 53
|
||||||
|
landmark_connections: 53
|
||||||
|
landmark_connections: 52
|
||||||
|
landmark_connections: 52
|
||||||
|
landmark_connections: 65
|
||||||
|
landmark_connections: 65
|
||||||
|
landmark_connections: 55
|
||||||
|
landmark_connections: 70
|
||||||
|
landmark_connections: 63
|
||||||
|
landmark_connections: 63
|
||||||
|
landmark_connections: 105
|
||||||
|
landmark_connections: 105
|
||||||
|
landmark_connections: 66
|
||||||
|
landmark_connections: 66
|
||||||
|
landmark_connections: 107
|
||||||
|
# Right eye.
|
||||||
|
landmark_connections: 263
|
||||||
|
landmark_connections: 249
|
||||||
|
landmark_connections: 249
|
||||||
|
landmark_connections: 390
|
||||||
|
landmark_connections: 390
|
||||||
|
landmark_connections: 373
|
||||||
|
landmark_connections: 373
|
||||||
|
landmark_connections: 374
|
||||||
|
landmark_connections: 374
|
||||||
|
landmark_connections: 380
|
||||||
|
landmark_connections: 380
|
||||||
|
landmark_connections: 381
|
||||||
|
landmark_connections: 381
|
||||||
|
landmark_connections: 382
|
||||||
|
landmark_connections: 382
|
||||||
|
landmark_connections: 362
|
||||||
|
landmark_connections: 263
|
||||||
|
landmark_connections: 466
|
||||||
|
landmark_connections: 466
|
||||||
|
landmark_connections: 388
|
||||||
|
landmark_connections: 388
|
||||||
|
landmark_connections: 387
|
||||||
|
landmark_connections: 387
|
||||||
|
landmark_connections: 386
|
||||||
|
landmark_connections: 386
|
||||||
|
landmark_connections: 385
|
||||||
|
landmark_connections: 385
|
||||||
|
landmark_connections: 384
|
||||||
|
landmark_connections: 384
|
||||||
|
landmark_connections: 398
|
||||||
|
landmark_connections: 398
|
||||||
|
landmark_connections: 362
|
||||||
|
# Right eyebrow.
|
||||||
|
landmark_connections: 276
|
||||||
|
landmark_connections: 283
|
||||||
|
landmark_connections: 283
|
||||||
|
landmark_connections: 282
|
||||||
|
landmark_connections: 282
|
||||||
|
landmark_connections: 295
|
||||||
|
landmark_connections: 295
|
||||||
|
landmark_connections: 285
|
||||||
|
landmark_connections: 300
|
||||||
|
landmark_connections: 293
|
||||||
|
landmark_connections: 293
|
||||||
|
landmark_connections: 334
|
||||||
|
landmark_connections: 334
|
||||||
|
landmark_connections: 296
|
||||||
|
landmark_connections: 296
|
||||||
|
landmark_connections: 336
|
||||||
|
# Face oval.
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 338
|
||||||
|
landmark_connections: 338
|
||||||
|
landmark_connections: 297
|
||||||
|
landmark_connections: 297
|
||||||
|
landmark_connections: 332
|
||||||
|
landmark_connections: 332
|
||||||
|
landmark_connections: 284
|
||||||
|
landmark_connections: 284
|
||||||
|
landmark_connections: 251
|
||||||
|
landmark_connections: 251
|
||||||
|
landmark_connections: 389
|
||||||
|
landmark_connections: 389
|
||||||
|
landmark_connections: 356
|
||||||
|
landmark_connections: 356
|
||||||
|
landmark_connections: 454
|
||||||
|
landmark_connections: 454
|
||||||
|
landmark_connections: 323
|
||||||
|
landmark_connections: 323
|
||||||
|
landmark_connections: 361
|
||||||
|
landmark_connections: 361
|
||||||
|
landmark_connections: 288
|
||||||
|
landmark_connections: 288
|
||||||
|
landmark_connections: 397
|
||||||
|
landmark_connections: 397
|
||||||
|
landmark_connections: 365
|
||||||
|
landmark_connections: 365
|
||||||
|
landmark_connections: 379
|
||||||
|
landmark_connections: 379
|
||||||
|
landmark_connections: 378
|
||||||
|
landmark_connections: 378
|
||||||
|
landmark_connections: 400
|
||||||
|
landmark_connections: 400
|
||||||
|
landmark_connections: 377
|
||||||
|
landmark_connections: 377
|
||||||
|
landmark_connections: 152
|
||||||
|
landmark_connections: 152
|
||||||
|
landmark_connections: 148
|
||||||
|
landmark_connections: 148
|
||||||
|
landmark_connections: 176
|
||||||
|
landmark_connections: 176
|
||||||
|
landmark_connections: 149
|
||||||
|
landmark_connections: 149
|
||||||
|
landmark_connections: 150
|
||||||
|
landmark_connections: 150
|
||||||
|
landmark_connections: 136
|
||||||
|
landmark_connections: 136
|
||||||
|
landmark_connections: 172
|
||||||
|
landmark_connections: 172
|
||||||
|
landmark_connections: 58
|
||||||
|
landmark_connections: 58
|
||||||
|
landmark_connections: 132
|
||||||
|
landmark_connections: 132
|
||||||
|
landmark_connections: 93
|
||||||
|
landmark_connections: 93
|
||||||
|
landmark_connections: 234
|
||||||
|
landmark_connections: 234
|
||||||
|
landmark_connections: 127
|
||||||
|
landmark_connections: 127
|
||||||
|
landmark_connections: 162
|
||||||
|
landmark_connections: 162
|
||||||
|
landmark_connections: 21
|
||||||
|
landmark_connections: 21
|
||||||
|
landmark_connections: 54
|
||||||
|
landmark_connections: 54
|
||||||
|
landmark_connections: 103
|
||||||
|
landmark_connections: 103
|
||||||
|
landmark_connections: 67
|
||||||
|
landmark_connections: 67
|
||||||
|
landmark_connections: 109
|
||||||
|
landmark_connections: 109
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_color { r: 255 g: 255 b: 255 }
|
||||||
|
connection_color { r: 255 g: 255 b: 255 }
|
||||||
|
thickness: 0.5
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts face joints to cyan circles.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "RENDER_SCALE:render_scale"
|
||||||
|
output_stream: "RENDER_DATA:face_landmarks_joints_rd"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 0 g: 217 b: 231 }
|
||||||
|
connection_color { r: 0 g: 217 b: 231 }
|
||||||
|
thickness: 0.5
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Concatenates all render data.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateRenderDataVectorCalculator"
|
||||||
|
input_stream: "landmarks_render_data"
|
||||||
|
input_stream: "landmarks_background_joints_render_data"
|
||||||
|
input_stream: "landmarks_left_joints_render_data"
|
||||||
|
input_stream: "landmarks_right_joints_render_data"
|
||||||
|
|
||||||
|
# Left hand.
|
||||||
|
input_stream: "left_hand_landmarks_connections_rd"
|
||||||
|
input_stream: "left_hand_landmarks_joints_rd"
|
||||||
|
|
||||||
|
# Right hand.
|
||||||
|
input_stream: "right_hand_landmarks_connections_rd"
|
||||||
|
input_stream: "right_hand_landmarks_joints_rd"
|
||||||
|
|
||||||
|
# Face.
|
||||||
|
input_stream: "face_landmarks_connections_rd"
|
||||||
|
input_stream: "face_landmarks_joints_rd"
|
||||||
|
|
||||||
|
output_stream: "render_data_vector"
|
||||||
|
}
|
39
mediapipe/graphs/instant_motion_tracking/BUILD
Normal file
39
mediapipe/graphs/instant_motion_tracking/BUILD
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "instant_motion_tracking_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/graphs/instant_motion_tracking/calculators:matrices_manager_calculator",
|
||||||
|
"//mediapipe/graphs/instant_motion_tracking/calculators:sticker_manager_calculator",
|
||||||
|
"//mediapipe/graphs/instant_motion_tracking/subgraphs:region_tracking",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "instant_motion_tracking_binary_graph",
|
||||||
|
graph = "instant_motion_tracking.pbtxt",
|
||||||
|
output_name = "instant_motion_tracking.binarypb",
|
||||||
|
deps = [":instant_motion_tracking_deps"],
|
||||||
|
)
|
84
mediapipe/graphs/instant_motion_tracking/calculators/BUILD
Normal file
84
mediapipe/graphs/instant_motion_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
proto_library(
|
||||||
|
name = "sticker_buffer_proto",
|
||||||
|
srcs = [
|
||||||
|
"sticker_buffer.proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_cc_proto_library(
|
||||||
|
name = "sticker_buffer_cc_proto",
|
||||||
|
srcs = [
|
||||||
|
"sticker_buffer.proto",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":sticker_buffer_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "sticker_manager_calculator",
|
||||||
|
srcs = ["sticker_manager_calculator.cc"],
|
||||||
|
hdrs = ["transformations.h"],
|
||||||
|
deps = [
|
||||||
|
":sticker_buffer_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:timestamp",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "matrices_manager_calculator",
|
||||||
|
srcs = ["matrices_manager_calculator.cc"],
|
||||||
|
hdrs = ["transformations.h"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:timestamp",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:model_matrix_cc_proto",
|
||||||
|
"//mediapipe/modules/objectron/calculators:box",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tracked_anchor_manager_calculator",
|
||||||
|
srcs = ["tracked_anchor_manager_calculator.cc"],
|
||||||
|
hdrs = ["transformations.h"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/util/tracking:box_tracker_cc_proto",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
|
@ -0,0 +1,393 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
#include "Eigen/Geometry"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_join.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||||
|
#include "mediapipe/modules/objectron/calculators/box.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
using Matrix4fCM = Eigen::Matrix<float, 4, 4, Eigen::ColMajor>;
|
||||||
|
using Vector3f = Eigen::Vector3f;
|
||||||
|
using Matrix3f = Eigen::Matrix3f;
|
||||||
|
using DiagonalMatrix3f = Eigen::DiagonalMatrix<float, 3>;
|
||||||
|
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||||
|
constexpr char kIMUMatrixTag[] = "IMU_ROTATION";
|
||||||
|
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
|
||||||
|
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
|
||||||
|
constexpr char kRendersTag[] = "RENDER_DATA";
|
||||||
|
constexpr char kGifAspectRatioTag[] = "GIF_ASPECT_RATIO";
|
||||||
|
constexpr char kFOVSidePacketTag[] = "FOV";
|
||||||
|
constexpr char kAspectRatioSidePacketTag[] = "ASPECT_RATIO";
|
||||||
|
// initial Z value (-10 is center point in visual range for OpenGL render)
|
||||||
|
constexpr float kInitialZ = -10.0f;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Intermediary for rotation and translation data to model matrix usable by
|
||||||
|
// gl_animation_overlay_calculator. For information on the construction of
|
||||||
|
// OpenGL objects and transformations (including a breakdown of model matrices),
|
||||||
|
// please visit: https://open.gl/transformations
|
||||||
|
//
|
||||||
|
// Input Side Packets:
|
||||||
|
// FOV - Vertical field of view for device [REQUIRED - Defines perspective
|
||||||
|
// matrix] ASPECT_RATIO - Aspect ratio of device [REQUIRED - Defines
|
||||||
|
// perspective matrix]
|
||||||
|
//
|
||||||
|
// Input streams:
|
||||||
|
// ANCHORS - Anchor data with x,y,z coordinates (x,y are in [0.0-1.0] range for
|
||||||
|
// position on the device screen, while z is the scaling factor that changes
|
||||||
|
// in proportion to the distance from the tracked region) [REQUIRED]
|
||||||
|
// IMU_ROTATION - float[9] of row-major device rotation matrix [REQUIRED]
|
||||||
|
// USER_ROTATIONS - UserRotations with corresponding radians of rotation
|
||||||
|
// [REQUIRED]
|
||||||
|
// USER_SCALINGS - UserScalings with corresponding scale factor [REQUIRED]
|
||||||
|
// GIF_ASPECT_RATIO - Aspect ratio of GIF image used to dynamically scale
|
||||||
|
// GIF asset defined as width / height [OPTIONAL]
|
||||||
|
// Output:
|
||||||
|
// MATRICES - TimedModelMatrixProtoList of each object type to render
|
||||||
|
// [REQUIRED]
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node{
|
||||||
|
// calculator: "MatricesManagerCalculator"
|
||||||
|
// input_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||||
|
// input_stream: "IMU_ROTATION:imu_rotation_matrix"
|
||||||
|
// input_stream: "USER_ROTATIONS:user_rotation_data"
|
||||||
|
// input_stream: "USER_SCALINGS:user_scaling_data"
|
||||||
|
// input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
|
||||||
|
// output_stream: "MATRICES:0:first_render_matrices"
|
||||||
|
// output_stream: "MATRICES:1:second_render_matrices" [unbounded input size]
|
||||||
|
// input_side_packet: "FOV:vertical_fov_radians"
|
||||||
|
// input_side_packet: "ASPECT_RATIO:aspect_ratio"
|
||||||
|
// }
|
||||||
|
|
||||||
|
class MatricesManagerCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc);
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Device properties that will be preset by side packets
|
||||||
|
float vertical_fov_radians_ = 0.0f;
|
||||||
|
float aspect_ratio_ = 0.0f;
|
||||||
|
float gif_aspect_ratio_ = 1.0f;
|
||||||
|
|
||||||
|
const Matrix3f GenerateUserRotationMatrix(const float rotation_radians) const;
|
||||||
|
const Matrix4fCM GenerateEigenModelMatrix(
|
||||||
|
const Vector3f& translation_vector,
|
||||||
|
const Matrix3f& rotation_submatrix) const;
|
||||||
|
const Vector3f GenerateAnchorVector(const Anchor& tracked_anchor) const;
|
||||||
|
DiagonalMatrix3f GetDefaultRenderScaleDiagonal(
|
||||||
|
const int render_id, const float user_scale_factor,
|
||||||
|
const float gif_aspect_ratio) const;
|
||||||
|
|
||||||
|
// Returns a user scaling increment associated with the sticker_id
|
||||||
|
// TODO: Adjust lookup function if total number of stickers is uncapped to
|
||||||
|
// improve performance
|
||||||
|
const float GetUserScaler(const std::vector<UserScaling>& scalings,
|
||||||
|
const int sticker_id) const {
|
||||||
|
for (const UserScaling& user_scaling : scalings) {
|
||||||
|
if (user_scaling.sticker_id == sticker_id) {
|
||||||
|
return user_scaling.scale_factor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
|
||||||
|
<< ", returning 1.0f scaling";
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a user rotation in radians associated with the sticker_id
|
||||||
|
const float GetUserRotation(const std::vector<UserRotation>& rotations,
|
||||||
|
const int sticker_id) {
|
||||||
|
for (const UserRotation& rotation : rotations) {
|
||||||
|
if (rotation.sticker_id == sticker_id) {
|
||||||
|
return rotation.rotation_radians;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
|
||||||
|
<< ", returning 0.0f rotation";
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CALCULATOR(MatricesManagerCalculator);
|
||||||
|
|
||||||
|
absl::Status MatricesManagerCalculator::GetContract(CalculatorContract* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
|
||||||
|
cc->Inputs().HasTag(kIMUMatrixTag) &&
|
||||||
|
cc->Inputs().HasTag(kUserRotationsTag) &&
|
||||||
|
cc->Inputs().HasTag(kUserScalingsTag) &&
|
||||||
|
cc->InputSidePackets().HasTag(kFOVSidePacketTag) &&
|
||||||
|
cc->InputSidePackets().HasTag(kAspectRatioSidePacketTag));
|
||||||
|
|
||||||
|
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||||
|
cc->Inputs().Tag(kIMUMatrixTag).Set<float[]>();
|
||||||
|
cc->Inputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
|
||||||
|
cc->Inputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
|
||||||
|
cc->Inputs().Tag(kRendersTag).Set<std::vector<int>>();
|
||||||
|
if (cc->Inputs().HasTag(kGifAspectRatioTag)) {
|
||||||
|
cc->Inputs().Tag(kGifAspectRatioTag).Set<float>();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CollectionItemId id = cc->Outputs().BeginId("MATRICES");
|
||||||
|
id < cc->Outputs().EndId("MATRICES"); ++id) {
|
||||||
|
cc->Outputs().Get(id).Set<mediapipe::TimedModelMatrixProtoList>();
|
||||||
|
}
|
||||||
|
cc->InputSidePackets().Tag(kFOVSidePacketTag).Set<float>();
|
||||||
|
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Set<float>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status MatricesManagerCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
// Set device properties from side packets
|
||||||
|
vertical_fov_radians_ =
|
||||||
|
cc->InputSidePackets().Tag(kFOVSidePacketTag).Get<float>();
|
||||||
|
aspect_ratio_ =
|
||||||
|
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Get<float>();
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status MatricesManagerCalculator::Process(CalculatorContext* cc) {
|
||||||
|
// Define each object's model matrices
|
||||||
|
auto asset_matrices_gif =
|
||||||
|
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
|
||||||
|
auto asset_matrices_1 =
|
||||||
|
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
|
||||||
|
// Clear all model matrices
|
||||||
|
asset_matrices_gif->clear_model_matrix();
|
||||||
|
asset_matrices_1->clear_model_matrix();
|
||||||
|
|
||||||
|
const std::vector<UserRotation> user_rotation_data =
|
||||||
|
cc->Inputs().Tag(kUserRotationsTag).Get<std::vector<UserRotation>>();
|
||||||
|
|
||||||
|
const std::vector<UserScaling> user_scaling_data =
|
||||||
|
cc->Inputs().Tag(kUserScalingsTag).Get<std::vector<UserScaling>>();
|
||||||
|
|
||||||
|
const std::vector<int> render_data =
|
||||||
|
cc->Inputs().Tag(kRendersTag).Get<std::vector<int>>();
|
||||||
|
|
||||||
|
const std::vector<Anchor> anchor_data =
|
||||||
|
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
|
||||||
|
if (cc->Inputs().HasTag(kGifAspectRatioTag) &&
|
||||||
|
!cc->Inputs().Tag(kGifAspectRatioTag).IsEmpty()) {
|
||||||
|
gif_aspect_ratio_ = cc->Inputs().Tag(kGifAspectRatioTag).Get<float>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Device IMU rotation submatrix
|
||||||
|
const auto imu_matrix = cc->Inputs().Tag(kIMUMatrixTag).Get<float[]>();
|
||||||
|
Matrix3f imu_rotation_submatrix;
|
||||||
|
int idx = 0;
|
||||||
|
for (int x = 0; x < 3; ++x) {
|
||||||
|
for (int y = 0; y < 3; ++y) {
|
||||||
|
// Input matrix is row-major matrix, it must be reformatted to
|
||||||
|
// column-major via transpose procedure
|
||||||
|
imu_rotation_submatrix(y, x) = imu_matrix[idx++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int render_idx = 0;
|
||||||
|
for (const Anchor& anchor : anchor_data) {
|
||||||
|
const int id = anchor.sticker_id;
|
||||||
|
mediapipe::TimedModelMatrixProto* model_matrix;
|
||||||
|
// Add model matrix to matrices list for defined object render ID
|
||||||
|
if (render_data[render_idx] == 0) { // GIF
|
||||||
|
model_matrix = asset_matrices_gif->add_model_matrix();
|
||||||
|
} else { // Asset 3D
|
||||||
|
if (render_data[render_idx] != 1) {
|
||||||
|
LOG(ERROR) << "render id: " << render_data[render_idx]
|
||||||
|
<< " is not supported. Fall back to using render_id = 1.";
|
||||||
|
}
|
||||||
|
model_matrix = asset_matrices_1->add_model_matrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
model_matrix->set_id(id);
|
||||||
|
|
||||||
|
// The user transformation data associated with this sticker must be defined
|
||||||
|
const float user_rotation_radians = GetUserRotation(user_rotation_data, id);
|
||||||
|
const float user_scale_factor = GetUserScaler(user_scaling_data, id);
|
||||||
|
|
||||||
|
// A vector representative of a user's sticker rotation transformation can
|
||||||
|
// be created
|
||||||
|
const Matrix3f user_rotation_submatrix =
|
||||||
|
GenerateUserRotationMatrix(user_rotation_radians);
|
||||||
|
// Next, the diagonal representative of the combined scaling data
|
||||||
|
const DiagonalMatrix3f scaling_diagonal = GetDefaultRenderScaleDiagonal(
|
||||||
|
render_data[render_idx], user_scale_factor, gif_aspect_ratio_);
|
||||||
|
// Increment to next render id from vector
|
||||||
|
render_idx++;
|
||||||
|
|
||||||
|
// The user transformation data can be concatenated into a final rotation
|
||||||
|
// submatrix with the device IMU rotational data
|
||||||
|
const Matrix3f user_transformed_rotation_submatrix =
|
||||||
|
imu_rotation_submatrix * user_rotation_submatrix * scaling_diagonal;
|
||||||
|
|
||||||
|
// A vector representative of the translation of the object in OpenGL
|
||||||
|
// coordinate space must be generated
|
||||||
|
const Vector3f translation_vector = GenerateAnchorVector(anchor);
|
||||||
|
|
||||||
|
// Concatenate all model matrix data
|
||||||
|
const Matrix4fCM final_model_matrix = GenerateEigenModelMatrix(
|
||||||
|
translation_vector, user_transformed_rotation_submatrix);
|
||||||
|
|
||||||
|
// The generated model matrix must be mapped to TimedModelMatrixProto
|
||||||
|
// (col-wise)
|
||||||
|
for (int x = 0; x < final_model_matrix.rows(); ++x) {
|
||||||
|
for (int y = 0; y < final_model_matrix.cols(); ++y) {
|
||||||
|
model_matrix->add_matrix_entries(final_model_matrix(x, y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output all individual render matrices
|
||||||
|
// TODO: Perform depth ordering with gl_animation_overlay_calculator to render
|
||||||
|
// objects in order by depth to allow occlusion.
|
||||||
|
cc->Outputs()
|
||||||
|
.Get(cc->Outputs().GetId("MATRICES", 0))
|
||||||
|
.Add(asset_matrices_gif.release(), cc->InputTimestamp());
|
||||||
|
cc->Outputs()
|
||||||
|
.Get(cc->Outputs().GetId("MATRICES", 1))
|
||||||
|
.Add(asset_matrices_1.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Using a specified rotation value in radians, generate a rotation matrix for
|
||||||
|
// use with base rotation submatrix
|
||||||
|
const Matrix3f MatricesManagerCalculator::GenerateUserRotationMatrix(
|
||||||
|
const float rotation_radians) const {
|
||||||
|
Eigen::Matrix3f user_rotation_submatrix;
|
||||||
|
user_rotation_submatrix =
|
||||||
|
// The rotation in radians must be inverted to rotate the object
|
||||||
|
// with the direction of finger movement from the user (system dependent)
|
||||||
|
Eigen::AngleAxisf(-rotation_radians, Eigen::Vector3f::UnitY()) *
|
||||||
|
Eigen::AngleAxisf(0.0f, Eigen::Vector3f::UnitZ()) *
|
||||||
|
// Model orientations all assume z-axis is up, but we need y-axis upwards,
|
||||||
|
// therefore, a +(M_PI * 0.5f) transformation must be applied
|
||||||
|
// TODO: Bring default rotations, translations, and scalings into
|
||||||
|
// independent sticker configuration
|
||||||
|
Eigen::AngleAxisf(M_PI * 0.5f, Eigen::Vector3f::UnitX());
|
||||||
|
// Matrix must be transposed due to the method of submatrix generation in
|
||||||
|
// Eigen
|
||||||
|
return user_rotation_submatrix.transpose();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Investigate possible differences in warping of tracking speed across
|
||||||
|
// screen Using the sticker anchor data, a translation vector can be generated
|
||||||
|
// in OpenGL coordinate space
|
||||||
|
const Vector3f MatricesManagerCalculator::GenerateAnchorVector(
|
||||||
|
const Anchor& tracked_anchor) const {
|
||||||
|
// Using an initial z-value in OpenGL space, generate a new base z-axis value
|
||||||
|
// to mimic scaling by distance.
|
||||||
|
const float z = kInitialZ * tracked_anchor.z;
|
||||||
|
|
||||||
|
// Using triangle geometry, the minimum for a y-coordinate that will appear in
|
||||||
|
// the view field for the given z value above can be found.
|
||||||
|
const float y_half_range = z * (tan(vertical_fov_radians_ * 0.5f));
|
||||||
|
|
||||||
|
// The aspect ratio of the device and y_minimum calculated above can be used
|
||||||
|
// to find the minimum value for x that will appear in the view field of the
|
||||||
|
// device screen.
|
||||||
|
const float x_half_range = y_half_range * aspect_ratio_;
|
||||||
|
|
||||||
|
// Given the minimum bounds of the screen in OpenGL space, the tracked anchor
|
||||||
|
// coordinates can be converted to OpenGL coordinate space.
|
||||||
|
//
|
||||||
|
// (i.e: X and Y will be converted from [0.0-1.0] space to [x_minimum,
|
||||||
|
// -x_minimum] space and [y_minimum, -y_minimum] space respectively)
|
||||||
|
const float x = (-2.0f * tracked_anchor.x * x_half_range) + x_half_range;
|
||||||
|
const float y = (-2.0f * tracked_anchor.y * y_half_range) + y_half_range;
|
||||||
|
|
||||||
|
// A translation transformation vector can be generated via Eigen
|
||||||
|
const Vector3f t_vector(x, y, z);
|
||||||
|
return t_vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generates a model matrix via Eigen with appropriate transformations
|
||||||
|
const Matrix4fCM MatricesManagerCalculator::GenerateEigenModelMatrix(
|
||||||
|
const Vector3f& translation_vector,
|
||||||
|
const Matrix3f& rotation_submatrix) const {
|
||||||
|
// Define basic empty model matrix
|
||||||
|
Matrix4fCM mvp_matrix;
|
||||||
|
|
||||||
|
// Set the translation vector
|
||||||
|
mvp_matrix.topRightCorner<3, 1>() = translation_vector;
|
||||||
|
|
||||||
|
// Set the rotation submatrix
|
||||||
|
mvp_matrix.topLeftCorner<3, 3>() = rotation_submatrix;
|
||||||
|
|
||||||
|
// Set trailing 1.0 required by OpenGL to define coordinate space
|
||||||
|
mvp_matrix(3, 3) = 1.0f;
|
||||||
|
|
||||||
|
return mvp_matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This returns a scaling matrix to alter the projection matrix for
|
||||||
|
// the specified render id in order to ensure all objects render at a similar
|
||||||
|
// size in the view screen upon initial placement
|
||||||
|
DiagonalMatrix3f MatricesManagerCalculator::GetDefaultRenderScaleDiagonal(
|
||||||
|
const int render_id, const float user_scale_factor,
|
||||||
|
const float gif_aspect_ratio) const {
|
||||||
|
float scale_preset = 1.0f;
|
||||||
|
float x_scalar = 1.0f;
|
||||||
|
float y_scalar = 1.0f;
|
||||||
|
|
||||||
|
switch (render_id) {
|
||||||
|
case 0: { // GIF
|
||||||
|
// 160 is the scaling preset to make the GIF asset appear relatively
|
||||||
|
// similar in size to all other assets
|
||||||
|
scale_preset = 160.0f;
|
||||||
|
if (gif_aspect_ratio >= 1.0f) {
|
||||||
|
// GIF is wider horizontally (scale on x-axis)
|
||||||
|
x_scalar = gif_aspect_ratio;
|
||||||
|
} else {
|
||||||
|
// GIF is wider vertically (scale on y-axis)
|
||||||
|
y_scalar = 1.0f / gif_aspect_ratio;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1: { // 3D asset
|
||||||
|
// 5 is the scaling preset to make the 3D asset appear relatively
|
||||||
|
// similar in size to all other assets
|
||||||
|
scale_preset = 5.0f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
LOG(INFO) << "Unsupported render_id: " << render_id
|
||||||
|
<< ", returning default render_scale";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DiagonalMatrix3f scaling(scale_preset * user_scale_factor * x_scalar,
|
||||||
|
scale_preset * user_scale_factor * y_scalar,
|
||||||
|
scale_preset * user_scale_factor);
|
||||||
|
return scaling;
|
||||||
|
}
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,33 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
option java_package = "com.google.mediapipe.graphs.instantmotiontracking";
|
||||||
|
option java_outer_classname = "StickerBufferProto";
|
||||||
|
|
||||||
|
message Sticker {
|
||||||
|
optional int32 id = 1;
|
||||||
|
optional float x = 2;
|
||||||
|
optional float y = 3;
|
||||||
|
optional float rotation = 4;
|
||||||
|
optional float scale = 5;
|
||||||
|
optional int32 render_id = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message StickerRoll {
|
||||||
|
repeated Sticker sticker = 1;
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.pb.h"
|
||||||
|
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kProtoDataString[] = "PROTO";
|
||||||
|
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||||
|
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
|
||||||
|
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
|
||||||
|
constexpr char kRenderDescriptorsTag[] = "RENDER_DATA";
|
||||||
|
|
||||||
|
// This calculator takes in the sticker protobuffer data and parses each
|
||||||
|
// individual sticker object into anchors, user rotations and scalings, in
|
||||||
|
// addition to basic render data represented in integer form.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// PROTO - String of sticker data in appropriate protobuf format [REQUIRED]
|
||||||
|
// Output:
|
||||||
|
// ANCHORS - Anchors with initial normalized X,Y coordinates [REQUIRED]
|
||||||
|
// USER_ROTATIONS - UserRotations with radians of rotation from user [REQUIRED]
|
||||||
|
// USER_SCALINGS - UserScalings with increment of scaling from user [REQUIRED]
|
||||||
|
// RENDER_DATA - Descriptors of which objects/animations to render for stickers
|
||||||
|
// [REQUIRED]
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node {
|
||||||
|
// calculator: "StickerManagerCalculator"
|
||||||
|
// input_stream: "PROTO:sticker_proto_string"
|
||||||
|
// output_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
// output_stream: "USER_ROTATIONS:user_rotation_data"
|
||||||
|
// output_stream: "USER_SCALINGS:user_scaling_data"
|
||||||
|
// output_stream: "RENDER_DATA:sticker_render_data"
|
||||||
|
// }
|
||||||
|
|
||||||
|
class StickerManagerCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kProtoDataString));
|
||||||
|
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
|
||||||
|
cc->Outputs().HasTag(kUserRotationsTag) &&
|
||||||
|
cc->Outputs().HasTag(kUserScalingsTag) &&
|
||||||
|
cc->Outputs().HasTag(kRenderDescriptorsTag));
|
||||||
|
|
||||||
|
cc->Inputs().Tag(kProtoDataString).Set<std::string>();
|
||||||
|
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||||
|
cc->Outputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
|
||||||
|
cc->Outputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
|
||||||
|
cc->Outputs().Tag(kRenderDescriptorsTag).Set<std::vector<int>>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
std::string sticker_proto_string =
|
||||||
|
cc->Inputs().Tag(kProtoDataString).Get<std::string>();
|
||||||
|
|
||||||
|
std::vector<Anchor> initial_anchor_data;
|
||||||
|
std::vector<UserRotation> user_rotation_data;
|
||||||
|
std::vector<UserScaling> user_scaling_data;
|
||||||
|
std::vector<int> render_data;
|
||||||
|
|
||||||
|
::mediapipe::StickerRoll sticker_roll;
|
||||||
|
bool parse_success = sticker_roll.ParseFromString(sticker_proto_string);
|
||||||
|
|
||||||
|
// Ensure parsing was a success
|
||||||
|
RET_CHECK(parse_success) << "Error parsing sticker protobuf data";
|
||||||
|
|
||||||
|
for (int i = 0; i < sticker_roll.sticker().size(); ++i) {
|
||||||
|
// Declare empty structures for sticker data
|
||||||
|
Anchor initial_anchor;
|
||||||
|
UserRotation user_rotation;
|
||||||
|
UserScaling user_scaling;
|
||||||
|
// Get individual Sticker object as defined by Protobuffer
|
||||||
|
::mediapipe::Sticker sticker = sticker_roll.sticker(i);
|
||||||
|
// Set individual data structure ids to associate with this sticker
|
||||||
|
initial_anchor.sticker_id = sticker.id();
|
||||||
|
user_rotation.sticker_id = sticker.id();
|
||||||
|
user_scaling.sticker_id = sticker.id();
|
||||||
|
initial_anchor.x = sticker.x();
|
||||||
|
initial_anchor.y = sticker.y();
|
||||||
|
initial_anchor.z = 1.0f; // default to 1.0 in normalized 3d space
|
||||||
|
user_rotation.rotation_radians = sticker.rotation();
|
||||||
|
user_scaling.scale_factor = sticker.scale();
|
||||||
|
const int render_id = sticker.render_id();
|
||||||
|
// Set all vector data with sticker attributes
|
||||||
|
initial_anchor_data.emplace_back(initial_anchor);
|
||||||
|
user_rotation_data.emplace_back(user_rotation);
|
||||||
|
user_scaling_data.emplace_back(user_scaling);
|
||||||
|
render_data.emplace_back(render_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->Outputs().HasTag(kAnchorsTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kAnchorsTag)
|
||||||
|
.AddPacket(MakePacket<std::vector<Anchor>>(initial_anchor_data)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kUserRotationsTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kUserRotationsTag)
|
||||||
|
.AddPacket(MakePacket<std::vector<UserRotation>>(user_rotation_data)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kUserScalingsTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kUserScalingsTag)
|
||||||
|
.AddPacket(MakePacket<std::vector<UserScaling>>(user_scaling_data)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kRenderDescriptorsTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kRenderDescriptorsTag)
|
||||||
|
.AddPacket(MakePacket<std::vector<int>>(render_data)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Close(CalculatorContext* cc) override {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CALCULATOR(StickerManagerCalculator);
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,210 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||||
|
#include "mediapipe/util/tracking/box_tracker.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kSentinelTag[] = "SENTINEL";
|
||||||
|
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||||
|
constexpr char kBoxesInputTag[] = "BOXES";
|
||||||
|
constexpr char kBoxesOutputTag[] = "START_POS";
|
||||||
|
constexpr char kCancelTag[] = "CANCEL_ID";
|
||||||
|
// TODO: Find optimal Height/Width (0.1-0.3)
|
||||||
|
constexpr float kBoxEdgeSize =
|
||||||
|
0.2f; // Used to establish tracking box dimensions
|
||||||
|
constexpr float kUsToMs =
|
||||||
|
1000.0f; // Used to convert from microseconds to millis
|
||||||
|
|
||||||
|
// This calculator manages the regions being tracked for each individual sticker
|
||||||
|
// and adjusts the regions being tracked if a change is detected in a sticker's
|
||||||
|
// initial anchor placement. Regions being tracked that have no associated
|
||||||
|
// sticker will be automatically removed upon the next iteration of the graph to
|
||||||
|
// optimize performance and remove all sticker artifacts
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// SENTINEL - ID of sticker which has an anchor that must be reset (-1 when no
|
||||||
|
// anchor must be reset) [REQUIRED]
|
||||||
|
// ANCHORS - Initial anchor data (tracks changes and where to re/position)
|
||||||
|
// [REQUIRED] BOXES - Used in cycle, boxes being tracked meant to update
|
||||||
|
// positions [OPTIONAL
|
||||||
|
// - provided by subgraph]
|
||||||
|
// Output:
|
||||||
|
// START_POS - Positions of boxes being tracked (can be overwritten with ID)
|
||||||
|
// [REQUIRED] CANCEL_ID - Single integer ID of tracking box to remove from
|
||||||
|
// tracker subgraph [OPTIONAL] ANCHORS - Updated set of anchors with tracked
|
||||||
|
// and normalized X,Y,Z [REQUIRED]
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node {
|
||||||
|
// calculator: "TrackedAnchorManagerCalculator"
|
||||||
|
// input_stream: "SENTINEL:sticker_sentinel"
|
||||||
|
// input_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
// input_stream: "BOXES:boxes"
|
||||||
|
// input_stream_info: {
|
||||||
|
// tag_index: 'BOXES'
|
||||||
|
// back_edge: true
|
||||||
|
// }
|
||||||
|
// output_stream: "START_POS:start_pos"
|
||||||
|
// output_stream: "CANCEL_ID:cancel_object_id"
|
||||||
|
// output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||||
|
// }
|
||||||
|
|
||||||
|
class TrackedAnchorManagerCalculator : public CalculatorBase {
|
||||||
|
private:
|
||||||
|
// Previous graph iteration anchor data
|
||||||
|
std::vector<Anchor> previous_anchor_data_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
|
||||||
|
cc->Inputs().HasTag(kSentinelTag));
|
||||||
|
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
|
||||||
|
cc->Outputs().HasTag(kBoxesOutputTag));
|
||||||
|
|
||||||
|
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||||
|
cc->Inputs().Tag(kSentinelTag).Set<int>();
|
||||||
|
|
||||||
|
if (cc->Inputs().HasTag(kBoxesInputTag)) {
|
||||||
|
cc->Inputs().Tag(kBoxesInputTag).Set<mediapipe::TimedBoxProtoList>();
|
||||||
|
}
|
||||||
|
|
||||||
|
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||||
|
cc->Outputs().Tag(kBoxesOutputTag).Set<mediapipe::TimedBoxProtoList>();
|
||||||
|
|
||||||
|
if (cc->Outputs().HasTag(kCancelTag)) {
|
||||||
|
cc->Outputs().Tag(kCancelTag).Set<int>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override { return absl::OkStatus(); }
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(TrackedAnchorManagerCalculator);
|
||||||
|
|
||||||
|
absl::Status TrackedAnchorManagerCalculator::Process(CalculatorContext* cc) {
|
||||||
|
mediapipe::Timestamp timestamp = cc->InputTimestamp();
|
||||||
|
const int sticker_sentinel = cc->Inputs().Tag(kSentinelTag).Get<int>();
|
||||||
|
std::vector<Anchor> current_anchor_data =
|
||||||
|
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
|
||||||
|
auto pos_boxes = absl::make_unique<mediapipe::TimedBoxProtoList>();
|
||||||
|
std::vector<Anchor> tracked_scaled_anchor_data;
|
||||||
|
|
||||||
|
// Delete any boxes being tracked without an associated anchor
|
||||||
|
for (const mediapipe::TimedBoxProto& box :
|
||||||
|
cc->Inputs()
|
||||||
|
.Tag(kBoxesInputTag)
|
||||||
|
.Get<mediapipe::TimedBoxProtoList>()
|
||||||
|
.box()) {
|
||||||
|
bool anchor_exists = false;
|
||||||
|
for (Anchor anchor : current_anchor_data) {
|
||||||
|
if (box.id() == anchor.sticker_id) {
|
||||||
|
anchor_exists = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!anchor_exists) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kCancelTag)
|
||||||
|
.AddPacket(MakePacket<int>(box.id()).At(timestamp++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform tracking or updating for each anchor position
|
||||||
|
for (const Anchor& anchor : current_anchor_data) {
|
||||||
|
Anchor output_anchor = anchor;
|
||||||
|
// Check if anchor position is being reset by user in this graph iteration
|
||||||
|
if (sticker_sentinel == anchor.sticker_id) {
|
||||||
|
// Delete associated tracking box
|
||||||
|
// TODO: BoxTrackingSubgraph should accept vector to avoid breaking
|
||||||
|
// timestamp rules
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kCancelTag)
|
||||||
|
.AddPacket(MakePacket<int>(anchor.sticker_id).At(timestamp++));
|
||||||
|
// Add a tracking box
|
||||||
|
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
|
||||||
|
box->set_left(anchor.x - kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_right(anchor.x + kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_top(anchor.y - kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_bottom(anchor.y + kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_id(anchor.sticker_id);
|
||||||
|
box->set_time_msec((timestamp++).Microseconds() / kUsToMs);
|
||||||
|
// Default value for normalized z (scale factor)
|
||||||
|
output_anchor.z = 1.0f;
|
||||||
|
} else {
|
||||||
|
// Anchor position was not reset by user
|
||||||
|
// Attempt to update anchor position from tracking subgraph
|
||||||
|
// (TimedBoxProto)
|
||||||
|
bool updated_from_tracker = false;
|
||||||
|
const mediapipe::TimedBoxProtoList box_list =
|
||||||
|
cc->Inputs().Tag(kBoxesInputTag).Get<mediapipe::TimedBoxProtoList>();
|
||||||
|
for (const auto& box : box_list.box()) {
|
||||||
|
if (box.id() == anchor.sticker_id) {
|
||||||
|
// Get center x normalized coordinate [0.0-1.0]
|
||||||
|
output_anchor.x = (box.left() + box.right()) * 0.5f;
|
||||||
|
// Get center y normalized coordinate [0.0-1.0]
|
||||||
|
output_anchor.y = (box.top() + box.bottom()) * 0.5f;
|
||||||
|
// Get center z coordinate [z starts at normalized 1.0 and scales
|
||||||
|
// inversely with box-width]
|
||||||
|
// TODO: Look into issues with uniform scaling on x-axis and y-axis
|
||||||
|
output_anchor.z = kBoxEdgeSize / (box.right() - box.left());
|
||||||
|
updated_from_tracker = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If anchor position was not updated from tracker, create new tracking
|
||||||
|
// box at last recorded anchor coordinates. This will allow all current
|
||||||
|
// stickers to be tracked at approximately last location even if
|
||||||
|
// re-acquisitioning in the BoxTrackingSubgraph encounters errors
|
||||||
|
if (!updated_from_tracker) {
|
||||||
|
for (const Anchor& prev_anchor : previous_anchor_data_) {
|
||||||
|
if (anchor.sticker_id == prev_anchor.sticker_id) {
|
||||||
|
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
|
||||||
|
box->set_left(prev_anchor.x - kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_right(prev_anchor.x + kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_top(prev_anchor.y - kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_bottom(prev_anchor.y + kBoxEdgeSize * 0.5f);
|
||||||
|
box->set_id(prev_anchor.sticker_id);
|
||||||
|
box->set_time_msec(cc->InputTimestamp().Microseconds() / kUsToMs);
|
||||||
|
output_anchor = prev_anchor;
|
||||||
|
// Default value for normalized z (scale factor)
|
||||||
|
output_anchor.z = 1.0f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tracked_scaled_anchor_data.emplace_back(output_anchor);
|
||||||
|
}
|
||||||
|
// Set anchor data for next iteration
|
||||||
|
previous_anchor_data_ = tracked_scaled_anchor_data;
|
||||||
|
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kAnchorsTag)
|
||||||
|
.AddPacket(MakePacket<std::vector<Anchor>>(tracked_scaled_anchor_data)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kBoxesOutputTag)
|
||||||
|
.Add(pos_boxes.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,42 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
||||||
|
#define MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
// Radians by which to rotate the object (Provided by UI input)
|
||||||
|
struct UserRotation {
|
||||||
|
float rotation_radians;
|
||||||
|
int sticker_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Scaling factor provided by the UI application end
|
||||||
|
struct UserScaling {
|
||||||
|
float scale_factor;
|
||||||
|
int sticker_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The normalized anchor coordinates of a sticker
|
||||||
|
struct Anchor {
|
||||||
|
float x; // [0.0-1.0]
|
||||||
|
float y; // [0.0-1.0]
|
||||||
|
float z; // Centered around 1.0 [current_scale = z * initial_scale]
|
||||||
|
int sticker_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# MediaPipe graph that performs region tracking and 3d object (AR sticker) rendering.
|
||||||
|
|
||||||
|
# Images in/out of graph with sticker data and IMU information from device
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "sticker_sentinel"
|
||||||
|
input_stream: "sticker_proto_string"
|
||||||
|
input_stream: "imu_rotation_matrix"
|
||||||
|
input_stream: "gif_texture"
|
||||||
|
input_stream: "gif_aspect_ratio"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Converts sticker data into user data (rotations/scalings), render data, and
|
||||||
|
# initial anchors.
|
||||||
|
node {
|
||||||
|
calculator: "StickerManagerCalculator"
|
||||||
|
input_stream: "PROTO:sticker_proto_string"
|
||||||
|
output_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
output_stream: "USER_ROTATIONS:user_rotation_data"
|
||||||
|
output_stream: "USER_SCALINGS:user_scaling_data"
|
||||||
|
output_stream: "RENDER_DATA:sticker_render_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Uses box tracking in order to create 'anchors' for associated 3d stickers.
|
||||||
|
node {
|
||||||
|
calculator: "RegionTrackingSubgraph"
|
||||||
|
input_stream: "VIDEO:input_video"
|
||||||
|
input_stream: "SENTINEL:sticker_sentinel"
|
||||||
|
input_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
output_stream: "ANCHORS:tracked_anchor_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Concatenates all transformations to generate model matrices for the OpenGL
|
||||||
|
# animation overlay calculator.
|
||||||
|
node {
|
||||||
|
calculator: "MatricesManagerCalculator"
|
||||||
|
input_stream: "ANCHORS:tracked_anchor_data"
|
||||||
|
input_stream: "IMU_ROTATION:imu_rotation_matrix"
|
||||||
|
input_stream: "USER_ROTATIONS:user_rotation_data"
|
||||||
|
input_stream: "USER_SCALINGS:user_scaling_data"
|
||||||
|
input_stream: "RENDER_DATA:sticker_render_data"
|
||||||
|
input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
|
||||||
|
output_stream: "MATRICES:0:gif_matrices"
|
||||||
|
output_stream: "MATRICES:1:asset_3d_matrices"
|
||||||
|
input_side_packet: "FOV:vertical_fov_radians"
|
||||||
|
input_side_packet: "ASPECT_RATIO:aspect_ratio"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders the final 3d stickers and overlays them on input image.
|
||||||
|
node {
|
||||||
|
calculator: "GlAnimationOverlayCalculator"
|
||||||
|
input_stream: "VIDEO:input_video"
|
||||||
|
input_stream: "MODEL_MATRICES:gif_matrices"
|
||||||
|
input_stream: "TEXTURE:gif_texture"
|
||||||
|
input_side_packet: "ANIMATION_ASSET:gif_asset_name"
|
||||||
|
output_stream: "asset_gif_rendered"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders the final 3d stickers and overlays them on top of the input image.
|
||||||
|
node {
|
||||||
|
calculator: "GlAnimationOverlayCalculator"
|
||||||
|
input_stream: "VIDEO:asset_gif_rendered"
|
||||||
|
input_stream: "MODEL_MATRICES:asset_3d_matrices"
|
||||||
|
input_side_packet: "TEXTURE:texture_3d"
|
||||||
|
input_side_packet: "ANIMATION_ASSET:asset_3d"
|
||||||
|
output_stream: "output_video"
|
||||||
|
}
|
32
mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD
Normal file
32
mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "region_tracking",
|
||||||
|
graph = "region_tracking.pbtxt",
|
||||||
|
register_as = "RegionTrackingSubgraph",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/graphs/instant_motion_tracking/calculators:tracked_anchor_manager_calculator",
|
||||||
|
"//mediapipe/graphs/tracking/subgraphs:box_tracking_gpu",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2020 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# MediaPipe graph that performs region tracking on initial anchor positions
|
||||||
|
# provided by the application
|
||||||
|
|
||||||
|
# Images in/out of graph with tracked and scaled normalized anchor data
|
||||||
|
type: "RegionTrackingSubgraph"
|
||||||
|
input_stream: "VIDEO:input_video"
|
||||||
|
input_stream: "SENTINEL:sticker_sentinel"
|
||||||
|
input_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||||
|
|
||||||
|
# Manages the anchors and tracking if user changes/adds/deletes anchors
|
||||||
|
node {
|
||||||
|
calculator: "TrackedAnchorManagerCalculator"
|
||||||
|
input_stream: "SENTINEL:sticker_sentinel"
|
||||||
|
input_stream: "ANCHORS:initial_anchor_data"
|
||||||
|
input_stream: "BOXES:boxes"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: 'BOXES'
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "START_POS:start_pos"
|
||||||
|
output_stream: "CANCEL_ID:cancel_object_id"
|
||||||
|
output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph performs anchor placement and tracking
|
||||||
|
node {
|
||||||
|
calculator: "BoxTrackingSubgraphGpu"
|
||||||
|
input_stream: "VIDEO:input_video"
|
||||||
|
input_stream: "BOXES:start_pos"
|
||||||
|
input_stream: "CANCEL_ID:cancel_object_id"
|
||||||
|
output_stream: "BOXES:boxes"
|
||||||
|
}
|
86
mediapipe/graphs/iris_tracking/BUILD
Normal file
86
mediapipe/graphs/iris_tracking/BUILD
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_depth_cpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_file_properties_calculator",
|
||||||
|
"//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator",
|
||||||
|
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_cpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||||
|
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_tracking_cpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||||
|
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_tracking_cpu_video_input_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||||
|
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_tracking_gpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_gpu",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||||
|
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "iris_tracking_gpu_binary_graph",
|
||||||
|
graph = "iris_tracking_gpu.pbtxt",
|
||||||
|
output_name = "iris_tracking_gpu.binarypb",
|
||||||
|
deps = [":iris_tracking_gpu_deps"],
|
||||||
|
)
|
107
mediapipe/graphs/iris_tracking/calculators/BUILD
Normal file
107
mediapipe/graphs/iris_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
proto_library(
|
||||||
|
name = "iris_to_render_data_calculator_proto",
|
||||||
|
srcs = ["iris_to_render_data_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/util:color_proto",
|
||||||
|
"//mediapipe/util:render_data_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_cc_proto_library(
|
||||||
|
name = "iris_to_render_data_calculator_cc_proto",
|
||||||
|
srcs = ["iris_to_render_data_calculator.proto"],
|
||||||
|
cc_deps = [
|
||||||
|
"//mediapipe/framework:calculator_cc_proto",
|
||||||
|
"//mediapipe/util:color_cc_proto",
|
||||||
|
"//mediapipe/util:render_data_cc_proto",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [":iris_to_render_data_calculator_proto"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_to_render_data_calculator",
|
||||||
|
srcs = ["iris_to_render_data_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":iris_to_render_data_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/util:color_cc_proto",
|
||||||
|
"//mediapipe/util:render_data_cc_proto",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
proto_library(
|
||||||
|
name = "iris_to_depth_calculator_proto",
|
||||||
|
srcs = ["iris_to_depth_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_cc_proto_library(
|
||||||
|
name = "iris_to_depth_calculator_cc_proto",
|
||||||
|
srcs = ["iris_to_depth_calculator.proto"],
|
||||||
|
cc_deps = [
|
||||||
|
"//mediapipe/framework:calculator_cc_proto",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [":iris_to_depth_calculator_proto"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "iris_to_depth_calculator",
|
||||||
|
srcs = ["iris_to_depth_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":iris_to_depth_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:image_file_properties_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "update_face_landmarks_calculator",
|
||||||
|
srcs = ["update_face_landmarks_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:image_file_properties_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
|
@ -0,0 +1,245 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/image_file_properties.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_depth_calculator.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kIrisTag[] = "IRIS";
|
||||||
|
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||||
|
constexpr char kFocalLengthPixelTag[] = "FOCAL_LENGTH";
|
||||||
|
constexpr char kImageFilePropertiesTag[] = "IMAGE_FILE_PROPERTIES";
|
||||||
|
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
|
||||||
|
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
|
||||||
|
constexpr int kNumIrisLandmarksPerEye = 5;
|
||||||
|
constexpr float kDepthWeightUpdate = 0.1;
|
||||||
|
// Avergae fixed iris size across human beings.
|
||||||
|
constexpr float kIrisSizeInMM = 11.8;
|
||||||
|
|
||||||
|
inline float GetDepth(float x0, float y0, float x1, float y1) {
|
||||||
|
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
|
||||||
|
const NormalizedLandmark& ld1,
|
||||||
|
const std::pair<int, int>& image_size) {
|
||||||
|
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
|
||||||
|
ld1.x() * image_size.first, ld1.y() * image_size.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
|
||||||
|
const std::pair<int, int>& image_size) {
|
||||||
|
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
|
||||||
|
landmarks.landmark(2), image_size);
|
||||||
|
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
|
||||||
|
landmarks.landmark(4), image_size);
|
||||||
|
return (dist_hori + dist_vert) / 2.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
|
||||||
|
float iris_size, float img_w, float img_h) {
|
||||||
|
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
|
||||||
|
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
|
||||||
|
center.y() * img_h);
|
||||||
|
const auto x = std::sqrt(focal_length * focal_length + y * y);
|
||||||
|
const auto depth = kIrisSizeInMM * x / iris_size;
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Estimates depth from iris to camera given focal length and image size.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "IrisToDepthCalculator"
|
||||||
|
// # A NormalizedLandmarkList contains landmarks for both iris.
|
||||||
|
// input_stream: "IRIS:iris_landmarks"
|
||||||
|
// input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
|
||||||
|
// # to get focal length in pixels. Sending focal length in pixels to
|
||||||
|
// # this calculator is optional.
|
||||||
|
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||||
|
// # OR
|
||||||
|
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||||
|
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
// }
|
||||||
|
class IrisToDepthCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
|
||||||
|
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||||
|
|
||||||
|
// Only one of kFocalLengthPixelTag or kImageFilePropertiesTag must exist
|
||||||
|
// if they are present.
|
||||||
|
RET_CHECK(!(cc->InputSidePackets().HasTag(kFocalLengthPixelTag) &&
|
||||||
|
cc->InputSidePackets().HasTag(kImageFilePropertiesTag)));
|
||||||
|
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||||
|
cc->InputSidePackets().Tag(kFocalLengthPixelTag).SetAny();
|
||||||
|
}
|
||||||
|
if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
|
||||||
|
cc->InputSidePackets()
|
||||||
|
.Tag(kImageFilePropertiesTag)
|
||||||
|
.Set<ImageFileProperties>();
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
|
||||||
|
cc->Outputs().Tag(kLeftIrisDepthTag).Set<float>();
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
|
||||||
|
cc->Outputs().Tag(kRightIrisDepthTag).Set<float>();
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
float focal_length_pixels_ = -1.f;
|
||||||
|
// TODO: Consolidate the logic when switching to input stream for
|
||||||
|
// focal length.
|
||||||
|
bool compute_depth_from_iris_ = false;
|
||||||
|
float smoothed_left_depth_mm_ = -1.f;
|
||||||
|
float smoothed_right_depth_mm_ = -1.f;
|
||||||
|
|
||||||
|
void GetLeftIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris);
|
||||||
|
void GetRightIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris);
|
||||||
|
::mediapipe::IrisToDepthCalculatorOptions options_;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(IrisToDepthCalculator);
|
||||||
|
|
||||||
|
absl::Status IrisToDepthCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
focal_length_pixels_ = *cc->InputSidePackets()
|
||||||
|
.Tag(kFocalLengthPixelTag)
|
||||||
|
.Get<std::unique_ptr<float>>();
|
||||||
|
#else
|
||||||
|
focal_length_pixels_ =
|
||||||
|
cc->InputSidePackets().Tag(kFocalLengthPixelTag).Get<float>();
|
||||||
|
#endif
|
||||||
|
compute_depth_from_iris_ = true;
|
||||||
|
} else if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
|
||||||
|
const auto& properties = cc->InputSidePackets()
|
||||||
|
.Tag(kImageFilePropertiesTag)
|
||||||
|
.Get<ImageFileProperties>();
|
||||||
|
focal_length_pixels_ = properties.focal_length_pixels();
|
||||||
|
compute_depth_from_iris_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
options_ = cc->Options<::mediapipe::IrisToDepthCalculatorOptions>();
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status IrisToDepthCalculator::Process(CalculatorContext* cc) {
|
||||||
|
// Only process if there's input landmarks.
|
||||||
|
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& iris_landmarks =
|
||||||
|
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
|
||||||
|
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
|
||||||
|
<< "Wrong number of iris landmarks";
|
||||||
|
|
||||||
|
std::pair<int, int> image_size;
|
||||||
|
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
|
||||||
|
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||||
|
|
||||||
|
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||||
|
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||||
|
GetLeftIris(iris_landmarks, left_iris.get());
|
||||||
|
GetRightIris(iris_landmarks, right_iris.get());
|
||||||
|
|
||||||
|
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
|
||||||
|
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||||
|
focal_length_pixels_ = *cc->InputSidePackets()
|
||||||
|
.Tag(kFocalLengthPixelTag)
|
||||||
|
.Get<std::unique_ptr<float>>();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (compute_depth_from_iris_ && focal_length_pixels_ > 0) {
|
||||||
|
const auto left_depth =
|
||||||
|
CalculateDepth(left_iris->landmark(0), focal_length_pixels_,
|
||||||
|
left_iris_size, image_size.first, image_size.second);
|
||||||
|
const auto right_depth =
|
||||||
|
CalculateDepth(right_iris->landmark(0), focal_length_pixels_,
|
||||||
|
right_iris_size, image_size.first, image_size.second);
|
||||||
|
smoothed_left_depth_mm_ =
|
||||||
|
smoothed_left_depth_mm_ < 0 || std::isinf(smoothed_left_depth_mm_)
|
||||||
|
? left_depth
|
||||||
|
: smoothed_left_depth_mm_ * (1 - kDepthWeightUpdate) +
|
||||||
|
left_depth * kDepthWeightUpdate;
|
||||||
|
smoothed_right_depth_mm_ =
|
||||||
|
smoothed_right_depth_mm_ < 0 || std::isinf(smoothed_right_depth_mm_)
|
||||||
|
? right_depth
|
||||||
|
: smoothed_right_depth_mm_ * (1 - kDepthWeightUpdate) +
|
||||||
|
right_depth * kDepthWeightUpdate;
|
||||||
|
|
||||||
|
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kLeftIrisDepthTag)
|
||||||
|
.AddPacket(MakePacket<float>(smoothed_left_depth_mm_)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kRightIrisDepthTag)
|
||||||
|
.AddPacket(MakePacket<float>(smoothed_right_depth_mm_)
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToDepthCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris) {
|
||||||
|
// Center, top, bottom, left, right
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.left_iris_center_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.left_iris_top_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.left_iris_bottom_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.left_iris_left_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.left_iris_right_index());
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToDepthCalculator::GetRightIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris) {
|
||||||
|
// Center, top, bottom, left, right
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.right_iris_center_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.right_iris_top_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.right_iris_bottom_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.right_iris_left_index());
|
||||||
|
*iris->add_landmark() = lds.landmark(options_.right_iris_right_index());
|
||||||
|
}
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,39 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message IrisToDepthCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional IrisToDepthCalculatorOptions ext = 303429002;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Indices of correspondent left iris landmarks in input stream.
|
||||||
|
optional int32 left_iris_center_index = 1 [default = 0];
|
||||||
|
optional int32 left_iris_top_index = 2 [default = 2];
|
||||||
|
optional int32 left_iris_bottom_index = 3 [default = 4];
|
||||||
|
optional int32 left_iris_left_index = 4 [default = 3];
|
||||||
|
optional int32 left_iris_right_index = 5 [default = 1];
|
||||||
|
|
||||||
|
// Indices of correspondent right iris landmarks in input stream.
|
||||||
|
optional int32 right_iris_center_index = 6 [default = 5];
|
||||||
|
optional int32 right_iris_top_index = 7 [default = 7];
|
||||||
|
optional int32 right_iris_bottom_index = 8 [default = 9];
|
||||||
|
optional int32 right_iris_left_index = 9 [default = 6];
|
||||||
|
optional int32 right_iris_right_index = 10 [default = 8];
|
||||||
|
}
|
|
@ -0,0 +1,318 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_render_data_calculator.pb.h"
|
||||||
|
#include "mediapipe/util/color.pb.h"
|
||||||
|
#include "mediapipe/util/render_data.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kIrisTag[] = "IRIS";
|
||||||
|
constexpr char kRenderDataTag[] = "RENDER_DATA";
|
||||||
|
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||||
|
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
|
||||||
|
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
|
||||||
|
constexpr char kOvalLabel[] = "OVAL";
|
||||||
|
constexpr float kFontHeightScale = 1.5f;
|
||||||
|
constexpr int kNumIrisLandmarksPerEye = 5;
|
||||||
|
// TODO: Source.
|
||||||
|
constexpr float kIrisSizeInMM = 11.8;
|
||||||
|
|
||||||
|
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
|
||||||
|
annotation->mutable_color()->set_r(color.r());
|
||||||
|
annotation->mutable_color()->set_g(color.g());
|
||||||
|
annotation->mutable_color()->set_b(color.b());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetDepth(float x0, float y0, float x1, float y1) {
|
||||||
|
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
|
||||||
|
const NormalizedLandmark& ld1,
|
||||||
|
const std::pair<int, int>& image_size) {
|
||||||
|
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
|
||||||
|
ld1.x() * image_size.first, ld1.y() * image_size.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
|
||||||
|
const std::pair<int, int>& image_size) {
|
||||||
|
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
|
||||||
|
landmarks.landmark(2), image_size);
|
||||||
|
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
|
||||||
|
landmarks.landmark(4), image_size);
|
||||||
|
return (dist_hori + dist_vert) / 2.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
|
||||||
|
float iris_size, float img_w, float img_h) {
|
||||||
|
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
|
||||||
|
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
|
||||||
|
center.y() * img_h);
|
||||||
|
const auto x = std::sqrt(focal_length * focal_length + y * y);
|
||||||
|
const auto depth = kIrisSizeInMM * x / iris_size;
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Converts iris landmarks to render data and estimates depth from the camera if
|
||||||
|
// focal length and image size. The depth will be rendered as part of the render
|
||||||
|
// data on the frame.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "IrisToRenderDataCalculator"
|
||||||
|
// input_stream: "IRIS:iris_landmarks"
|
||||||
|
// input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
|
||||||
|
// # to get focal length in pixels. Sending focal length in pixels to
|
||||||
|
// # this calculator is optional.
|
||||||
|
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||||
|
// # OR
|
||||||
|
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||||
|
// output_stream: "RENDER_DATA:iris_render_data"
|
||||||
|
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
// node_options: {
|
||||||
|
// [type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||||
|
// color { r: 255 g: 255 b: 255 }
|
||||||
|
// thickness: 2.0
|
||||||
|
// font_height_px: 50
|
||||||
|
// horizontal_offset_px: 200
|
||||||
|
// vertical_offset_px: 200
|
||||||
|
// location: TOP_LEFT
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
class IrisToRenderDataCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
|
||||||
|
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
|
||||||
|
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||||
|
|
||||||
|
if (cc->Inputs().HasTag(kLeftIrisDepthTag)) {
|
||||||
|
cc->Inputs().Tag(kLeftIrisDepthTag).Set<float>();
|
||||||
|
}
|
||||||
|
if (cc->Inputs().HasTag(kRightIrisDepthTag)) {
|
||||||
|
cc->Inputs().Tag(kRightIrisDepthTag).Set<float>();
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void RenderIris(const NormalizedLandmarkList& iris_landmarks,
|
||||||
|
const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
const std::pair<int, int>& image_size, float iris_size,
|
||||||
|
RenderData* render_data);
|
||||||
|
void GetLeftIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris);
|
||||||
|
void GetRightIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris);
|
||||||
|
|
||||||
|
void AddTextRenderData(const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
const std::pair<int, int>& image_size,
|
||||||
|
const std::vector<std::string>& lines,
|
||||||
|
RenderData* render_data);
|
||||||
|
|
||||||
|
static RenderAnnotation* AddOvalRenderData(
|
||||||
|
const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
RenderData* render_data);
|
||||||
|
static RenderAnnotation* AddPointRenderData(
|
||||||
|
const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
RenderData* render_data);
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(IrisToRenderDataCalculator);
|
||||||
|
|
||||||
|
absl::Status IrisToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status IrisToRenderDataCalculator::Process(CalculatorContext* cc) {
|
||||||
|
// Only process if there's input landmarks.
|
||||||
|
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const auto& options =
|
||||||
|
cc->Options<::mediapipe::IrisToRenderDataCalculatorOptions>();
|
||||||
|
|
||||||
|
const auto& iris_landmarks =
|
||||||
|
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
|
||||||
|
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
|
||||||
|
<< "Wrong number of iris landmarks";
|
||||||
|
|
||||||
|
std::pair<int, int> image_size;
|
||||||
|
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
|
||||||
|
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||||
|
|
||||||
|
auto render_data = absl::make_unique<RenderData>();
|
||||||
|
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||||
|
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||||
|
GetLeftIris(iris_landmarks, left_iris.get());
|
||||||
|
GetRightIris(iris_landmarks, right_iris.get());
|
||||||
|
|
||||||
|
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
|
||||||
|
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
|
||||||
|
RenderIris(*left_iris, options, image_size, left_iris_size,
|
||||||
|
render_data.get());
|
||||||
|
RenderIris(*right_iris, options, image_size, right_iris_size,
|
||||||
|
render_data.get());
|
||||||
|
|
||||||
|
std::vector<std::string> lines;
|
||||||
|
std::string line;
|
||||||
|
if (cc->Inputs().HasTag(kLeftIrisDepthTag) &&
|
||||||
|
!cc->Inputs().Tag(kLeftIrisDepthTag).IsEmpty()) {
|
||||||
|
const float left_iris_depth =
|
||||||
|
cc->Inputs().Tag(kLeftIrisDepthTag).Get<float>();
|
||||||
|
if (!std::isinf(left_iris_depth)) {
|
||||||
|
line = "Left : ";
|
||||||
|
absl::StrAppend(&line, ":", std::round(left_iris_depth / 10), " cm");
|
||||||
|
lines.emplace_back(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cc->Inputs().HasTag(kRightIrisDepthTag) &&
|
||||||
|
!cc->Inputs().Tag(kRightIrisDepthTag).IsEmpty()) {
|
||||||
|
const float right_iris_depth =
|
||||||
|
cc->Inputs().Tag(kRightIrisDepthTag).Get<float>();
|
||||||
|
if (!std::isinf(right_iris_depth)) {
|
||||||
|
line = "Right : ";
|
||||||
|
absl::StrAppend(&line, ":", std::round(right_iris_depth / 10), " cm");
|
||||||
|
lines.emplace_back(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AddTextRenderData(options, image_size, lines, render_data.get());
|
||||||
|
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kRenderDataTag)
|
||||||
|
.Add(render_data.release(), cc->InputTimestamp());
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToRenderDataCalculator::AddTextRenderData(
|
||||||
|
const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
const std::pair<int, int>& image_size,
|
||||||
|
const std::vector<std::string>& lines, RenderData* render_data) {
|
||||||
|
int label_baseline_px = options.vertical_offset_px();
|
||||||
|
float label_height_px =
|
||||||
|
std::ceil(options.font_height_px() * kFontHeightScale);
|
||||||
|
if (options.location() == IrisToRenderDataCalculatorOptions::TOP_LEFT) {
|
||||||
|
label_baseline_px += label_height_px;
|
||||||
|
} else if (options.location() ==
|
||||||
|
IrisToRenderDataCalculatorOptions::BOTTOM_LEFT) {
|
||||||
|
label_baseline_px += image_size.second - label_height_px * lines.size();
|
||||||
|
}
|
||||||
|
const auto label_left_px = options.horizontal_offset_px();
|
||||||
|
for (int i = 0; i < lines.size(); ++i) {
|
||||||
|
auto* label_annotation = render_data->add_render_annotations();
|
||||||
|
label_annotation->set_thickness(5);
|
||||||
|
|
||||||
|
label_annotation->mutable_color()->set_r(255);
|
||||||
|
label_annotation->mutable_color()->set_g(0);
|
||||||
|
label_annotation->mutable_color()->set_b(0);
|
||||||
|
//
|
||||||
|
auto* text = label_annotation->mutable_text();
|
||||||
|
text->set_display_text(lines[i]);
|
||||||
|
text->set_font_height(options.font_height_px());
|
||||||
|
text->set_left(label_left_px);
|
||||||
|
text->set_baseline(label_baseline_px + i * label_height_px);
|
||||||
|
text->set_font_face(options.font_face());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToRenderDataCalculator::RenderIris(
|
||||||
|
const NormalizedLandmarkList& iris_landmarks,
|
||||||
|
const IrisToRenderDataCalculatorOptions& options,
|
||||||
|
const std::pair<int, int>& image_size, float iris_size,
|
||||||
|
RenderData* render_data) {
|
||||||
|
auto* oval_data_render = AddOvalRenderData(options, render_data);
|
||||||
|
auto* oval_data = oval_data_render->mutable_oval();
|
||||||
|
const float iris_radius = iris_size / 2.f;
|
||||||
|
const auto& iris_center = iris_landmarks.landmark(0);
|
||||||
|
|
||||||
|
oval_data->mutable_rectangle()->set_top(iris_center.y() -
|
||||||
|
iris_radius / image_size.second);
|
||||||
|
oval_data->mutable_rectangle()->set_bottom(iris_center.y() +
|
||||||
|
iris_radius / image_size.second);
|
||||||
|
oval_data->mutable_rectangle()->set_left(iris_center.x() -
|
||||||
|
iris_radius / image_size.first);
|
||||||
|
oval_data->mutable_rectangle()->set_right(iris_center.x() +
|
||||||
|
iris_radius / image_size.first);
|
||||||
|
oval_data->mutable_rectangle()->set_normalized(true);
|
||||||
|
|
||||||
|
for (int i = 0; i < iris_landmarks.landmark_size(); ++i) {
|
||||||
|
const NormalizedLandmark& landmark = iris_landmarks.landmark(i);
|
||||||
|
auto* landmark_data_render = AddPointRenderData(options, render_data);
|
||||||
|
auto* landmark_data = landmark_data_render->mutable_point();
|
||||||
|
landmark_data->set_normalized(true);
|
||||||
|
landmark_data->set_x(landmark.x());
|
||||||
|
landmark_data->set_y(landmark.y());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToRenderDataCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris) {
|
||||||
|
// Center, top, bottom, left, right
|
||||||
|
*iris->add_landmark() = lds.landmark(0);
|
||||||
|
*iris->add_landmark() = lds.landmark(2);
|
||||||
|
*iris->add_landmark() = lds.landmark(4);
|
||||||
|
*iris->add_landmark() = lds.landmark(3);
|
||||||
|
*iris->add_landmark() = lds.landmark(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IrisToRenderDataCalculator::GetRightIris(const NormalizedLandmarkList& lds,
|
||||||
|
NormalizedLandmarkList* iris) {
|
||||||
|
// Center, top, bottom, left, right
|
||||||
|
*iris->add_landmark() = lds.landmark(5);
|
||||||
|
*iris->add_landmark() = lds.landmark(7);
|
||||||
|
*iris->add_landmark() = lds.landmark(9);
|
||||||
|
*iris->add_landmark() = lds.landmark(6);
|
||||||
|
*iris->add_landmark() = lds.landmark(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderAnnotation* IrisToRenderDataCalculator::AddOvalRenderData(
|
||||||
|
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
|
||||||
|
auto* oval_data_annotation = render_data->add_render_annotations();
|
||||||
|
oval_data_annotation->set_scene_tag(kOvalLabel);
|
||||||
|
|
||||||
|
SetColor(oval_data_annotation, options.oval_color());
|
||||||
|
oval_data_annotation->set_thickness(options.oval_thickness());
|
||||||
|
return oval_data_annotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderAnnotation* IrisToRenderDataCalculator::AddPointRenderData(
|
||||||
|
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
|
||||||
|
auto* landmark_data_annotation = render_data->add_render_annotations();
|
||||||
|
SetColor(landmark_data_annotation, options.landmark_color());
|
||||||
|
landmark_data_annotation->set_thickness(options.landmark_thickness());
|
||||||
|
|
||||||
|
return landmark_data_annotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,62 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/util/color.proto";
|
||||||
|
|
||||||
|
message IrisToRenderDataCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional IrisToRenderDataCalculatorOptions ext = 289530040;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Color of the oval.
|
||||||
|
optional Color oval_color = 1;
|
||||||
|
// Color of the landmarks.
|
||||||
|
optional Color landmark_color = 9;
|
||||||
|
|
||||||
|
// Thickness of the drawing of landmarks and iris oval.
|
||||||
|
optional double oval_thickness = 2 [default = 1.0];
|
||||||
|
optional double landmark_thickness = 10 [default = 1.0];
|
||||||
|
|
||||||
|
// The font height in absolute pixels.
|
||||||
|
optional int32 font_height_px = 3 [default = 50];
|
||||||
|
|
||||||
|
// The offset of the starting text in horizontal direction in absolute pixels.
|
||||||
|
optional int32 horizontal_offset_px = 7 [default = 0];
|
||||||
|
// The offset of the starting text in vertical direction in absolute pixels.
|
||||||
|
optional int32 vertical_offset_px = 8 [default = 0];
|
||||||
|
|
||||||
|
// Specifies the font for the text. Font must be one of the following from
|
||||||
|
// OpenCV:
|
||||||
|
// cv::FONT_HERSHEY_SIMPLEX (0)
|
||||||
|
// cv::FONT_HERSHEY_PLAIN (1)
|
||||||
|
// cv::FONT_HERSHEY_DUPLEX (2)
|
||||||
|
// cv::FONT_HERSHEY_COMPLEX (3)
|
||||||
|
// cv::FONT_HERSHEY_TRIPLEX (4)
|
||||||
|
// cv::FONT_HERSHEY_COMPLEX_SMALL (5)
|
||||||
|
// cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6)
|
||||||
|
// cv::FONT_HERSHEY_SCRIPT_COMPLEX (7)
|
||||||
|
optional int32 font_face = 5 [default = 0];
|
||||||
|
|
||||||
|
// Label location.
|
||||||
|
enum Location {
|
||||||
|
TOP_LEFT = 0;
|
||||||
|
BOTTOM_LEFT = 1;
|
||||||
|
}
|
||||||
|
optional Location location = 6 [default = TOP_LEFT];
|
||||||
|
}
|
|
@ -0,0 +1,268 @@
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kFaceLandmarksTag[] = "FACE_LANDMARKS";
|
||||||
|
constexpr char kNewEyeLandmarksTag[] = "NEW_EYE_LANDMARKS";
|
||||||
|
constexpr char kUpdatedFaceLandmarksTag[] = "UPDATED_FACE_LANDMARKS";
|
||||||
|
|
||||||
|
constexpr int kNumFaceLandmarks = 468;
|
||||||
|
// 71 landamrks for left eye and 71 landmarks for right eye.
|
||||||
|
constexpr int kNumEyeLandmarks = 142;
|
||||||
|
|
||||||
|
constexpr int kEyeLandmarkIndicesInFaceLandmarks[] = {
|
||||||
|
// Left eye
|
||||||
|
// eye lower contour
|
||||||
|
33,
|
||||||
|
7,
|
||||||
|
163,
|
||||||
|
144,
|
||||||
|
145,
|
||||||
|
153,
|
||||||
|
154,
|
||||||
|
155,
|
||||||
|
133,
|
||||||
|
// eye upper contour (excluding corners)
|
||||||
|
246,
|
||||||
|
161,
|
||||||
|
160,
|
||||||
|
159,
|
||||||
|
158,
|
||||||
|
157,
|
||||||
|
173,
|
||||||
|
// halo x2 lower contour
|
||||||
|
130,
|
||||||
|
25,
|
||||||
|
110,
|
||||||
|
24,
|
||||||
|
23,
|
||||||
|
22,
|
||||||
|
26,
|
||||||
|
112,
|
||||||
|
243,
|
||||||
|
// halo x2 upper contour (excluding corners)
|
||||||
|
247,
|
||||||
|
30,
|
||||||
|
29,
|
||||||
|
27,
|
||||||
|
28,
|
||||||
|
56,
|
||||||
|
190,
|
||||||
|
// halo x3 lower contour
|
||||||
|
226,
|
||||||
|
31,
|
||||||
|
228,
|
||||||
|
229,
|
||||||
|
230,
|
||||||
|
231,
|
||||||
|
232,
|
||||||
|
233,
|
||||||
|
244,
|
||||||
|
// halo x3 upper contour (excluding corners)
|
||||||
|
113,
|
||||||
|
225,
|
||||||
|
224,
|
||||||
|
223,
|
||||||
|
222,
|
||||||
|
221,
|
||||||
|
189,
|
||||||
|
// halo x4 upper contour (no lower because of mesh structure)
|
||||||
|
// or eyebrow inner contour
|
||||||
|
35,
|
||||||
|
124,
|
||||||
|
46,
|
||||||
|
53,
|
||||||
|
52,
|
||||||
|
65,
|
||||||
|
// halo x5 lower contour
|
||||||
|
143,
|
||||||
|
111,
|
||||||
|
117,
|
||||||
|
118,
|
||||||
|
119,
|
||||||
|
120,
|
||||||
|
121,
|
||||||
|
128,
|
||||||
|
245,
|
||||||
|
// halo x5 upper contour (excluding corners)
|
||||||
|
// or eyebrow outer contour
|
||||||
|
156,
|
||||||
|
70,
|
||||||
|
63,
|
||||||
|
105,
|
||||||
|
66,
|
||||||
|
107,
|
||||||
|
55,
|
||||||
|
193,
|
||||||
|
|
||||||
|
// Right eye
|
||||||
|
// eye lower contour
|
||||||
|
263,
|
||||||
|
249,
|
||||||
|
390,
|
||||||
|
373,
|
||||||
|
374,
|
||||||
|
380,
|
||||||
|
381,
|
||||||
|
382,
|
||||||
|
362,
|
||||||
|
// eye upper contour (excluding corners)
|
||||||
|
466,
|
||||||
|
388,
|
||||||
|
387,
|
||||||
|
386,
|
||||||
|
385,
|
||||||
|
384,
|
||||||
|
398,
|
||||||
|
// halo x2 lower contour
|
||||||
|
359,
|
||||||
|
255,
|
||||||
|
339,
|
||||||
|
254,
|
||||||
|
253,
|
||||||
|
252,
|
||||||
|
256,
|
||||||
|
341,
|
||||||
|
463,
|
||||||
|
// halo x2 upper contour (excluding corners)
|
||||||
|
467,
|
||||||
|
260,
|
||||||
|
259,
|
||||||
|
257,
|
||||||
|
258,
|
||||||
|
286,
|
||||||
|
414,
|
||||||
|
// halo x3 lower contour
|
||||||
|
446,
|
||||||
|
261,
|
||||||
|
448,
|
||||||
|
449,
|
||||||
|
450,
|
||||||
|
451,
|
||||||
|
452,
|
||||||
|
453,
|
||||||
|
464,
|
||||||
|
// halo x3 upper contour (excluding corners)
|
||||||
|
342,
|
||||||
|
445,
|
||||||
|
444,
|
||||||
|
443,
|
||||||
|
442,
|
||||||
|
441,
|
||||||
|
413,
|
||||||
|
// halo x4 upper contour (no lower because of mesh structure)
|
||||||
|
// or eyebrow inner contour
|
||||||
|
265,
|
||||||
|
353,
|
||||||
|
276,
|
||||||
|
283,
|
||||||
|
282,
|
||||||
|
295,
|
||||||
|
// halo x5 lower contour
|
||||||
|
372,
|
||||||
|
340,
|
||||||
|
346,
|
||||||
|
347,
|
||||||
|
348,
|
||||||
|
349,
|
||||||
|
350,
|
||||||
|
357,
|
||||||
|
465,
|
||||||
|
// halo x5 upper contour (excluding corners)
|
||||||
|
// or eyebrow outer contour
|
||||||
|
383,
|
||||||
|
300,
|
||||||
|
293,
|
||||||
|
334,
|
||||||
|
296,
|
||||||
|
336,
|
||||||
|
285,
|
||||||
|
417,
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Update face landmarks with new (e.g., refined) values. Currently only updates
|
||||||
|
// landmarks around the eyes.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "UpdateFaceLandmarksCalculator"
|
||||||
|
// input_stream: "NEW_EYE_LANDMARKS:new_eye_landmarks"
|
||||||
|
// input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
// output_stream: "UPDATED_FACE_LANDMARKS:refine_face_landmarks"
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
class UpdateFaceLandmarksCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
cc->Inputs().Tag(kFaceLandmarksTag).Set<NormalizedLandmarkList>();
|
||||||
|
cc->Inputs().Tag(kNewEyeLandmarksTag).Set<NormalizedLandmarkList>();
|
||||||
|
|
||||||
|
cc->Outputs().Tag(kUpdatedFaceLandmarksTag).Set<NormalizedLandmarkList>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
absl::Status Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(UpdateFaceLandmarksCalculator);
|
||||||
|
|
||||||
|
absl::Status UpdateFaceLandmarksCalculator::Process(CalculatorContext* cc) {
|
||||||
|
if (cc->Inputs().Tag(kFaceLandmarksTag).IsEmpty() ||
|
||||||
|
cc->Inputs().Tag(kNewEyeLandmarksTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const auto& face_landmarks =
|
||||||
|
cc->Inputs().Tag(kFaceLandmarksTag).Get<NormalizedLandmarkList>();
|
||||||
|
const auto& new_eye_landmarks =
|
||||||
|
cc->Inputs().Tag(kNewEyeLandmarksTag).Get<NormalizedLandmarkList>();
|
||||||
|
|
||||||
|
RET_CHECK_EQ(face_landmarks.landmark_size(), kNumFaceLandmarks)
|
||||||
|
<< "Wrong number of face landmarks";
|
||||||
|
RET_CHECK_EQ(new_eye_landmarks.landmark_size(), kNumEyeLandmarks)
|
||||||
|
<< "Wrong number of face landmarks";
|
||||||
|
|
||||||
|
auto refined_face_landmarks =
|
||||||
|
absl::make_unique<NormalizedLandmarkList>(face_landmarks);
|
||||||
|
for (int i = 0; i < kNumEyeLandmarks; ++i) {
|
||||||
|
const auto& refined_ld = new_eye_landmarks.landmark(i);
|
||||||
|
const int id = kEyeLandmarkIndicesInFaceLandmarks[i];
|
||||||
|
refined_face_landmarks->mutable_landmark(id)->set_x(refined_ld.x());
|
||||||
|
refined_face_landmarks->mutable_landmark(id)->set_y(refined_ld.y());
|
||||||
|
refined_face_landmarks->mutable_landmark(id)->set_z(refined_ld.z());
|
||||||
|
refined_face_landmarks->mutable_landmark(id)->set_visibility(
|
||||||
|
refined_ld.visibility());
|
||||||
|
}
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kUpdatedFaceLandmarksTag)
|
||||||
|
.Add(refined_face_landmarks.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
159
mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt
Normal file
159
mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
# MediaPipe graph that performs iris distance computation on desktop with
|
||||||
|
# TensorFlow Lite on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/desktop/iris_tracking:iris_depth_from_image_desktop.
|
||||||
|
|
||||||
|
# Raw image bytes. (std::string)
|
||||||
|
input_stream: "input_image_bytes"
|
||||||
|
|
||||||
|
# Image with all the detections rendered. (ImageFrame)
|
||||||
|
output_stream: "output_image"
|
||||||
|
# Estimated depth in mm from the camera to the left iris of the face (if any) in
|
||||||
|
# the image. (float)
|
||||||
|
output_stream: "left_iris_depth_mm"
|
||||||
|
# Estimated depth in mm from the camera to the right iris of the face (if any)
|
||||||
|
# in the image. (float)
|
||||||
|
output_stream: "right_iris_depth_mm"
|
||||||
|
|
||||||
|
# Computes the focal length in pixels based on EXIF information stored in the
|
||||||
|
# image file. The output is an ImageFileProperties object containing relevant
|
||||||
|
# image EXIF information along with focal length in pixels.
|
||||||
|
node {
|
||||||
|
calculator: "ImageFilePropertiesCalculator"
|
||||||
|
input_stream: "input_image_bytes"
|
||||||
|
output_side_packet: "image_file_properties"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts a raw string with encoded image bytes into an ImageFrame object
|
||||||
|
# via OpenCV so that it can be processed by downstream calculators.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvEncodedImageToImageFrameCalculator"
|
||||||
|
input_stream: "input_image_bytes"
|
||||||
|
output_stream: "input_image"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||||
|
# face (left and right eye), and therefore this should always be set to 1.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontCpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "multi_face_landmarks"
|
||||||
|
output_stream: "face_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||||
|
# vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedRectVectorCalculator"
|
||||||
|
input_stream: "face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rect"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define left eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "left_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 33 end: 34 }
|
||||||
|
ranges: { begin: 133 end: 134 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define right eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "right_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 362 end: 363 }
|
||||||
|
ranges: { begin: 263 end: 264 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||||
|
node {
|
||||||
|
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_eye_contour_landmarks"
|
||||||
|
input_stream: "right_eye_contour_landmarks"
|
||||||
|
output_stream: "refined_eye_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "UpdateFaceLandmarksCalculator"
|
||||||
|
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "IrisAndDepthRendererCpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:face_rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
}
|
142
mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt
Normal file
142
mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/desktop/iris_tracking:iris_tracking_cpu.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Face landmarks with iris. (NormalizedLandmarkList)
|
||||||
|
output_stream: "face_landmarks_with_iris"
|
||||||
|
|
||||||
|
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||||
|
# face (left and right eye), and therefore this should always be set to 1.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "multi_face_landmarks"
|
||||||
|
output_stream: "face_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||||
|
# vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedRectVectorCalculator"
|
||||||
|
input_stream: "face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rect"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define left eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "left_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 33 end: 34 }
|
||||||
|
ranges: { begin: 133 end: 134 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define right eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "right_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 362 end: 363 }
|
||||||
|
ranges: { begin: 263 end: 264 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||||
|
node {
|
||||||
|
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_eye_contour_landmarks"
|
||||||
|
input_stream: "right_eye_contour_landmarks"
|
||||||
|
output_stream: "refined_eye_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "UpdateFaceLandmarksCalculator"
|
||||||
|
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "IrisRendererCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:face_rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "updated_face_landmarks"
|
||||||
|
input_stream: "iris_landmarks"
|
||||||
|
output_stream: "face_landmarks_with_iris"
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
|
||||||
|
# max_queue_size limits the number of packets enqueued on any input stream
|
||||||
|
# by throttling inputs to the graph. This makes the graph only process one
|
||||||
|
# frame per time.
|
||||||
|
max_queue_size: 1
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||||
|
# face (left and right eye), and therefore this should always be set to 1.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:0:num_faces"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "multi_face_landmarks"
|
||||||
|
output_stream: "face_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||||
|
# vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedRectVectorCalculator"
|
||||||
|
input_stream: "face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rect"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define left eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "left_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 33 end: 34 }
|
||||||
|
ranges: { begin: 133 end: 134 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define right eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "right_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 362 end: 363 }
|
||||||
|
ranges: { begin: 263 end: 264 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||||
|
node {
|
||||||
|
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_eye_contour_landmarks"
|
||||||
|
input_stream: "right_eye_contour_landmarks"
|
||||||
|
output_stream: "refined_eye_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "UpdateFaceLandmarksCalculator"
|
||||||
|
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "IrisRendererCpu"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:face_rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
163
mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt
Normal file
163
mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt
Normal file
|
@ -0,0 +1,163 @@
|
||||||
|
# MediaPipe graph that performs iris tracking with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipie/examples/android/src/java/com/mediapipe/apps/iristrackinggpu and
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# GPU buffer. (GpuBuffer)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Face landmarks with iris. (NormalizedLandmarkList)
|
||||||
|
output_stream: "face_landmarks_with_iris"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||||
|
# face (left and right eye), and therefore this should always be set to 1.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:num_faces"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { int_value: 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces and corresponding landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkFrontGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "multi_face_landmarks"
|
||||||
|
output_stream: "face_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||||
|
# vector.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedRectVectorCalculator"
|
||||||
|
input_stream: "face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rect"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
element_only: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define left eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "left_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 33 end: 34 }
|
||||||
|
ranges: { begin: 133 end: 134 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets two landmarks which define right eye boundary.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "face_landmarks"
|
||||||
|
output_stream: "right_eye_boundary_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 362 end: 363 }
|
||||||
|
ranges: { begin: 263 end: 264 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||||
|
node {
|
||||||
|
calculator: "IrisLandmarkLeftAndRightGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||||
|
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||||
|
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_eye_contour_landmarks"
|
||||||
|
input_stream: "right_eye_contour_landmarks"
|
||||||
|
output_stream: "refined_eye_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "UpdateFaceLandmarksCalculator"
|
||||||
|
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Renders annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "IrisAndDepthRendererGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:face_rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "updated_face_landmarks"
|
||||||
|
input_stream: "iris_landmarks"
|
||||||
|
output_stream: "face_landmarks_with_iris"
|
||||||
|
}
|
67
mediapipe/graphs/iris_tracking/subgraphs/BUILD
Normal file
67
mediapipe/graphs/iris_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_simple_subgraph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "renderer_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||||
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||||
|
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "iris_and_depth_renderer_gpu",
|
||||||
|
graph = "iris_and_depth_renderer_gpu.pbtxt",
|
||||||
|
register_as = "IrisAndDepthRendererGpu",
|
||||||
|
deps = [
|
||||||
|
":renderer_calculators",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "iris_renderer_cpu",
|
||||||
|
graph = "iris_renderer_cpu.pbtxt",
|
||||||
|
register_as = "IrisRendererCpu",
|
||||||
|
deps = [
|
||||||
|
":renderer_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "iris_and_depth_renderer_cpu",
|
||||||
|
graph = "iris_and_depth_renderer_cpu.pbtxt",
|
||||||
|
register_as = "IrisAndDepthRendererCpu",
|
||||||
|
deps = [
|
||||||
|
":renderer_calculators",
|
||||||
|
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,267 @@
|
||||||
|
# MediaPipe iris tracking rendering subgraph.
|
||||||
|
|
||||||
|
type: "IrisAndDepthRendererCpu"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_left_eye_contour_landmarks"
|
||||||
|
output_stream: "left_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_right_eye_contour_landmarks"
|
||||||
|
output_stream: "right_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Concatenate iris landmarks from both eyes.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_iris_landmarks"
|
||||||
|
input_stream: "right_iris_landmarks"
|
||||||
|
output_stream: "iris_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 150 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 150 b: 0 }
|
||||||
|
thickness: 2
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text ("Face").
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "labeled_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label: "Face"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:labeled_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
output_stream: "RENDER_DATA:rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "IrisToDepthCalculator"
|
||||||
|
input_stream: "IRIS:iris_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||||
|
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "IrisToRenderDataCalculator"
|
||||||
|
input_stream: "IRIS:iris_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
output_stream: "RENDER_DATA:iris_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||||
|
oval_color { r: 0 g: 0 b: 255 }
|
||||||
|
landmark_color { r: 0 g: 255 b: 0 }
|
||||||
|
oval_thickness: 2.0
|
||||||
|
landmark_thickness: 1.0
|
||||||
|
font_height_px: 50
|
||||||
|
horizontal_offset_px: 200
|
||||||
|
vertical_offset_px: 200
|
||||||
|
location: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
input_stream: "face_landmarks_render_data"
|
||||||
|
input_stream: "right_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "left_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "iris_render_data"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
}
|
|
@ -0,0 +1,270 @@
|
||||||
|
# MediaPipe iris tracking rendering subgraph.
|
||||||
|
|
||||||
|
type: "IrisAndDepthRendererGpu"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_left_eye_contour_landmarks"
|
||||||
|
output_stream: "left_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_right_eye_contour_landmarks"
|
||||||
|
output_stream: "right_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Concatenate iris landmarks from both eyes.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_iris_landmarks"
|
||||||
|
input_stream: "right_iris_landmarks"
|
||||||
|
output_stream: "iris_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 150 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 150 b: 0 }
|
||||||
|
thickness: 2
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text ("Face").
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "labeled_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label: "Face"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:labeled_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 2.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 2.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
output_stream: "RENDER_DATA:rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "IrisToDepthCalculator"
|
||||||
|
input_stream: "IRIS:iris_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||||
|
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "IrisToRenderDataCalculator"
|
||||||
|
input_stream: "IRIS:iris_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||||
|
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||||
|
output_stream: "RENDER_DATA:iris_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||||
|
oval_color { r: 0 g: 0 b: 255 }
|
||||||
|
landmark_color { r: 0 g: 255 b: 0 }
|
||||||
|
oval_thickness: 4.0
|
||||||
|
landmark_thickness: 2.0
|
||||||
|
font_height_px: 50
|
||||||
|
horizontal_offset_px: 200
|
||||||
|
vertical_offset_px: 200
|
||||||
|
location: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
input_stream: "face_landmarks_render_data"
|
||||||
|
input_stream: "right_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "left_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "iris_render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_image"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.AnnotationOverlayCalculatorOptions] {
|
||||||
|
gpu_scale_factor: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
254
mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt
Normal file
254
mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,254 @@
|
||||||
|
# MediaPipe iris tracking rendering subgraph.
|
||||||
|
|
||||||
|
type: "IrisRendererCpu"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||||
|
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||||
|
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||||
|
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||||
|
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_left_eye_contour_landmarks"
|
||||||
|
output_stream: "left_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "all_right_eye_contour_landmarks"
|
||||||
|
output_stream: "right_eye_contour_landmarks"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||||
|
ranges: { begin: 0 end: 15 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Concatenate iris landmarks from both eyes.
|
||||||
|
node {
|
||||||
|
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "left_iris_landmarks"
|
||||||
|
input_stream: "right_iris_landmarks"
|
||||||
|
output_stream: "iris_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_color { r: 150 g: 0 b: 0 }
|
||||||
|
connection_color { r: 0 g: 150 b: 0 }
|
||||||
|
thickness: 2
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text ("Face").
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "labeled_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label: "Face"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:labeled_detections"
|
||||||
|
output_stream: "RENDER_DATA:detection_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 0 g: 255 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts landmarks to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 1
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 2
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 3
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 4
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 5
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 6
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 7
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 10
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 11
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 12
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 13
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_connections: 0
|
||||||
|
landmark_connections: 9
|
||||||
|
landmark_connections: 8
|
||||||
|
landmark_connections: 14
|
||||||
|
landmark_color { r: 255 g: 0 b: 0 }
|
||||||
|
connection_color { r: 255 g: 0 b: 0 }
|
||||||
|
visualize_landmark_depth: false
|
||||||
|
thickness: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:rect"
|
||||||
|
output_stream: "RENDER_DATA:rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "RectToRenderDataCalculator"
|
||||||
|
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||||
|
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||||
|
filled: false
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
thickness: 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "IrisToRenderDataCalculator"
|
||||||
|
input_stream: "IRIS:iris_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "RENDER_DATA:iris_render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||||
|
oval_color { r: 0 g: 0 b: 255 }
|
||||||
|
landmark_color { r: 0 g: 255 b: 0 }
|
||||||
|
oval_thickness: 4.0
|
||||||
|
landmark_thickness: 2.0
|
||||||
|
font_height_px: 50
|
||||||
|
horizontal_offset_px: 200
|
||||||
|
vertical_offset_px: 200
|
||||||
|
location: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "detection_render_data"
|
||||||
|
input_stream: "face_landmarks_render_data"
|
||||||
|
input_stream: "right_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "left_eye_contour_landmarks_render_data"
|
||||||
|
input_stream: "iris_render_data"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
}
|
47
mediapipe/graphs/media_sequence/BUILD
Normal file
47
mediapipe/graphs/media_sequence/BUILD
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "clipped_images_from_file_at_24fps_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||||
|
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||||
|
"//mediapipe/calculators/image:scale_image_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tvl1_flow_and_rgb_from_file_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:packet_inner_join_calculator",
|
||||||
|
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||||
|
"//mediapipe/calculators/core:sequence_shift_calculator",
|
||||||
|
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||||
|
"//mediapipe/calculators/image:scale_image_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
|
||||||
|
"//mediapipe/calculators/video:flow_to_image_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:tvl1_optical_flow_calculator",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,78 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# Convert the string input into a decoded SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "StringToSequenceExampleCalculator"
|
||||||
|
input_side_packet: "STRING:input_sequence_example"
|
||||||
|
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Unpack the data path and clip timing from the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "UnpackMediaSequenceCalculator"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
output_side_packet: "DATA_PATH:input_video_path"
|
||||||
|
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
|
||||||
|
base_packet_resampler_options: {
|
||||||
|
frame_rate: 24.0
|
||||||
|
base_timestamp: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decode the entire video.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:decoded_frames"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract the subset of frames we want to keep.
|
||||||
|
node {
|
||||||
|
calculator: "PacketResamplerCalculator"
|
||||||
|
input_stream: "decoded_frames"
|
||||||
|
output_stream: "sampled_frames"
|
||||||
|
input_side_packet: "OPTIONS:packet_resampler_options"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encode the images to store in the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvImageEncoderCalculator"
|
||||||
|
input_stream: "sampled_frames"
|
||||||
|
output_stream: "encoded_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||||
|
quality: 80
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store the images in the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "PackMediaSequenceCalculator"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||||
|
input_stream: "IMAGE:encoded_frames"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Serialize the SequenceExample to a string for storage.
|
||||||
|
node {
|
||||||
|
calculator: "StringToSequenceExampleCalculator"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||||
|
output_side_packet: "STRING:output_sequence_example"
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# Convert the string input into a decoded SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "StringToSequenceExampleCalculator"
|
||||||
|
input_side_packet: "STRING:input_sequence_example"
|
||||||
|
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Unpack the data path and clip timing from the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "UnpackMediaSequenceCalculator"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
output_side_packet: "DATA_PATH:input_video_path"
|
||||||
|
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
|
||||||
|
base_packet_resampler_options: {
|
||||||
|
frame_rate: 25.0
|
||||||
|
base_timestamp: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decode the entire video.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:decoded_frames"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract the subset of frames we want to keep.
|
||||||
|
node {
|
||||||
|
calculator: "PacketResamplerCalculator"
|
||||||
|
input_stream: "decoded_frames"
|
||||||
|
output_stream: "sampled_frames"
|
||||||
|
input_side_packet: "OPTIONS:packet_resampler_options"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fit the images into the target size.
|
||||||
|
node: {
|
||||||
|
calculator: "ScaleImageCalculator"
|
||||||
|
input_stream: "sampled_frames"
|
||||||
|
output_stream: "scaled_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ScaleImageCalculatorOptions]: {
|
||||||
|
target_height: 256
|
||||||
|
preserve_aspect_ratio: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Shift the the timestamps of packets along a stream.
|
||||||
|
# With a packet_offset of -1, the first packet will be dropped, the second will
|
||||||
|
# be output with the timestamp of the first, the third with the timestamp of
|
||||||
|
# the second, and so on.
|
||||||
|
node: {
|
||||||
|
calculator: "SequenceShiftCalculator"
|
||||||
|
input_stream: "scaled_frames"
|
||||||
|
output_stream: "shifted_scaled_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SequenceShiftCalculatorOptions]: {
|
||||||
|
packet_offset: -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Join the original input stream and the one that is shifted by one packet.
|
||||||
|
node: {
|
||||||
|
calculator: "PacketInnerJoinCalculator"
|
||||||
|
input_stream: "scaled_frames"
|
||||||
|
input_stream: "shifted_scaled_frames"
|
||||||
|
output_stream: "first_frames"
|
||||||
|
output_stream: "second_frames"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute the forward optical flow.
|
||||||
|
node {
|
||||||
|
calculator: "Tvl1OpticalFlowCalculator"
|
||||||
|
input_stream: "FIRST_FRAME:first_frames"
|
||||||
|
input_stream: "SECOND_FRAME:second_frames"
|
||||||
|
output_stream: "FORWARD_FLOW:forward_flow"
|
||||||
|
max_in_flight: 32
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert an optical flow to be an image frame with 2 channels (v_x and v_y),
|
||||||
|
# each channel is quantized to 0-255.
|
||||||
|
node: {
|
||||||
|
calculator: "FlowToImageCalculator"
|
||||||
|
input_stream: "forward_flow"
|
||||||
|
output_stream: "flow_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FlowToImageCalculatorOptions]: {
|
||||||
|
min_value: -20.0
|
||||||
|
max_value: 20.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encode the optical flow images to store in the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvImageEncoderCalculator"
|
||||||
|
input_stream: "flow_frames"
|
||||||
|
output_stream: "encoded_flow_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||||
|
quality: 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encode the rgb images to store in the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvImageEncoderCalculator"
|
||||||
|
input_stream: "scaled_frames"
|
||||||
|
output_stream: "encoded_frames"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||||
|
quality: 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store the images in the SequenceExample.
|
||||||
|
node {
|
||||||
|
calculator: "PackMediaSequenceCalculator"
|
||||||
|
input_stream: "IMAGE:encoded_frames"
|
||||||
|
input_stream: "FORWARD_FLOW_ENCODED:encoded_flow_frames"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||||
|
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Serialize the SequenceExample to a string for storage.
|
||||||
|
node {
|
||||||
|
calculator: "StringToSequenceExampleCalculator"
|
||||||
|
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||||
|
output_side_packet: "STRING:output_sequence_example"
|
||||||
|
}
|
||||||
|
|
||||||
|
num_threads: 32
|
94
mediapipe/graphs/object_detection/BUILD
Normal file
94
mediapipe/graphs/object_detection/BUILD
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_tensorflow_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/tensorflow:image_frame_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:object_detection_tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:tensor_squeeze_dimensions_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:tensorflow_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_tflite_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_cpu_binary_graph",
|
||||||
|
graph = "object_detection_mobile_cpu.pbtxt",
|
||||||
|
output_name = "mobile_cpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_gpu_binary_graph",
|
||||||
|
graph = "object_detection_mobile_gpu.pbtxt",
|
||||||
|
output_name = "mobile_gpu.binarypb",
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
|
@ -0,0 +1,174 @@
|
||||||
|
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/desktop/object_detection:object_detection_cpu.
|
||||||
|
|
||||||
|
# Images on CPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||||
|
# detection model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "IMAGE:transformed_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 320
|
||||||
|
output_height: 320
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on CPU into an image tensor stored as a
|
||||||
|
# TfLiteTensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE:transformed_input_video"
|
||||||
|
output_stream: "TENSORS:image_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS:image_tensor"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||||
|
num_layers: 6
|
||||||
|
min_scale: 0.2
|
||||||
|
max_scale: 0.95
|
||||||
|
input_size_height: 320
|
||||||
|
input_size_width: 320
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
strides: 16
|
||||||
|
strides: 32
|
||||||
|
strides: 64
|
||||||
|
strides: 128
|
||||||
|
strides: 256
|
||||||
|
strides: 512
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
aspect_ratios: 2.0
|
||||||
|
aspect_ratios: 0.5
|
||||||
|
aspect_ratios: 3.0
|
||||||
|
aspect_ratios: 0.3333
|
||||||
|
reduce_boxes_in_lowest_layer: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||||
|
num_classes: 91
|
||||||
|
num_boxes: 2034
|
||||||
|
num_coords: 4
|
||||||
|
ignore_classes: 0
|
||||||
|
sigmoid_score: true
|
||||||
|
apply_exponential_on_box_size: true
|
||||||
|
x_scale: 10.0
|
||||||
|
y_scale: 10.0
|
||||||
|
h_scale: 5.0
|
||||||
|
w_scale: 5.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||||
|
min_suppression_threshold: 0.4
|
||||||
|
max_num_detections: 3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
return_empty_detections: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text. The label map is
|
||||||
|
# provided in the label_map_path option.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "filtered_detections"
|
||||||
|
output_stream: "output_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
# MediaPipe graph that performs object detection on desktop with TensorFlow
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipie/examples/desktop/object_detection:object_detection_tensorflow.
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the input image into an image tensor as a tensorflow::Tensor.
|
||||||
|
node {
|
||||||
|
calculator: "ImageFrameToTensorCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "image_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a TensorFlow session from a saved
|
||||||
|
# model. The directory path that contains the saved model is specified in the
|
||||||
|
# saved_model_path option, and the name of the saved model file has to be
|
||||||
|
# "saved_model.pb".
|
||||||
|
node {
|
||||||
|
calculator: "TensorFlowSessionFromSavedModelCalculator"
|
||||||
|
output_side_packet: "SESSION:object_detection_session"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: {
|
||||||
|
saved_model_path: "mediapipe/models/object_detection_saved_model"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow session (specified as an input side packet) that takes an
|
||||||
|
# image tensor and outputs multiple tensors that describe the objects detected
|
||||||
|
# in the image. The batch_size option is set to 1 to disable batching entirely.
|
||||||
|
# Note that the particular TensorFlow model used in this session handles image
|
||||||
|
# scaling internally before the object-detection inference, and therefore no
|
||||||
|
# additional calculator for image transformation is needed in this MediaPipe
|
||||||
|
# graph.
|
||||||
|
node: {
|
||||||
|
calculator: "TensorFlowInferenceCalculator"
|
||||||
|
input_side_packet: "SESSION:object_detection_session"
|
||||||
|
input_stream: "INPUTS:image_tensor"
|
||||||
|
output_stream: "DETECTION_BOXES:detection_boxes_tensor"
|
||||||
|
output_stream: "DETECTION_CLASSES:detection_classes_tensor"
|
||||||
|
output_stream: "DETECTION_SCORES:detection_scores_tensor"
|
||||||
|
output_stream: "NUM_DETECTIONS:num_detections_tensor"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
|
||||||
|
batch_size: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors from the TensorFlow model into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "ObjectDetectionTensorsToDetectionsCalculator"
|
||||||
|
input_stream: "BOXES:detection_boxes_tensor"
|
||||||
|
input_stream: "SCORES:detection_scores_tensor"
|
||||||
|
input_stream: "CLASSES:detection_classes_tensor"
|
||||||
|
input_stream: "NUM_DETECTIONS:num_detections_tensor"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||||
|
min_suppression_threshold: 0.4
|
||||||
|
min_score_threshold: 0.6
|
||||||
|
max_num_detections: 10
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text. The label map is
|
||||||
|
# provided in the label_map_path option.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "filtered_detections"
|
||||||
|
output_stream: "output_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,180 @@
|
||||||
|
# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
|
||||||
|
# on CPU.
|
||||||
|
# Used in the example in
|
||||||
|
# mediapipe/examples/desktop/object_detection:object_detection_tflite.
|
||||||
|
|
||||||
|
# max_queue_size limits the number of packets enqueued on any input stream
|
||||||
|
# by throttling inputs to the graph. This makes the graph only process one
|
||||||
|
# frame per time.
|
||||||
|
max_queue_size: 1
|
||||||
|
|
||||||
|
# Decodes an input video file into images and a video header.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoDecoderCalculator"
|
||||||
|
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||||
|
output_stream: "VIDEO:input_video"
|
||||||
|
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||||
|
# detection model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "IMAGE:transformed_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 320
|
||||||
|
output_height: 320
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on CPU into an image tensor as a
|
||||||
|
# TfLiteTensor. The zero_center option is set to true to normalize the
|
||||||
|
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE:transformed_input_video"
|
||||||
|
output_stream: "TENSORS:image_tensor"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||||
|
zero_center: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS:image_tensor"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||||
|
num_layers: 6
|
||||||
|
min_scale: 0.2
|
||||||
|
max_scale: 0.95
|
||||||
|
input_size_height: 320
|
||||||
|
input_size_width: 320
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
strides: 16
|
||||||
|
strides: 32
|
||||||
|
strides: 64
|
||||||
|
strides: 128
|
||||||
|
strides: 256
|
||||||
|
strides: 512
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
aspect_ratios: 2.0
|
||||||
|
aspect_ratios: 0.5
|
||||||
|
aspect_ratios: 3.0
|
||||||
|
aspect_ratios: 0.3333
|
||||||
|
reduce_boxes_in_lowest_layer: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||||
|
num_classes: 91
|
||||||
|
num_boxes: 2034
|
||||||
|
num_coords: 4
|
||||||
|
ignore_classes: 0
|
||||||
|
apply_exponential_on_box_size: true
|
||||||
|
|
||||||
|
x_scale: 10.0
|
||||||
|
y_scale: 10.0
|
||||||
|
h_scale: 5.0
|
||||||
|
w_scale: 5.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||||
|
min_suppression_threshold: 0.4
|
||||||
|
min_score_threshold: 0.6
|
||||||
|
max_num_detections: 5
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text. The label map is
|
||||||
|
# provided in the label_map_path option.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "filtered_detections"
|
||||||
|
output_stream: "output_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Encodes the annotated images into a video file, adopting properties specified
|
||||||
|
# in the input video header, e.g., video framerate.
|
||||||
|
node {
|
||||||
|
calculator: "OpenCvVideoEncoderCalculator"
|
||||||
|
input_stream: "VIDEO:output_video"
|
||||||
|
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||||
|
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||||
|
codec: "avc1"
|
||||||
|
video_format: "mp4"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,193 @@
|
||||||
|
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectioncpu and
|
||||||
|
# mediapipe/examples/ios/objectdetectioncpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Transfers the input image from GPU to CPU memory for the purpose of
|
||||||
|
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
|
||||||
|
# origin defined at the bottom-left corner (OpenGL convention). As a result,
|
||||||
|
# the transferred image on CPU also shares the same representation.
|
||||||
|
node: {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "input_video_cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video_cpu"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video_cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||||
|
# detection model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video_cpu"
|
||||||
|
output_stream: "IMAGE:transformed_input_video_cpu"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 320
|
||||||
|
output_height: 320
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on CPU into an image tensor stored as a
|
||||||
|
# TfLiteTensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE:transformed_input_video_cpu"
|
||||||
|
output_stream: "TENSORS:image_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS:image_tensor"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||||
|
num_layers: 6
|
||||||
|
min_scale: 0.2
|
||||||
|
max_scale: 0.95
|
||||||
|
input_size_height: 320
|
||||||
|
input_size_width: 320
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
strides: 16
|
||||||
|
strides: 32
|
||||||
|
strides: 64
|
||||||
|
strides: 128
|
||||||
|
strides: 256
|
||||||
|
strides: 512
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
aspect_ratios: 2.0
|
||||||
|
aspect_ratios: 0.5
|
||||||
|
aspect_ratios: 3.0
|
||||||
|
aspect_ratios: 0.3333
|
||||||
|
reduce_boxes_in_lowest_layer: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||||
|
num_classes: 91
|
||||||
|
num_boxes: 2034
|
||||||
|
num_coords: 4
|
||||||
|
ignore_classes: 0
|
||||||
|
sigmoid_score: true
|
||||||
|
apply_exponential_on_box_size: true
|
||||||
|
x_scale: 10.0
|
||||||
|
y_scale: 10.0
|
||||||
|
h_scale: 5.0
|
||||||
|
w_scale: 5.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||||
|
min_suppression_threshold: 0.4
|
||||||
|
max_num_detections: 3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
return_empty_detections: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text. The label map is
|
||||||
|
# provided in the label_map_path option.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "filtered_detections"
|
||||||
|
output_stream: "output_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video_cpu"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video_cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
|
||||||
|
# the graph.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "output_video_cpu"
|
||||||
|
output_stream: "output_video"
|
||||||
|
}
|
|
@ -0,0 +1,175 @@
|
||||||
|
# MediaPipe graph that performs object detection with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and
|
||||||
|
# mediapipe/examples/ios/objectdetectiongpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
|
||||||
|
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||||
|
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||||
|
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||||
|
# detection model used in this graph is agnostic to that deformation.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageTransformationCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||||
|
output_width: 320
|
||||||
|
output_height: 320
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the transformed input image on GPU into an image tensor stored as a
|
||||||
|
# TfLiteTensor.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteConverterCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||||
|
output_stream: "TENSORS_GPU:image_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteInferenceCalculator"
|
||||||
|
input_stream: "TENSORS_GPU:image_tensor"
|
||||||
|
output_stream: "TENSORS_GPU:detection_tensors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||||
|
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||||
|
num_layers: 6
|
||||||
|
min_scale: 0.2
|
||||||
|
max_scale: 0.95
|
||||||
|
input_size_height: 320
|
||||||
|
input_size_width: 320
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
strides: 16
|
||||||
|
strides: 32
|
||||||
|
strides: 64
|
||||||
|
strides: 128
|
||||||
|
strides: 256
|
||||||
|
strides: 512
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
aspect_ratios: 2.0
|
||||||
|
aspect_ratios: 0.5
|
||||||
|
aspect_ratios: 3.0
|
||||||
|
aspect_ratios: 0.3333
|
||||||
|
reduce_boxes_in_lowest_layer: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS_GPU:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||||
|
num_classes: 91
|
||||||
|
num_boxes: 2034
|
||||||
|
num_coords: 4
|
||||||
|
ignore_classes: 0
|
||||||
|
sigmoid_score: true
|
||||||
|
apply_exponential_on_box_size: true
|
||||||
|
x_scale: 10.0
|
||||||
|
y_scale: 10.0
|
||||||
|
h_scale: 5.0
|
||||||
|
w_scale: 5.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||||
|
min_suppression_threshold: 0.4
|
||||||
|
max_num_detections: 3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
return_empty_detections: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maps detection label IDs to the corresponding label text. The label map is
|
||||||
|
# provided in the label_map_path option.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionLabelIdToTextCalculator"
|
||||||
|
input_stream: "filtered_detections"
|
||||||
|
output_stream: "output_detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||||
|
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:output_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
80
mediapipe/graphs/object_detection_3d/BUILD
Normal file
80
mediapipe/graphs/object_detection_3d/BUILD
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
exports_files(glob([
|
||||||
|
"*.pbtxt",
|
||||||
|
]))
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_cropping_calculator",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/objectron:objectron_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mobile_calculators_1stage",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_cropping_calculator",
|
||||||
|
"//mediapipe/gpu:gl_scaler_calculator",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/objectron:objectron_detection_1stage_gpu",
|
||||||
|
"//mediapipe/modules/objectron:objectron_tracking_1stage_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_cpu_calculators",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||||
|
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||||
|
"//mediapipe/graphs/object_detection_3d/subgraphs:renderer_cpu",
|
||||||
|
"//mediapipe/modules/objectron:objectron_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_gpu_binary_graph",
|
||||||
|
graph = "object_occlusion_tracking.pbtxt",
|
||||||
|
output_name = "mobile_gpu_binary_graph.binarypb",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [":mobile_calculators"],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "mobile_gpu_1stage_binary_graph",
|
||||||
|
graph = "object_occlusion_tracking_1stage.pbtxt",
|
||||||
|
output_name = "mobile_gpu_1stage_binary_graph.binarypb",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [":mobile_calculators_1stage"],
|
||||||
|
)
|
113
mediapipe/graphs/object_detection_3d/calculators/BUILD
Normal file
113
mediapipe/graphs/object_detection_3d/calculators/BUILD
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
# Copyright 2020 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "gl_animation_overlay_calculator_proto",
|
||||||
|
srcs = ["gl_animation_overlay_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "annotations_to_model_matrices_calculator_proto",
|
||||||
|
srcs = ["annotations_to_model_matrices_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "model_matrix_proto",
|
||||||
|
srcs = ["model_matrix.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "annotations_to_render_data_calculator_proto",
|
||||||
|
srcs = ["annotations_to_render_data_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/util:color_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "gl_animation_overlay_calculator",
|
||||||
|
srcs = ["gl_animation_overlay_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":gl_animation_overlay_calculator_cc_proto",
|
||||||
|
":model_matrix_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:shader_util",
|
||||||
|
"//mediapipe/modules/objectron/calculators:camera_parameters_cc_proto",
|
||||||
|
"//mediapipe/util/android:asset_manager_util",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "annotations_to_model_matrices_calculator",
|
||||||
|
srcs = ["annotations_to_model_matrices_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":annotations_to_model_matrices_calculator_cc_proto",
|
||||||
|
":model_matrix_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
|
||||||
|
"//mediapipe/modules/objectron/calculators:box",
|
||||||
|
"//mediapipe/util:color_cc_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "annotations_to_render_data_calculator",
|
||||||
|
srcs = ["annotations_to_render_data_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":annotations_to_render_data_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
|
||||||
|
"//mediapipe/util:color_cc_proto",
|
||||||
|
"//mediapipe/util:render_data_cc_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
|
@ -0,0 +1,215 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
#include "Eigen/Geometry"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_join.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.pb.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||||
|
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
|
||||||
|
#include "mediapipe/modules/objectron/calculators/box.h"
|
||||||
|
#include "mediapipe/util/color.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kAnnotationTag[] = "ANNOTATIONS";
|
||||||
|
constexpr char kModelMatricesTag[] = "MODEL_MATRICES";
|
||||||
|
|
||||||
|
using Matrix3fRM = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
|
||||||
|
using Matrix4fRM = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Converts the box prediction from Objectron Model to the Model matrices
|
||||||
|
// to be rendered.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// ANNOTATIONS - Frame annotations with lifted 3D points, the points are in
|
||||||
|
// Objectron coordinate system.
|
||||||
|
// Output:
|
||||||
|
// MODEL_MATRICES - Result ModelMatrices, in OpenGL coordinate system.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "AnnotationsToModelMatricesCalculator"
|
||||||
|
// input_stream: "ANNOTATIONS:objects"
|
||||||
|
// output_stream: "MODEL_MATRICES:model_matrices"
|
||||||
|
//}
|
||||||
|
|
||||||
|
class AnnotationsToModelMatricesCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
AnnotationsToModelMatricesCalculator() {}
|
||||||
|
~AnnotationsToModelMatricesCalculator() override {}
|
||||||
|
AnnotationsToModelMatricesCalculator(
|
||||||
|
const AnnotationsToModelMatricesCalculator&) = delete;
|
||||||
|
AnnotationsToModelMatricesCalculator& operator=(
|
||||||
|
const AnnotationsToModelMatricesCalculator&) = delete;
|
||||||
|
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc);
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
absl::Status GetModelMatricesForAnnotations(
|
||||||
|
const FrameAnnotation& annotations,
|
||||||
|
TimedModelMatrixProtoList* model_matrix_list);
|
||||||
|
|
||||||
|
AnnotationsToModelMatricesCalculatorOptions options_;
|
||||||
|
Eigen::Vector3f model_scale_;
|
||||||
|
Matrix4fRM model_transformation_;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(AnnotationsToModelMatricesCalculator);
|
||||||
|
|
||||||
|
absl::Status AnnotationsToModelMatricesCalculator::GetContract(
|
||||||
|
CalculatorContract* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
|
||||||
|
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||||
|
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->Outputs().HasTag(kModelMatricesTag)) {
|
||||||
|
cc->Outputs().Tag(kModelMatricesTag).Set<TimedModelMatrixProtoList>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
|
||||||
|
cc->InputSidePackets().Tag("MODEL_SCALE").Set<float[]>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
|
||||||
|
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Set<float[]>();
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status AnnotationsToModelMatricesCalculator::Open(CalculatorContext* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag));
|
||||||
|
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
options_ = cc->Options<AnnotationsToModelMatricesCalculatorOptions>();
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
|
||||||
|
model_scale_ = Eigen::Map<const Eigen::Vector3f>(
|
||||||
|
cc->InputSidePackets().Tag("MODEL_SCALE").Get<float[]>());
|
||||||
|
} else if (options_.model_scale_size() == 3) {
|
||||||
|
model_scale_ =
|
||||||
|
Eigen::Map<const Eigen::Vector3f>(options_.model_scale().data());
|
||||||
|
} else {
|
||||||
|
model_scale_.setOnes();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
|
||||||
|
model_transformation_ = Eigen::Map<const Matrix4fRM>(
|
||||||
|
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Get<float[]>());
|
||||||
|
} else if (options_.model_transformation_size() == 16) {
|
||||||
|
model_transformation_ =
|
||||||
|
Eigen::Map<const Matrix4fRM>(options_.model_transformation().data());
|
||||||
|
} else {
|
||||||
|
model_transformation_.setIdentity();
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status AnnotationsToModelMatricesCalculator::Process(
|
||||||
|
CalculatorContext* cc) {
|
||||||
|
auto model_matrices = std::make_unique<TimedModelMatrixProtoList>();
|
||||||
|
|
||||||
|
const FrameAnnotation& annotations =
|
||||||
|
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
|
||||||
|
|
||||||
|
if (!GetModelMatricesForAnnotations(annotations, model_matrices.get()).ok()) {
|
||||||
|
return absl::InvalidArgumentError("Error in GetModelMatricesForBoxes");
|
||||||
|
}
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kModelMatricesTag)
|
||||||
|
.Add(model_matrices.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status
|
||||||
|
AnnotationsToModelMatricesCalculator::GetModelMatricesForAnnotations(
|
||||||
|
const FrameAnnotation& annotations,
|
||||||
|
TimedModelMatrixProtoList* model_matrix_list) {
|
||||||
|
if (model_matrix_list == nullptr) {
|
||||||
|
return absl::InvalidArgumentError("model_matrix_list is nullptr");
|
||||||
|
}
|
||||||
|
model_matrix_list->clear_model_matrix();
|
||||||
|
|
||||||
|
for (const auto& object : annotations.annotations()) {
|
||||||
|
TimedModelMatrixProto* model_matrix = model_matrix_list->add_model_matrix();
|
||||||
|
model_matrix->set_id(object.object_id());
|
||||||
|
|
||||||
|
// Get object rotation, translation and scale.
|
||||||
|
const auto object_rotation =
|
||||||
|
Eigen::Map<const Matrix3fRM>(object.rotation().data());
|
||||||
|
const auto object_translation =
|
||||||
|
Eigen::Map<const Eigen::Vector3f>(object.translation().data());
|
||||||
|
const auto object_scale =
|
||||||
|
Eigen::Map<const Eigen::Vector3f>(object.scale().data());
|
||||||
|
|
||||||
|
// Compose object transformation matrix.
|
||||||
|
Matrix4fRM object_transformation;
|
||||||
|
object_transformation.setIdentity();
|
||||||
|
object_transformation.topLeftCorner<3, 3>() = object_rotation;
|
||||||
|
object_transformation.topRightCorner<3, 1>() = object_translation;
|
||||||
|
|
||||||
|
Matrix4fRM model_view;
|
||||||
|
Matrix4fRM objectron_model;
|
||||||
|
// The reference view is
|
||||||
|
//
|
||||||
|
// ref << 0., 0., 1., 0.,
|
||||||
|
// -1., 0., 0., 0.,
|
||||||
|
// 0., -1., 0., 0.,
|
||||||
|
// 0., 0., 0., 1.;
|
||||||
|
// We have objectron_model * model = model_view, to get objectron_model:
|
||||||
|
// objectron_model = model_view * model^-1
|
||||||
|
// clang-format off
|
||||||
|
objectron_model << 1.0, 0.0, 0.0, 0.0,
|
||||||
|
0.0, -1., 0.0, 0.0,
|
||||||
|
0.0, 0.0, 1.0, 0.0,
|
||||||
|
0.0, 0.0, 0.0, 1.0;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
// Re-scale the CAD model to the scale of the estimated bounding box.
|
||||||
|
const Eigen::Vector3f scale = model_scale_.cwiseProduct(object_scale);
|
||||||
|
const Matrix4fRM model =
|
||||||
|
model_transformation_.array().colwise() * scale.homogeneous().array();
|
||||||
|
|
||||||
|
// Finally compute the model_view matrix.
|
||||||
|
model_view = objectron_model * object_transformation * model;
|
||||||
|
|
||||||
|
for (int i = 0; i < model_view.rows(); ++i) {
|
||||||
|
for (int j = 0; j < model_view.cols(); ++j) {
|
||||||
|
model_matrix->add_matrix_entries(model_view(i, j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,33 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message AnnotationsToModelMatricesCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional AnnotationsToModelMatricesCalculatorOptions ext = 290166283;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vector of size 3 indicating the scale vector [x, y, z]. We will re-scale
|
||||||
|
// the model size with this vector. (Defaults to [1., 1., 1.])
|
||||||
|
repeated float model_scale = 1;
|
||||||
|
|
||||||
|
// 4x4 Row major matrix denoting the transformation from the model to the
|
||||||
|
// Deep Pursuit 3D coordinate system (where front is +z, and up is +y).
|
||||||
|
repeated float model_transformation = 2;
|
||||||
|
}
|
|
@ -0,0 +1,271 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_join.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.pb.h"
|
||||||
|
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
|
||||||
|
#include "mediapipe/util/color.pb.h"
|
||||||
|
#include "mediapipe/util/render_data.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kAnnotationTag[] = "ANNOTATIONS";
|
||||||
|
constexpr char kRenderDataTag[] = "RENDER_DATA";
|
||||||
|
constexpr char kKeypointLabel[] = "KEYPOINT";
|
||||||
|
constexpr int kMaxLandmarkThickness = 18;
|
||||||
|
|
||||||
|
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
|
||||||
|
annotation->mutable_color()->set_r(color.r());
|
||||||
|
annotation->mutable_color()->set_g(color.g());
|
||||||
|
annotation->mutable_color()->set_b(color.b());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remap x from range [lo hi] to range [0 1] then multiply by scale.
|
||||||
|
inline float Remap(float x, float lo, float hi, float scale) {
|
||||||
|
return (x - lo) / (hi - lo + 1e-6) * scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GetMinMaxZ(const FrameAnnotation& annotations, float* z_min,
|
||||||
|
float* z_max) {
|
||||||
|
*z_min = std::numeric_limits<float>::max();
|
||||||
|
*z_max = std::numeric_limits<float>::min();
|
||||||
|
// Use a global depth scale for all the objects in the scene
|
||||||
|
for (const auto& object : annotations.annotations()) {
|
||||||
|
for (const auto& keypoint : object.keypoints()) {
|
||||||
|
*z_min = std::min(keypoint.point_2d().depth(), *z_min);
|
||||||
|
*z_max = std::max(keypoint.point_2d().depth(), *z_max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetColorSizeValueFromZ(float z, float z_min, float z_max,
|
||||||
|
RenderAnnotation* render_annotation) {
|
||||||
|
const int color_value = 255 - static_cast<int>(Remap(z, z_min, z_max, 255));
|
||||||
|
::mediapipe::Color color;
|
||||||
|
color.set_r(color_value);
|
||||||
|
color.set_g(color_value);
|
||||||
|
color.set_b(color_value);
|
||||||
|
SetColor(render_annotation, color);
|
||||||
|
const int thickness = static_cast<int>((1.f - Remap(z, z_min, z_max, 1)) *
|
||||||
|
kMaxLandmarkThickness);
|
||||||
|
render_annotation->set_thickness(thickness);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// A calculator that converts FrameAnnotation proto to RenderData proto for
|
||||||
|
// visualization. The input should be the FrameAnnotation proto buffer. It is
|
||||||
|
// also possible to specify the connections between landmarks.
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node {
|
||||||
|
// calculator: "AnnotationsToRenderDataCalculator"
|
||||||
|
// input_stream: "ANNOTATIONS:annotations"
|
||||||
|
// output_stream: "RENDER_DATA:render_data"
|
||||||
|
// options {
|
||||||
|
// [AnnotationsToRenderDataCalculator.ext] {
|
||||||
|
// landmark_connections: [0, 1, 1, 2]
|
||||||
|
// landmark_color { r: 0 g: 255 b: 0 }
|
||||||
|
// connection_color { r: 0 g: 255 b: 0 }
|
||||||
|
// thickness: 4.0
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
class AnnotationsToRenderDataCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
AnnotationsToRenderDataCalculator() {}
|
||||||
|
~AnnotationsToRenderDataCalculator() override {}
|
||||||
|
AnnotationsToRenderDataCalculator(const AnnotationsToRenderDataCalculator&) =
|
||||||
|
delete;
|
||||||
|
AnnotationsToRenderDataCalculator& operator=(
|
||||||
|
const AnnotationsToRenderDataCalculator&) = delete;
|
||||||
|
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc);
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void SetRenderAnnotationColorThickness(
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||||
|
RenderAnnotation* render_annotation);
|
||||||
|
static RenderAnnotation* AddPointRenderData(
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||||
|
RenderData* render_data);
|
||||||
|
|
||||||
|
// Add a command to draw a line in the rendering queue. The line is drawn from
|
||||||
|
// (start_x, start_y) to (end_x, end_y). The input x,y can either be in pixel
|
||||||
|
// or normalized coordinate [0, 1] as indicated by the normalized flag.
|
||||||
|
static void AddConnectionToRenderData(
|
||||||
|
float start_x, float start_y, float end_x, float end_y,
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||||
|
RenderData* render_data);
|
||||||
|
|
||||||
|
// Same as above function. Instead of using color data to render the line, it
|
||||||
|
// re-colors the line according to the two depth value. gray_val1 is the color
|
||||||
|
// of the starting point and gray_val2 is the color of the ending point. The
|
||||||
|
// line is colored using gradient color from gray_val1 to gray_val2. The
|
||||||
|
// gray_val ranges from [0 to 255] for black to white.
|
||||||
|
static void AddConnectionToRenderData(
|
||||||
|
float start_x, float start_y, float end_x, float end_y,
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||||
|
int gray_val1, int gray_val2, RenderData* render_data);
|
||||||
|
|
||||||
|
AnnotationsToRenderDataCalculatorOptions options_;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(AnnotationsToRenderDataCalculator);
|
||||||
|
|
||||||
|
absl::Status AnnotationsToRenderDataCalculator::GetContract(
|
||||||
|
CalculatorContract* cc) {
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
|
||||||
|
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||||
|
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
|
||||||
|
}
|
||||||
|
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status AnnotationsToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
options_ = cc->Options<AnnotationsToRenderDataCalculatorOptions>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status AnnotationsToRenderDataCalculator::Process(CalculatorContext* cc) {
|
||||||
|
auto render_data = absl::make_unique<RenderData>();
|
||||||
|
bool visualize_depth = options_.visualize_landmark_depth();
|
||||||
|
float z_min = 0.f;
|
||||||
|
float z_max = 0.f;
|
||||||
|
|
||||||
|
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||||
|
const auto& annotations =
|
||||||
|
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
|
||||||
|
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
|
||||||
|
<< "Number of entries in landmark connections must be a multiple of 2";
|
||||||
|
|
||||||
|
if (visualize_depth) {
|
||||||
|
GetMinMaxZ(annotations, &z_min, &z_max);
|
||||||
|
// Only change rendering if there are actually z values other than 0.
|
||||||
|
visualize_depth &= ((z_max - z_min) > 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& object : annotations.annotations()) {
|
||||||
|
for (const auto& keypoint : object.keypoints()) {
|
||||||
|
auto* keypoint_data_render =
|
||||||
|
AddPointRenderData(options_, render_data.get());
|
||||||
|
auto* point = keypoint_data_render->mutable_point();
|
||||||
|
if (visualize_depth) {
|
||||||
|
SetColorSizeValueFromZ(keypoint.point_2d().depth(), z_min, z_max,
|
||||||
|
keypoint_data_render);
|
||||||
|
}
|
||||||
|
|
||||||
|
point->set_normalized(true);
|
||||||
|
point->set_x(keypoint.point_2d().x());
|
||||||
|
point->set_y(keypoint.point_2d().y());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add edges
|
||||||
|
for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
|
||||||
|
const auto& ld0 =
|
||||||
|
object.keypoints(options_.landmark_connections(i)).point_2d();
|
||||||
|
const auto& ld1 =
|
||||||
|
object.keypoints(options_.landmark_connections(i + 1)).point_2d();
|
||||||
|
const bool normalized = true;
|
||||||
|
|
||||||
|
if (visualize_depth) {
|
||||||
|
const int gray_val1 =
|
||||||
|
255 - static_cast<int>(Remap(ld0.depth(), z_min, z_max, 255));
|
||||||
|
const int gray_val2 =
|
||||||
|
255 - static_cast<int>(Remap(ld1.depth(), z_min, z_max, 255));
|
||||||
|
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
|
||||||
|
options_, normalized, gray_val1, gray_val2,
|
||||||
|
render_data.get());
|
||||||
|
} else {
|
||||||
|
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
|
||||||
|
options_, normalized, render_data.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kRenderDataTag)
|
||||||
|
.Add(render_data.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
|
||||||
|
float start_x, float start_y, float end_x, float end_y,
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||||
|
int gray_val1, int gray_val2, RenderData* render_data) {
|
||||||
|
auto* connection_annotation = render_data->add_render_annotations();
|
||||||
|
RenderAnnotation::GradientLine* line =
|
||||||
|
connection_annotation->mutable_gradient_line();
|
||||||
|
line->set_x_start(start_x);
|
||||||
|
line->set_y_start(start_y);
|
||||||
|
line->set_x_end(end_x);
|
||||||
|
line->set_y_end(end_y);
|
||||||
|
line->set_normalized(normalized);
|
||||||
|
line->mutable_color1()->set_r(gray_val1);
|
||||||
|
line->mutable_color1()->set_g(gray_val1);
|
||||||
|
line->mutable_color1()->set_b(gray_val1);
|
||||||
|
line->mutable_color2()->set_r(gray_val2);
|
||||||
|
line->mutable_color2()->set_g(gray_val2);
|
||||||
|
line->mutable_color2()->set_b(gray_val2);
|
||||||
|
connection_annotation->set_thickness(options.thickness());
|
||||||
|
}
|
||||||
|
|
||||||
|
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
|
||||||
|
float start_x, float start_y, float end_x, float end_y,
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||||
|
RenderData* render_data) {
|
||||||
|
auto* connection_annotation = render_data->add_render_annotations();
|
||||||
|
RenderAnnotation::Line* line = connection_annotation->mutable_line();
|
||||||
|
line->set_x_start(start_x);
|
||||||
|
line->set_y_start(start_y);
|
||||||
|
line->set_x_end(end_x);
|
||||||
|
line->set_y_end(end_y);
|
||||||
|
line->set_normalized(normalized);
|
||||||
|
SetColor(connection_annotation, options.connection_color());
|
||||||
|
connection_annotation->set_thickness(options.thickness());
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderAnnotation* AnnotationsToRenderDataCalculator::AddPointRenderData(
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||||
|
RenderData* render_data) {
|
||||||
|
auto* landmark_data_annotation = render_data->add_render_annotations();
|
||||||
|
landmark_data_annotation->set_scene_tag(kKeypointLabel);
|
||||||
|
SetRenderAnnotationColorThickness(options, landmark_data_annotation);
|
||||||
|
return landmark_data_annotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AnnotationsToRenderDataCalculator::SetRenderAnnotationColorThickness(
|
||||||
|
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||||
|
RenderAnnotation* render_annotation) {
|
||||||
|
SetColor(render_annotation, options.landmark_color());
|
||||||
|
render_annotation->set_thickness(options.thickness());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,43 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/util/color.proto";
|
||||||
|
|
||||||
|
message AnnotationsToRenderDataCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional AnnotationsToRenderDataCalculatorOptions ext = 267644238;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Specifies the landmarks to be connected in the drawing. For example, the
|
||||||
|
// landmark_connections value of [0, 1, 1, 2] specifies two connections: one
|
||||||
|
// that connects landmarks with index 0 and 1, and another that connects
|
||||||
|
// landmarks with index 1 and 2.
|
||||||
|
repeated int32 landmark_connections = 1;
|
||||||
|
|
||||||
|
// Color of the landmarks.
|
||||||
|
optional Color landmark_color = 2;
|
||||||
|
// Color of the connections.
|
||||||
|
optional Color connection_color = 3;
|
||||||
|
|
||||||
|
// Thickness of the drawing of landmarks and connections.
|
||||||
|
optional double thickness = 4 [default = 1.0];
|
||||||
|
|
||||||
|
// Change color and size of rendered landmarks based on its z value.
|
||||||
|
optional bool visualize_landmark_depth = 5 [default = true];
|
||||||
|
}
|
|
@ -0,0 +1,947 @@
|
||||||
|
// Copyright 2020 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
#include "mediapipe/util/android/asset_manager_util.h"
|
||||||
|
#else
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/shader_util.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.pb.h"
|
||||||
|
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||||
|
#include "mediapipe/modules/objectron/calculators/camera_parameters.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
#if defined(GL_DEBUG)
|
||||||
|
#define GLCHECK(command) \
|
||||||
|
command; \
|
||||||
|
if (int err = glGetError()) LOG(ERROR) << "GL error detected: " << err;
|
||||||
|
#else
|
||||||
|
#define GLCHECK(command) command
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// For ease of use, we prefer ImageFrame on Android and GpuBuffer otherwise.
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
typedef ImageFrame AssetTextureFormat;
|
||||||
|
#else
|
||||||
|
typedef GpuBuffer AssetTextureFormat;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, ATTRIB_NORMAL, NUM_ATTRIBUTES };
|
||||||
|
static const int kNumMatrixEntries = 16;
|
||||||
|
|
||||||
|
// Hard-coded MVP Matrix for testing.
|
||||||
|
static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0,
|
||||||
|
0.06146407, 0.8076706, 0.5864218, 0.0,
|
||||||
|
-0.54367524, -0.4656292, 0.69828844, 0.0,
|
||||||
|
0.0, 0.0, -98.64117, 1.0};
|
||||||
|
|
||||||
|
// Loads a texture from an input side packet, and streams in an animation file
|
||||||
|
// from a filename given in another input side packet, and renders the animation
|
||||||
|
// over the screen according to the input timestamp and desired animation FPS.
|
||||||
|
//
|
||||||
|
// Inputs:
|
||||||
|
// VIDEO (GpuBuffer, optional):
|
||||||
|
// If provided, the input buffer will be assumed to be unique, and will be
|
||||||
|
// consumed by this calculator and rendered to directly. The output video
|
||||||
|
// buffer will then be the released reference to the input video buffer.
|
||||||
|
// MODEL_MATRICES (TimedModelMatrixProtoList, optional):
|
||||||
|
// If provided, will set the model matrices for the objects to be rendered
|
||||||
|
// during future rendering calls.
|
||||||
|
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
|
||||||
|
// Texture to use with animation file. Texture is REQUIRED to be passed into
|
||||||
|
// the calculator, but can be passed in as a Side Packet OR Input Stream.
|
||||||
|
//
|
||||||
|
// Input side packets:
|
||||||
|
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
|
||||||
|
// Texture to use with animation file. Texture is REQUIRED to be passed into
|
||||||
|
// the calculator, but can be passed in as a Side Packet OR Input Stream.
|
||||||
|
// ANIMATION_ASSET (String, required):
|
||||||
|
// Path of animation file to load and render. The file format expects an
|
||||||
|
// arbitrary number of animation frames, concatenated directly together,
|
||||||
|
// with each animation frame looking like:
|
||||||
|
// HEADER
|
||||||
|
// VERTICES
|
||||||
|
// TEXTURE_COORDS
|
||||||
|
// INDICES
|
||||||
|
// The header consists of 3 int32 lengths, the sizes of the vertex data,
|
||||||
|
// the texcoord data, and the index data, respectively. Let us call those
|
||||||
|
// N1, N2, and N3. Then we expect N1 float32's for vertex information
|
||||||
|
// (x1,y1,z1,x2,y2,z2,etc.), followed by N2 float32's for texcoord
|
||||||
|
// information (u1,v1,u2,v2,u3,v3,etc.), followed by N3 shorts/int16's
|
||||||
|
// for triangle indices (a1,b1,c1,a2,b2,c2,etc.).
|
||||||
|
// CAMERA_PARAMETERS_PROTO_STRING (String, optional):
|
||||||
|
// Serialized proto std::string of CameraParametersProto. We need this to
|
||||||
|
// get the right aspect ratio and field of view.
|
||||||
|
// Options:
|
||||||
|
// aspect_ratio: the ratio between the rendered image width and height.
|
||||||
|
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
|
||||||
|
// is provided.
|
||||||
|
// vertical_fov_degrees: vertical field of view in degrees.
|
||||||
|
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
|
||||||
|
// is provided.
|
||||||
|
// z_clipping_plane_near: near plane value for z-clipping.
|
||||||
|
// z_clipping_plane_far: far plane value for z-clipping.
|
||||||
|
// animation_speed_fps: speed at which to cycle through animation frames (in
|
||||||
|
// frames per second).
|
||||||
|
//
|
||||||
|
// Outputs:
|
||||||
|
// OUTPUT, or index 0 (GpuBuffer):
|
||||||
|
// Frames filled with the given texture.
|
||||||
|
|
||||||
|
// Simple helper-struct for containing the parsed geometry data from a 3D
|
||||||
|
// animation frame for rendering.
|
||||||
|
struct TriangleMesh {
|
||||||
|
int index_count = 0; // Needed for glDrawElements rendering call
|
||||||
|
std::unique_ptr<float[]> normals = nullptr;
|
||||||
|
std::unique_ptr<float[]> vertices = nullptr;
|
||||||
|
std::unique_ptr<float[]> texture_coords = nullptr;
|
||||||
|
std::unique_ptr<int16[]> triangle_indices = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::unique_ptr<float[]> ModelMatrix;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class GlAnimationOverlayCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
GlAnimationOverlayCalculator() {}
|
||||||
|
~GlAnimationOverlayCalculator();
|
||||||
|
|
||||||
|
static absl::Status GetContract(CalculatorContract *cc);
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext *cc) override;
|
||||||
|
absl::Status Process(CalculatorContext *cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool has_video_stream_ = false;
|
||||||
|
bool has_model_matrix_stream_ = false;
|
||||||
|
bool has_mask_model_matrix_stream_ = false;
|
||||||
|
bool has_occlusion_mask_ = false;
|
||||||
|
|
||||||
|
GlCalculatorHelper helper_;
|
||||||
|
bool initialized_ = false;
|
||||||
|
GlTexture texture_;
|
||||||
|
GlTexture mask_texture_;
|
||||||
|
|
||||||
|
GLuint renderbuffer_ = 0;
|
||||||
|
bool depth_buffer_created_ = false;
|
||||||
|
|
||||||
|
GLuint program_ = 0;
|
||||||
|
GLint texture_uniform_ = -1;
|
||||||
|
GLint perspective_matrix_uniform_ = -1;
|
||||||
|
GLint model_matrix_uniform_ = -1;
|
||||||
|
|
||||||
|
std::vector<TriangleMesh> triangle_meshes_;
|
||||||
|
std::vector<TriangleMesh> mask_meshes_;
|
||||||
|
Timestamp animation_start_time_;
|
||||||
|
int frame_count_ = 0;
|
||||||
|
float animation_speed_fps_;
|
||||||
|
|
||||||
|
std::vector<ModelMatrix> current_model_matrices_;
|
||||||
|
std::vector<ModelMatrix> current_mask_model_matrices_;
|
||||||
|
|
||||||
|
// Perspective matrix for rendering, to be applied to all model matrices
|
||||||
|
// prior to passing through to the shader as a MVP matrix. Initialized during
|
||||||
|
// first image packet read.
|
||||||
|
float perspective_matrix_[kNumMatrixEntries];
|
||||||
|
|
||||||
|
void ComputeAspectRatioAndFovFromCameraParameters(
|
||||||
|
const CameraParametersProto &camera_parameters, float *aspect_ratio,
|
||||||
|
float *vertical_fov_degrees);
|
||||||
|
|
||||||
|
int GetAnimationFrameIndex(Timestamp timestamp);
|
||||||
|
absl::Status GlSetup();
|
||||||
|
absl::Status GlBind(const TriangleMesh &triangle_mesh,
|
||||||
|
const GlTexture &texture);
|
||||||
|
absl::Status GlRender(const TriangleMesh &triangle_mesh,
|
||||||
|
const float *model_matrix);
|
||||||
|
void InitializePerspectiveMatrix(float aspect_ratio,
|
||||||
|
float vertical_fov_degrees, float z_near,
|
||||||
|
float z_far);
|
||||||
|
void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices,
|
||||||
|
std::vector<ModelMatrix> *current_model_matrices);
|
||||||
|
void CalculateTriangleMeshNormals(int normals_len,
|
||||||
|
TriangleMesh *triangle_mesh);
|
||||||
|
void Normalize3f(float input[3]);
|
||||||
|
|
||||||
|
#if !defined(__ANDROID__)
|
||||||
|
// Asset loading routine for all non-Android platforms.
|
||||||
|
bool LoadAnimation(const std::string &filename);
|
||||||
|
#else
|
||||||
|
// Asset loading for all Android platforms.
|
||||||
|
bool LoadAnimationAndroid(const std::string &filename,
|
||||||
|
std::vector<TriangleMesh> *mesh);
|
||||||
|
bool ReadBytesFromAsset(AAsset *asset, void *buffer, int num_bytes_to_read);
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(GlAnimationOverlayCalculator);
|
||||||
|
|
||||||
|
// static
|
||||||
|
absl::Status GlAnimationOverlayCalculator::GetContract(CalculatorContract *cc) {
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
GlCalculatorHelper::SetupInputSidePackets(&(cc->InputSidePackets())));
|
||||||
|
if (cc->Inputs().HasTag("VIDEO")) {
|
||||||
|
// Currently used only for size and timestamp.
|
||||||
|
cc->Inputs().Tag("VIDEO").Set<GpuBuffer>();
|
||||||
|
}
|
||||||
|
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0).Set<GpuBuffer>();
|
||||||
|
|
||||||
|
if (cc->Inputs().HasTag("MODEL_MATRICES")) {
|
||||||
|
cc->Inputs().Tag("MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
|
||||||
|
}
|
||||||
|
if (cc->Inputs().HasTag("MASK_MODEL_MATRICES")) {
|
||||||
|
cc->Inputs().Tag("MASK_MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have texture as Input Stream or Side Packet
|
||||||
|
if (cc->InputSidePackets().HasTag("TEXTURE")) {
|
||||||
|
cc->InputSidePackets().Tag("TEXTURE").Set<AssetTextureFormat>();
|
||||||
|
} else {
|
||||||
|
cc->Inputs().Tag("TEXTURE").Set<AssetTextureFormat>();
|
||||||
|
}
|
||||||
|
|
||||||
|
cc->InputSidePackets().Tag("ANIMATION_ASSET").Set<std::string>();
|
||||||
|
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
|
||||||
|
cc->InputSidePackets()
|
||||||
|
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
|
||||||
|
.Set<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
|
||||||
|
cc->InputSidePackets().Tag("MASK_TEXTURE").Set<AssetTextureFormat>();
|
||||||
|
}
|
||||||
|
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
|
||||||
|
cc->InputSidePackets().Tag("MASK_ASSET").Set<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlAnimationOverlayCalculator::CalculateTriangleMeshNormals(
|
||||||
|
int normals_len, TriangleMesh *triangle_mesh) {
|
||||||
|
// Set triangle_mesh normals for shader usage
|
||||||
|
triangle_mesh->normals.reset(new float[normals_len]);
|
||||||
|
// Used for storing the vertex normals prior to averaging
|
||||||
|
std::vector<float> vertex_normals_sum(normals_len, 0.0f);
|
||||||
|
// Compute every triangle surface normal and store them for averaging
|
||||||
|
for (int idx = 0; idx < triangle_mesh->index_count; idx += 3) {
|
||||||
|
int v_idx[3];
|
||||||
|
v_idx[0] = triangle_mesh->triangle_indices.get()[idx];
|
||||||
|
v_idx[1] = triangle_mesh->triangle_indices.get()[idx + 1];
|
||||||
|
v_idx[2] = triangle_mesh->triangle_indices.get()[idx + 2];
|
||||||
|
// (V1) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||||
|
const float v1x = triangle_mesh->vertices[v_idx[0] * 3];
|
||||||
|
const float v1y = triangle_mesh->vertices[v_idx[0] * 3 + 1];
|
||||||
|
const float v1z = triangle_mesh->vertices[v_idx[0] * 3 + 2];
|
||||||
|
// (V2) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||||
|
const float v2x = triangle_mesh->vertices[v_idx[1] * 3];
|
||||||
|
const float v2y = triangle_mesh->vertices[v_idx[1] * 3 + 1];
|
||||||
|
const float v2z = triangle_mesh->vertices[v_idx[1] * 3 + 2];
|
||||||
|
// (V3) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||||
|
const float v3x = triangle_mesh->vertices[v_idx[2] * 3];
|
||||||
|
const float v3y = triangle_mesh->vertices[v_idx[2] * 3 + 1];
|
||||||
|
const float v3z = triangle_mesh->vertices[v_idx[2] * 3 + 2];
|
||||||
|
// Calculate normals from vertices
|
||||||
|
// V2 - V1
|
||||||
|
const float ax = v2x - v1x;
|
||||||
|
const float ay = v2y - v1y;
|
||||||
|
const float az = v2z - v1z;
|
||||||
|
// V3 - V1
|
||||||
|
const float bx = v3x - v1x;
|
||||||
|
const float by = v3y - v1y;
|
||||||
|
const float bz = v3z - v1z;
|
||||||
|
// Calculate cross product
|
||||||
|
const float normal_x = ay * bz - az * by;
|
||||||
|
const float normal_y = az * bx - ax * bz;
|
||||||
|
const float normal_z = ax * by - ay * bx;
|
||||||
|
// The normals calculated above must be normalized if we wish to prevent
|
||||||
|
// triangles with a larger surface area from dominating the normal
|
||||||
|
// calculations, however, none of our current models require this
|
||||||
|
// normalization.
|
||||||
|
|
||||||
|
// Add connected normal to each associated vertex
|
||||||
|
// It is also necessary to increment each vertex denominator for averaging
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
vertex_normals_sum[v_idx[i] * 3] += normal_x;
|
||||||
|
vertex_normals_sum[v_idx[i] * 3 + 1] += normal_y;
|
||||||
|
vertex_normals_sum[v_idx[i] * 3 + 2] += normal_z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine all triangle normals connected to each vertex by adding the X,Y,Z
|
||||||
|
// value of each adjacent triangle surface normal to every vertex and then
|
||||||
|
// averaging the combined value.
|
||||||
|
for (int idx = 0; idx < normals_len; idx += 3) {
|
||||||
|
float normal[3];
|
||||||
|
normal[0] = vertex_normals_sum[idx];
|
||||||
|
normal[1] = vertex_normals_sum[idx + 1];
|
||||||
|
normal[2] = vertex_normals_sum[idx + 2];
|
||||||
|
Normalize3f(normal);
|
||||||
|
triangle_mesh->normals.get()[idx] = normal[0];
|
||||||
|
triangle_mesh->normals.get()[idx + 1] = normal[1];
|
||||||
|
triangle_mesh->normals.get()[idx + 2] = normal[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlAnimationOverlayCalculator::Normalize3f(float input[3]) {
|
||||||
|
float product = 0.0;
|
||||||
|
product += input[0] * input[0];
|
||||||
|
product += input[1] * input[1];
|
||||||
|
product += input[2] * input[2];
|
||||||
|
float magnitude = sqrt(product);
|
||||||
|
input[0] /= magnitude;
|
||||||
|
input[1] /= magnitude;
|
||||||
|
input[2] /= magnitude;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for initializing our perspective matrix.
|
||||||
|
void GlAnimationOverlayCalculator::InitializePerspectiveMatrix(
|
||||||
|
float aspect_ratio, float fov_degrees, float z_near, float z_far) {
|
||||||
|
// Standard perspective projection matrix calculations.
|
||||||
|
const float f = 1.0f / std::tan(fov_degrees * M_PI / 360.0f);
|
||||||
|
for (int i = 0; i < kNumMatrixEntries; i++) {
|
||||||
|
perspective_matrix_[i] = 0;
|
||||||
|
}
|
||||||
|
const float denom = 1.0f / (z_near - z_far);
|
||||||
|
perspective_matrix_[0] = f / aspect_ratio;
|
||||||
|
perspective_matrix_[5] = f;
|
||||||
|
perspective_matrix_[10] = (z_near + z_far) * denom;
|
||||||
|
perspective_matrix_[11] = -1.0f;
|
||||||
|
perspective_matrix_[14] = 2.0f * z_far * z_near * denom;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
// Helper function for reading in a specified number of bytes from an Android
|
||||||
|
// asset. Returns true if successfully reads in all bytes into buffer.
|
||||||
|
bool GlAnimationOverlayCalculator::ReadBytesFromAsset(AAsset *asset,
|
||||||
|
void *buffer,
|
||||||
|
int num_bytes_to_read) {
|
||||||
|
// Most file systems use block sizes of 4KB or 8KB; ideally we'd choose a
|
||||||
|
// small multiple of the block size for best input streaming performance, so
|
||||||
|
// we go for a reasobably safe buffer size of 8KB = 8*1024 bytes.
|
||||||
|
static const int kMaxChunkSize = 8192;
|
||||||
|
|
||||||
|
int bytes_left = num_bytes_to_read;
|
||||||
|
int bytes_read = 1; // any value > 0 here just to start looping.
|
||||||
|
|
||||||
|
// Treat as uint8_t array so we can deal in single byte arithmetic easily.
|
||||||
|
uint8_t *currBufferIndex = reinterpret_cast<uint8_t *>(buffer);
|
||||||
|
while (bytes_read > 0 && bytes_left > 0) {
|
||||||
|
bytes_read = AAsset_read(asset, (void *)currBufferIndex,
|
||||||
|
std::min(bytes_left, kMaxChunkSize));
|
||||||
|
bytes_left -= bytes_read;
|
||||||
|
currBufferIndex += bytes_read;
|
||||||
|
}
|
||||||
|
// At least log any I/O errors encountered.
|
||||||
|
if (bytes_read < 0) {
|
||||||
|
LOG(ERROR) << "Error reading from AAsset: " << bytes_read;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (bytes_left > 0) {
|
||||||
|
// Reached EOF before reading in specified number of bytes.
|
||||||
|
LOG(WARNING) << "Reached EOF before reading in specified number of bytes.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The below asset streaming code is Android-only, making use of the platform
|
||||||
|
// JNI helper classes AAssetManager and AAsset.
|
||||||
|
bool GlAnimationOverlayCalculator::LoadAnimationAndroid(
|
||||||
|
const std::string &filename, std::vector<TriangleMesh> *meshes) {
|
||||||
|
mediapipe::AssetManager *mediapipe_asset_manager =
|
||||||
|
Singleton<mediapipe::AssetManager>::get();
|
||||||
|
AAssetManager *asset_manager = mediapipe_asset_manager->GetAssetManager();
|
||||||
|
if (!asset_manager) {
|
||||||
|
LOG(ERROR) << "Failed to access Android asset manager.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// New read-bytes stuff here! First we open file for streaming.
|
||||||
|
AAsset *asset = AAssetManager_open(asset_manager, filename.c_str(),
|
||||||
|
AASSET_MODE_STREAMING);
|
||||||
|
if (!asset) {
|
||||||
|
LOG(ERROR) << "Failed to open animation asset: " << filename;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// And now, while we are able to stream in more frames, we do so.
|
||||||
|
frame_count_ = 0;
|
||||||
|
int32 lengths[3];
|
||||||
|
while (ReadBytesFromAsset(asset, (void *)lengths, sizeof(lengths[0]) * 3)) {
|
||||||
|
// About to start reading the next animation frame. Stream it in here.
|
||||||
|
// Each frame stores first the object counts of its three arrays
|
||||||
|
// (vertices, texture coordinates, triangle indices; respectively), and
|
||||||
|
// then stores each of those arrays as a byte dump, in order.
|
||||||
|
meshes->emplace_back();
|
||||||
|
TriangleMesh &triangle_mesh = meshes->back();
|
||||||
|
// Try to read in vertices (4-byte floats)
|
||||||
|
triangle_mesh.vertices.reset(new float[lengths[0]]);
|
||||||
|
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.vertices.get(),
|
||||||
|
sizeof(float) * lengths[0])) {
|
||||||
|
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Try to read in texture coordinates (4-byte floats)
|
||||||
|
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
|
||||||
|
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.texture_coords.get(),
|
||||||
|
sizeof(float) * lengths[1])) {
|
||||||
|
LOG(ERROR) << "Failed to read tex-coords for frame " << frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Try to read in indices (2-byte shorts)
|
||||||
|
triangle_mesh.index_count = lengths[2];
|
||||||
|
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
|
||||||
|
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.triangle_indices.get(),
|
||||||
|
sizeof(int16) * lengths[2])) {
|
||||||
|
LOG(ERROR) << "Failed to read indices for frame " << frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the normals for this triangle_mesh
|
||||||
|
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
|
||||||
|
|
||||||
|
frame_count_++;
|
||||||
|
}
|
||||||
|
AAsset_close(asset);
|
||||||
|
|
||||||
|
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
|
||||||
|
if (meshes->empty()) {
|
||||||
|
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // defined(__ANDROID__)
|
||||||
|
|
||||||
|
bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) {
|
||||||
|
std::ifstream infile(filename.c_str(), std::ifstream::binary);
|
||||||
|
if (!infile) {
|
||||||
|
LOG(ERROR) << "Error opening asset with filename: " << filename;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_count_ = 0;
|
||||||
|
int32 lengths[3];
|
||||||
|
while (true) {
|
||||||
|
// See if we have more initial size counts to read in.
|
||||||
|
infile.read((char *)(lengths), sizeof(lengths[0]) * 3);
|
||||||
|
if (!infile) {
|
||||||
|
// No more frames to read. Close out.
|
||||||
|
infile.close();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
triangle_meshes_.emplace_back();
|
||||||
|
TriangleMesh &triangle_mesh = triangle_meshes_.back();
|
||||||
|
|
||||||
|
// Try to read in vertices (4-byte floats).
|
||||||
|
triangle_mesh.vertices.reset(new float[lengths[0]]);
|
||||||
|
infile.read((char *)(triangle_mesh.vertices.get()),
|
||||||
|
sizeof(float) * lengths[0]);
|
||||||
|
if (!infile) {
|
||||||
|
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to read in texture coordinates (4-byte floats)
|
||||||
|
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
|
||||||
|
infile.read((char *)(triangle_mesh.texture_coords.get()),
|
||||||
|
sizeof(float) * lengths[1]);
|
||||||
|
if (!infile) {
|
||||||
|
LOG(ERROR) << "Failed to read texture coordinates for frame "
|
||||||
|
<< frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to read in the triangle indices (2-byte shorts)
|
||||||
|
triangle_mesh.index_count = lengths[2];
|
||||||
|
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
|
||||||
|
infile.read((char *)(triangle_mesh.triangle_indices.get()),
|
||||||
|
sizeof(int16) * lengths[2]);
|
||||||
|
if (!infile) {
|
||||||
|
LOG(ERROR) << "Failed to read triangle indices for frame "
|
||||||
|
<< frame_count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the normals for this triangle_mesh
|
||||||
|
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
|
||||||
|
|
||||||
|
frame_count_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
|
||||||
|
if (triangle_meshes_.empty()) {
|
||||||
|
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters(
|
||||||
|
const CameraParametersProto &camera_parameters, float *aspect_ratio,
|
||||||
|
float *vertical_fov_degrees) {
|
||||||
|
CHECK(aspect_ratio != nullptr);
|
||||||
|
CHECK(vertical_fov_degrees != nullptr);
|
||||||
|
*aspect_ratio =
|
||||||
|
camera_parameters.portrait_width() / camera_parameters.portrait_height();
|
||||||
|
*vertical_fov_degrees =
|
||||||
|
std::atan(camera_parameters.portrait_height() * 0.5f) * 2 * 180 / M_PI;
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GlAnimationOverlayCalculator::Open(CalculatorContext *cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
MP_RETURN_IF_ERROR(helper_.Open(cc));
|
||||||
|
|
||||||
|
const auto &options = cc->Options<GlAnimationOverlayCalculatorOptions>();
|
||||||
|
|
||||||
|
animation_speed_fps_ = options.animation_speed_fps();
|
||||||
|
|
||||||
|
// Construct projection matrix using input side packets or option
|
||||||
|
float aspect_ratio;
|
||||||
|
float vertical_fov_degrees;
|
||||||
|
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
|
||||||
|
const std::string &camera_parameters_proto_string =
|
||||||
|
cc->InputSidePackets()
|
||||||
|
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
|
||||||
|
.Get<std::string>();
|
||||||
|
CameraParametersProto camera_parameters_proto;
|
||||||
|
camera_parameters_proto.ParseFromString(camera_parameters_proto_string);
|
||||||
|
ComputeAspectRatioAndFovFromCameraParameters(
|
||||||
|
camera_parameters_proto, &aspect_ratio, &vertical_fov_degrees);
|
||||||
|
} else {
|
||||||
|
aspect_ratio = options.aspect_ratio();
|
||||||
|
vertical_fov_degrees = options.vertical_fov_degrees();
|
||||||
|
}
|
||||||
|
|
||||||
|
// when constructing projection matrix.
|
||||||
|
InitializePerspectiveMatrix(aspect_ratio, vertical_fov_degrees,
|
||||||
|
options.z_clipping_plane_near(),
|
||||||
|
options.z_clipping_plane_far());
|
||||||
|
|
||||||
|
// See what streams we have.
|
||||||
|
has_video_stream_ = cc->Inputs().HasTag("VIDEO");
|
||||||
|
has_model_matrix_stream_ = cc->Inputs().HasTag("MODEL_MATRICES");
|
||||||
|
has_mask_model_matrix_stream_ = cc->Inputs().HasTag("MASK_MODEL_MATRICES");
|
||||||
|
|
||||||
|
// Try to load in the animation asset in a platform-specific manner.
|
||||||
|
const std::string &asset_name =
|
||||||
|
cc->InputSidePackets().Tag("ANIMATION_ASSET").Get<std::string>();
|
||||||
|
bool loaded_animation = false;
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
|
||||||
|
has_occlusion_mask_ = true;
|
||||||
|
const std::string &mask_asset_name =
|
||||||
|
cc->InputSidePackets().Tag("MASK_ASSET").Get<std::string>();
|
||||||
|
loaded_animation = LoadAnimationAndroid(mask_asset_name, &mask_meshes_);
|
||||||
|
if (!loaded_animation) {
|
||||||
|
LOG(ERROR) << "Failed to load mask asset.";
|
||||||
|
return absl::UnknownError("Failed to load mask asset.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loaded_animation = LoadAnimationAndroid(asset_name, &triangle_meshes_);
|
||||||
|
#else
|
||||||
|
loaded_animation = LoadAnimation(asset_name);
|
||||||
|
#endif
|
||||||
|
if (!loaded_animation) {
|
||||||
|
LOG(ERROR) << "Failed to load animation asset.";
|
||||||
|
return absl::UnknownError("Failed to load animation asset.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
|
||||||
|
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
|
||||||
|
const auto &mask_texture =
|
||||||
|
cc->InputSidePackets().Tag("MASK_TEXTURE").Get<AssetTextureFormat>();
|
||||||
|
mask_texture_ = helper_.CreateSourceTexture(mask_texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load in all static texture data if it exists
|
||||||
|
if (cc->InputSidePackets().HasTag("TEXTURE")) {
|
||||||
|
const auto &input_texture =
|
||||||
|
cc->InputSidePackets().Tag("TEXTURE").Get<AssetTextureFormat>();
|
||||||
|
texture_ = helper_.CreateSourceTexture(input_texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
VLOG(2) << "Input texture size: " << texture_.width() << ", "
|
||||||
|
<< texture_.height() << std::endl;
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
int GlAnimationOverlayCalculator::GetAnimationFrameIndex(Timestamp timestamp) {
|
||||||
|
double seconds_delta = timestamp.Seconds() - animation_start_time_.Seconds();
|
||||||
|
int64_t frame_index =
|
||||||
|
static_cast<int64_t>(seconds_delta * animation_speed_fps_);
|
||||||
|
frame_index %= frame_count_;
|
||||||
|
return static_cast<int>(frame_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlAnimationOverlayCalculator::LoadModelMatrices(
|
||||||
|
const TimedModelMatrixProtoList &model_matrices,
|
||||||
|
std::vector<ModelMatrix> *current_model_matrices) {
|
||||||
|
current_model_matrices->clear();
|
||||||
|
for (int i = 0; i < model_matrices.model_matrix_size(); ++i) {
|
||||||
|
const auto &model_matrix = model_matrices.model_matrix(i);
|
||||||
|
CHECK(model_matrix.matrix_entries_size() == kNumMatrixEntries)
|
||||||
|
<< "Invalid Model Matrix";
|
||||||
|
current_model_matrices->emplace_back();
|
||||||
|
ModelMatrix &new_matrix = current_model_matrices->back();
|
||||||
|
new_matrix.reset(new float[kNumMatrixEntries]);
|
||||||
|
for (int j = 0; j < kNumMatrixEntries; j++) {
|
||||||
|
// Model matrices streamed in using ROW-MAJOR format, but we want
|
||||||
|
// COLUMN-MAJOR for rendering, so we transpose here.
|
||||||
|
int col = j % 4;
|
||||||
|
int row = j / 4;
|
||||||
|
new_matrix[row + col * 4] = model_matrix.matrix_entries(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GlAnimationOverlayCalculator::Process(CalculatorContext *cc) {
|
||||||
|
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
|
||||||
|
if (!initialized_) {
|
||||||
|
MP_RETURN_IF_ERROR(GlSetup());
|
||||||
|
initialized_ = true;
|
||||||
|
animation_start_time_ = cc->InputTimestamp();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process model matrices, if any are being streamed in, and update our
|
||||||
|
// list.
|
||||||
|
current_model_matrices_.clear();
|
||||||
|
if (has_model_matrix_stream_ &&
|
||||||
|
!cc->Inputs().Tag("MODEL_MATRICES").IsEmpty()) {
|
||||||
|
const TimedModelMatrixProtoList &model_matrices =
|
||||||
|
cc->Inputs().Tag("MODEL_MATRICES").Get<TimedModelMatrixProtoList>();
|
||||||
|
LoadModelMatrices(model_matrices, ¤t_model_matrices_);
|
||||||
|
}
|
||||||
|
|
||||||
|
current_mask_model_matrices_.clear();
|
||||||
|
if (has_mask_model_matrix_stream_ &&
|
||||||
|
!cc->Inputs().Tag("MASK_MODEL_MATRICES").IsEmpty()) {
|
||||||
|
const TimedModelMatrixProtoList &model_matrices =
|
||||||
|
cc->Inputs()
|
||||||
|
.Tag("MASK_MODEL_MATRICES")
|
||||||
|
.Get<TimedModelMatrixProtoList>();
|
||||||
|
LoadModelMatrices(model_matrices, ¤t_mask_model_matrices_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arbitrary default width and height for output destination texture, in the
|
||||||
|
// event that we don't have a valid and unique input buffer to overlay.
|
||||||
|
int width = 640;
|
||||||
|
int height = 480;
|
||||||
|
|
||||||
|
GlTexture dst;
|
||||||
|
std::unique_ptr<GpuBuffer> input_frame(nullptr);
|
||||||
|
if (has_video_stream_ && !(cc->Inputs().Tag("VIDEO").IsEmpty())) {
|
||||||
|
auto result = cc->Inputs().Tag("VIDEO").Value().Consume<GpuBuffer>();
|
||||||
|
if (result.ok()) {
|
||||||
|
input_frame = std::move(result).value();
|
||||||
|
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||||
|
input_frame->GetGlTextureBufferSharedPtr()->Reuse();
|
||||||
|
#endif
|
||||||
|
width = input_frame->width();
|
||||||
|
height = input_frame->height();
|
||||||
|
dst = helper_.CreateSourceTexture(*input_frame);
|
||||||
|
} else {
|
||||||
|
LOG(ERROR) << "Unable to consume input video frame for overlay!";
|
||||||
|
LOG(ERROR) << "Status returned was: " << result.status();
|
||||||
|
dst = helper_.CreateDestinationTexture(width, height);
|
||||||
|
}
|
||||||
|
} else if (!has_video_stream_) {
|
||||||
|
dst = helper_.CreateDestinationTexture(width, height);
|
||||||
|
} else {
|
||||||
|
// We have an input video stream, but not for this frame. Don't render!
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
helper_.BindFramebuffer(dst);
|
||||||
|
|
||||||
|
if (!depth_buffer_created_) {
|
||||||
|
// Create our private depth buffer.
|
||||||
|
GLCHECK(glGenRenderbuffers(1, &renderbuffer_));
|
||||||
|
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
|
||||||
|
GLCHECK(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
|
||||||
|
width, height));
|
||||||
|
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
|
||||||
|
GL_RENDERBUFFER, renderbuffer_));
|
||||||
|
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
|
||||||
|
depth_buffer_created_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-bind our depth renderbuffer to our FBO depth attachment here.
|
||||||
|
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
|
||||||
|
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
|
||||||
|
GL_RENDERBUFFER, renderbuffer_));
|
||||||
|
GLenum status = GLCHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER));
|
||||||
|
if (status != GL_FRAMEBUFFER_COMPLETE) {
|
||||||
|
LOG(ERROR) << "Incomplete framebuffer with status: " << status;
|
||||||
|
}
|
||||||
|
GLCHECK(glClear(GL_DEPTH_BUFFER_BIT));
|
||||||
|
|
||||||
|
if (has_occlusion_mask_) {
|
||||||
|
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
|
||||||
|
const TriangleMesh &mask_frame = mask_meshes_.front();
|
||||||
|
MP_RETURN_IF_ERROR(GlBind(mask_frame, mask_texture_));
|
||||||
|
// Draw objects using our latest model matrix stream packet.
|
||||||
|
for (const ModelMatrix &model_matrix : current_mask_model_matrices_) {
|
||||||
|
MP_RETURN_IF_ERROR(GlRender(mask_frame, model_matrix.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||||
|
int frame_index = GetAnimationFrameIndex(cc->InputTimestamp());
|
||||||
|
const TriangleMesh ¤t_frame = triangle_meshes_[frame_index];
|
||||||
|
|
||||||
|
// Load dynamic texture if it exists
|
||||||
|
if (cc->Inputs().HasTag("TEXTURE")) {
|
||||||
|
const auto &input_texture =
|
||||||
|
cc->Inputs().Tag("TEXTURE").Get<AssetTextureFormat>();
|
||||||
|
texture_ = helper_.CreateSourceTexture(input_texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(GlBind(current_frame, texture_));
|
||||||
|
if (has_model_matrix_stream_) {
|
||||||
|
// Draw objects using our latest model matrix stream packet.
|
||||||
|
for (const ModelMatrix &model_matrix : current_model_matrices_) {
|
||||||
|
MP_RETURN_IF_ERROR(GlRender(current_frame, model_matrix.get()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Just draw one object to a static model matrix.
|
||||||
|
MP_RETURN_IF_ERROR(GlRender(current_frame, kModelMatrix));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disable vertex attributes
|
||||||
|
GLCHECK(glDisableVertexAttribArray(ATTRIB_VERTEX));
|
||||||
|
GLCHECK(glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
|
||||||
|
GLCHECK(glDisableVertexAttribArray(ATTRIB_NORMAL));
|
||||||
|
|
||||||
|
// Disable depth test
|
||||||
|
GLCHECK(glDisable(GL_DEPTH_TEST));
|
||||||
|
|
||||||
|
// Unbind texture
|
||||||
|
GLCHECK(glActiveTexture(GL_TEXTURE1));
|
||||||
|
GLCHECK(glBindTexture(texture_.target(), 0));
|
||||||
|
|
||||||
|
// Unbind depth buffer
|
||||||
|
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
|
||||||
|
|
||||||
|
GLCHECK(glFlush());
|
||||||
|
|
||||||
|
auto output = dst.GetFrame<GpuBuffer>();
|
||||||
|
dst.Release();
|
||||||
|
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0)
|
||||||
|
.Add(output.release(), cc->InputTimestamp());
|
||||||
|
GLCHECK(glFrontFace(GL_CCW));
|
||||||
|
return absl::OkStatus();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GlAnimationOverlayCalculator::GlSetup() {
|
||||||
|
// Load vertex and fragment shaders
|
||||||
|
const GLint attr_location[NUM_ATTRIBUTES] = {
|
||||||
|
ATTRIB_VERTEX,
|
||||||
|
ATTRIB_TEXTURE_POSITION,
|
||||||
|
ATTRIB_NORMAL,
|
||||||
|
};
|
||||||
|
const GLchar *attr_name[NUM_ATTRIBUTES] = {
|
||||||
|
"position",
|
||||||
|
"texture_coordinate",
|
||||||
|
"normal",
|
||||||
|
};
|
||||||
|
|
||||||
|
const GLchar *vert_src = R"(
|
||||||
|
// Perspective projection matrix for rendering / clipping
|
||||||
|
uniform mat4 perspectiveMatrix;
|
||||||
|
|
||||||
|
// Matrix defining the currently rendered object model
|
||||||
|
uniform mat4 modelMatrix;
|
||||||
|
|
||||||
|
// vertex position in threespace
|
||||||
|
attribute vec4 position;
|
||||||
|
attribute vec3 normal;
|
||||||
|
|
||||||
|
// texture coordinate for each vertex in normalized texture space (0..1)
|
||||||
|
attribute mediump vec4 texture_coordinate;
|
||||||
|
|
||||||
|
// texture coordinate for fragment shader (will be interpolated)
|
||||||
|
varying mediump vec2 sampleCoordinate;
|
||||||
|
varying mediump vec3 vNormal;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
sampleCoordinate = texture_coordinate.xy;
|
||||||
|
mat4 mvpMatrix = perspectiveMatrix * modelMatrix;
|
||||||
|
gl_Position = mvpMatrix * position;
|
||||||
|
|
||||||
|
// TODO: Pass in rotation submatrix with no scaling or transforms to prevent
|
||||||
|
// breaking vNormal in case of model matrix having non-uniform scaling
|
||||||
|
vec4 tmpNormal = mvpMatrix * vec4(normal, 1.0);
|
||||||
|
vec4 transformedZero = mvpMatrix * vec4(0.0, 0.0, 0.0, 1.0);
|
||||||
|
tmpNormal = tmpNormal - transformedZero;
|
||||||
|
vNormal = normalize(tmpNormal.xyz);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
const GLchar *frag_src = R"(
|
||||||
|
precision mediump float;
|
||||||
|
|
||||||
|
varying vec2 sampleCoordinate; // texture coordinate (0..1)
|
||||||
|
varying vec3 vNormal;
|
||||||
|
uniform sampler2D texture; // texture to shade with
|
||||||
|
const float kPi = 3.14159265359;
|
||||||
|
|
||||||
|
// Define ambient lighting factor that is applied to our texture in order to
|
||||||
|
// generate ambient lighting of the scene on the object. Range is [0.0-1.0],
|
||||||
|
// with the factor being proportional to the brightness of the lighting in the
|
||||||
|
// scene being applied to the object
|
||||||
|
const float kAmbientLighting = 0.75;
|
||||||
|
|
||||||
|
// Define RGB values for light source
|
||||||
|
const vec3 kLightColor = vec3(0.25);
|
||||||
|
// Exponent for directional lighting that governs diffusion of surface light
|
||||||
|
const float kExponent = 1.0;
|
||||||
|
// Define direction of lighting effect source
|
||||||
|
const vec3 lightDir = vec3(0.0, -1.0, -0.6);
|
||||||
|
// Hard-coded view direction
|
||||||
|
const vec3 viewDir = vec3(0.0, 0.0, -1.0);
|
||||||
|
|
||||||
|
// DirectionalLighting procedure imported from Lullaby @ https://github.com/google/lullaby
|
||||||
|
// Calculate and return the color (diffuse and specular together) reflected by
|
||||||
|
// a directional light.
|
||||||
|
vec3 GetDirectionalLight(vec3 pos, vec3 normal, vec3 viewDir, vec3 lightDir, vec3 lightColor, float exponent) {
|
||||||
|
// Intensity of the diffuse light. Saturate to keep within the 0-1 range.
|
||||||
|
float normal_dot_light_dir = dot(-normal, -lightDir);
|
||||||
|
float intensity = clamp(normal_dot_light_dir, 0.0, 1.0);
|
||||||
|
// Calculate the diffuse light
|
||||||
|
vec3 diffuse = intensity * lightColor;
|
||||||
|
// http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/
|
||||||
|
float kEnergyConservation = (2.0 + exponent) / (2.0 * kPi);
|
||||||
|
vec3 reflect_dir = reflect(lightDir, -normal);
|
||||||
|
// Intensity of the specular light
|
||||||
|
float view_dot_reflect = dot(-viewDir, reflect_dir);
|
||||||
|
// Use an epsilon for pow because pow(x,y) is undefined if x < 0 or x == 0
|
||||||
|
// and y <= 0 (GLSL Spec 8.2)
|
||||||
|
const float kEpsilon = 1e-5;
|
||||||
|
intensity = kEnergyConservation * pow(clamp(view_dot_reflect, kEpsilon, 1.0),
|
||||||
|
exponent);
|
||||||
|
// Specular color:
|
||||||
|
vec3 specular = intensity * lightColor;
|
||||||
|
return diffuse + specular;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// Sample the texture, retrieving an rgba pixel value
|
||||||
|
vec4 pixel = texture2D(texture, sampleCoordinate);
|
||||||
|
// If the alpha (background) value is near transparent, then discard the
|
||||||
|
// pixel, this allows the rendering of transparent background GIFs
|
||||||
|
// TODO: Adding a toggle to perform pixel alpha discarding for transparent
|
||||||
|
// GIFs (prevent interference with Objectron system).
|
||||||
|
if (pixel.a < 0.2) discard;
|
||||||
|
|
||||||
|
// Generate directional lighting effect
|
||||||
|
vec3 lighting = GetDirectionalLight(gl_FragCoord.xyz, vNormal, viewDir, lightDir, kLightColor, kExponent);
|
||||||
|
// Apply both ambient and directional lighting to our texture
|
||||||
|
gl_FragColor = vec4((vec3(kAmbientLighting) + lighting) * pixel.rgb, 1.0);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
// Shader program
|
||||||
|
GLCHECK(GlhCreateProgram(vert_src, frag_src, NUM_ATTRIBUTES,
|
||||||
|
(const GLchar **)&attr_name[0], attr_location,
|
||||||
|
&program_));
|
||||||
|
RET_CHECK(program_) << "Problem initializing the program.";
|
||||||
|
texture_uniform_ = GLCHECK(glGetUniformLocation(program_, "texture"));
|
||||||
|
perspective_matrix_uniform_ =
|
||||||
|
GLCHECK(glGetUniformLocation(program_, "perspectiveMatrix"));
|
||||||
|
model_matrix_uniform_ =
|
||||||
|
GLCHECK(glGetUniformLocation(program_, "modelMatrix"));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GlAnimationOverlayCalculator::GlBind(
|
||||||
|
const TriangleMesh &triangle_mesh, const GlTexture &texture) {
|
||||||
|
GLCHECK(glUseProgram(program_));
|
||||||
|
|
||||||
|
// Disable backface culling to allow occlusion effects.
|
||||||
|
// Some options for solid arbitrary 3D geometry rendering
|
||||||
|
GLCHECK(glEnable(GL_BLEND));
|
||||||
|
GLCHECK(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
|
||||||
|
GLCHECK(glEnable(GL_DEPTH_TEST));
|
||||||
|
GLCHECK(glFrontFace(GL_CW));
|
||||||
|
GLCHECK(glDepthMask(GL_TRUE));
|
||||||
|
GLCHECK(glDepthFunc(GL_LESS));
|
||||||
|
|
||||||
|
// Clear our depth buffer before starting draw calls
|
||||||
|
GLCHECK(glVertexAttribPointer(ATTRIB_VERTEX, 3, GL_FLOAT, 0, 0,
|
||||||
|
triangle_mesh.vertices.get()));
|
||||||
|
GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
|
||||||
|
GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
|
||||||
|
triangle_mesh.texture_coords.get()));
|
||||||
|
GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
|
||||||
|
GLCHECK(glVertexAttribPointer(ATTRIB_NORMAL, 3, GL_FLOAT, 0, 0,
|
||||||
|
triangle_mesh.normals.get()));
|
||||||
|
GLCHECK(glEnableVertexAttribArray(ATTRIB_NORMAL));
|
||||||
|
GLCHECK(glActiveTexture(GL_TEXTURE1));
|
||||||
|
GLCHECK(glBindTexture(texture.target(), texture.name()));
|
||||||
|
|
||||||
|
// We previously bound it to GL_TEXTURE1
|
||||||
|
GLCHECK(glUniform1i(texture_uniform_, 1));
|
||||||
|
|
||||||
|
GLCHECK(glUniformMatrix4fv(perspective_matrix_uniform_, 1, GL_FALSE,
|
||||||
|
perspective_matrix_));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GlAnimationOverlayCalculator::GlRender(
|
||||||
|
const TriangleMesh &triangle_mesh, const float *model_matrix) {
|
||||||
|
GLCHECK(glUniformMatrix4fv(model_matrix_uniform_, 1, GL_FALSE, model_matrix));
|
||||||
|
GLCHECK(glDrawElements(GL_TRIANGLES, triangle_mesh.index_count,
|
||||||
|
GL_UNSIGNED_SHORT,
|
||||||
|
triangle_mesh.triangle_indices.get()));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
GlAnimationOverlayCalculator::~GlAnimationOverlayCalculator() {
|
||||||
|
helper_.RunInGlContext([this] {
|
||||||
|
if (program_) {
|
||||||
|
GLCHECK(glDeleteProgram(program_));
|
||||||
|
program_ = 0;
|
||||||
|
}
|
||||||
|
if (depth_buffer_created_) {
|
||||||
|
GLCHECK(glDeleteRenderbuffers(1, &renderbuffer_));
|
||||||
|
renderbuffer_ = 0;
|
||||||
|
}
|
||||||
|
if (texture_.width() > 0) {
|
||||||
|
texture_.Release();
|
||||||
|
}
|
||||||
|
if (mask_texture_.width() > 0) {
|
||||||
|
mask_texture_.Release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,41 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message GlAnimationOverlayCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional GlAnimationOverlayCalculatorOptions ext = 174760573;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default aspect ratio of rendering target width over height.
|
||||||
|
// This specific value is for 3:4 view. Do not change this default value.
|
||||||
|
optional float aspect_ratio = 1 [default = 0.75];
|
||||||
|
// Default vertical field of view in degrees. This specific default value
|
||||||
|
// is arbitrary. Do not change this default value. If you want to use
|
||||||
|
// a different vertical_fov_degrees, set it in the options.
|
||||||
|
optional float vertical_fov_degrees = 2 [default = 70.0];
|
||||||
|
|
||||||
|
// Perspective projection matrix z-clipping near plane value.
|
||||||
|
optional float z_clipping_plane_near = 3 [default = 0.1];
|
||||||
|
// Perspective projection matrix z-clipping far plane value.
|
||||||
|
optional float z_clipping_plane_far = 4 [default = 1000.0];
|
||||||
|
|
||||||
|
// Speed at which to play the animation (in frames per second).
|
||||||
|
optional float animation_speed_fps = 5 [default = 25.0];
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
// Copyright 2020 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
message TimedModelMatrixProto {
|
||||||
|
// 4x4 model matrix stored in ROW major order.
|
||||||
|
repeated float matrix_entries = 1 [packed = true];
|
||||||
|
// Timestamp of this model matrix in milliseconds.
|
||||||
|
optional int64 time_msec = 2 [default = 0];
|
||||||
|
// Unique per object id
|
||||||
|
optional int32 id = 3 [default = -1];
|
||||||
|
}
|
||||||
|
|
||||||
|
message TimedModelMatrixProtoList {
|
||||||
|
repeated TimedModelMatrixProto model_matrix = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For convenience, when the desired information or transformation can be
|
||||||
|
// encoded into vectors (e.g. when the matrix represents a scale or Euler-angle-
|
||||||
|
// based rotation operation.)
|
||||||
|
message TimedVectorProto {
|
||||||
|
// The vector values themselves.
|
||||||
|
repeated float vector_entries = 1 [packed = true];
|
||||||
|
|
||||||
|
// Timestamp of this vector in milliseconds.
|
||||||
|
optional int64 time_msec = 2 [default = 0];
|
||||||
|
|
||||||
|
// Unique per object id
|
||||||
|
optional int32 id = 3 [default = -1];
|
||||||
|
}
|
||||||
|
|
||||||
|
message TimedVectorProtoList {
|
||||||
|
repeated TimedVectorProto vector_list = 1;
|
||||||
|
}
|
33
mediapipe/graphs/object_detection_3d/obj_parser/BUILD
Normal file
33
mediapipe/graphs/object_detection_3d/obj_parser/BUILD
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
java_library(
|
||||||
|
name = "obj_parser_lib",
|
||||||
|
srcs = [
|
||||||
|
"ObjParserMain.java",
|
||||||
|
"SimpleObjParser.java",
|
||||||
|
],
|
||||||
|
javacopts = ["-Xep:DefaultPackage:OFF"],
|
||||||
|
)
|
||||||
|
|
||||||
|
java_binary(
|
||||||
|
name = "ObjParser",
|
||||||
|
javacopts = ["-Xep:DefaultPackage:OFF"],
|
||||||
|
main_class = "ObjParserMain",
|
||||||
|
runtime_deps = [
|
||||||
|
":obj_parser_lib",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,205 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileFilter;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class for running desktop-side parsing/packing routines on .obj AR assets. Usage: ObjParser
|
||||||
|
* --input_dir=[INPUT_DIRECTORY] --output_dir=[OUTPUT_DIRECTORY] where INPUT_DIRECTORY is the folder
|
||||||
|
* with asset obj files to process, and OUTPUT_DIRECTORY is the folder where processed asset uuu
|
||||||
|
* file should be placed.
|
||||||
|
*
|
||||||
|
* <p>NOTE: Directories are assumed to be absolute paths.
|
||||||
|
*/
|
||||||
|
public final class ObjParserMain {
|
||||||
|
// Simple FileFilter implementation to let us walk over only our .obj files in a particular
|
||||||
|
// directory.
|
||||||
|
private static final class ObjFileFilter implements FileFilter {
|
||||||
|
ObjFileFilter() {
|
||||||
|
// Nothing to do here.
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean accept(File file) {
|
||||||
|
return file.getName().endsWith(".obj");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// File extension for binary output files; tagged onto end of initial file extension.
|
||||||
|
private static final String BINARY_FILE_EXT = ".uuu";
|
||||||
|
private static final String INPUT_DIR_FLAG = "--input_dir=";
|
||||||
|
private static final String OUTPUT_DIR_FLAG = "--output_dir=";
|
||||||
|
private static final float DEFAULT_VERTEX_SCALE_FACTOR = 30.0f;
|
||||||
|
private static final double NS_TO_SECONDS = 1e9;
|
||||||
|
|
||||||
|
public final PrintWriter writer;
|
||||||
|
|
||||||
|
public ObjParserMain() {
|
||||||
|
super();
|
||||||
|
this.writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, UTF_8)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple overridable logging function.
|
||||||
|
protected void logString(String infoLog) {
|
||||||
|
writer.println(infoLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Main program logic: parse command-line arguments and perform actions.
|
||||||
|
*/
|
||||||
|
public void run(String inDirectory, String outDirectory) {
|
||||||
|
if (inDirectory.isEmpty()) {
|
||||||
|
logString("Error: Must provide input directory with " + INPUT_DIR_FLAG);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (outDirectory.isEmpty()) {
|
||||||
|
logString("Error: Must provide output directory with " + OUTPUT_DIR_FLAG);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
File dirAsFile = new File(inDirectory);
|
||||||
|
ObjFileFilter objFileFilter = new ObjFileFilter();
|
||||||
|
File[] objFiles = dirAsFile.listFiles(objFileFilter);
|
||||||
|
|
||||||
|
FileOutputStream outputStream = null;
|
||||||
|
logString("Parsing directory: " + inDirectory);
|
||||||
|
// We need frames processed in correct order.
|
||||||
|
Arrays.sort(objFiles);
|
||||||
|
|
||||||
|
for (File objFile : objFiles) {
|
||||||
|
String fileName = objFile.getAbsolutePath();
|
||||||
|
|
||||||
|
// Just take the file name of the first processed frame.
|
||||||
|
if (outputStream == null) {
|
||||||
|
String outputFileName = outDirectory + objFile.getName() + BINARY_FILE_EXT;
|
||||||
|
try {
|
||||||
|
// Create new file here, if we can.
|
||||||
|
outputStream = new FileOutputStream(outputFileName);
|
||||||
|
logString("Created outfile: " + outputFileName);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logString("Error creating outfile: " + e.toString());
|
||||||
|
e.printStackTrace(writer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each file into the stream.
|
||||||
|
logString("Processing file: " + fileName);
|
||||||
|
processFile(fileName, outputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally close the stream out.
|
||||||
|
try {
|
||||||
|
if (outputStream != null) {
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logString("Error trying to close output stream: " + e.toString());
|
||||||
|
e.printStackTrace(writer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Entrypoint for command-line executable.
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) {
|
||||||
|
// Parse flags
|
||||||
|
String inDirectory = "";
|
||||||
|
String outDirectory = "";
|
||||||
|
for (int i = 0; i < args.length; i++) {
|
||||||
|
if (args[i].startsWith(INPUT_DIR_FLAG)) {
|
||||||
|
inDirectory = args[i].substring(INPUT_DIR_FLAG.length());
|
||||||
|
// Make sure this will be treated as a directory
|
||||||
|
if (!inDirectory.endsWith("/")) {
|
||||||
|
inDirectory += "/";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (args[i].startsWith(OUTPUT_DIR_FLAG)) {
|
||||||
|
outDirectory = args[i].substring(OUTPUT_DIR_FLAG.length());
|
||||||
|
// Make sure this will be treated as a directory
|
||||||
|
if (!outDirectory.endsWith("/")) {
|
||||||
|
outDirectory += "/";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ObjParserMain parser = new ObjParserMain();
|
||||||
|
parser.run(inDirectory, outDirectory);
|
||||||
|
parser.writer.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internal helper function to parse a .obj from an infile name and stream the resulting data
|
||||||
|
* directly out in binary-dump format to outputStream.
|
||||||
|
*/
|
||||||
|
private void processFile(String infileName, OutputStream outputStream) {
|
||||||
|
long start = System.nanoTime();
|
||||||
|
|
||||||
|
// First we parse the obj.
|
||||||
|
SimpleObjParser objParser = new SimpleObjParser(infileName, DEFAULT_VERTEX_SCALE_FACTOR);
|
||||||
|
if (!objParser.parse()) {
|
||||||
|
logString("Error parsing .obj file before processing");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final float[] vertices = objParser.getVertices();
|
||||||
|
final float[] textureCoords = objParser.getTextureCoords();
|
||||||
|
final ArrayList<Short> triangleList = objParser.getTriangles();
|
||||||
|
|
||||||
|
// Overall byte count to stream: 12 for the 3 list-length ints, and then 4 for each vertex and
|
||||||
|
// texCoord int, and finally 2 for each triangle index short.
|
||||||
|
final int bbSize =
|
||||||
|
12 + 4 * vertices.length + 4 * textureCoords.length + 2 * triangleList.size();
|
||||||
|
|
||||||
|
// Ensure ByteBuffer is native order, just like we want to read it in, but is NOT direct, so
|
||||||
|
// we can call .array() on it.
|
||||||
|
ByteBuffer bb = ByteBuffer.allocate(bbSize);
|
||||||
|
bb.order(ByteOrder.nativeOrder());
|
||||||
|
|
||||||
|
bb.putInt(vertices.length);
|
||||||
|
bb.putInt(textureCoords.length);
|
||||||
|
bb.putInt(triangleList.size());
|
||||||
|
logString(String.format("Writing... Vertices: %d, TextureCoords: %d, Indices: %d.%n",
|
||||||
|
vertices.length, textureCoords.length, triangleList.size()));
|
||||||
|
for (float vertex : vertices) {
|
||||||
|
bb.putFloat(vertex);
|
||||||
|
}
|
||||||
|
for (float textureCoord : textureCoords) {
|
||||||
|
bb.putFloat(textureCoord);
|
||||||
|
}
|
||||||
|
for (Short vertexIndex : triangleList) {
|
||||||
|
bb.putShort(vertexIndex.shortValue());
|
||||||
|
}
|
||||||
|
bb.position(0);
|
||||||
|
try {
|
||||||
|
outputStream.write(bb.array(), 0, bbSize);
|
||||||
|
logString(String.format("Processing successful! Took %.4f seconds.%n",
|
||||||
|
(System.nanoTime() - start) / NS_TO_SECONDS));
|
||||||
|
} catch (Exception e) {
|
||||||
|
logString("Error writing during processing: " + e.toString());
|
||||||
|
e.printStackTrace(writer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,386 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class for parsing a single .obj file into openGL-usable pieces.
|
||||||
|
*
|
||||||
|
* <p>Usage:
|
||||||
|
*
|
||||||
|
* <p>SimpleObjParser objParser = new SimpleObjParser("animations/cow/cow320.obj", .015f);
|
||||||
|
*
|
||||||
|
* <p>if (objParser.parse()) { ... }
|
||||||
|
*/
|
||||||
|
public class SimpleObjParser {
|
||||||
|
private static class ShortPair {
|
||||||
|
private final Short first;
|
||||||
|
private final Short second;
|
||||||
|
|
||||||
|
public ShortPair(Short newFirst, Short newSecond) {
|
||||||
|
first = newFirst;
|
||||||
|
second = newSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Short getFirst() {
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Short getSecond() {
|
||||||
|
return second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final String TAG = SimpleObjParser.class.getSimpleName();
|
||||||
|
private static final boolean DEBUG = false;
|
||||||
|
private static final int INVALID_INDEX = -1;
|
||||||
|
private static final int POSITIONS_COORDS_PER_VERTEX = 3;
|
||||||
|
private static final int TEXTURE_COORDS_PER_VERTEX = 2;
|
||||||
|
private final String fileName;
|
||||||
|
|
||||||
|
// Since .obj doesn't tie together texture coordinates and vertex
|
||||||
|
// coordinates, but OpenGL does, we need to keep a map of all such pairings that occur in
|
||||||
|
// our face list.
|
||||||
|
private final HashMap<ShortPair, Short> vertexTexCoordMap;
|
||||||
|
|
||||||
|
// Internal (de-coupled) unique vertices and texture coordinates
|
||||||
|
private ArrayList<Float> vertices;
|
||||||
|
private ArrayList<Float> textureCoords;
|
||||||
|
|
||||||
|
// Data we expose to openGL for rendering
|
||||||
|
private float[] finalizedVertices;
|
||||||
|
private float[] finalizedTextureCoords;
|
||||||
|
private ArrayList<Short> finalizedTriangles;
|
||||||
|
|
||||||
|
// So we only display warnings about dropped w-coordinates once
|
||||||
|
private boolean vertexCoordIgnoredWarning;
|
||||||
|
private boolean textureCoordIgnoredWarning;
|
||||||
|
private boolean startedProcessingFaces;
|
||||||
|
|
||||||
|
private int numPrimitiveVertices;
|
||||||
|
private int numPrimitiveTextureCoords;
|
||||||
|
private int numPrimitiveFaces;
|
||||||
|
|
||||||
|
// For scratchwork, so we don't have to keep reallocating
|
||||||
|
private float[] tempCoords;
|
||||||
|
|
||||||
|
// We scale all our position coordinates uniformly by this factor
|
||||||
|
private float objectUniformScaleFactor;
|
||||||
|
|
||||||
|
public SimpleObjParser(String objFile, float scaleFactor) {
|
||||||
|
objectUniformScaleFactor = scaleFactor;
|
||||||
|
|
||||||
|
fileName = objFile;
|
||||||
|
vertices = new ArrayList<Float>();
|
||||||
|
textureCoords = new ArrayList<Float>();
|
||||||
|
|
||||||
|
vertexTexCoordMap = new HashMap<ShortPair, Short>();
|
||||||
|
finalizedTriangles = new ArrayList<Short>();
|
||||||
|
|
||||||
|
tempCoords = new float[Math.max(POSITIONS_COORDS_PER_VERTEX, TEXTURE_COORDS_PER_VERTEX)];
|
||||||
|
numPrimitiveFaces = 0;
|
||||||
|
|
||||||
|
vertexCoordIgnoredWarning = false;
|
||||||
|
textureCoordIgnoredWarning = false;
|
||||||
|
startedProcessingFaces = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple helper wrapper function
|
||||||
|
private void debugLogString(String message) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseVertex(String[] linePieces) {
|
||||||
|
// Note: Traditionally xyzw is acceptable as a format, with w defaulting to 1.0, but for now
|
||||||
|
// we only parse xyz.
|
||||||
|
if (linePieces.length < POSITIONS_COORDS_PER_VERTEX + 1
|
||||||
|
|| linePieces.length > POSITIONS_COORDS_PER_VERTEX + 2) {
|
||||||
|
System.out.println("Malformed vertex coordinate specification, assuming xyz format only.");
|
||||||
|
return;
|
||||||
|
} else if (linePieces.length == POSITIONS_COORDS_PER_VERTEX + 2 && !vertexCoordIgnoredWarning) {
|
||||||
|
System.out.println(
|
||||||
|
"Only x, y, and z parsed for vertex coordinates; w coordinates will be ignored.");
|
||||||
|
vertexCoordIgnoredWarning = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean success = true;
|
||||||
|
try {
|
||||||
|
for (int i = 1; i < POSITIONS_COORDS_PER_VERTEX + 1; i++) {
|
||||||
|
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
success = false;
|
||||||
|
System.out.println("Malformed vertex coordinate error: " + e.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
|
||||||
|
vertices.add(Float.valueOf(tempCoords[i] * objectUniformScaleFactor));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseTextureCoordinate(String[] linePieces) {
|
||||||
|
// Similar to vertices, uvw is acceptable as a format, with w defaulting to 0.0, but for now we
|
||||||
|
// only parse uv.
|
||||||
|
if (linePieces.length < TEXTURE_COORDS_PER_VERTEX + 1
|
||||||
|
|| linePieces.length > TEXTURE_COORDS_PER_VERTEX + 2) {
|
||||||
|
System.out.println("Malformed texture coordinate specification, assuming uv format only.");
|
||||||
|
return;
|
||||||
|
} else if (linePieces.length == (TEXTURE_COORDS_PER_VERTEX + 2)
|
||||||
|
&& !textureCoordIgnoredWarning) {
|
||||||
|
debugLogString("Only u and v parsed for texture coordinates; w coordinates will be ignored.");
|
||||||
|
textureCoordIgnoredWarning = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean success = true;
|
||||||
|
try {
|
||||||
|
for (int i = 1; i < TEXTURE_COORDS_PER_VERTEX + 1; i++) {
|
||||||
|
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
success = false;
|
||||||
|
System.out.println("Malformed texture coordinate error: " + e.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
// .obj files treat (0,0) as top-left, compared to bottom-left for openGL. So invert "v"
|
||||||
|
// texture coordinate only here.
|
||||||
|
textureCoords.add(Float.valueOf(tempCoords[0]));
|
||||||
|
textureCoords.add(Float.valueOf(1.0f - tempCoords[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Will return INVALID_INDEX if error occurs, and otherwise will return finalized (combined)
|
||||||
|
// index, adding and hashing new combinations as it sees them.
|
||||||
|
private short parseAndProcessCombinedVertexCoord(String coordString) {
|
||||||
|
String[] coords = coordString.split("/");
|
||||||
|
try {
|
||||||
|
// Parse vertex and texture indices; 1-indexed from front if positive and from end of list if
|
||||||
|
// negative.
|
||||||
|
short vertexIndex = Short.parseShort(coords[0]);
|
||||||
|
short textureIndex = Short.parseShort(coords[1]);
|
||||||
|
if (vertexIndex > 0) {
|
||||||
|
vertexIndex--;
|
||||||
|
} else {
|
||||||
|
vertexIndex = (short) (vertexIndex + numPrimitiveVertices);
|
||||||
|
}
|
||||||
|
if (textureIndex > 0) {
|
||||||
|
textureIndex--;
|
||||||
|
} else {
|
||||||
|
textureIndex = (short) (textureIndex + numPrimitiveTextureCoords);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine indices and look up in pair map.
|
||||||
|
ShortPair indexPair = new ShortPair(Short.valueOf(vertexIndex), Short.valueOf(textureIndex));
|
||||||
|
Short combinedIndex = vertexTexCoordMap.get(indexPair);
|
||||||
|
if (combinedIndex == null) {
|
||||||
|
short numIndexPairs = (short) vertexTexCoordMap.size();
|
||||||
|
vertexTexCoordMap.put(indexPair, numIndexPairs);
|
||||||
|
return numIndexPairs;
|
||||||
|
} else {
|
||||||
|
return combinedIndex.shortValue();
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// Failure to parse coordinates as shorts
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: it is assumed that face list occurs AFTER vertex and texture coordinate lists finish in
|
||||||
|
// the obj file format.
|
||||||
|
private void parseFace(String[] linePieces) {
|
||||||
|
if (linePieces.length < 4) {
|
||||||
|
System.out.println("Malformed face index list: there must be at least 3 indices per face");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
short[] faceIndices = new short[linePieces.length - 1];
|
||||||
|
boolean success = true;
|
||||||
|
for (int i = 1; i < linePieces.length; i++) {
|
||||||
|
short faceIndex = parseAndProcessCombinedVertexCoord(linePieces[i]);
|
||||||
|
|
||||||
|
if (faceIndex < 0) {
|
||||||
|
System.out.println(faceIndex);
|
||||||
|
System.out.println("Malformed face index: " + linePieces[i]);
|
||||||
|
success = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
faceIndices[i - 1] = faceIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
numPrimitiveFaces++;
|
||||||
|
// Manually triangulate the face under the assumption that the points are coplanar, the poly
|
||||||
|
// is convex, and the points are listed in either clockwise or anti-clockwise orientation.
|
||||||
|
for (int i = 1; i < faceIndices.length - 1; i++) {
|
||||||
|
// We use a triangle fan here, so first point is part of all triangles
|
||||||
|
finalizedTriangles.add(faceIndices[0]);
|
||||||
|
finalizedTriangles.add(faceIndices[i]);
|
||||||
|
finalizedTriangles.add(faceIndices[i + 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over map and reconstruct proper vertex/texture coordinate pairings.
|
||||||
|
private boolean constructFinalCoordinatesFromMap() {
|
||||||
|
final int numIndexPairs = vertexTexCoordMap.size();
|
||||||
|
// XYZ vertices and UV texture coordinates
|
||||||
|
finalizedVertices = new float[POSITIONS_COORDS_PER_VERTEX * numIndexPairs];
|
||||||
|
finalizedTextureCoords = new float[TEXTURE_COORDS_PER_VERTEX * numIndexPairs];
|
||||||
|
try {
|
||||||
|
for (Map.Entry<ShortPair, Short> entry : vertexTexCoordMap.entrySet()) {
|
||||||
|
ShortPair indexPair = entry.getKey();
|
||||||
|
short rawVertexIndex = indexPair.getFirst().shortValue();
|
||||||
|
short rawTexCoordIndex = indexPair.getSecond().shortValue();
|
||||||
|
short finalIndex = entry.getValue().shortValue();
|
||||||
|
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
|
||||||
|
finalizedVertices[POSITIONS_COORDS_PER_VERTEX * finalIndex + i]
|
||||||
|
= vertices.get(rawVertexIndex * POSITIONS_COORDS_PER_VERTEX + i);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < TEXTURE_COORDS_PER_VERTEX; i++) {
|
||||||
|
finalizedTextureCoords[TEXTURE_COORDS_PER_VERTEX * finalIndex + i]
|
||||||
|
= textureCoords.get(rawTexCoordIndex * TEXTURE_COORDS_PER_VERTEX + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
System.out.println("Malformed index in vertex/texture coordinate mapping.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the vertex position coordinate list (x1, y1, z1, x2, y2, z2, ...) after a successful
|
||||||
|
* call to parse().
|
||||||
|
*/
|
||||||
|
public float[] getVertices() {
|
||||||
|
return finalizedVertices;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the vertex texture coordinate list (u1, v1, u2, v2, ...) after a successful call to
|
||||||
|
* parse().
|
||||||
|
*/
|
||||||
|
public float[] getTextureCoords() {
|
||||||
|
return finalizedTextureCoords;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the list of indices (a1, b1, c1, a2, b2, c2, ...) after a successful call to parse().
|
||||||
|
* Each (a, b, c) triplet specifies a triangle to be rendered, with a, b, and c Short objects used
|
||||||
|
* to index into the coordinates returned by getVertices() and getTextureCoords().<p></p>
|
||||||
|
* For example, a Short index representing 5 should be used to index into vertices[15],
|
||||||
|
* vertices[16], and vertices[17], as well as textureCoords[10] and textureCoords[11].
|
||||||
|
*/
|
||||||
|
public ArrayList<Short> getTriangles() {
|
||||||
|
return finalizedTriangles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to locate and read the specified .obj file, and parse it accordingly. None of the
|
||||||
|
* getter functions in this class will return valid results until a value of true is returned
|
||||||
|
* from this function.
|
||||||
|
* @return true on success.
|
||||||
|
*/
|
||||||
|
public boolean parse() {
|
||||||
|
boolean success = true;
|
||||||
|
BufferedReader reader = null;
|
||||||
|
try {
|
||||||
|
reader = Files.newBufferedReader(Paths.get(fileName), UTF_8);
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
// Skip over lines with no characters
|
||||||
|
if (line.length() < 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore comment lines entirely
|
||||||
|
if (line.charAt(0) == '#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split into pieces based on whitespace, and process according to first command piece
|
||||||
|
String[] linePieces = line.split(" +");
|
||||||
|
switch (linePieces[0]) {
|
||||||
|
case "v":
|
||||||
|
// Add vertex
|
||||||
|
if (startedProcessingFaces) {
|
||||||
|
throw new IOException("Vertices must all be declared before faces in obj files.");
|
||||||
|
}
|
||||||
|
parseVertex(linePieces);
|
||||||
|
break;
|
||||||
|
case "vt":
|
||||||
|
// Add texture coordinate
|
||||||
|
if (startedProcessingFaces) {
|
||||||
|
throw new IOException(
|
||||||
|
"Texture coordinates must all be declared before faces in obj files.");
|
||||||
|
}
|
||||||
|
parseTextureCoordinate(linePieces);
|
||||||
|
break;
|
||||||
|
case "f":
|
||||||
|
// Vertex and texture coordinate lists should be locked into place by now
|
||||||
|
if (!startedProcessingFaces) {
|
||||||
|
startedProcessingFaces = true;
|
||||||
|
numPrimitiveVertices = vertices.size() / POSITIONS_COORDS_PER_VERTEX;
|
||||||
|
numPrimitiveTextureCoords = textureCoords.size() / TEXTURE_COORDS_PER_VERTEX;
|
||||||
|
}
|
||||||
|
// Add face
|
||||||
|
parseFace(linePieces);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Unknown or unused directive: ignoring
|
||||||
|
// Note: We do not yet process vertex normals or curves, so we ignore {vp, vn, s}
|
||||||
|
// Note: We assume only a single object, so we ignore {g, o}
|
||||||
|
// Note: We also assume a single texture, which we process independently, so we ignore
|
||||||
|
// {mtllib, usemtl}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we made it all the way through, then we have a vertex-to-tex-coord pair mapping, so
|
||||||
|
// construct our final vertex and texture coordinate lists now.
|
||||||
|
success = constructFinalCoordinatesFromMap();
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
success = false;
|
||||||
|
System.out.println("Failure to parse obj file: " + e.toString());
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (reader != null) {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.out.println("Couldn't close reader");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (success) {
|
||||||
|
debugLogString("Successfully parsed " + numPrimitiveVertices + " vertices and "
|
||||||
|
+ numPrimitiveTextureCoords + " texture coordinates into " + vertexTexCoordMap.size()
|
||||||
|
+ " combined vertices and " + numPrimitiveFaces + " faces, represented as a mesh of "
|
||||||
|
+ finalizedTriangles.size() / 3 + " triangles.");
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user