code fill

This commit is contained in:
Victor Dudochkin 2022-03-01 19:04:01 +07:00
parent d109328198
commit 5578aa50e8
351 changed files with 80993 additions and 1 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
/refs/

15
Cargo.toml Normal file
View File

@ -0,0 +1,15 @@
[package]
name = "ux-mediapipe"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "mediapipe"
[dependencies]
cgmath = "0.18.0"
libc = "0.2.0"
opencv = {version = "0.63.0", default-features = false, features = ["videoio", "highgui", "imgproc"]}
protobuf = "2.23.0"

View File

@ -1,2 +1,4 @@
# ux-media
# ux-mediapipe
Rust and mediapipe
bazel build --define MEDIAPIPE_DISABLE_GPU=1 mediapipe:mediagraph

View File

@ -0,0 +1,48 @@
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
# GPU image. (GpuBuffer)
input_stream: "input_video"
# GPU image. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "hand_landmarks"
# Generates side packet cotaining max number of hands to detect/track.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_hands"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 2 }
}
}
}
# Detects/tracks hand landmarks.
node {
calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "HANDEDNESS:handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see hand_renderer_gpu.pbtxt).
node {
calculator: "HandRendererSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "DETECTIONS:palm_detections"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "HANDEDNESS:handedness"
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
output_stream: "IMAGE:output_video"
}

212
examples/hello.rs Normal file
View File

@ -0,0 +1,212 @@
#![allow(unused_variables)]
#![allow(dead_code)]
use mediapipe::*;
mod examples {
use super::*;
use opencv::prelude::*;
use opencv::{highgui, imgproc, videoio, Result};
pub fn corner_rectangle() -> Result<()> {
let window = "video capture";
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
if !cap.is_opened()? {
panic!("Unable to open default cam")
}
let detector = hands::HandDetector::default();
loop {
let mut frame = Mat::default();
cap.read(&mut frame)?;
let size = frame.size()?;
if size.width > 0 {
highgui::imshow(window, &mut frame)?
}
let key = highgui::wait_key(10)?;
if key > 0 && key != 255 {
break;
}
}
Ok(())
}
// pub fn face_detection() -> Result<()> {
// let window = "video capture";
// highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
// let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
// if !cap.is_opened()? {
// panic!("Unable to open default cam")
// }
// let detector = mediapipe::face_detection::FaceDetector::default();
// loop {
// let mut frame = Mat::default();
// cap.read(&mut frame)?;
// let size = frame.size()?;
// if size.width > 0 {
// highgui::imshow(window, &mut frame)?
// }
// let key = highgui::wait_key(10)?;
// if key > 0 && key != 255 {
// break;
// }
// }
// Ok(())
// }
pub fn face_mesh() -> Result<()> {
let window = "video capture";
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
if !cap.is_opened()? {
panic!("Unable to open default cam")
}
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
let mut mesh = FaceMesh::default();
let mut detector = face_mesh::FaceMeshDetector::default();
let mut raw_frame = Mat::default();
let mut rgb_frame = Mat::default();
let mut flip_frame = Mat::default();
loop {
cap.read(&mut raw_frame)?;
let size = raw_frame.size()?;
if size.width > 0 && !raw_frame.empty() {
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
detector.process(&flip_frame, &mut mesh);
highgui::imshow(window, &mut flip_frame)?;
println!(
"LANDMARK: {} {} {}",
mesh.data[0].x, mesh.data[0].y, mesh.data[0].z
);
} else {
println!("WARN: Skip empty frame");
}
let key = highgui::wait_key(10)?;
if key > 0 && key != 255 {
break;
}
}
Ok(())
}
pub fn hand_tracking() -> Result<()> {
let window = "video capture";
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
if !cap.is_opened()? {
panic!("Unable to open default cam")
}
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
let mut left = Hand::default();
let mut right = Hand::default();
let mut detector = hands::HandDetector::default();
let mut raw_frame = Mat::default();
let mut rgb_frame = Mat::default();
let mut flip_frame = Mat::default();
loop {
cap.read(&mut raw_frame)?;
let size = raw_frame.size()?;
if size.width > 0 && !raw_frame.empty() {
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
detector.process(&flip_frame, &mut left, &mut right);
highgui::imshow(window, &mut flip_frame)?;
println!(
"LANDMARK: {} {} {}",
left.data[0].x, left.data[0].y, left.data[0].z
);
} else {
println!("WARN: Skip empty frame");
}
let key = highgui::wait_key(10)?;
if key > 0 && key != 255 {
break;
}
}
Ok(())
}
pub fn pose_estimation() -> Result<()> {
let window = "video capture";
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
if !cap.is_opened()? {
panic!("Unable to open default cam")
}
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
let mut pose = Pose::default();
let mut detector = pose::PoseDetector::default();
let mut raw_frame = Mat::default();
let mut rgb_frame = Mat::default();
let mut flip_frame = Mat::default();
loop {
cap.read(&mut raw_frame)?;
let size = raw_frame.size()?;
if size.width > 0 && !raw_frame.empty() {
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
detector.process(&rgb_frame, &mut pose);
highgui::imshow(window, &mut rgb_frame)?;
println!(
"LANDMARK: {} {} {}",
pose.data[0].x, pose.data[0].y, pose.data[0].z
);
} else {
println!("WARN: Skip empty frame");
}
let key = highgui::wait_key(10)?;
if key > 0 && key != 255 {
break;
}
}
Ok(())
}
}
fn main() {
// examples::pose_estimation().unwrap()
// examples::hand_tracking().unwrap()
examples::face_mesh().unwrap()
}

View File

@ -0,0 +1,36 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/image:luminance_calculator",
"//mediapipe/calculators/image:sobel_edges_calculator",
],
)
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
mediapipe_binary_graph(
name = "mobile_gpu_binary_graph",
graph = "edge_detection_mobile_gpu.pbtxt",
output_name = "mobile_gpu.binarypb",
)

View File

@ -0,0 +1,22 @@
# MediaPipe graph that performs GPU Sobel edge detection on a live video stream.
# Used in the examples in
# mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:helloworld
# and mediapipe/examples/ios/helloworld.
# Images coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Converts RGB images into luminance images, still stored in RGB format.
node: {
calculator: "LuminanceCalculator"
input_stream: "input_video"
output_stream: "luma_video"
}
# Applies the Sobel filter to luminance images stored in RGB format.
node: {
calculator: "SobelEdgesCalculator"
input_stream: "luma_video"
output_stream: "output_video"
}

View File

@ -0,0 +1,95 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
],
)
cc_library(
name = "desktop_live_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
],
)
cc_library(
name = "desktop_live_gpu_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
],
)
mediapipe_binary_graph(
name = "face_detection_mobile_cpu_binary_graph",
graph = "face_detection_mobile_cpu.pbtxt",
output_name = "face_detection_mobile_cpu.binarypb",
deps = [":mobile_calculators"],
)
mediapipe_binary_graph(
name = "face_detection_mobile_gpu_binary_graph",
graph = "face_detection_mobile_gpu.pbtxt",
output_name = "face_detection_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)
cc_library(
name = "face_detection_full_range_mobile_gpu_deps",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_full_range_gpu",
],
)
mediapipe_binary_graph(
name = "face_detection_full_range_mobile_gpu_binary_graph",
graph = "face_detection_full_range_mobile_gpu.pbtxt",
output_name = "face_detection_full_range_mobile_gpu.binarypb",
deps = [":face_detection_full_range_mobile_gpu_deps"],
)
cc_library(
name = "face_detection_full_range_desktop_live_deps",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
],
)

View File

@ -0,0 +1,58 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# CPU buffer. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Detected faces. (std::vector<Detection>)
output_stream: "face_detections"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Subgraph that detects faces.
node {
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:face_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:face_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,60 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects faces.
node {
calculator: "FaceDetectionFullRangeCpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,60 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
# mediapipie/examples/ios/facedetectiongpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects faces.
node {
calculator: "FaceDetectionFullRangeGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -0,0 +1,76 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Detected faces. (std::vector<Detection>)
output_stream: "face_detections"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transfers the input image from GPU to CPU memory for the purpose of
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
# origin defined at the bottom-left corner (OpenGL convention). As a result,
# the transferred image on CPU also shares the same representation.
node: {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "throttled_input_video"
output_stream: "input_video_cpu"
}
# Subgraph that detects faces.
node {
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:input_video_cpu"
output_stream: "DETECTIONS:face_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:face_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video_cpu"
input_stream: "render_data"
output_stream: "IMAGE:output_video_cpu"
}
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
# the graph.
node: {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "output_video_cpu"
output_stream: "output_video"
}

View File

@ -0,0 +1,58 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Detected faces. (std::vector<Detection>)
output_stream: "face_detections"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Subgraph that detects faces.
node {
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:face_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:face_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -0,0 +1,44 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "face_effect_gpu_deps",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:immediate_mux_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/framework/tool:switch_container",
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_detection_gpu",
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_landmarks_gpu",
"//mediapipe/modules/face_geometry:effect_renderer_calculator",
"//mediapipe/modules/face_geometry:env_generator_calculator",
],
)
mediapipe_binary_graph(
name = "face_effect_gpu_binary_graph",
graph = "face_effect_gpu.pbtxt",
output_name = "face_effect_gpu.binarypb",
deps = [":face_effect_gpu_deps"],
)

View File

@ -0,0 +1,47 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
encode_binary_proto(
name = "axis",
input = "axis.pbtxt",
message_type = "mediapipe.face_geometry.Mesh3d",
output = "axis.binarypb",
deps = [
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
],
)
encode_binary_proto(
name = "glasses",
input = "glasses.pbtxt",
message_type = "mediapipe.face_geometry.Mesh3d",
output = "glasses.binarypb",
deps = [
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
],
)
# `.pngblob` is used instead of `.png` to prevent iOS build from preprocessing the image.
# OpenCV is unable to read a PNG file preprocessed by the iOS build.
exports_files([
"axis.pngblob",
"facepaint.pngblob",
"glasses.pngblob",
])

View File

@ -0,0 +1,320 @@
vertex_type: VERTEX_PT
primitive_type: TRIANGLE
vertex_buffer: -0.100000
vertex_buffer: -0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.873006
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: -0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.928502
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: 0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.928502
vertex_buffer: 0.750000
vertex_buffer: -0.100000
vertex_buffer: 0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.873006
vertex_buffer: 0.750000
vertex_buffer: 0.100000
vertex_buffer: 0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.928502
vertex_buffer: 0.500000
vertex_buffer: -0.100000
vertex_buffer: 0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.873006
vertex_buffer: 0.500000
vertex_buffer: 0.100000
vertex_buffer: -0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.928502
vertex_buffer: 0.250000
vertex_buffer: -0.100000
vertex_buffer: -0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.873006
vertex_buffer: 0.250000
vertex_buffer: 0.100000
vertex_buffer: -0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.928502
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: -0.100000
vertex_buffer: 11.500000
vertex_buffer: 0.873006
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: -0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.983999
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: 0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.983999
vertex_buffer: 0.750000
vertex_buffer: -0.100000
vertex_buffer: -0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.817509
vertex_buffer: 1.000000
vertex_buffer: -0.100000
vertex_buffer: 0.100000
vertex_buffer: 8.500000
vertex_buffer: 0.817509
vertex_buffer: 0.750000
vertex_buffer: 3.000000
vertex_buffer: -0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.069341
vertex_buffer: 1.000000
vertex_buffer: 3.000000
vertex_buffer: -0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.123429
vertex_buffer: 1.000000
vertex_buffer: 3.000000
vertex_buffer: 0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.123429
vertex_buffer: 0.750000
vertex_buffer: 3.000000
vertex_buffer: 0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.069341
vertex_buffer: 0.750000
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.123419
vertex_buffer: 0.499992
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.069341
vertex_buffer: 0.500000
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.123429
vertex_buffer: 0.250000
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.069341
vertex_buffer: 0.250000
vertex_buffer: 3.000000
vertex_buffer: -0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.123429
vertex_buffer: 0.000000
vertex_buffer: 3.000000
vertex_buffer: -0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.069341
vertex_buffer: 0.000000
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.177516
vertex_buffer: 1.000000
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: 8.400000
vertex_buffer: 0.177516
vertex_buffer: 0.750000
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.015254
vertex_buffer: 1.000000
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: 8.600000
vertex_buffer: 0.015254
vertex_buffer: 0.750000
vertex_buffer: -0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.400000
vertex_buffer: 0.472252
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.400000
vertex_buffer: 0.527748
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.600000
vertex_buffer: 0.527748
vertex_buffer: 0.750000
vertex_buffer: -0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.600000
vertex_buffer: 0.472252
vertex_buffer: 0.750000
vertex_buffer: 0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.600000
vertex_buffer: 0.527748
vertex_buffer: 0.500000
vertex_buffer: -0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.600000
vertex_buffer: 0.472252
vertex_buffer: 0.500000
vertex_buffer: 0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.400000
vertex_buffer: 0.527748
vertex_buffer: 0.250000
vertex_buffer: -0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.400000
vertex_buffer: 0.472252
vertex_buffer: 0.250000
vertex_buffer: 0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.400000
vertex_buffer: 0.527748
vertex_buffer: 0.000000
vertex_buffer: -0.100000
vertex_buffer: 0.000000
vertex_buffer: 8.400000
vertex_buffer: 0.472252
vertex_buffer: 0.000000
vertex_buffer: 0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.400000
vertex_buffer: 0.583245
vertex_buffer: 1.000000
vertex_buffer: 0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.600000
vertex_buffer: 0.583245
vertex_buffer: 0.750000
vertex_buffer: -0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.400000
vertex_buffer: 0.416755
vertex_buffer: 1.000000
vertex_buffer: -0.100000
vertex_buffer: 3.000000
vertex_buffer: 8.600000
vertex_buffer: 0.416755
vertex_buffer: 0.750000
index_buffer: 0
index_buffer: 1
index_buffer: 2
index_buffer: 0
index_buffer: 2
index_buffer: 3
index_buffer: 3
index_buffer: 2
index_buffer: 4
index_buffer: 3
index_buffer: 4
index_buffer: 5
index_buffer: 5
index_buffer: 4
index_buffer: 6
index_buffer: 5
index_buffer: 6
index_buffer: 7
index_buffer: 7
index_buffer: 6
index_buffer: 8
index_buffer: 7
index_buffer: 8
index_buffer: 9
index_buffer: 1
index_buffer: 10
index_buffer: 11
index_buffer: 1
index_buffer: 11
index_buffer: 2
index_buffer: 12
index_buffer: 0
index_buffer: 3
index_buffer: 12
index_buffer: 3
index_buffer: 13
index_buffer: 14
index_buffer: 15
index_buffer: 16
index_buffer: 14
index_buffer: 16
index_buffer: 17
index_buffer: 17
index_buffer: 16
index_buffer: 18
index_buffer: 17
index_buffer: 18
index_buffer: 19
index_buffer: 19
index_buffer: 18
index_buffer: 20
index_buffer: 19
index_buffer: 20
index_buffer: 21
index_buffer: 21
index_buffer: 20
index_buffer: 22
index_buffer: 21
index_buffer: 22
index_buffer: 23
index_buffer: 15
index_buffer: 24
index_buffer: 25
index_buffer: 15
index_buffer: 25
index_buffer: 16
index_buffer: 26
index_buffer: 14
index_buffer: 17
index_buffer: 26
index_buffer: 17
index_buffer: 27
index_buffer: 28
index_buffer: 29
index_buffer: 30
index_buffer: 28
index_buffer: 30
index_buffer: 31
index_buffer: 31
index_buffer: 30
index_buffer: 32
index_buffer: 31
index_buffer: 32
index_buffer: 33
index_buffer: 33
index_buffer: 32
index_buffer: 34
index_buffer: 33
index_buffer: 34
index_buffer: 35
index_buffer: 35
index_buffer: 34
index_buffer: 36
index_buffer: 35
index_buffer: 36
index_buffer: 37
index_buffer: 29
index_buffer: 38
index_buffer: 39
index_buffer: 29
index_buffer: 39
index_buffer: 30
index_buffer: 40
index_buffer: 28
index_buffer: 31
index_buffer: 40
index_buffer: 31
index_buffer: 41

Binary file not shown.

After

Width:  |  Height:  |  Size: 492 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 593 KiB

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 KiB

View File

@ -0,0 +1,130 @@
# MediaPipe graph that applies a face effect to the input video stream.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# An integer, which indicate which effect is selected. (int)
#
# If `selected_effect_id` is `0`, the Axis effect is selected.
# If `selected_effect_id` is `1`, the Facepaint effect is selected.
# If `selected_effect_id` is `2`, the Glasses effect is selected.
#
# No other values are allowed for `selected_effect_id`.
input_stream: "selected_effect_id"
# Indicates whether to use the face detection as the input source. (bool)
#
# If `true`, the face detection pipeline will be used to produce landmarks.
# If `false`, the face landmark pipeline will be used to produce landmarks.
input_side_packet: "use_face_detection_input_source"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# A list of geometry data for a single detected face.
#
# NOTE: there will not be an output packet in this stream for this particular
# timestamp if none of faces detected.
#
# (std::vector<face_geometry::FaceGeometry>)
output_stream: "multi_face_geometry"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Generates an environment that describes the current virtual scene.
node {
calculator: "FaceGeometryEnvGeneratorCalculator"
output_side_packet: "ENVIRONMENT:environment"
node_options: {
[type.googleapis.com/mediapipe.FaceGeometryEnvGeneratorCalculatorOptions] {
environment: {
origin_point_location: TOP_LEFT_CORNER
perspective_camera: {
vertical_fov_degrees: 63.0 # 63 degrees
near: 1.0 # 1cm
far: 10000.0 # 100m
}
}
}
}
}
# Computes the face geometry for a single face. The input source is defined
# through `use_face_detection_input_source`.
node {
calculator: "SwitchContainer"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "ENABLE:use_face_detection_input_source"
input_side_packet: "ENVIRONMENT:environment"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
node_options: {
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
contained_node: {
calculator: "SingleFaceGeometryFromLandmarksGpu"
}
contained_node: {
calculator: "SingleFaceGeometryFromDetectionGpu"
}
}
}
}
# Renders the selected effect based on `selected_effect_id`.
node {
calculator: "SwitchContainer"
input_stream: "SELECT:selected_effect_id"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
input_side_packet: "ENVIRONMENT:environment"
output_stream: "IMAGE_GPU:output_video"
node_options: {
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
contained_node: {
calculator: "FaceGeometryEffectRendererCalculator"
node_options: {
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
effect_texture_path: "mediapipe/graphs/face_effect/data/axis.pngblob"
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/axis.binarypb"
}
}
}
contained_node: {
calculator: "FaceGeometryEffectRendererCalculator"
node_options: {
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
effect_texture_path: "mediapipe/graphs/face_effect/data/facepaint.pngblob"
}
}
}
contained_node: {
calculator: "FaceGeometryEffectRendererCalculator"
node_options: {
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
effect_texture_path: "mediapipe/graphs/face_effect/data/glasses.pngblob"
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/glasses.binarypb"
}
}
}
}
}
}

View File

@ -0,0 +1,61 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_landmarks_smoothing",
graph = "face_landmarks_smoothing.pbtxt",
register_as = "FaceLandmarksSmoothing",
deps = [
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
],
)
mediapipe_simple_subgraph(
name = "single_face_geometry_from_detection_gpu",
graph = "single_face_geometry_from_detection_gpu.pbtxt",
register_as = "SingleFaceGeometryFromDetectionGpu",
deps = [
":face_landmarks_smoothing",
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
],
)
mediapipe_simple_subgraph(
name = "single_face_geometry_from_landmarks_gpu",
graph = "single_face_geometry_from_landmarks_gpu.pbtxt",
register_as = "SingleFaceGeometryFromLandmarksGpu",
deps = [
":face_landmarks_smoothing",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
"//mediapipe/modules/face_geometry:face_geometry_from_landmarks",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)

View File

@ -0,0 +1,24 @@
# MediaPipe subgraph that smoothes face landmarks.
type: "FaceLandmarksSmoothing"
input_stream: "NORM_LANDMARKS:landmarks"
input_stream: "IMAGE_SIZE:input_image_size"
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
# Applies smoothing to a face landmark list. The filter options were handpicked
# to achieve better visual results.
node {
calculator: "LandmarksSmoothingCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
input_stream: "IMAGE_SIZE:input_image_size"
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
node_options: {
[type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] {
velocity_filter: {
window_size: 5
velocity_scale: 20.0
}
}
}
}

View File

@ -0,0 +1,91 @@
# MediaPipe subgraph that extracts geometry from a single face using the face
# landmark pipeline on an input GPU image. The face landmarks are also
# "smoothed" to achieve better visual results.
type: "SingleFaceGeometryFromDetectionGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Environment that describes the current virtual scene.
# (face_geometry::Environment)
input_side_packet: "ENVIRONMENT:environment"
# A list of geometry data for a single detected face. The size of this
# collection is at most 1 because of the single-face use in this graph.
# (std::vector<face_geometry::FaceGeometry>)
#
# NOTE: if no face is detected at a particular timestamp, there will not be an
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Subgraph that detects faces and corresponding landmarks using the face
# detection pipeline.
node {
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:input_image"
output_stream: "DETECTIONS:multi_face_detection"
}
# Extracts the first face detection associated with the most prominent face from
# a collection.
node {
calculator: "SplitDetectionVectorCalculator"
input_stream: "multi_face_detection"
output_stream: "face_detection"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Extracts face detection keypoints as a normalized landmarks.
node {
calculator: "DetectionToLandmarksCalculator"
input_stream: "DETECTION:face_detection"
output_stream: "LANDMARKS:face_landmarks"
}
# Extracts the input image frame dimensions as a separate packet.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:input_image_size"
}
# Applies smoothing to the face landmarks previously extracted from the face
# detection keypoints.
node {
calculator: "FaceLandmarksSmoothing"
input_stream: "NORM_LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:input_image_size"
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
}
# Converts smoothed face landmarks back into the detection format.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:smoothed_face_landmarks"
output_stream: "DETECTION:smoothed_face_detection"
}
# Puts the smoothed single face detection back into a collection to simplify
# passing the result into the `FaceGeometryFromDetection` subgraph.
node {
calculator: "ConcatenateDetectionVectorCalculator"
input_stream: "smoothed_face_detection"
output_stream: "multi_smoothed_face_detection"
}
# Computes face geometry from the single face detection.
node {
calculator: "FaceGeometryFromDetection"
input_stream: "MULTI_FACE_DETECTION:multi_smoothed_face_detection"
input_stream: "IMAGE_SIZE:input_image_size"
input_side_packet: "ENVIRONMENT:environment"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
}

View File

@ -0,0 +1,89 @@
# MediaPipe subgraph that extracts geometry from a single face using the face
# landmark pipeline on an input GPU image. The face landmarks are also
# "smoothed" to achieve better visual results.
type: "SingleFaceGeometryFromLandmarksGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Environment that describes the current virtual scene.
# (face_geometry::Environment)
input_side_packet: "ENVIRONMENT:environment"
# A list of geometry data for a single detected face. The size of this
# collection is at most 1 because of the single-face use in this graph.
# (std::vector<face_geometry::FaceGeometry>)
#
# NOTE: if no face is detected at a particular timestamp, there will not be an
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Creates a packet to inform the `FaceLandmarkFrontGpu` subgraph to detect at
# most 1 face.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Subgraph that detects faces and corresponding landmarks using the face
# landmark pipeline.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:input_image"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
}
# Extracts a single set of face landmarks associated with the most prominent
# face detected from a collection.
node {
calculator: "SplitNormalizedLandmarkListVectorCalculator"
input_stream: "multi_face_landmarks"
output_stream: "face_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Extracts the input image frame dimensions as a separate packet.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:input_image_size"
}
# Applies smoothing to the single set of face landmarks.
node {
calculator: "FaceLandmarksSmoothing"
input_stream: "NORM_LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:input_image_size"
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
}
# Puts the single set of smoothed landmarks back into a collection to simplify
# passing the result into the `FaceGeometryFromLandmarks` subgraph.
node {
calculator: "ConcatenateLandmarListVectorCalculator"
input_stream: "smoothed_face_landmarks"
output_stream: "multi_smoothed_face_landmarks"
}
# Computes face geometry from face landmarks for a single face.
node {
calculator: "FaceGeometryFromLandmarks"
input_stream: "MULTI_FACE_LANDMARKS:multi_smoothed_face_landmarks"
input_stream: "IMAGE_SIZE:input_image_size"
input_side_packet: "ENVIRONMENT:environment"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
}

View File

@ -0,0 +1,69 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "desktop_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_gpu_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
mediapipe_binary_graph(
name = "face_mesh_mobile_gpu_binary_graph",
graph = "face_mesh_mobile.pbtxt",
output_name = "face_mesh_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,37 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "face_landmarks_to_render_data_calculator",
srcs = ["face_landmarks_to_render_data_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)

View File

@ -0,0 +1,104 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h"
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
namespace mediapipe {
namespace {
constexpr int kNumFaceLandmarkConnections = 132;
// Pairs of landmark indices to be rendered with connections.
constexpr int kFaceLandmarkConnections[] = {
// Lips.
61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321,
321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267,
269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14,
14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81,
81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308,
// Left eye.
33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133,
33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173,
133,
// Left eyebrow.
46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107,
// Left iris.
474, 475, 475, 476, 476, 477, 477, 474,
// Right eye.
263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382,
362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398,
398, 362,
// Right eyebrow.
276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296,
336,
// Right iris.
469, 470, 470, 471, 471, 472, 472, 469,
// Face oval.
10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
} // namespace
// A calculator that converts face landmarks to RenderData proto for
// visualization. Ignores landmark_connections specified in
// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set
// of landmark connections specific to face landmark (defined in
// kFaceLandmarkConnections[] above).
//
// Example config:
// node {
// calculator: "FaceLandmarksToRenderDataCalculator"
// input_stream: "NORM_LANDMARKS:landmarks"
// output_stream: "RENDER_DATA:render_data"
// options {
// [LandmarksToRenderDataCalculatorOptions.ext] {
// landmark_color { r: 0 g: 255 b: 0 }
// connection_color { r: 0 g: 255 b: 0 }
// thickness: 4.0
// }
// }
// }
class FaceLandmarksToRenderDataCalculator
: public LandmarksToRenderDataCalculator {
public:
absl::Status Open(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator);
absl::Status FaceLandmarksToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<mediapipe::LandmarksToRenderDataCalculatorOptions>();
for (int i = 0; i < kNumFaceLandmarkConnections; ++i) {
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]);
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]);
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,70 @@
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
# on CPU.
# Path to the input video file. (string)
input_side_packet: "input_video_path"
# Path to the output video file. (string)
input_side_packet: "output_video_path"
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input video.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,66 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# Input image. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,66 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# Input image. (GpuBuffer)
input_stream: "input_video"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,67 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# Max number of faces to detect/process. (int)
input_side_packet: "num_faces"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,52 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "renderer_calculators",
deps = [
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_gpu",
graph = "face_renderer_gpu.pbtxt",
register_as = "FaceRendererGpu",
deps = [
":renderer_calculators",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_cpu",
graph = "face_renderer_cpu.pbtxt",
register_as = "FaceRendererCpu",
deps = [
":renderer_calculators",
],
)

View File

@ -0,0 +1,96 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# CPU image with rendered data. (ImageFrame)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:landmark_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmarks_render_data"
input_stream: "BATCH_END:landmark_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:rects"
output_stream: "RENDER_DATA:rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_image"
input_stream: "detections_render_data"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
input_stream: "rects_render_data"
output_stream: "IMAGE:output_image"
}

View File

@ -0,0 +1,96 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# GPU image with rendered data. (GpuBuffer)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:end_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmarks_render_data"
input_stream: "BATCH_END:end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:rects"
output_stream: "RENDER_DATA:rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
input_stream: "detections_render_data"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
input_stream: "rects_render_data"
output_stream: "IMAGE_GPU:output_image"
}

View File

@ -0,0 +1,61 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/image:recolor_calculator",
"//mediapipe/calculators/image:set_alpha_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
],
)
cc_library(
name = "desktop_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/image:recolor_calculator",
"//mediapipe/calculators/image:set_alpha_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
],
)
mediapipe_binary_graph(
name = "mobile_gpu_binary_graph",
graph = "hair_segmentation_mobile_gpu.pbtxt",
output_name = "mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,152 @@
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on CPU.
# Used in the example in
# mediapipie/examples/desktop/hair_segmentation:hair_segmentation_cpu
# Images on CPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
# generating the corresponding hair mask before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:hair_mask"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on CPU to a 512x512 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the hair
# segmentation model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 512
output_height: 512
}
}
}
# Caches a mask fed back from the previous round of hair segmentation, and upon
# the arrival of the next input image sends out the cached mask with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous mask. Note that upon the arrival of the very first
# input image, an empty packet is sent out to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:throttled_input_video"
input_stream: "LOOP:hair_mask"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:previous_hair_mask"
}
# Embeds the hair mask generated from the previous round of hair segmentation
# as the alpha channel of the current input image.
node {
calculator: "SetAlphaCalculator"
input_stream: "IMAGE:transformed_input_video"
input_stream: "ALPHA:previous_hair_mask"
output_stream: "IMAGE:mask_embedded_input_video"
}
# Converts the transformed input image on CPU into an image tensor stored in
# TfLiteTensor. The zero_center option is set to false to normalize the
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
# image tensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:mask_embedded_input_video"
output_stream: "TENSORS:image_tensor"
node_options: {
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
zero_center: false
max_num_channels: 4
}
}
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
use_gpu: false
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# tensor representing the hair segmentation, which has the same width and height
# as the input image tensor.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:segmentation_tensor"
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/hair_segmentation.tflite"
use_gpu: false
}
}
}
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
# mask of values in [0, 255], stored in a CPU buffer. It also
# takes the mask generated previously as another input to improve the temporal
# consistency.
node {
calculator: "TfLiteTensorsToSegmentationCalculator"
input_stream: "TENSORS:segmentation_tensor"
input_stream: "PREV_MASK:previous_hair_mask"
output_stream: "MASK:hair_mask"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
tensor_width: 512
tensor_height: 512
tensor_channels: 2
combine_with_previous_ratio: 0.9
output_layer_index: 1
}
}
}
# Colors the hair segmentation with the color specified in the option.
node {
calculator: "RecolorCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "MASK:hair_mask"
output_stream: "IMAGE:output_video"
node_options: {
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
color { r: 0 g: 0 b: 255 }
mask_channel: RED
}
}
}

View File

@ -0,0 +1,152 @@
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU.
# Used in the example in
# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
# generating the corresponding hair mask before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:hair_mask"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on GPU to a 512x512 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the hair
# segmentation model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 512
output_height: 512
}
}
}
# Caches a mask fed back from the previous round of hair segmentation, and upon
# the arrival of the next input image sends out the cached mask with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous mask. Note that upon the arrival of the very first
# input image, an empty packet is sent out to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:throttled_input_video"
input_stream: "LOOP:hair_mask"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:previous_hair_mask"
}
# Embeds the hair mask generated from the previous round of hair segmentation
# as the alpha channel of the current input image.
node {
calculator: "SetAlphaCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
input_stream: "ALPHA_GPU:previous_hair_mask"
output_stream: "IMAGE_GPU:mask_embedded_input_video"
}
# Converts the transformed input image on GPU into an image tensor stored in
# tflite::gpu::GlBuffer. The zero_center option is set to false to normalize the
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
# image tensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:mask_embedded_input_video"
output_stream: "TENSORS_GPU:image_tensor"
node_options: {
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
zero_center: false
max_num_channels: 4
}
}
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
use_gpu: true
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# tensor representing the hair segmentation, which has the same width and height
# as the input image tensor.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS_GPU:segmentation_tensor"
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/hair_segmentation.tflite"
use_gpu: true
}
}
}
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
# mask of values in [0.f, 1.f], stored in the R channel of a GPU buffer. It also
# takes the mask generated previously as another input to improve the temporal
# consistency.
node {
calculator: "TfLiteTensorsToSegmentationCalculator"
input_stream: "TENSORS_GPU:segmentation_tensor"
input_stream: "PREV_MASK_GPU:previous_hair_mask"
output_stream: "MASK_GPU:hair_mask"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
tensor_width: 512
tensor_height: 512
tensor_channels: 2
combine_with_previous_ratio: 0.9
output_layer_index: 1
}
}
}
# Colors the hair segmentation with the color specified in the option.
node {
calculator: "RecolorCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "MASK_GPU:hair_mask"
output_stream: "IMAGE_GPU:output_video"
node_options: {
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
color { r: 0 g: 0 b: 255 }
mask_channel: RED
}
}
}

View File

@ -0,0 +1,91 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
exports_files(glob([
"*.pbtxt",
]))
cc_library(
name = "desktop_offline_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:immediate_mux_calculator",
"//mediapipe/calculators/core:packet_inner_join_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
],
)
cc_library(
name = "desktop_tflite_calculators",
deps = [
":desktop_offline_calculators",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:merge_calculator",
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_cpu",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu",
],
)
mediapipe_binary_graph(
name = "hand_tracking_desktop_live_binary_graph",
graph = "hand_tracking_desktop_live.pbtxt",
output_name = "hand_tracking_desktop_live.binarypb",
deps = [":desktop_tflite_calculators"],
)
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_gpu",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
],
)
mediapipe_binary_graph(
name = "hand_tracking_mobile_gpu_binary_graph",
graph = "hand_tracking_mobile.pbtxt",
output_name = "hand_tracking_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)
cc_library(
name = "detection_mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/palm_detection:palm_detection_gpu",
],
)
mediapipe_binary_graph(
name = "hand_detection_mobile_gpu_binary_graph",
graph = "hand_detection_mobile.pbtxt",
output_name = "hand_detection_mobile_gpu.binarypb",
deps = [":detection_mobile_calculators"],
)

View File

@ -0,0 +1,17 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])

View File

@ -0,0 +1,61 @@
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipie/examples/desktop/hand_tracking:hand_detection_tflite.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Detects palms.
node {
calculator: "PalmDetectionCpu"
input_stream: "IMAGE:input_video"
output_stream: "DETECTIONS:output_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the original image coming into
# the graph.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,39 @@
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/hand_tracking:hand_detection_cpu.
# CPU image. (ImageFrame)
input_stream: "input_video"
# CPU image. (ImageFrame)
output_stream: "output_video"
# Detects palms.
node {
calculator: "PalmDetectionCpu"
input_stream: "IMAGE:input_video"
output_stream: "DETECTIONS:output_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the original image coming into
# the graph.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,59 @@
# MediaPipe graph that performs hand detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/handdetectiongpu and
# mediapipe/examples/ios/handdetectiongpu.
# GPU image. (GpuBuffer)
input_stream: "input_video"
# GPU image. (GpuBuffer)
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for PalmDetectionGpu
# downstream in the graph to finish its tasks before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images in PalmDetectionGpu to 1. This prevents the nodes in
# PalmDetectionGpu from queuing up incoming images and data excessively, which
# leads to increased latency and memory usage, unwanted in real-time mobile
# applications. It also eliminates unnecessarily computation, e.g., the output
# produced by a node in the subgraph may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects palms.
node {
calculator: "PalmDetectionGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:palm_detections"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:palm_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "detection_render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -0,0 +1,68 @@
# MediaPipe graph that performs hands tracking on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/hand_tracking:hand_tracking_tflite.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Generates side packet cotaining max number of hands to detect/track.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_hands"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 2 }
}
}
}
# Detects/tracks hand landmarks.
node {
calculator: "HandLandmarkTrackingCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:landmarks"
output_stream: "HANDEDNESS:handedness"
output_stream: "PALM_DETECTIONS:multi_palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see hand_renderer_cpu.pbtxt).
node {
calculator: "HandRendererSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "DETECTIONS:multi_palm_detections"
input_stream: "LANDMARKS:landmarks"
input_stream: "HANDEDNESS:handedness"
input_stream: "NORM_RECTS:0:multi_palm_rects"
input_stream: "NORM_RECTS:1:multi_hand_rects"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,46 @@
# MediaPipe graph that performs hands tracking on desktop with TensorFlow
# Lite on CPU.
# Used in the example in
# mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu.
# CPU image. (ImageFrame)
input_stream: "input_video"
# CPU image. (ImageFrame)
output_stream: "output_video"
# Generates side packet cotaining max number of hands to detect/track.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_hands"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 2 }
}
}
}
# Detects/tracks hand landmarks.
node {
calculator: "HandLandmarkTrackingCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:landmarks"
output_stream: "HANDEDNESS:handedness"
output_stream: "PALM_DETECTIONS:multi_palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see hand_renderer_cpu.pbtxt).
node {
calculator: "HandRendererSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "DETECTIONS:multi_palm_detections"
input_stream: "LANDMARKS:landmarks"
input_stream: "HANDEDNESS:handedness"
input_stream: "NORM_RECTS:0:multi_palm_rects"
input_stream: "NORM_RECTS:1:multi_hand_rects"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,48 @@
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
# GPU image. (GpuBuffer)
input_stream: "input_video"
# GPU image. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "hand_landmarks"
# Generates side packet cotaining max number of hands to detect/track.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_hands"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 2 }
}
}
}
# Detects/tracks hand landmarks.
node {
calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "HANDEDNESS:handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see hand_renderer_gpu.pbtxt).
node {
calculator: "HandRendererSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "DETECTIONS:palm_detections"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "HANDEDNESS:handedness"
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,65 @@
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
# GPU image. (GpuBuffer)
input_stream: "input_video"
# Max number of hands to detect/process. (int)
input_side_packet: "num_hands"
# Model complexity (0 or 1). (int)
input_side_packet: "model_complexity"
# GPU image. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "hand_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects/tracks hand landmarks.
node {
calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "HANDEDNESS:handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see hand_renderer_gpu.pbtxt).
node {
calculator: "HandRendererSubgraph"
input_stream: "IMAGE:throttled_input_video"
input_stream: "DETECTIONS:palm_detections"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "HANDEDNESS:handedness"
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,58 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "hand_renderer_cpu",
graph = "hand_renderer_cpu.pbtxt",
register_as = "HandRendererSubgraph",
deps = [
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:labels_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_renderer_gpu",
graph = "hand_renderer_gpu.pbtxt",
register_as = "HandRendererSubgraph",
deps = [
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:labels_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
],
)

View File

@ -0,0 +1,209 @@
# MediaPipe graph to render hand landmarks and some related debug information.
type: "HandRendererSubgraph"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_hand_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (std::vector<ClassificationList>)
input_stream: "HANDEDNESS:multi_handedness"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:0:multi_palm_rects"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:1:multi_hand_rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:palm_detections"
# Updated CPU image. (ImageFrame)
output_stream: "IMAGE:output_image"
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:palm_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:multi_hand_rects"
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:multi_palm_rects"
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 125 g: 0 b: 122 }
thickness: 4.0
}
}
}
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_hand_landmarks"
output_stream: "ITEM:single_hand_landmarks"
output_stream: "BATCH_END:landmark_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 0
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 5
landmark_connections: 9
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 9
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 14
landmark_connections: 15
landmark_connections: 15
landmark_connections: 16
landmark_connections: 13
landmark_connections: 17
landmark_connections: 0
landmark_connections: 17
landmark_connections: 17
landmark_connections: 18
landmark_connections: 18
landmark_connections: 19
landmark_connections: 19
landmark_connections: 20
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 4.0
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:single_hand_landmark_render_data"
input_stream: "BATCH_END:landmark_timestamp"
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
}
# Don't render handedness if there are more than one handedness reported.
node {
calculator: "ClassificationListVectorHasMinSizeCalculator"
input_stream: "ITERABLE:multi_handedness"
output_stream: "disallow_handedness_rendering"
node_options: {
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
min_size: 2
}
}
}
node {
calculator: "GateCalculator"
input_stream: "multi_handedness"
input_stream: "DISALLOW:disallow_handedness_rendering"
output_stream: "allowed_multi_handedness"
node_options: {
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
empty_packets_as_allow: false
}
}
}
node {
calculator: "SplitClassificationListVectorCalculator"
input_stream: "allowed_multi_handedness"
output_stream: "handedness"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Converts classification to drawing primitives for annotation overlay.
node {
calculator: "LabelsToRenderDataCalculator"
input_stream: "CLASSIFICATIONS:handedness"
output_stream: "RENDER_DATA:handedness_render_data"
node_options: {
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
color { r: 255 g: 0 b: 0 }
thickness: 10.0
font_height_px: 50
horizontal_offset_px: 30
vertical_offset_px: 50
max_num_labels: 1
location: TOP_LEFT
}
}
}
# Draws annotations and overlays them on top of the input images. Consumes
# a vector of RenderData objects and draws each of them on the input frame.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_image"
input_stream: "detection_render_data"
input_stream: "multi_hand_rects_render_data"
input_stream: "multi_palm_rects_render_data"
input_stream: "handedness_render_data"
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
output_stream: "IMAGE:output_image"
}

View File

@ -0,0 +1,209 @@
# MediaPipe graph to render hand landmarks and some related debug information.
type: "HandRendererSubgraph"
# GPU buffer. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_hand_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (std::vector<ClassificationList>)
input_stream: "HANDEDNESS:multi_handedness"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:0:multi_palm_rects"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:1:multi_hand_rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:palm_detections"
# Updated GPU buffer. (GpuBuffer)
output_stream: "IMAGE:output_image"
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:palm_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:multi_hand_rects"
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:multi_palm_rects"
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 125 g: 0 b: 122 }
thickness: 4.0
}
}
}
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_hand_landmarks"
output_stream: "ITEM:single_hand_landmarks"
output_stream: "BATCH_END:landmark_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 0
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 5
landmark_connections: 9
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 9
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 14
landmark_connections: 15
landmark_connections: 15
landmark_connections: 16
landmark_connections: 13
landmark_connections: 17
landmark_connections: 0
landmark_connections: 17
landmark_connections: 17
landmark_connections: 18
landmark_connections: 18
landmark_connections: 19
landmark_connections: 19
landmark_connections: 20
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 4.0
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:single_hand_landmark_render_data"
input_stream: "BATCH_END:landmark_timestamp"
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
}
# Don't render handedness if there are more than one handedness reported.
node {
calculator: "ClassificationListVectorHasMinSizeCalculator"
input_stream: "ITERABLE:multi_handedness"
output_stream: "disallow_handedness_rendering"
node_options: {
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
min_size: 2
}
}
}
node {
calculator: "GateCalculator"
input_stream: "multi_handedness"
input_stream: "DISALLOW:disallow_handedness_rendering"
output_stream: "allowed_multi_handedness"
node_options: {
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
empty_packets_as_allow: false
}
}
}
node {
calculator: "SplitClassificationListVectorCalculator"
input_stream: "allowed_multi_handedness"
output_stream: "handedness"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Converts classification to drawing primitives for annotation overlay.
node {
calculator: "LabelsToRenderDataCalculator"
input_stream: "CLASSIFICATIONS:handedness"
output_stream: "RENDER_DATA:handedness_render_data"
node_options: {
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
color { r: 255 g: 0 b: 0 }
thickness: 10.0
font_height_px: 50
horizontal_offset_px: 30
vertical_offset_px: 50
max_num_labels: 1
location: TOP_LEFT
}
}
}
# Draws annotations and overlays them on top of the input images. Consumes
# a vector of RenderData objects and draws each of them on the input frame.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
input_stream: "detection_render_data"
input_stream: "multi_hand_rects_render_data"
input_stream: "multi_palm_rects_render_data"
input_stream: "handedness_render_data"
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
output_stream: "IMAGE_GPU:output_image"
}

View File

@ -0,0 +1,70 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
"mediapipe_simple_subgraph",
)
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
mediapipe_simple_subgraph(
name = "holistic_tracking_to_render_data",
graph = "holistic_tracking_to_render_data.pbtxt",
register_as = "HolisticTrackingToRenderData",
deps = [
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:merge_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_scale_calculator",
"//mediapipe/modules/holistic_landmark:hand_wrist_for_pose",
],
)
cc_library(
name = "holistic_tracking_gpu_deps",
deps = [
":holistic_tracking_to_render_data",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
],
)
mediapipe_binary_graph(
name = "holistic_tracking_gpu",
graph = "holistic_tracking_gpu.pbtxt",
output_name = "holistic_tracking_gpu.binarypb",
deps = [":holistic_tracking_gpu_deps"],
)
cc_library(
name = "holistic_tracking_cpu_graph_deps",
deps = [
":holistic_tracking_to_render_data",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
],
)

View File

@ -0,0 +1,75 @@
# Tracks and renders pose + hands + face landmarks.
# CPU image. (ImageFrame)
input_stream: "input_video"
# CPU image with rendered results. (ImageFrame)
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
node_options: {
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
max_in_flight: 1
max_in_queue: 1
# Timeout is disabled (set to 0) as first frame processing can take more
# than 1 second.
in_flight_timeout: 0
}
}
}
node {
calculator: "HolisticLandmarkCpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "POSE_LANDMARKS:pose_landmarks"
output_stream: "POSE_ROI:pose_roi"
output_stream: "POSE_DETECTION:pose_detection"
output_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
}
# Gets image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:throttled_input_video"
output_stream: "SIZE:image_size"
}
# Converts pose, hands and face landmarks to a render data vector.
node {
calculator: "HolisticTrackingToRenderData"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "POSE_LANDMARKS:pose_landmarks"
input_stream: "POSE_ROI:pose_roi"
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "VECTOR:render_data_vector"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,75 @@
# Tracks and renders pose + hands + face landmarks.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# GPU image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
node_options: {
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
max_in_flight: 1
max_in_queue: 1
# Timeout is disabled (set to 0) as first frame processing can take more
# than 1 second.
in_flight_timeout: 0
}
}
}
node {
calculator: "HolisticLandmarkGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "POSE_LANDMARKS:pose_landmarks"
output_stream: "POSE_ROI:pose_roi"
output_stream: "POSE_DETECTION:pose_detection"
output_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
}
# Gets image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "SIZE:image_size"
}
# Converts pose, hands and face landmarks to a render data vector.
node {
calculator: "HolisticTrackingToRenderData"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "POSE_LANDMARKS:pose_landmarks"
input_stream: "POSE_ROI:pose_roi"
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "VECTOR:render_data_vector"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -0,0 +1,757 @@
# Converts pose + hands + face landmarks to a render data vector.
type: "HolisticTrackingToRenderData"
# Image size. (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Pose landmarks. (NormalizedLandmarkList)
input_stream: "POSE_LANDMARKS:landmarks"
# Region of interest calculated based on pose landmarks. (NormalizedRect)
input_stream: "POSE_ROI:roi"
# Left hand landmarks. (NormalizedLandmarkList)
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
# Right hand landmarks. (NormalizedLandmarkList)
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
# Face landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS:face_landmarks"
# Render data vector. (std::vector<RenderData>)
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
# --------------------------------------------------------------------------- #
# ------------------ Calculates scale for render objects -------------------- #
# --------------------------------------------------------------------------- #
# Calculates rendering scale based on the pose bounding box.
node {
calculator: "RectToRenderScaleCalculator"
input_stream: "NORM_RECT:roi"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "RENDER_SCALE:render_scale"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderScaleCalculatorOptions] {
multiplier: 0.0008
}
}
}
# --------------------------------------------------------------------------- #
# --------------- Combines pose and hands into pose skeleton ---------------- #
# --------------------------------------------------------------------------- #
# Gets pose landmarks before wrists.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks"
output_stream: "landmarks_before_wrist"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 11 end: 15 }
}
}
}
# Gets pose left wrist landmark.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks"
output_stream: "landmarks_left_wrist"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 15 end: 16 }
}
}
}
# Gets pose right wrist landmark.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks"
output_stream: "landmarks_right_wrist"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 16 end: 17 }
}
}
}
# Gets pose landmarks after wrists.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks"
output_stream: "landmarks_after_wrist"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 23 end: 33 }
}
}
}
# Gets left hand wrist landmark.
node {
calculator: "HandWristForPose"
input_stream: "HAND_LANDMARKS:left_hand_landmarks"
output_stream: "WRIST_LANDMARK:left_hand_wrist_landmark"
}
# Gets left hand wrist landmark or keep pose wrist landmark if hand was not
# predicted.
node {
calculator: "MergeCalculator"
input_stream: "left_hand_wrist_landmark"
input_stream: "landmarks_left_wrist"
output_stream: "merged_left_hand_wrist_landmark"
}
# Gets right hand wrist landmark.
node {
calculator: "HandWristForPose"
input_stream: "HAND_LANDMARKS:right_hand_landmarks"
output_stream: "WRIST_LANDMARK:right_hand_wrist_landmark"
}
# Gets right hand wrist landmark or keep pose wrist landmark if hand was not
# predicted.
node {
calculator: "MergeCalculator"
input_stream: "right_hand_wrist_landmark"
input_stream: "landmarks_right_wrist"
output_stream: "merged_right_hand_wrist_landmark"
}
# Combines pose landmarks all together.
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "landmarks_before_wrist"
input_stream: "merged_left_hand_wrist_landmark"
input_stream: "merged_right_hand_wrist_landmark"
input_stream: "landmarks_after_wrist"
output_stream: "landmarks_merged"
node_options: {
[type.googleapis.com/mediapipe.ConcatenateVectorCalculatorOptions] {
only_emit_if_all_present: true
}
}
}
# Takes left pose landmarks.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_merged"
output_stream: "landmarks_left_side"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 4 end: 5 }
ranges: { begin: 6 end: 7 }
ranges: { begin: 8 end: 9 }
ranges: { begin: 10 end: 11 }
ranges: { begin: 12 end: 13 }
ranges: { begin: 14 end: 15 }
combine_outputs: true
}
}
}
# Takes right pose landmarks.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_merged"
output_stream: "landmarks_right_side"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 1 end: 2 }
ranges: { begin: 3 end: 4 }
ranges: { begin: 5 end: 6 }
ranges: { begin: 7 end: 8 }
ranges: { begin: 9 end: 10 }
ranges: { begin: 11 end: 12 }
ranges: { begin: 13 end: 14 }
ranges: { begin: 15 end: 16 }
combine_outputs: true
}
}
}
# --------------------------------------------------------------------------- #
# ---------------------------------- Pose ----------------------------------- #
# --------------------------------------------------------------------------- #
# Converts pose connections to white lines.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks_merged"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 0
landmark_connections: 2
landmark_connections: 2
landmark_connections: 4
landmark_connections: 1
landmark_connections: 3
landmark_connections: 3
landmark_connections: 5
landmark_connections: 0
landmark_connections: 6
landmark_connections: 1
landmark_connections: 7
landmark_connections: 6
landmark_connections: 7
landmark_connections: 6
landmark_connections: 8
landmark_connections: 7
landmark_connections: 9
landmark_connections: 8
landmark_connections: 10
landmark_connections: 9
landmark_connections: 11
landmark_connections: 10
landmark_connections: 12
landmark_connections: 11
landmark_connections: 13
landmark_connections: 12
landmark_connections: 14
landmark_connections: 13
landmark_connections: 15
landmark_connections: 10
landmark_connections: 14
landmark_connections: 11
landmark_connections: 15
landmark_color { r: 255 g: 255 b: 255 }
connection_color { r: 255 g: 255 b: 255 }
thickness: 3.0
visualize_landmark_depth: false
utilize_visibility: true
visibility_threshold: 0.1
}
}
}
# Converts pose joints to big white circles.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks_merged"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:landmarks_background_joints_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 255 b: 255 }
connection_color { r: 255 g: 255 b: 255 }
thickness: 5.0
visualize_landmark_depth: false
utilize_visibility: true
visibility_threshold: 0.5
}
}
}
# Converts pose left side joints to orange circles (inside white ones).
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks_left_side"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:landmarks_left_joints_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 138 b: 0 }
connection_color { r: 255 g: 138 b: 0 }
thickness: 3.0
visualize_landmark_depth: false
utilize_visibility: true
visibility_threshold: 0.5
}
}
}
# Converts pose right side joints to cyan circles (inside white ones).
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks_right_side"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:landmarks_right_joints_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 0 g: 217 b: 231 }
connection_color { r: 0 g: 217 b: 231 }
thickness: 3.0
visualize_landmark_depth: false
utilize_visibility: true
visibility_threshold: 0.5
}
}
}
# --------------------------------------------------------------------------- #
# ------------------------------- Left hand --------------------------------- #
# --------------------------------------------------------------------------- #
# Converts left hand connections to white lines.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:left_hand_landmarks_connections_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 0
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 5
landmark_connections: 9
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 9
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 14
landmark_connections: 15
landmark_connections: 15
landmark_connections: 16
landmark_connections: 13
landmark_connections: 17
landmark_connections: 0
landmark_connections: 17
landmark_connections: 17
landmark_connections: 18
landmark_connections: 18
landmark_connections: 19
landmark_connections: 19
landmark_connections: 20
landmark_color { r: 255 g: 255 b: 255 }
connection_color { r: 255 g: 255 b: 255 }
thickness: 4.0
visualize_landmark_depth: false
}
}
}
# Converts left hand color joints.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:left_hand_landmarks_joints_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 138 b: 0 }
connection_color { r: 255 g: 138 b: 0 }
thickness: 3.0
visualize_landmark_depth: false
}
}
}
# --------------------------------------------------------------------------- #
# -------------------------------- Right hand ------------------------------- #
# --------------------------------------------------------------------------- #
# Converts right hand connections to white lines.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:right_hand_landmarks_connections_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 0
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 5
landmark_connections: 9
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 9
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 14
landmark_connections: 15
landmark_connections: 15
landmark_connections: 16
landmark_connections: 13
landmark_connections: 17
landmark_connections: 0
landmark_connections: 17
landmark_connections: 17
landmark_connections: 18
landmark_connections: 18
landmark_connections: 19
landmark_connections: 19
landmark_connections: 20
landmark_color { r: 255 g: 255 b: 255 }
connection_color { r: 255 g: 255 b: 255 }
thickness: 4.0
visualize_landmark_depth: false
}
}
}
# Converts right hand color joints.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:right_hand_landmarks_joints_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 0 g: 217 b: 231 }
connection_color { r: 0 g: 217 b: 231 }
thickness: 3.0
visualize_landmark_depth: false
}
}
}
# --------------------------------------------------------------------------- #
# ---------------------------------- Face ----------------------------------- #
# --------------------------------------------------------------------------- #
# Converts face connections to white lines.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:face_landmarks_connections_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
# Lips.
landmark_connections: 61
landmark_connections: 146
landmark_connections: 146
landmark_connections: 91
landmark_connections: 91
landmark_connections: 181
landmark_connections: 181
landmark_connections: 84
landmark_connections: 84
landmark_connections: 17
landmark_connections: 17
landmark_connections: 314
landmark_connections: 314
landmark_connections: 405
landmark_connections: 405
landmark_connections: 321
landmark_connections: 321
landmark_connections: 375
landmark_connections: 375
landmark_connections: 291
landmark_connections: 61
landmark_connections: 185
landmark_connections: 185
landmark_connections: 40
landmark_connections: 40
landmark_connections: 39
landmark_connections: 39
landmark_connections: 37
landmark_connections: 37
landmark_connections: 0
landmark_connections: 0
landmark_connections: 267
landmark_connections: 267
landmark_connections: 269
landmark_connections: 269
landmark_connections: 270
landmark_connections: 270
landmark_connections: 409
landmark_connections: 409
landmark_connections: 291
landmark_connections: 78
landmark_connections: 95
landmark_connections: 95
landmark_connections: 88
landmark_connections: 88
landmark_connections: 178
landmark_connections: 178
landmark_connections: 87
landmark_connections: 87
landmark_connections: 14
landmark_connections: 14
landmark_connections: 317
landmark_connections: 317
landmark_connections: 402
landmark_connections: 402
landmark_connections: 318
landmark_connections: 318
landmark_connections: 324
landmark_connections: 324
landmark_connections: 308
landmark_connections: 78
landmark_connections: 191
landmark_connections: 191
landmark_connections: 80
landmark_connections: 80
landmark_connections: 81
landmark_connections: 81
landmark_connections: 82
landmark_connections: 82
landmark_connections: 13
landmark_connections: 13
landmark_connections: 312
landmark_connections: 312
landmark_connections: 311
landmark_connections: 311
landmark_connections: 310
landmark_connections: 310
landmark_connections: 415
landmark_connections: 415
landmark_connections: 308
# Left eye.
landmark_connections: 33
landmark_connections: 7
landmark_connections: 7
landmark_connections: 163
landmark_connections: 163
landmark_connections: 144
landmark_connections: 144
landmark_connections: 145
landmark_connections: 145
landmark_connections: 153
landmark_connections: 153
landmark_connections: 154
landmark_connections: 154
landmark_connections: 155
landmark_connections: 155
landmark_connections: 133
landmark_connections: 33
landmark_connections: 246
landmark_connections: 246
landmark_connections: 161
landmark_connections: 161
landmark_connections: 160
landmark_connections: 160
landmark_connections: 159
landmark_connections: 159
landmark_connections: 158
landmark_connections: 158
landmark_connections: 157
landmark_connections: 157
landmark_connections: 173
landmark_connections: 173
landmark_connections: 133
# Left eyebrow.
landmark_connections: 46
landmark_connections: 53
landmark_connections: 53
landmark_connections: 52
landmark_connections: 52
landmark_connections: 65
landmark_connections: 65
landmark_connections: 55
landmark_connections: 70
landmark_connections: 63
landmark_connections: 63
landmark_connections: 105
landmark_connections: 105
landmark_connections: 66
landmark_connections: 66
landmark_connections: 107
# Right eye.
landmark_connections: 263
landmark_connections: 249
landmark_connections: 249
landmark_connections: 390
landmark_connections: 390
landmark_connections: 373
landmark_connections: 373
landmark_connections: 374
landmark_connections: 374
landmark_connections: 380
landmark_connections: 380
landmark_connections: 381
landmark_connections: 381
landmark_connections: 382
landmark_connections: 382
landmark_connections: 362
landmark_connections: 263
landmark_connections: 466
landmark_connections: 466
landmark_connections: 388
landmark_connections: 388
landmark_connections: 387
landmark_connections: 387
landmark_connections: 386
landmark_connections: 386
landmark_connections: 385
landmark_connections: 385
landmark_connections: 384
landmark_connections: 384
landmark_connections: 398
landmark_connections: 398
landmark_connections: 362
# Right eyebrow.
landmark_connections: 276
landmark_connections: 283
landmark_connections: 283
landmark_connections: 282
landmark_connections: 282
landmark_connections: 295
landmark_connections: 295
landmark_connections: 285
landmark_connections: 300
landmark_connections: 293
landmark_connections: 293
landmark_connections: 334
landmark_connections: 334
landmark_connections: 296
landmark_connections: 296
landmark_connections: 336
# Face oval.
landmark_connections: 10
landmark_connections: 338
landmark_connections: 338
landmark_connections: 297
landmark_connections: 297
landmark_connections: 332
landmark_connections: 332
landmark_connections: 284
landmark_connections: 284
landmark_connections: 251
landmark_connections: 251
landmark_connections: 389
landmark_connections: 389
landmark_connections: 356
landmark_connections: 356
landmark_connections: 454
landmark_connections: 454
landmark_connections: 323
landmark_connections: 323
landmark_connections: 361
landmark_connections: 361
landmark_connections: 288
landmark_connections: 288
landmark_connections: 397
landmark_connections: 397
landmark_connections: 365
landmark_connections: 365
landmark_connections: 379
landmark_connections: 379
landmark_connections: 378
landmark_connections: 378
landmark_connections: 400
landmark_connections: 400
landmark_connections: 377
landmark_connections: 377
landmark_connections: 152
landmark_connections: 152
landmark_connections: 148
landmark_connections: 148
landmark_connections: 176
landmark_connections: 176
landmark_connections: 149
landmark_connections: 149
landmark_connections: 150
landmark_connections: 150
landmark_connections: 136
landmark_connections: 136
landmark_connections: 172
landmark_connections: 172
landmark_connections: 58
landmark_connections: 58
landmark_connections: 132
landmark_connections: 132
landmark_connections: 93
landmark_connections: 93
landmark_connections: 234
landmark_connections: 234
landmark_connections: 127
landmark_connections: 127
landmark_connections: 162
landmark_connections: 162
landmark_connections: 21
landmark_connections: 21
landmark_connections: 54
landmark_connections: 54
landmark_connections: 103
landmark_connections: 103
landmark_connections: 67
landmark_connections: 67
landmark_connections: 109
landmark_connections: 109
landmark_connections: 10
landmark_color { r: 255 g: 255 b: 255 }
connection_color { r: 255 g: 255 b: 255 }
thickness: 0.5
visualize_landmark_depth: false
}
}
}
# Converts face joints to cyan circles.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
input_stream: "RENDER_SCALE:render_scale"
output_stream: "RENDER_DATA:face_landmarks_joints_rd"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 0 g: 217 b: 231 }
connection_color { r: 0 g: 217 b: 231 }
thickness: 0.5
visualize_landmark_depth: false
}
}
}
# Concatenates all render data.
node {
calculator: "ConcatenateRenderDataVectorCalculator"
input_stream: "landmarks_render_data"
input_stream: "landmarks_background_joints_render_data"
input_stream: "landmarks_left_joints_render_data"
input_stream: "landmarks_right_joints_render_data"
# Left hand.
input_stream: "left_hand_landmarks_connections_rd"
input_stream: "left_hand_landmarks_joints_rd"
# Right hand.
input_stream: "right_hand_landmarks_connections_rd"
input_stream: "right_hand_landmarks_joints_rd"
# Face.
input_stream: "face_landmarks_connections_rd"
input_stream: "face_landmarks_joints_rd"
output_stream: "render_data_vector"
}

View File

@ -0,0 +1,39 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "instant_motion_tracking_deps",
deps = [
"//mediapipe/graphs/instant_motion_tracking/calculators:matrices_manager_calculator",
"//mediapipe/graphs/instant_motion_tracking/calculators:sticker_manager_calculator",
"//mediapipe/graphs/instant_motion_tracking/subgraphs:region_tracking",
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
],
)
mediapipe_binary_graph(
name = "instant_motion_tracking_binary_graph",
graph = "instant_motion_tracking.pbtxt",
output_name = "instant_motion_tracking.binarypb",
deps = [":instant_motion_tracking_deps"],
)

View File

@ -0,0 +1,84 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
proto_library(
name = "sticker_buffer_proto",
srcs = [
"sticker_buffer.proto",
],
)
mediapipe_cc_proto_library(
name = "sticker_buffer_cc_proto",
srcs = [
"sticker_buffer.proto",
],
visibility = ["//visibility:public"],
deps = [
":sticker_buffer_proto",
],
)
cc_library(
name = "sticker_manager_calculator",
srcs = ["sticker_manager_calculator.cc"],
hdrs = ["transformations.h"],
deps = [
":sticker_buffer_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
cc_library(
name = "matrices_manager_calculator",
srcs = ["matrices_manager_calculator.cc"],
hdrs = ["transformations.h"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/graphs/object_detection_3d/calculators:model_matrix_cc_proto",
"//mediapipe/modules/objectron/calculators:box",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@eigen_archive//:eigen3",
],
alwayslink = 1,
)
cc_library(
name = "tracked_anchor_manager_calculator",
srcs = ["tracked_anchor_manager_calculator.cc"],
hdrs = ["transformations.h"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/util/tracking:box_tracker_cc_proto",
],
alwayslink = 1,
)

View File

@ -0,0 +1,393 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <memory>
#include "Eigen/Core"
#include "Eigen/Dense"
#include "Eigen/Geometry"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
#include "mediapipe/modules/objectron/calculators/box.h"
namespace mediapipe {
namespace {
using Matrix4fCM = Eigen::Matrix<float, 4, 4, Eigen::ColMajor>;
using Vector3f = Eigen::Vector3f;
using Matrix3f = Eigen::Matrix3f;
using DiagonalMatrix3f = Eigen::DiagonalMatrix<float, 3>;
constexpr char kAnchorsTag[] = "ANCHORS";
constexpr char kIMUMatrixTag[] = "IMU_ROTATION";
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
constexpr char kRendersTag[] = "RENDER_DATA";
constexpr char kGifAspectRatioTag[] = "GIF_ASPECT_RATIO";
constexpr char kFOVSidePacketTag[] = "FOV";
constexpr char kAspectRatioSidePacketTag[] = "ASPECT_RATIO";
// initial Z value (-10 is center point in visual range for OpenGL render)
constexpr float kInitialZ = -10.0f;
} // namespace
// Intermediary for rotation and translation data to model matrix usable by
// gl_animation_overlay_calculator. For information on the construction of
// OpenGL objects and transformations (including a breakdown of model matrices),
// please visit: https://open.gl/transformations
//
// Input Side Packets:
// FOV - Vertical field of view for device [REQUIRED - Defines perspective
// matrix] ASPECT_RATIO - Aspect ratio of device [REQUIRED - Defines
// perspective matrix]
//
// Input streams:
// ANCHORS - Anchor data with x,y,z coordinates (x,y are in [0.0-1.0] range for
// position on the device screen, while z is the scaling factor that changes
// in proportion to the distance from the tracked region) [REQUIRED]
// IMU_ROTATION - float[9] of row-major device rotation matrix [REQUIRED]
// USER_ROTATIONS - UserRotations with corresponding radians of rotation
// [REQUIRED]
// USER_SCALINGS - UserScalings with corresponding scale factor [REQUIRED]
// GIF_ASPECT_RATIO - Aspect ratio of GIF image used to dynamically scale
// GIF asset defined as width / height [OPTIONAL]
// Output:
// MATRICES - TimedModelMatrixProtoList of each object type to render
// [REQUIRED]
//
// Example config:
// node{
// calculator: "MatricesManagerCalculator"
// input_stream: "ANCHORS:tracked_scaled_anchor_data"
// input_stream: "IMU_ROTATION:imu_rotation_matrix"
// input_stream: "USER_ROTATIONS:user_rotation_data"
// input_stream: "USER_SCALINGS:user_scaling_data"
// input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
// output_stream: "MATRICES:0:first_render_matrices"
// output_stream: "MATRICES:1:second_render_matrices" [unbounded input size]
// input_side_packet: "FOV:vertical_fov_radians"
// input_side_packet: "ASPECT_RATIO:aspect_ratio"
// }
class MatricesManagerCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
// Device properties that will be preset by side packets
float vertical_fov_radians_ = 0.0f;
float aspect_ratio_ = 0.0f;
float gif_aspect_ratio_ = 1.0f;
const Matrix3f GenerateUserRotationMatrix(const float rotation_radians) const;
const Matrix4fCM GenerateEigenModelMatrix(
const Vector3f& translation_vector,
const Matrix3f& rotation_submatrix) const;
const Vector3f GenerateAnchorVector(const Anchor& tracked_anchor) const;
DiagonalMatrix3f GetDefaultRenderScaleDiagonal(
const int render_id, const float user_scale_factor,
const float gif_aspect_ratio) const;
// Returns a user scaling increment associated with the sticker_id
// TODO: Adjust lookup function if total number of stickers is uncapped to
// improve performance
const float GetUserScaler(const std::vector<UserScaling>& scalings,
const int sticker_id) const {
for (const UserScaling& user_scaling : scalings) {
if (user_scaling.sticker_id == sticker_id) {
return user_scaling.scale_factor;
}
}
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
<< ", returning 1.0f scaling";
return 1.0f;
}
// Returns a user rotation in radians associated with the sticker_id
const float GetUserRotation(const std::vector<UserRotation>& rotations,
const int sticker_id) {
for (const UserRotation& rotation : rotations) {
if (rotation.sticker_id == sticker_id) {
return rotation.rotation_radians;
}
}
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
<< ", returning 0.0f rotation";
return 0.0f;
}
};
REGISTER_CALCULATOR(MatricesManagerCalculator);
absl::Status MatricesManagerCalculator::GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
cc->Inputs().HasTag(kIMUMatrixTag) &&
cc->Inputs().HasTag(kUserRotationsTag) &&
cc->Inputs().HasTag(kUserScalingsTag) &&
cc->InputSidePackets().HasTag(kFOVSidePacketTag) &&
cc->InputSidePackets().HasTag(kAspectRatioSidePacketTag));
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
cc->Inputs().Tag(kIMUMatrixTag).Set<float[]>();
cc->Inputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
cc->Inputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
cc->Inputs().Tag(kRendersTag).Set<std::vector<int>>();
if (cc->Inputs().HasTag(kGifAspectRatioTag)) {
cc->Inputs().Tag(kGifAspectRatioTag).Set<float>();
}
for (CollectionItemId id = cc->Outputs().BeginId("MATRICES");
id < cc->Outputs().EndId("MATRICES"); ++id) {
cc->Outputs().Get(id).Set<mediapipe::TimedModelMatrixProtoList>();
}
cc->InputSidePackets().Tag(kFOVSidePacketTag).Set<float>();
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Set<float>();
return absl::OkStatus();
}
absl::Status MatricesManagerCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
// Set device properties from side packets
vertical_fov_radians_ =
cc->InputSidePackets().Tag(kFOVSidePacketTag).Get<float>();
aspect_ratio_ =
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Get<float>();
return absl::OkStatus();
}
absl::Status MatricesManagerCalculator::Process(CalculatorContext* cc) {
// Define each object's model matrices
auto asset_matrices_gif =
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
auto asset_matrices_1 =
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
// Clear all model matrices
asset_matrices_gif->clear_model_matrix();
asset_matrices_1->clear_model_matrix();
const std::vector<UserRotation> user_rotation_data =
cc->Inputs().Tag(kUserRotationsTag).Get<std::vector<UserRotation>>();
const std::vector<UserScaling> user_scaling_data =
cc->Inputs().Tag(kUserScalingsTag).Get<std::vector<UserScaling>>();
const std::vector<int> render_data =
cc->Inputs().Tag(kRendersTag).Get<std::vector<int>>();
const std::vector<Anchor> anchor_data =
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
if (cc->Inputs().HasTag(kGifAspectRatioTag) &&
!cc->Inputs().Tag(kGifAspectRatioTag).IsEmpty()) {
gif_aspect_ratio_ = cc->Inputs().Tag(kGifAspectRatioTag).Get<float>();
}
// Device IMU rotation submatrix
const auto imu_matrix = cc->Inputs().Tag(kIMUMatrixTag).Get<float[]>();
Matrix3f imu_rotation_submatrix;
int idx = 0;
for (int x = 0; x < 3; ++x) {
for (int y = 0; y < 3; ++y) {
// Input matrix is row-major matrix, it must be reformatted to
// column-major via transpose procedure
imu_rotation_submatrix(y, x) = imu_matrix[idx++];
}
}
int render_idx = 0;
for (const Anchor& anchor : anchor_data) {
const int id = anchor.sticker_id;
mediapipe::TimedModelMatrixProto* model_matrix;
// Add model matrix to matrices list for defined object render ID
if (render_data[render_idx] == 0) { // GIF
model_matrix = asset_matrices_gif->add_model_matrix();
} else { // Asset 3D
if (render_data[render_idx] != 1) {
LOG(ERROR) << "render id: " << render_data[render_idx]
<< " is not supported. Fall back to using render_id = 1.";
}
model_matrix = asset_matrices_1->add_model_matrix();
}
model_matrix->set_id(id);
// The user transformation data associated with this sticker must be defined
const float user_rotation_radians = GetUserRotation(user_rotation_data, id);
const float user_scale_factor = GetUserScaler(user_scaling_data, id);
// A vector representative of a user's sticker rotation transformation can
// be created
const Matrix3f user_rotation_submatrix =
GenerateUserRotationMatrix(user_rotation_radians);
// Next, the diagonal representative of the combined scaling data
const DiagonalMatrix3f scaling_diagonal = GetDefaultRenderScaleDiagonal(
render_data[render_idx], user_scale_factor, gif_aspect_ratio_);
// Increment to next render id from vector
render_idx++;
// The user transformation data can be concatenated into a final rotation
// submatrix with the device IMU rotational data
const Matrix3f user_transformed_rotation_submatrix =
imu_rotation_submatrix * user_rotation_submatrix * scaling_diagonal;
// A vector representative of the translation of the object in OpenGL
// coordinate space must be generated
const Vector3f translation_vector = GenerateAnchorVector(anchor);
// Concatenate all model matrix data
const Matrix4fCM final_model_matrix = GenerateEigenModelMatrix(
translation_vector, user_transformed_rotation_submatrix);
// The generated model matrix must be mapped to TimedModelMatrixProto
// (col-wise)
for (int x = 0; x < final_model_matrix.rows(); ++x) {
for (int y = 0; y < final_model_matrix.cols(); ++y) {
model_matrix->add_matrix_entries(final_model_matrix(x, y));
}
}
}
// Output all individual render matrices
// TODO: Perform depth ordering with gl_animation_overlay_calculator to render
// objects in order by depth to allow occlusion.
cc->Outputs()
.Get(cc->Outputs().GetId("MATRICES", 0))
.Add(asset_matrices_gif.release(), cc->InputTimestamp());
cc->Outputs()
.Get(cc->Outputs().GetId("MATRICES", 1))
.Add(asset_matrices_1.release(), cc->InputTimestamp());
return absl::OkStatus();
}
// Using a specified rotation value in radians, generate a rotation matrix for
// use with base rotation submatrix
const Matrix3f MatricesManagerCalculator::GenerateUserRotationMatrix(
const float rotation_radians) const {
Eigen::Matrix3f user_rotation_submatrix;
user_rotation_submatrix =
// The rotation in radians must be inverted to rotate the object
// with the direction of finger movement from the user (system dependent)
Eigen::AngleAxisf(-rotation_radians, Eigen::Vector3f::UnitY()) *
Eigen::AngleAxisf(0.0f, Eigen::Vector3f::UnitZ()) *
// Model orientations all assume z-axis is up, but we need y-axis upwards,
// therefore, a +(M_PI * 0.5f) transformation must be applied
// TODO: Bring default rotations, translations, and scalings into
// independent sticker configuration
Eigen::AngleAxisf(M_PI * 0.5f, Eigen::Vector3f::UnitX());
// Matrix must be transposed due to the method of submatrix generation in
// Eigen
return user_rotation_submatrix.transpose();
}
// TODO: Investigate possible differences in warping of tracking speed across
// screen Using the sticker anchor data, a translation vector can be generated
// in OpenGL coordinate space
const Vector3f MatricesManagerCalculator::GenerateAnchorVector(
const Anchor& tracked_anchor) const {
// Using an initial z-value in OpenGL space, generate a new base z-axis value
// to mimic scaling by distance.
const float z = kInitialZ * tracked_anchor.z;
// Using triangle geometry, the minimum for a y-coordinate that will appear in
// the view field for the given z value above can be found.
const float y_half_range = z * (tan(vertical_fov_radians_ * 0.5f));
// The aspect ratio of the device and y_minimum calculated above can be used
// to find the minimum value for x that will appear in the view field of the
// device screen.
const float x_half_range = y_half_range * aspect_ratio_;
// Given the minimum bounds of the screen in OpenGL space, the tracked anchor
// coordinates can be converted to OpenGL coordinate space.
//
// (i.e: X and Y will be converted from [0.0-1.0] space to [x_minimum,
// -x_minimum] space and [y_minimum, -y_minimum] space respectively)
const float x = (-2.0f * tracked_anchor.x * x_half_range) + x_half_range;
const float y = (-2.0f * tracked_anchor.y * y_half_range) + y_half_range;
// A translation transformation vector can be generated via Eigen
const Vector3f t_vector(x, y, z);
return t_vector;
}
// Generates a model matrix via Eigen with appropriate transformations
const Matrix4fCM MatricesManagerCalculator::GenerateEigenModelMatrix(
const Vector3f& translation_vector,
const Matrix3f& rotation_submatrix) const {
// Define basic empty model matrix
Matrix4fCM mvp_matrix;
// Set the translation vector
mvp_matrix.topRightCorner<3, 1>() = translation_vector;
// Set the rotation submatrix
mvp_matrix.topLeftCorner<3, 3>() = rotation_submatrix;
// Set trailing 1.0 required by OpenGL to define coordinate space
mvp_matrix(3, 3) = 1.0f;
return mvp_matrix;
}
// This returns a scaling matrix to alter the projection matrix for
// the specified render id in order to ensure all objects render at a similar
// size in the view screen upon initial placement
DiagonalMatrix3f MatricesManagerCalculator::GetDefaultRenderScaleDiagonal(
const int render_id, const float user_scale_factor,
const float gif_aspect_ratio) const {
float scale_preset = 1.0f;
float x_scalar = 1.0f;
float y_scalar = 1.0f;
switch (render_id) {
case 0: { // GIF
// 160 is the scaling preset to make the GIF asset appear relatively
// similar in size to all other assets
scale_preset = 160.0f;
if (gif_aspect_ratio >= 1.0f) {
// GIF is wider horizontally (scale on x-axis)
x_scalar = gif_aspect_ratio;
} else {
// GIF is wider vertically (scale on y-axis)
y_scalar = 1.0f / gif_aspect_ratio;
}
break;
}
case 1: { // 3D asset
// 5 is the scaling preset to make the 3D asset appear relatively
// similar in size to all other assets
scale_preset = 5.0f;
break;
}
default: {
LOG(INFO) << "Unsupported render_id: " << render_id
<< ", returning default render_scale";
break;
}
}
DiagonalMatrix3f scaling(scale_preset * user_scale_factor * x_scalar,
scale_preset * user_scale_factor * y_scalar,
scale_preset * user_scale_factor);
return scaling;
}
} // namespace mediapipe

View File

@ -0,0 +1,33 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
option java_package = "com.google.mediapipe.graphs.instantmotiontracking";
option java_outer_classname = "StickerBufferProto";
message Sticker {
optional int32 id = 1;
optional float x = 2;
optional float y = 3;
optional float rotation = 4;
optional float scale = 5;
optional int32 render_id = 6;
}
message StickerRoll {
repeated Sticker sticker = 1;
}

View File

@ -0,0 +1,150 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.pb.h"
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
namespace mediapipe {
constexpr char kProtoDataString[] = "PROTO";
constexpr char kAnchorsTag[] = "ANCHORS";
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
constexpr char kRenderDescriptorsTag[] = "RENDER_DATA";
// This calculator takes in the sticker protobuffer data and parses each
// individual sticker object into anchors, user rotations and scalings, in
// addition to basic render data represented in integer form.
//
// Input:
// PROTO - String of sticker data in appropriate protobuf format [REQUIRED]
// Output:
// ANCHORS - Anchors with initial normalized X,Y coordinates [REQUIRED]
// USER_ROTATIONS - UserRotations with radians of rotation from user [REQUIRED]
// USER_SCALINGS - UserScalings with increment of scaling from user [REQUIRED]
// RENDER_DATA - Descriptors of which objects/animations to render for stickers
// [REQUIRED]
//
// Example config:
// node {
// calculator: "StickerManagerCalculator"
// input_stream: "PROTO:sticker_proto_string"
// output_stream: "ANCHORS:initial_anchor_data"
// output_stream: "USER_ROTATIONS:user_rotation_data"
// output_stream: "USER_SCALINGS:user_scaling_data"
// output_stream: "RENDER_DATA:sticker_render_data"
// }
class StickerManagerCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kProtoDataString));
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
cc->Outputs().HasTag(kUserRotationsTag) &&
cc->Outputs().HasTag(kUserScalingsTag) &&
cc->Outputs().HasTag(kRenderDescriptorsTag));
cc->Inputs().Tag(kProtoDataString).Set<std::string>();
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
cc->Outputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
cc->Outputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
cc->Outputs().Tag(kRenderDescriptorsTag).Set<std::vector<int>>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
std::string sticker_proto_string =
cc->Inputs().Tag(kProtoDataString).Get<std::string>();
std::vector<Anchor> initial_anchor_data;
std::vector<UserRotation> user_rotation_data;
std::vector<UserScaling> user_scaling_data;
std::vector<int> render_data;
::mediapipe::StickerRoll sticker_roll;
bool parse_success = sticker_roll.ParseFromString(sticker_proto_string);
// Ensure parsing was a success
RET_CHECK(parse_success) << "Error parsing sticker protobuf data";
for (int i = 0; i < sticker_roll.sticker().size(); ++i) {
// Declare empty structures for sticker data
Anchor initial_anchor;
UserRotation user_rotation;
UserScaling user_scaling;
// Get individual Sticker object as defined by Protobuffer
::mediapipe::Sticker sticker = sticker_roll.sticker(i);
// Set individual data structure ids to associate with this sticker
initial_anchor.sticker_id = sticker.id();
user_rotation.sticker_id = sticker.id();
user_scaling.sticker_id = sticker.id();
initial_anchor.x = sticker.x();
initial_anchor.y = sticker.y();
initial_anchor.z = 1.0f; // default to 1.0 in normalized 3d space
user_rotation.rotation_radians = sticker.rotation();
user_scaling.scale_factor = sticker.scale();
const int render_id = sticker.render_id();
// Set all vector data with sticker attributes
initial_anchor_data.emplace_back(initial_anchor);
user_rotation_data.emplace_back(user_rotation);
user_scaling_data.emplace_back(user_scaling);
render_data.emplace_back(render_id);
}
if (cc->Outputs().HasTag(kAnchorsTag)) {
cc->Outputs()
.Tag(kAnchorsTag)
.AddPacket(MakePacket<std::vector<Anchor>>(initial_anchor_data)
.At(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kUserRotationsTag)) {
cc->Outputs()
.Tag(kUserRotationsTag)
.AddPacket(MakePacket<std::vector<UserRotation>>(user_rotation_data)
.At(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kUserScalingsTag)) {
cc->Outputs()
.Tag(kUserScalingsTag)
.AddPacket(MakePacket<std::vector<UserScaling>>(user_scaling_data)
.At(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kRenderDescriptorsTag)) {
cc->Outputs()
.Tag(kRenderDescriptorsTag)
.AddPacket(MakePacket<std::vector<int>>(render_data)
.At(cc->InputTimestamp()));
}
return absl::OkStatus();
}
absl::Status Close(CalculatorContext* cc) override {
return absl::OkStatus();
}
};
REGISTER_CALCULATOR(StickerManagerCalculator);
} // namespace mediapipe

View File

@ -0,0 +1,210 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
#include "mediapipe/util/tracking/box_tracker.pb.h"
namespace mediapipe {
constexpr char kSentinelTag[] = "SENTINEL";
constexpr char kAnchorsTag[] = "ANCHORS";
constexpr char kBoxesInputTag[] = "BOXES";
constexpr char kBoxesOutputTag[] = "START_POS";
constexpr char kCancelTag[] = "CANCEL_ID";
// TODO: Find optimal Height/Width (0.1-0.3)
constexpr float kBoxEdgeSize =
0.2f; // Used to establish tracking box dimensions
constexpr float kUsToMs =
1000.0f; // Used to convert from microseconds to millis
// This calculator manages the regions being tracked for each individual sticker
// and adjusts the regions being tracked if a change is detected in a sticker's
// initial anchor placement. Regions being tracked that have no associated
// sticker will be automatically removed upon the next iteration of the graph to
// optimize performance and remove all sticker artifacts
//
// Input:
// SENTINEL - ID of sticker which has an anchor that must be reset (-1 when no
// anchor must be reset) [REQUIRED]
// ANCHORS - Initial anchor data (tracks changes and where to re/position)
// [REQUIRED] BOXES - Used in cycle, boxes being tracked meant to update
// positions [OPTIONAL
// - provided by subgraph]
// Output:
// START_POS - Positions of boxes being tracked (can be overwritten with ID)
// [REQUIRED] CANCEL_ID - Single integer ID of tracking box to remove from
// tracker subgraph [OPTIONAL] ANCHORS - Updated set of anchors with tracked
// and normalized X,Y,Z [REQUIRED]
//
// Example config:
// node {
// calculator: "TrackedAnchorManagerCalculator"
// input_stream: "SENTINEL:sticker_sentinel"
// input_stream: "ANCHORS:initial_anchor_data"
// input_stream: "BOXES:boxes"
// input_stream_info: {
// tag_index: 'BOXES'
// back_edge: true
// }
// output_stream: "START_POS:start_pos"
// output_stream: "CANCEL_ID:cancel_object_id"
// output_stream: "ANCHORS:tracked_scaled_anchor_data"
// }
class TrackedAnchorManagerCalculator : public CalculatorBase {
private:
// Previous graph iteration anchor data
std::vector<Anchor> previous_anchor_data_;
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
cc->Inputs().HasTag(kSentinelTag));
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
cc->Outputs().HasTag(kBoxesOutputTag));
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
cc->Inputs().Tag(kSentinelTag).Set<int>();
if (cc->Inputs().HasTag(kBoxesInputTag)) {
cc->Inputs().Tag(kBoxesInputTag).Set<mediapipe::TimedBoxProtoList>();
}
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
cc->Outputs().Tag(kBoxesOutputTag).Set<mediapipe::TimedBoxProtoList>();
if (cc->Outputs().HasTag(kCancelTag)) {
cc->Outputs().Tag(kCancelTag).Set<int>();
}
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override { return absl::OkStatus(); }
absl::Status Process(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(TrackedAnchorManagerCalculator);
absl::Status TrackedAnchorManagerCalculator::Process(CalculatorContext* cc) {
mediapipe::Timestamp timestamp = cc->InputTimestamp();
const int sticker_sentinel = cc->Inputs().Tag(kSentinelTag).Get<int>();
std::vector<Anchor> current_anchor_data =
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
auto pos_boxes = absl::make_unique<mediapipe::TimedBoxProtoList>();
std::vector<Anchor> tracked_scaled_anchor_data;
// Delete any boxes being tracked without an associated anchor
for (const mediapipe::TimedBoxProto& box :
cc->Inputs()
.Tag(kBoxesInputTag)
.Get<mediapipe::TimedBoxProtoList>()
.box()) {
bool anchor_exists = false;
for (Anchor anchor : current_anchor_data) {
if (box.id() == anchor.sticker_id) {
anchor_exists = true;
break;
}
}
if (!anchor_exists) {
cc->Outputs()
.Tag(kCancelTag)
.AddPacket(MakePacket<int>(box.id()).At(timestamp++));
}
}
// Perform tracking or updating for each anchor position
for (const Anchor& anchor : current_anchor_data) {
Anchor output_anchor = anchor;
// Check if anchor position is being reset by user in this graph iteration
if (sticker_sentinel == anchor.sticker_id) {
// Delete associated tracking box
// TODO: BoxTrackingSubgraph should accept vector to avoid breaking
// timestamp rules
cc->Outputs()
.Tag(kCancelTag)
.AddPacket(MakePacket<int>(anchor.sticker_id).At(timestamp++));
// Add a tracking box
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
box->set_left(anchor.x - kBoxEdgeSize * 0.5f);
box->set_right(anchor.x + kBoxEdgeSize * 0.5f);
box->set_top(anchor.y - kBoxEdgeSize * 0.5f);
box->set_bottom(anchor.y + kBoxEdgeSize * 0.5f);
box->set_id(anchor.sticker_id);
box->set_time_msec((timestamp++).Microseconds() / kUsToMs);
// Default value for normalized z (scale factor)
output_anchor.z = 1.0f;
} else {
// Anchor position was not reset by user
// Attempt to update anchor position from tracking subgraph
// (TimedBoxProto)
bool updated_from_tracker = false;
const mediapipe::TimedBoxProtoList box_list =
cc->Inputs().Tag(kBoxesInputTag).Get<mediapipe::TimedBoxProtoList>();
for (const auto& box : box_list.box()) {
if (box.id() == anchor.sticker_id) {
// Get center x normalized coordinate [0.0-1.0]
output_anchor.x = (box.left() + box.right()) * 0.5f;
// Get center y normalized coordinate [0.0-1.0]
output_anchor.y = (box.top() + box.bottom()) * 0.5f;
// Get center z coordinate [z starts at normalized 1.0 and scales
// inversely with box-width]
// TODO: Look into issues with uniform scaling on x-axis and y-axis
output_anchor.z = kBoxEdgeSize / (box.right() - box.left());
updated_from_tracker = true;
break;
}
}
// If anchor position was not updated from tracker, create new tracking
// box at last recorded anchor coordinates. This will allow all current
// stickers to be tracked at approximately last location even if
// re-acquisitioning in the BoxTrackingSubgraph encounters errors
if (!updated_from_tracker) {
for (const Anchor& prev_anchor : previous_anchor_data_) {
if (anchor.sticker_id == prev_anchor.sticker_id) {
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
box->set_left(prev_anchor.x - kBoxEdgeSize * 0.5f);
box->set_right(prev_anchor.x + kBoxEdgeSize * 0.5f);
box->set_top(prev_anchor.y - kBoxEdgeSize * 0.5f);
box->set_bottom(prev_anchor.y + kBoxEdgeSize * 0.5f);
box->set_id(prev_anchor.sticker_id);
box->set_time_msec(cc->InputTimestamp().Microseconds() / kUsToMs);
output_anchor = prev_anchor;
// Default value for normalized z (scale factor)
output_anchor.z = 1.0f;
break;
}
}
}
}
tracked_scaled_anchor_data.emplace_back(output_anchor);
}
// Set anchor data for next iteration
previous_anchor_data_ = tracked_scaled_anchor_data;
cc->Outputs()
.Tag(kAnchorsTag)
.AddPacket(MakePacket<std::vector<Anchor>>(tracked_scaled_anchor_data)
.At(cc->InputTimestamp()));
cc->Outputs()
.Tag(kBoxesOutputTag)
.Add(pos_boxes.release(), cc->InputTimestamp());
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,42 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
#define MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
namespace mediapipe {
// Radians by which to rotate the object (Provided by UI input)
struct UserRotation {
float rotation_radians;
int sticker_id;
};
// Scaling factor provided by the UI application end
struct UserScaling {
float scale_factor;
int sticker_id;
};
// The normalized anchor coordinates of a sticker
struct Anchor {
float x; // [0.0-1.0]
float y; // [0.0-1.0]
float z; // Centered around 1.0 [current_scale = z * initial_scale]
int sticker_id;
};
} // namespace mediapipe
#endif // MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_

View File

@ -0,0 +1,80 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# MediaPipe graph that performs region tracking and 3d object (AR sticker) rendering.
# Images in/out of graph with sticker data and IMU information from device
input_stream: "input_video"
input_stream: "sticker_sentinel"
input_stream: "sticker_proto_string"
input_stream: "imu_rotation_matrix"
input_stream: "gif_texture"
input_stream: "gif_aspect_ratio"
output_stream: "output_video"
# Converts sticker data into user data (rotations/scalings), render data, and
# initial anchors.
node {
calculator: "StickerManagerCalculator"
input_stream: "PROTO:sticker_proto_string"
output_stream: "ANCHORS:initial_anchor_data"
output_stream: "USER_ROTATIONS:user_rotation_data"
output_stream: "USER_SCALINGS:user_scaling_data"
output_stream: "RENDER_DATA:sticker_render_data"
}
# Uses box tracking in order to create 'anchors' for associated 3d stickers.
node {
calculator: "RegionTrackingSubgraph"
input_stream: "VIDEO:input_video"
input_stream: "SENTINEL:sticker_sentinel"
input_stream: "ANCHORS:initial_anchor_data"
output_stream: "ANCHORS:tracked_anchor_data"
}
# Concatenates all transformations to generate model matrices for the OpenGL
# animation overlay calculator.
node {
calculator: "MatricesManagerCalculator"
input_stream: "ANCHORS:tracked_anchor_data"
input_stream: "IMU_ROTATION:imu_rotation_matrix"
input_stream: "USER_ROTATIONS:user_rotation_data"
input_stream: "USER_SCALINGS:user_scaling_data"
input_stream: "RENDER_DATA:sticker_render_data"
input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
output_stream: "MATRICES:0:gif_matrices"
output_stream: "MATRICES:1:asset_3d_matrices"
input_side_packet: "FOV:vertical_fov_radians"
input_side_packet: "ASPECT_RATIO:aspect_ratio"
}
# Renders the final 3d stickers and overlays them on input image.
node {
calculator: "GlAnimationOverlayCalculator"
input_stream: "VIDEO:input_video"
input_stream: "MODEL_MATRICES:gif_matrices"
input_stream: "TEXTURE:gif_texture"
input_side_packet: "ANIMATION_ASSET:gif_asset_name"
output_stream: "asset_gif_rendered"
}
# Renders the final 3d stickers and overlays them on top of the input image.
node {
calculator: "GlAnimationOverlayCalculator"
input_stream: "VIDEO:asset_gif_rendered"
input_stream: "MODEL_MATRICES:asset_3d_matrices"
input_side_packet: "TEXTURE:texture_3d"
input_side_packet: "ANIMATION_ASSET:asset_3d"
output_stream: "output_video"
}

View File

@ -0,0 +1,32 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "region_tracking",
graph = "region_tracking.pbtxt",
register_as = "RegionTrackingSubgraph",
deps = [
"//mediapipe/graphs/instant_motion_tracking/calculators:tracked_anchor_manager_calculator",
"//mediapipe/graphs/tracking/subgraphs:box_tracking_gpu",
],
)

View File

@ -0,0 +1,47 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# MediaPipe graph that performs region tracking on initial anchor positions
# provided by the application
# Images in/out of graph with tracked and scaled normalized anchor data
type: "RegionTrackingSubgraph"
input_stream: "VIDEO:input_video"
input_stream: "SENTINEL:sticker_sentinel"
input_stream: "ANCHORS:initial_anchor_data"
output_stream: "ANCHORS:tracked_scaled_anchor_data"
# Manages the anchors and tracking if user changes/adds/deletes anchors
node {
calculator: "TrackedAnchorManagerCalculator"
input_stream: "SENTINEL:sticker_sentinel"
input_stream: "ANCHORS:initial_anchor_data"
input_stream: "BOXES:boxes"
input_stream_info: {
tag_index: 'BOXES'
back_edge: true
}
output_stream: "START_POS:start_pos"
output_stream: "CANCEL_ID:cancel_object_id"
output_stream: "ANCHORS:tracked_scaled_anchor_data"
}
# Subgraph performs anchor placement and tracking
node {
calculator: "BoxTrackingSubgraphGpu"
input_stream: "VIDEO:input_video"
input_stream: "BOXES:start_pos"
input_stream: "CANCEL_ID:cancel_object_id"
output_stream: "BOXES:boxes"
}

View File

@ -0,0 +1,86 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "iris_depth_cpu_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_file_properties_calculator",
"//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator",
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
],
)
cc_library(
name = "iris_tracking_cpu_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
],
)
cc_library(
name = "iris_tracking_cpu_video_input_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
],
)
cc_library(
name = "iris_tracking_gpu_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_gpu",
],
)
mediapipe_binary_graph(
name = "iris_tracking_gpu_binary_graph",
graph = "iris_tracking_gpu.pbtxt",
output_name = "iris_tracking_gpu.binarypb",
deps = [":iris_tracking_gpu_deps"],
)

View File

@ -0,0 +1,107 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
licenses(["notice"])
proto_library(
name = "iris_to_render_data_calculator_proto",
srcs = ["iris_to_render_data_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
"//mediapipe/util:color_proto",
"//mediapipe/util:render_data_proto",
],
)
mediapipe_cc_proto_library(
name = "iris_to_render_data_calculator_cc_proto",
srcs = ["iris_to_render_data_calculator.proto"],
cc_deps = [
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
],
visibility = ["//visibility:public"],
deps = [":iris_to_render_data_calculator_proto"],
)
cc_library(
name = "iris_to_render_data_calculator",
srcs = ["iris_to_render_data_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":iris_to_render_data_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)
proto_library(
name = "iris_to_depth_calculator_proto",
srcs = ["iris_to_depth_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "iris_to_depth_calculator_cc_proto",
srcs = ["iris_to_depth_calculator.proto"],
cc_deps = [
"//mediapipe/framework:calculator_cc_proto",
],
visibility = ["//visibility:public"],
deps = [":iris_to_depth_calculator_proto"],
)
cc_library(
name = "iris_to_depth_calculator",
srcs = ["iris_to_depth_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":iris_to_depth_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_file_properties_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)
cc_library(
name = "update_face_landmarks_calculator",
srcs = ["update_face_landmarks_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_file_properties_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)

View File

@ -0,0 +1,245 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_file_properties.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_depth_calculator.pb.h"
namespace mediapipe {
namespace {
constexpr char kIrisTag[] = "IRIS";
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kFocalLengthPixelTag[] = "FOCAL_LENGTH";
constexpr char kImageFilePropertiesTag[] = "IMAGE_FILE_PROPERTIES";
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
constexpr int kNumIrisLandmarksPerEye = 5;
constexpr float kDepthWeightUpdate = 0.1;
// Avergae fixed iris size across human beings.
constexpr float kIrisSizeInMM = 11.8;
inline float GetDepth(float x0, float y0, float x1, float y1) {
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
}
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
const NormalizedLandmark& ld1,
const std::pair<int, int>& image_size) {
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
ld1.x() * image_size.first, ld1.y() * image_size.second);
}
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
const std::pair<int, int>& image_size) {
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
landmarks.landmark(2), image_size);
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
landmarks.landmark(4), image_size);
return (dist_hori + dist_vert) / 2.0f;
}
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
float iris_size, float img_w, float img_h) {
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
center.y() * img_h);
const auto x = std::sqrt(focal_length * focal_length + y * y);
const auto depth = kIrisSizeInMM * x / iris_size;
return depth;
}
} // namespace
// Estimates depth from iris to camera given focal length and image size.
//
// Usage example:
// node {
// calculator: "IrisToDepthCalculator"
// # A NormalizedLandmarkList contains landmarks for both iris.
// input_stream: "IRIS:iris_landmarks"
// input_stream: "IMAGE_SIZE:image_size"
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
// # to get focal length in pixels. Sending focal length in pixels to
// # this calculator is optional.
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
// # OR
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
// }
class IrisToDepthCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
// Only one of kFocalLengthPixelTag or kImageFilePropertiesTag must exist
// if they are present.
RET_CHECK(!(cc->InputSidePackets().HasTag(kFocalLengthPixelTag) &&
cc->InputSidePackets().HasTag(kImageFilePropertiesTag)));
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
cc->InputSidePackets().Tag(kFocalLengthPixelTag).SetAny();
}
if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
cc->InputSidePackets()
.Tag(kImageFilePropertiesTag)
.Set<ImageFileProperties>();
}
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
cc->Outputs().Tag(kLeftIrisDepthTag).Set<float>();
}
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
cc->Outputs().Tag(kRightIrisDepthTag).Set<float>();
}
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
float focal_length_pixels_ = -1.f;
// TODO: Consolidate the logic when switching to input stream for
// focal length.
bool compute_depth_from_iris_ = false;
float smoothed_left_depth_mm_ = -1.f;
float smoothed_right_depth_mm_ = -1.f;
void GetLeftIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris);
void GetRightIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris);
::mediapipe::IrisToDepthCalculatorOptions options_;
};
REGISTER_CALCULATOR(IrisToDepthCalculator);
absl::Status IrisToDepthCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
#if defined(__APPLE__)
focal_length_pixels_ = *cc->InputSidePackets()
.Tag(kFocalLengthPixelTag)
.Get<std::unique_ptr<float>>();
#else
focal_length_pixels_ =
cc->InputSidePackets().Tag(kFocalLengthPixelTag).Get<float>();
#endif
compute_depth_from_iris_ = true;
} else if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
const auto& properties = cc->InputSidePackets()
.Tag(kImageFilePropertiesTag)
.Get<ImageFileProperties>();
focal_length_pixels_ = properties.focal_length_pixels();
compute_depth_from_iris_ = true;
}
options_ = cc->Options<::mediapipe::IrisToDepthCalculatorOptions>();
return absl::OkStatus();
}
absl::Status IrisToDepthCalculator::Process(CalculatorContext* cc) {
// Only process if there's input landmarks.
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& iris_landmarks =
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
<< "Wrong number of iris landmarks";
std::pair<int, int> image_size;
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
GetLeftIris(iris_landmarks, left_iris.get());
GetRightIris(iris_landmarks, right_iris.get());
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
#if defined(__APPLE__)
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
focal_length_pixels_ = *cc->InputSidePackets()
.Tag(kFocalLengthPixelTag)
.Get<std::unique_ptr<float>>();
}
#endif
if (compute_depth_from_iris_ && focal_length_pixels_ > 0) {
const auto left_depth =
CalculateDepth(left_iris->landmark(0), focal_length_pixels_,
left_iris_size, image_size.first, image_size.second);
const auto right_depth =
CalculateDepth(right_iris->landmark(0), focal_length_pixels_,
right_iris_size, image_size.first, image_size.second);
smoothed_left_depth_mm_ =
smoothed_left_depth_mm_ < 0 || std::isinf(smoothed_left_depth_mm_)
? left_depth
: smoothed_left_depth_mm_ * (1 - kDepthWeightUpdate) +
left_depth * kDepthWeightUpdate;
smoothed_right_depth_mm_ =
smoothed_right_depth_mm_ < 0 || std::isinf(smoothed_right_depth_mm_)
? right_depth
: smoothed_right_depth_mm_ * (1 - kDepthWeightUpdate) +
right_depth * kDepthWeightUpdate;
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
cc->Outputs()
.Tag(kLeftIrisDepthTag)
.AddPacket(MakePacket<float>(smoothed_left_depth_mm_)
.At(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
cc->Outputs()
.Tag(kRightIrisDepthTag)
.AddPacket(MakePacket<float>(smoothed_right_depth_mm_)
.At(cc->InputTimestamp()));
}
}
return absl::OkStatus();
}
void IrisToDepthCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris) {
// Center, top, bottom, left, right
*iris->add_landmark() = lds.landmark(options_.left_iris_center_index());
*iris->add_landmark() = lds.landmark(options_.left_iris_top_index());
*iris->add_landmark() = lds.landmark(options_.left_iris_bottom_index());
*iris->add_landmark() = lds.landmark(options_.left_iris_left_index());
*iris->add_landmark() = lds.landmark(options_.left_iris_right_index());
}
void IrisToDepthCalculator::GetRightIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris) {
// Center, top, bottom, left, right
*iris->add_landmark() = lds.landmark(options_.right_iris_center_index());
*iris->add_landmark() = lds.landmark(options_.right_iris_top_index());
*iris->add_landmark() = lds.landmark(options_.right_iris_bottom_index());
*iris->add_landmark() = lds.landmark(options_.right_iris_left_index());
*iris->add_landmark() = lds.landmark(options_.right_iris_right_index());
}
} // namespace mediapipe

View File

@ -0,0 +1,39 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message IrisToDepthCalculatorOptions {
extend CalculatorOptions {
optional IrisToDepthCalculatorOptions ext = 303429002;
}
// Indices of correspondent left iris landmarks in input stream.
optional int32 left_iris_center_index = 1 [default = 0];
optional int32 left_iris_top_index = 2 [default = 2];
optional int32 left_iris_bottom_index = 3 [default = 4];
optional int32 left_iris_left_index = 4 [default = 3];
optional int32 left_iris_right_index = 5 [default = 1];
// Indices of correspondent right iris landmarks in input stream.
optional int32 right_iris_center_index = 6 [default = 5];
optional int32 right_iris_top_index = 7 [default = 7];
optional int32 right_iris_bottom_index = 8 [default = 9];
optional int32 right_iris_left_index = 9 [default = 6];
optional int32 right_iris_right_index = 10 [default = 8];
}

View File

@ -0,0 +1,318 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_render_data_calculator.pb.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
namespace mediapipe {
namespace {
constexpr char kIrisTag[] = "IRIS";
constexpr char kRenderDataTag[] = "RENDER_DATA";
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
constexpr char kOvalLabel[] = "OVAL";
constexpr float kFontHeightScale = 1.5f;
constexpr int kNumIrisLandmarksPerEye = 5;
// TODO: Source.
constexpr float kIrisSizeInMM = 11.8;
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
annotation->mutable_color()->set_r(color.r());
annotation->mutable_color()->set_g(color.g());
annotation->mutable_color()->set_b(color.b());
}
inline float GetDepth(float x0, float y0, float x1, float y1) {
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
}
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
const NormalizedLandmark& ld1,
const std::pair<int, int>& image_size) {
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
ld1.x() * image_size.first, ld1.y() * image_size.second);
}
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
const std::pair<int, int>& image_size) {
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
landmarks.landmark(2), image_size);
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
landmarks.landmark(4), image_size);
return (dist_hori + dist_vert) / 2.0f;
}
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
float iris_size, float img_w, float img_h) {
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
center.y() * img_h);
const auto x = std::sqrt(focal_length * focal_length + y * y);
const auto depth = kIrisSizeInMM * x / iris_size;
return depth;
}
} // namespace
// Converts iris landmarks to render data and estimates depth from the camera if
// focal length and image size. The depth will be rendered as part of the render
// data on the frame.
//
// Usage example:
// node {
// calculator: "IrisToRenderDataCalculator"
// input_stream: "IRIS:iris_landmarks"
// input_stream: "IMAGE_SIZE:image_size"
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
// # to get focal length in pixels. Sending focal length in pixels to
// # this calculator is optional.
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
// # OR
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
// output_stream: "RENDER_DATA:iris_render_data"
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
// node_options: {
// [type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
// color { r: 255 g: 255 b: 255 }
// thickness: 2.0
// font_height_px: 50
// horizontal_offset_px: 200
// vertical_offset_px: 200
// location: TOP_LEFT
// }
// }
// }
class IrisToRenderDataCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
if (cc->Inputs().HasTag(kLeftIrisDepthTag)) {
cc->Inputs().Tag(kLeftIrisDepthTag).Set<float>();
}
if (cc->Inputs().HasTag(kRightIrisDepthTag)) {
cc->Inputs().Tag(kRightIrisDepthTag).Set<float>();
}
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
void RenderIris(const NormalizedLandmarkList& iris_landmarks,
const IrisToRenderDataCalculatorOptions& options,
const std::pair<int, int>& image_size, float iris_size,
RenderData* render_data);
void GetLeftIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris);
void GetRightIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris);
void AddTextRenderData(const IrisToRenderDataCalculatorOptions& options,
const std::pair<int, int>& image_size,
const std::vector<std::string>& lines,
RenderData* render_data);
static RenderAnnotation* AddOvalRenderData(
const IrisToRenderDataCalculatorOptions& options,
RenderData* render_data);
static RenderAnnotation* AddPointRenderData(
const IrisToRenderDataCalculatorOptions& options,
RenderData* render_data);
};
REGISTER_CALCULATOR(IrisToRenderDataCalculator);
absl::Status IrisToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status IrisToRenderDataCalculator::Process(CalculatorContext* cc) {
// Only process if there's input landmarks.
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& options =
cc->Options<::mediapipe::IrisToRenderDataCalculatorOptions>();
const auto& iris_landmarks =
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
<< "Wrong number of iris landmarks";
std::pair<int, int> image_size;
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
auto render_data = absl::make_unique<RenderData>();
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
GetLeftIris(iris_landmarks, left_iris.get());
GetRightIris(iris_landmarks, right_iris.get());
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
RenderIris(*left_iris, options, image_size, left_iris_size,
render_data.get());
RenderIris(*right_iris, options, image_size, right_iris_size,
render_data.get());
std::vector<std::string> lines;
std::string line;
if (cc->Inputs().HasTag(kLeftIrisDepthTag) &&
!cc->Inputs().Tag(kLeftIrisDepthTag).IsEmpty()) {
const float left_iris_depth =
cc->Inputs().Tag(kLeftIrisDepthTag).Get<float>();
if (!std::isinf(left_iris_depth)) {
line = "Left : ";
absl::StrAppend(&line, ":", std::round(left_iris_depth / 10), " cm");
lines.emplace_back(line);
}
}
if (cc->Inputs().HasTag(kRightIrisDepthTag) &&
!cc->Inputs().Tag(kRightIrisDepthTag).IsEmpty()) {
const float right_iris_depth =
cc->Inputs().Tag(kRightIrisDepthTag).Get<float>();
if (!std::isinf(right_iris_depth)) {
line = "Right : ";
absl::StrAppend(&line, ":", std::round(right_iris_depth / 10), " cm");
lines.emplace_back(line);
}
}
AddTextRenderData(options, image_size, lines, render_data.get());
cc->Outputs()
.Tag(kRenderDataTag)
.Add(render_data.release(), cc->InputTimestamp());
return absl::OkStatus();
}
void IrisToRenderDataCalculator::AddTextRenderData(
const IrisToRenderDataCalculatorOptions& options,
const std::pair<int, int>& image_size,
const std::vector<std::string>& lines, RenderData* render_data) {
int label_baseline_px = options.vertical_offset_px();
float label_height_px =
std::ceil(options.font_height_px() * kFontHeightScale);
if (options.location() == IrisToRenderDataCalculatorOptions::TOP_LEFT) {
label_baseline_px += label_height_px;
} else if (options.location() ==
IrisToRenderDataCalculatorOptions::BOTTOM_LEFT) {
label_baseline_px += image_size.second - label_height_px * lines.size();
}
const auto label_left_px = options.horizontal_offset_px();
for (int i = 0; i < lines.size(); ++i) {
auto* label_annotation = render_data->add_render_annotations();
label_annotation->set_thickness(5);
label_annotation->mutable_color()->set_r(255);
label_annotation->mutable_color()->set_g(0);
label_annotation->mutable_color()->set_b(0);
//
auto* text = label_annotation->mutable_text();
text->set_display_text(lines[i]);
text->set_font_height(options.font_height_px());
text->set_left(label_left_px);
text->set_baseline(label_baseline_px + i * label_height_px);
text->set_font_face(options.font_face());
}
}
void IrisToRenderDataCalculator::RenderIris(
const NormalizedLandmarkList& iris_landmarks,
const IrisToRenderDataCalculatorOptions& options,
const std::pair<int, int>& image_size, float iris_size,
RenderData* render_data) {
auto* oval_data_render = AddOvalRenderData(options, render_data);
auto* oval_data = oval_data_render->mutable_oval();
const float iris_radius = iris_size / 2.f;
const auto& iris_center = iris_landmarks.landmark(0);
oval_data->mutable_rectangle()->set_top(iris_center.y() -
iris_radius / image_size.second);
oval_data->mutable_rectangle()->set_bottom(iris_center.y() +
iris_radius / image_size.second);
oval_data->mutable_rectangle()->set_left(iris_center.x() -
iris_radius / image_size.first);
oval_data->mutable_rectangle()->set_right(iris_center.x() +
iris_radius / image_size.first);
oval_data->mutable_rectangle()->set_normalized(true);
for (int i = 0; i < iris_landmarks.landmark_size(); ++i) {
const NormalizedLandmark& landmark = iris_landmarks.landmark(i);
auto* landmark_data_render = AddPointRenderData(options, render_data);
auto* landmark_data = landmark_data_render->mutable_point();
landmark_data->set_normalized(true);
landmark_data->set_x(landmark.x());
landmark_data->set_y(landmark.y());
}
}
void IrisToRenderDataCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris) {
// Center, top, bottom, left, right
*iris->add_landmark() = lds.landmark(0);
*iris->add_landmark() = lds.landmark(2);
*iris->add_landmark() = lds.landmark(4);
*iris->add_landmark() = lds.landmark(3);
*iris->add_landmark() = lds.landmark(1);
}
void IrisToRenderDataCalculator::GetRightIris(const NormalizedLandmarkList& lds,
NormalizedLandmarkList* iris) {
// Center, top, bottom, left, right
*iris->add_landmark() = lds.landmark(5);
*iris->add_landmark() = lds.landmark(7);
*iris->add_landmark() = lds.landmark(9);
*iris->add_landmark() = lds.landmark(6);
*iris->add_landmark() = lds.landmark(8);
}
RenderAnnotation* IrisToRenderDataCalculator::AddOvalRenderData(
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
auto* oval_data_annotation = render_data->add_render_annotations();
oval_data_annotation->set_scene_tag(kOvalLabel);
SetColor(oval_data_annotation, options.oval_color());
oval_data_annotation->set_thickness(options.oval_thickness());
return oval_data_annotation;
}
RenderAnnotation* IrisToRenderDataCalculator::AddPointRenderData(
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
auto* landmark_data_annotation = render_data->add_render_annotations();
SetColor(landmark_data_annotation, options.landmark_color());
landmark_data_annotation->set_thickness(options.landmark_thickness());
return landmark_data_annotation;
}
} // namespace mediapipe

View File

@ -0,0 +1,62 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/util/color.proto";
message IrisToRenderDataCalculatorOptions {
extend CalculatorOptions {
optional IrisToRenderDataCalculatorOptions ext = 289530040;
}
// Color of the oval.
optional Color oval_color = 1;
// Color of the landmarks.
optional Color landmark_color = 9;
// Thickness of the drawing of landmarks and iris oval.
optional double oval_thickness = 2 [default = 1.0];
optional double landmark_thickness = 10 [default = 1.0];
// The font height in absolute pixels.
optional int32 font_height_px = 3 [default = 50];
// The offset of the starting text in horizontal direction in absolute pixels.
optional int32 horizontal_offset_px = 7 [default = 0];
// The offset of the starting text in vertical direction in absolute pixels.
optional int32 vertical_offset_px = 8 [default = 0];
// Specifies the font for the text. Font must be one of the following from
// OpenCV:
// cv::FONT_HERSHEY_SIMPLEX (0)
// cv::FONT_HERSHEY_PLAIN (1)
// cv::FONT_HERSHEY_DUPLEX (2)
// cv::FONT_HERSHEY_COMPLEX (3)
// cv::FONT_HERSHEY_TRIPLEX (4)
// cv::FONT_HERSHEY_COMPLEX_SMALL (5)
// cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6)
// cv::FONT_HERSHEY_SCRIPT_COMPLEX (7)
optional int32 font_face = 5 [default = 0];
// Label location.
enum Location {
TOP_LEFT = 0;
BOTTOM_LEFT = 1;
}
optional Location location = 6 [default = TOP_LEFT];
}

View File

@ -0,0 +1,268 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
constexpr char kFaceLandmarksTag[] = "FACE_LANDMARKS";
constexpr char kNewEyeLandmarksTag[] = "NEW_EYE_LANDMARKS";
constexpr char kUpdatedFaceLandmarksTag[] = "UPDATED_FACE_LANDMARKS";
constexpr int kNumFaceLandmarks = 468;
// 71 landamrks for left eye and 71 landmarks for right eye.
constexpr int kNumEyeLandmarks = 142;
constexpr int kEyeLandmarkIndicesInFaceLandmarks[] = {
// Left eye
// eye lower contour
33,
7,
163,
144,
145,
153,
154,
155,
133,
// eye upper contour (excluding corners)
246,
161,
160,
159,
158,
157,
173,
// halo x2 lower contour
130,
25,
110,
24,
23,
22,
26,
112,
243,
// halo x2 upper contour (excluding corners)
247,
30,
29,
27,
28,
56,
190,
// halo x3 lower contour
226,
31,
228,
229,
230,
231,
232,
233,
244,
// halo x3 upper contour (excluding corners)
113,
225,
224,
223,
222,
221,
189,
// halo x4 upper contour (no lower because of mesh structure)
// or eyebrow inner contour
35,
124,
46,
53,
52,
65,
// halo x5 lower contour
143,
111,
117,
118,
119,
120,
121,
128,
245,
// halo x5 upper contour (excluding corners)
// or eyebrow outer contour
156,
70,
63,
105,
66,
107,
55,
193,
// Right eye
// eye lower contour
263,
249,
390,
373,
374,
380,
381,
382,
362,
// eye upper contour (excluding corners)
466,
388,
387,
386,
385,
384,
398,
// halo x2 lower contour
359,
255,
339,
254,
253,
252,
256,
341,
463,
// halo x2 upper contour (excluding corners)
467,
260,
259,
257,
258,
286,
414,
// halo x3 lower contour
446,
261,
448,
449,
450,
451,
452,
453,
464,
// halo x3 upper contour (excluding corners)
342,
445,
444,
443,
442,
441,
413,
// halo x4 upper contour (no lower because of mesh structure)
// or eyebrow inner contour
265,
353,
276,
283,
282,
295,
// halo x5 lower contour
372,
340,
346,
347,
348,
349,
350,
357,
465,
// halo x5 upper contour (excluding corners)
// or eyebrow outer contour
383,
300,
293,
334,
296,
336,
285,
417,
};
} // namespace
// Update face landmarks with new (e.g., refined) values. Currently only updates
// landmarks around the eyes.
//
// Usage example:
// node {
// calculator: "UpdateFaceLandmarksCalculator"
// input_stream: "NEW_EYE_LANDMARKS:new_eye_landmarks"
// input_stream: "FACE_LANDMARKS:face_landmarks"
// output_stream: "UPDATED_FACE_LANDMARKS:refine_face_landmarks"
// }
//
class UpdateFaceLandmarksCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag(kFaceLandmarksTag).Set<NormalizedLandmarkList>();
cc->Inputs().Tag(kNewEyeLandmarksTag).Set<NormalizedLandmarkList>();
cc->Outputs().Tag(kUpdatedFaceLandmarksTag).Set<NormalizedLandmarkList>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(UpdateFaceLandmarksCalculator);
absl::Status UpdateFaceLandmarksCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag(kFaceLandmarksTag).IsEmpty() ||
cc->Inputs().Tag(kNewEyeLandmarksTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& face_landmarks =
cc->Inputs().Tag(kFaceLandmarksTag).Get<NormalizedLandmarkList>();
const auto& new_eye_landmarks =
cc->Inputs().Tag(kNewEyeLandmarksTag).Get<NormalizedLandmarkList>();
RET_CHECK_EQ(face_landmarks.landmark_size(), kNumFaceLandmarks)
<< "Wrong number of face landmarks";
RET_CHECK_EQ(new_eye_landmarks.landmark_size(), kNumEyeLandmarks)
<< "Wrong number of face landmarks";
auto refined_face_landmarks =
absl::make_unique<NormalizedLandmarkList>(face_landmarks);
for (int i = 0; i < kNumEyeLandmarks; ++i) {
const auto& refined_ld = new_eye_landmarks.landmark(i);
const int id = kEyeLandmarkIndicesInFaceLandmarks[i];
refined_face_landmarks->mutable_landmark(id)->set_x(refined_ld.x());
refined_face_landmarks->mutable_landmark(id)->set_y(refined_ld.y());
refined_face_landmarks->mutable_landmark(id)->set_z(refined_ld.z());
refined_face_landmarks->mutable_landmark(id)->set_visibility(
refined_ld.visibility());
}
cc->Outputs()
.Tag(kUpdatedFaceLandmarksTag)
.Add(refined_face_landmarks.release(), cc->InputTimestamp());
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,159 @@
# MediaPipe graph that performs iris distance computation on desktop with
# TensorFlow Lite on CPU.
# Used in the example in
# mediapipie/examples/desktop/iris_tracking:iris_depth_from_image_desktop.
# Raw image bytes. (std::string)
input_stream: "input_image_bytes"
# Image with all the detections rendered. (ImageFrame)
output_stream: "output_image"
# Estimated depth in mm from the camera to the left iris of the face (if any) in
# the image. (float)
output_stream: "left_iris_depth_mm"
# Estimated depth in mm from the camera to the right iris of the face (if any)
# in the image. (float)
output_stream: "right_iris_depth_mm"
# Computes the focal length in pixels based on EXIF information stored in the
# image file. The output is an ImageFileProperties object containing relevant
# image EXIF information along with focal length in pixels.
node {
calculator: "ImageFilePropertiesCalculator"
input_stream: "input_image_bytes"
output_side_packet: "image_file_properties"
}
# Converts a raw string with encoded image bytes into an ImageFrame object
# via OpenCV so that it can be processed by downstream calculators.
node {
calculator: "OpenCvEncodedImageToImageFrameCalculator"
input_stream: "input_image_bytes"
output_stream: "input_image"
}
# Defines how many faces to detect. Iris tracking currently only handles one
# face (left and right eye), and therefore this should always be set to 1.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_image"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Gets the very first and only face from "multi_face_landmarks" vector.
node {
calculator: "SplitNormalizedLandmarkListVectorCalculator"
input_stream: "multi_face_landmarks"
output_stream: "face_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets the very first and only face rect from "face_rects_from_landmarks"
# vector.
node {
calculator: "SplitNormalizedRectVectorCalculator"
input_stream: "face_rects_from_landmarks"
output_stream: "face_rect"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets two landmarks which define left eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "left_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 33 end: 34 }
ranges: { begin: 133 end: 134 }
combine_outputs: true
}
}
}
# Gets two landmarks which define right eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "right_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 362 end: 363 }
ranges: { begin: 263 end: 264 }
combine_outputs: true
}
}
}
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
node {
calculator: "IrisLandmarkLeftAndRightCpu"
input_stream: "IMAGE:input_image"
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_eye_contour_landmarks"
input_stream: "right_eye_contour_landmarks"
output_stream: "refined_eye_landmarks"
}
node {
calculator: "UpdateFaceLandmarksCalculator"
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
}
# Renders annotations and overlays them on top of the input images.
node {
calculator: "IrisAndDepthRendererCpu"
input_stream: "IMAGE:input_image"
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:face_rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_stream: "DETECTIONS:face_detections"
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_image"
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
}

View File

@ -0,0 +1,142 @@
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipie/examples/desktop/iris_tracking:iris_tracking_cpu.
# CPU image. (ImageFrame)
input_stream: "input_video"
# CPU image. (ImageFrame)
output_stream: "output_video"
# Face landmarks with iris. (NormalizedLandmarkList)
output_stream: "face_landmarks_with_iris"
# Defines how many faces to detect. Iris tracking currently only handles one
# face (left and right eye), and therefore this should always be set to 1.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Gets the very first and only face from "multi_face_landmarks" vector.
node {
calculator: "SplitNormalizedLandmarkListVectorCalculator"
input_stream: "multi_face_landmarks"
output_stream: "face_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets the very first and only face rect from "face_rects_from_landmarks"
# vector.
node {
calculator: "SplitNormalizedRectVectorCalculator"
input_stream: "face_rects_from_landmarks"
output_stream: "face_rect"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets two landmarks which define left eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "left_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 33 end: 34 }
ranges: { begin: 133 end: 134 }
combine_outputs: true
}
}
}
# Gets two landmarks which define right eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "right_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 362 end: 363 }
ranges: { begin: 263 end: 264 }
combine_outputs: true
}
}
}
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
node {
calculator: "IrisLandmarkLeftAndRightCpu"
input_stream: "IMAGE:input_video"
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_eye_contour_landmarks"
input_stream: "right_eye_contour_landmarks"
output_stream: "refined_eye_landmarks"
}
node {
calculator: "UpdateFaceLandmarksCalculator"
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
}
# Renders annotations and overlays them on top of the input images.
node {
calculator: "IrisRendererCpu"
input_stream: "IMAGE:input_video"
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:face_rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_video"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "updated_face_landmarks"
input_stream: "iris_landmarks"
output_stream: "face_landmarks_with_iris"
}

View File

@ -0,0 +1,153 @@
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
# on CPU.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Defines how many faces to detect. Iris tracking currently only handles one
# face (left and right eye), and therefore this should always be set to 1.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Gets the very first and only face from "multi_face_landmarks" vector.
node {
calculator: "SplitNormalizedLandmarkListVectorCalculator"
input_stream: "multi_face_landmarks"
output_stream: "face_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets the very first and only face rect from "face_rects_from_landmarks"
# vector.
node {
calculator: "SplitNormalizedRectVectorCalculator"
input_stream: "face_rects_from_landmarks"
output_stream: "face_rect"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets two landmarks which define left eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "left_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 33 end: 34 }
ranges: { begin: 133 end: 134 }
combine_outputs: true
}
}
}
# Gets two landmarks which define right eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "right_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 362 end: 363 }
ranges: { begin: 263 end: 264 }
combine_outputs: true
}
}
}
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
node {
calculator: "IrisLandmarkLeftAndRightCpu"
input_stream: "IMAGE:input_video"
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_eye_contour_landmarks"
input_stream: "right_eye_contour_landmarks"
output_stream: "refined_eye_landmarks"
}
node {
calculator: "UpdateFaceLandmarksCalculator"
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
}
# Renders annotations and overlays them on top of the input images.
node {
calculator: "IrisRendererCpu"
input_stream: "IMAGE:input_video"
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:face_rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,163 @@
# MediaPipe graph that performs iris tracking with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipie/examples/android/src/java/com/mediapipe/apps/iristrackinggpu and
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# GPU buffer. (GpuBuffer)
output_stream: "output_video"
# Face landmarks with iris. (NormalizedLandmarkList)
output_stream: "face_landmarks_with_iris"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines how many faces to detect. Iris tracking currently only handles one
# face (left and right eye), and therefore this should always be set to 1.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Gets the very first and only face from "multi_face_landmarks" vector.
node {
calculator: "SplitNormalizedLandmarkListVectorCalculator"
input_stream: "multi_face_landmarks"
output_stream: "face_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets the very first and only face rect from "face_rects_from_landmarks"
# vector.
node {
calculator: "SplitNormalizedRectVectorCalculator"
input_stream: "face_rects_from_landmarks"
output_stream: "face_rect"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Gets two landmarks which define left eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "left_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 33 end: 34 }
ranges: { begin: 133 end: 134 }
combine_outputs: true
}
}
}
# Gets two landmarks which define right eye boundary.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "face_landmarks"
output_stream: "right_eye_boundary_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 362 end: 363 }
ranges: { begin: 263 end: 264 }
combine_outputs: true
}
}
}
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
node {
calculator: "IrisLandmarkLeftAndRightGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_eye_contour_landmarks"
input_stream: "right_eye_contour_landmarks"
output_stream: "refined_eye_landmarks"
}
node {
calculator: "UpdateFaceLandmarksCalculator"
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
input_stream: "FACE_LANDMARKS:face_landmarks"
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
}
# Renders annotations and overlays them on top of the input images.
node {
calculator: "IrisAndDepthRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:face_rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_stream: "DETECTIONS:face_detections"
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_video"
}
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "updated_face_landmarks"
input_stream: "iris_landmarks"
output_stream: "face_landmarks_with_iris"
}

View File

@ -0,0 +1,67 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "renderer_calculators",
deps = [
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
"//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator",
],
)
mediapipe_simple_subgraph(
name = "iris_and_depth_renderer_gpu",
graph = "iris_and_depth_renderer_gpu.pbtxt",
register_as = "IrisAndDepthRendererGpu",
deps = [
":renderer_calculators",
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
],
)
mediapipe_simple_subgraph(
name = "iris_renderer_cpu",
graph = "iris_renderer_cpu.pbtxt",
register_as = "IrisRendererCpu",
deps = [
":renderer_calculators",
],
)
mediapipe_simple_subgraph(
name = "iris_and_depth_renderer_cpu",
graph = "iris_and_depth_renderer_cpu.pbtxt",
register_as = "IrisAndDepthRendererCpu",
deps = [
":renderer_calculators",
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
],
)

View File

@ -0,0 +1,267 @@
# MediaPipe iris tracking rendering subgraph.
type: "IrisAndDepthRendererCpu"
input_stream: "IMAGE:input_image"
input_stream: "DETECTIONS:detections"
input_stream: "FACE_LANDMARKS:face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_image"
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_left_eye_contour_landmarks"
output_stream: "left_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_right_eye_contour_landmarks"
output_stream: "right_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
# Concatenate iris landmarks from both eyes.
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_iris_landmarks"
input_stream: "right_iris_landmarks"
output_stream: "iris_landmarks"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:face_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 150 g: 0 b: 0 }
connection_color { r: 0 g: 150 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_image"
output_stream: "SIZE:image_size"
}
# Maps detection label IDs to the corresponding label text ("Face").
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label: "Face"
}
}
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:labeled_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 1.0
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 1.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:rect"
output_stream: "RENDER_DATA:rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:right_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:left_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "IrisToDepthCalculator"
input_stream: "IRIS:iris_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
}
node {
calculator: "IrisToRenderDataCalculator"
input_stream: "IRIS:iris_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
output_stream: "RENDER_DATA:iris_render_data"
node_options: {
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
oval_color { r: 0 g: 0 b: 255 }
landmark_color { r: 0 g: 255 b: 0 }
oval_thickness: 2.0
landmark_thickness: 1.0
font_height_px: 50
horizontal_offset_px: 200
vertical_offset_px: 200
location: TOP_LEFT
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_image"
input_stream: "detection_render_data"
input_stream: "face_landmarks_render_data"
input_stream: "right_eye_contour_landmarks_render_data"
input_stream: "left_eye_contour_landmarks_render_data"
input_stream: "iris_render_data"
output_stream: "IMAGE:output_image"
}

View File

@ -0,0 +1,270 @@
# MediaPipe iris tracking rendering subgraph.
type: "IrisAndDepthRendererGpu"
input_stream: "IMAGE:input_image"
input_stream: "DETECTIONS:detections"
input_stream: "FACE_LANDMARKS:face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_image"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_left_eye_contour_landmarks"
output_stream: "left_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_right_eye_contour_landmarks"
output_stream: "right_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
# Concatenate iris landmarks from both eyes.
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_iris_landmarks"
input_stream: "right_iris_landmarks"
output_stream: "iris_landmarks"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:face_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 150 g: 0 b: 0 }
connection_color { r: 0 g: 150 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:image_size"
}
# Maps detection label IDs to the corresponding label text ("Face").
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label: "Face"
}
}
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:labeled_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 2.0
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 2.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:rect"
output_stream: "RENDER_DATA:rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:right_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:left_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "IrisToDepthCalculator"
input_stream: "IRIS:iris_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
}
node {
calculator: "IrisToRenderDataCalculator"
input_stream: "IRIS:iris_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
output_stream: "RENDER_DATA:iris_render_data"
node_options: {
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
oval_color { r: 0 g: 0 b: 255 }
landmark_color { r: 0 g: 255 b: 0 }
oval_thickness: 4.0
landmark_thickness: 2.0
font_height_px: 50
horizontal_offset_px: 200
vertical_offset_px: 200
location: TOP_LEFT
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
input_stream: "detection_render_data"
input_stream: "face_landmarks_render_data"
input_stream: "right_eye_contour_landmarks_render_data"
input_stream: "left_eye_contour_landmarks_render_data"
input_stream: "iris_render_data"
output_stream: "IMAGE_GPU:output_image"
node_options: {
[type.googleapis.com/mediapipe.AnnotationOverlayCalculatorOptions] {
gpu_scale_factor: 0.5
}
}
}

View File

@ -0,0 +1,254 @@
# MediaPipe iris tracking rendering subgraph.
type: "IrisRendererCpu"
input_stream: "IMAGE:input_image"
input_stream: "DETECTIONS:detections"
input_stream: "FACE_LANDMARKS:face_landmarks"
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
input_stream: "NORM_RECT:rect"
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
output_stream: "IRIS_LANDMARKS:iris_landmarks"
output_stream: "IMAGE:output_image"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_left_eye_contour_landmarks"
output_stream: "left_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "all_right_eye_contour_landmarks"
output_stream: "right_eye_contour_landmarks"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 15 }
}
}
}
# Concatenate iris landmarks from both eyes.
node {
calculator: "ConcatenateNormalizedLandmarkListCalculator"
input_stream: "left_iris_landmarks"
input_stream: "right_iris_landmarks"
output_stream: "iris_landmarks"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:face_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 150 g: 0 b: 0 }
connection_color { r: 0 g: 150 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_image"
output_stream: "SIZE:image_size"
}
# Maps detection label IDs to the corresponding label text ("Face").
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label: "Face"
}
}
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:labeled_detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 1.0
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 4
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 12
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 0
landmark_connections: 9
landmark_connections: 8
landmark_connections: 14
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 255 g: 0 b: 0 }
visualize_landmark_depth: false
thickness: 1.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:rect"
output_stream: "RENDER_DATA:rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:right_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
output_stream: "RENDER_DATA:left_eye_rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
node {
calculator: "IrisToRenderDataCalculator"
input_stream: "IRIS:iris_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "RENDER_DATA:iris_render_data"
node_options: {
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
oval_color { r: 0 g: 0 b: 255 }
landmark_color { r: 0 g: 255 b: 0 }
oval_thickness: 4.0
landmark_thickness: 2.0
font_height_px: 50
horizontal_offset_px: 200
vertical_offset_px: 200
location: TOP_LEFT
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_image"
input_stream: "detection_render_data"
input_stream: "face_landmarks_render_data"
input_stream: "right_eye_contour_landmarks_render_data"
input_stream: "left_eye_contour_landmarks_render_data"
input_stream: "iris_render_data"
output_stream: "IMAGE:output_image"
}

View File

@ -0,0 +1,47 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "clipped_images_from_file_at_24fps_calculators",
deps = [
"//mediapipe/calculators/core:packet_resampler_calculator",
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
"//mediapipe/calculators/image:scale_image_calculator",
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
],
)
cc_library(
name = "tvl1_flow_and_rgb_from_file_calculators",
deps = [
"//mediapipe/calculators/core:packet_inner_join_calculator",
"//mediapipe/calculators/core:packet_resampler_calculator",
"//mediapipe/calculators/core:sequence_shift_calculator",
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
"//mediapipe/calculators/image:scale_image_calculator",
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
"//mediapipe/calculators/video:flow_to_image_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:tvl1_optical_flow_calculator",
],
)

View File

@ -0,0 +1,78 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Convert the string input into a decoded SequenceExample.
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "STRING:input_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
}
# Unpack the data path and clip timing from the SequenceExample.
node {
calculator: "UnpackMediaSequenceCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "DATA_PATH:input_video_path"
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
node_options: {
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
base_packet_resampler_options: {
frame_rate: 24.0
base_timestamp: 0
}
}
}
}
# Decode the entire video.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:decoded_frames"
}
# Extract the subset of frames we want to keep.
node {
calculator: "PacketResamplerCalculator"
input_stream: "decoded_frames"
output_stream: "sampled_frames"
input_side_packet: "OPTIONS:packet_resampler_options"
}
# Encode the images to store in the SequenceExample.
node {
calculator: "OpenCvImageEncoderCalculator"
input_stream: "sampled_frames"
output_stream: "encoded_frames"
node_options: {
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
quality: 80
}
}
}
# Store the images in the SequenceExample.
node {
calculator: "PackMediaSequenceCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
input_stream: "IMAGE:encoded_frames"
}
# Serialize the SequenceExample to a string for storage.
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
output_side_packet: "STRING:output_sequence_example"
}

View File

@ -0,0 +1,153 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Convert the string input into a decoded SequenceExample.
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "STRING:input_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
}
# Unpack the data path and clip timing from the SequenceExample.
node {
calculator: "UnpackMediaSequenceCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "DATA_PATH:input_video_path"
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
node_options: {
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
base_packet_resampler_options: {
frame_rate: 25.0
base_timestamp: 0
}
}
}
}
# Decode the entire video.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:decoded_frames"
}
# Extract the subset of frames we want to keep.
node {
calculator: "PacketResamplerCalculator"
input_stream: "decoded_frames"
output_stream: "sampled_frames"
input_side_packet: "OPTIONS:packet_resampler_options"
}
# Fit the images into the target size.
node: {
calculator: "ScaleImageCalculator"
input_stream: "sampled_frames"
output_stream: "scaled_frames"
node_options: {
[type.googleapis.com/mediapipe.ScaleImageCalculatorOptions]: {
target_height: 256
preserve_aspect_ratio: true
}
}
}
# Shift the the timestamps of packets along a stream.
# With a packet_offset of -1, the first packet will be dropped, the second will
# be output with the timestamp of the first, the third with the timestamp of
# the second, and so on.
node: {
calculator: "SequenceShiftCalculator"
input_stream: "scaled_frames"
output_stream: "shifted_scaled_frames"
node_options: {
[type.googleapis.com/mediapipe.SequenceShiftCalculatorOptions]: {
packet_offset: -1
}
}
}
# Join the original input stream and the one that is shifted by one packet.
node: {
calculator: "PacketInnerJoinCalculator"
input_stream: "scaled_frames"
input_stream: "shifted_scaled_frames"
output_stream: "first_frames"
output_stream: "second_frames"
}
# Compute the forward optical flow.
node {
calculator: "Tvl1OpticalFlowCalculator"
input_stream: "FIRST_FRAME:first_frames"
input_stream: "SECOND_FRAME:second_frames"
output_stream: "FORWARD_FLOW:forward_flow"
max_in_flight: 32
}
# Convert an optical flow to be an image frame with 2 channels (v_x and v_y),
# each channel is quantized to 0-255.
node: {
calculator: "FlowToImageCalculator"
input_stream: "forward_flow"
output_stream: "flow_frames"
node_options: {
[type.googleapis.com/mediapipe.FlowToImageCalculatorOptions]: {
min_value: -20.0
max_value: 20.0
}
}
}
# Encode the optical flow images to store in the SequenceExample.
node {
calculator: "OpenCvImageEncoderCalculator"
input_stream: "flow_frames"
output_stream: "encoded_flow_frames"
node_options: {
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
quality: 100
}
}
}
# Encode the rgb images to store in the SequenceExample.
node {
calculator: "OpenCvImageEncoderCalculator"
input_stream: "scaled_frames"
output_stream: "encoded_frames"
node_options: {
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
quality: 100
}
}
}
# Store the images in the SequenceExample.
node {
calculator: "PackMediaSequenceCalculator"
input_stream: "IMAGE:encoded_frames"
input_stream: "FORWARD_FLOW_ENCODED:encoded_flow_frames"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
}
# Serialize the SequenceExample to a string for storage.
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
output_side_packet: "STRING:output_sequence_example"
}
num_threads: 32

View File

@ -0,0 +1,94 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
],
)
cc_library(
name = "desktop_tensorflow_calculators",
deps = [
"//mediapipe/calculators/tensorflow:image_frame_to_tensor_calculator",
"//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator",
"//mediapipe/calculators/tensorflow:object_detection_tensors_to_detections_calculator",
"//mediapipe/calculators/tensorflow:tensor_squeeze_dimensions_calculator",
"//mediapipe/calculators/tensorflow:tensorflow_inference_calculator",
"//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
],
)
cc_library(
name = "desktop_tflite_calculators",
deps = [
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
],
)
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
mediapipe_binary_graph(
name = "mobile_cpu_binary_graph",
graph = "object_detection_mobile_cpu.pbtxt",
output_name = "mobile_cpu.binarypb",
deps = [":mobile_calculators"],
)
mediapipe_binary_graph(
name = "mobile_gpu_binary_graph",
graph = "object_detection_mobile_gpu.pbtxt",
output_name = "mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,174 @@
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/desktop/object_detection:object_detection_cpu.
# Images on CPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 320
output_height: 320
}
}
}
# Converts the transformed input image on CPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "TENSORS:image_tensor"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 320
input_size_width: 320
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 91
num_boxes: 2034
num_coords: 4
ignore_classes: 0
sigmoid_score: true
apply_exponential_on_box_size: true
x_scale: 10.0
y_scale: 10.0
h_scale: 5.0
w_scale: 5.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
max_num_detections: 3
overlap_type: INTERSECTION_OVER_UNION
return_empty_detections: true
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,130 @@
# MediaPipe graph that performs object detection on desktop with TensorFlow
# on CPU.
# Used in the example in
# mediapipie/examples/desktop/object_detection:object_detection_tensorflow.
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Converts the input image into an image tensor as a tensorflow::Tensor.
node {
calculator: "ImageFrameToTensorCalculator"
input_stream: "input_video"
output_stream: "image_tensor"
}
# Generates a single side packet containing a TensorFlow session from a saved
# model. The directory path that contains the saved model is specified in the
# saved_model_path option, and the name of the saved model file has to be
# "saved_model.pb".
node {
calculator: "TensorFlowSessionFromSavedModelCalculator"
output_side_packet: "SESSION:object_detection_session"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: {
saved_model_path: "mediapipe/models/object_detection_saved_model"
}
}
}
# Runs a TensorFlow session (specified as an input side packet) that takes an
# image tensor and outputs multiple tensors that describe the objects detected
# in the image. The batch_size option is set to 1 to disable batching entirely.
# Note that the particular TensorFlow model used in this session handles image
# scaling internally before the object-detection inference, and therefore no
# additional calculator for image transformation is needed in this MediaPipe
# graph.
node: {
calculator: "TensorFlowInferenceCalculator"
input_side_packet: "SESSION:object_detection_session"
input_stream: "INPUTS:image_tensor"
output_stream: "DETECTION_BOXES:detection_boxes_tensor"
output_stream: "DETECTION_CLASSES:detection_classes_tensor"
output_stream: "DETECTION_SCORES:detection_scores_tensor"
output_stream: "NUM_DETECTIONS:num_detections_tensor"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
batch_size: 1
}
}
}
# Decodes the detection tensors from the TensorFlow model into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "ObjectDetectionTensorsToDetectionsCalculator"
input_stream: "BOXES:detection_boxes_tensor"
input_stream: "SCORES:detection_scores_tensor"
input_stream: "CLASSES:detection_classes_tensor"
input_stream: "NUM_DETECTIONS:num_detections_tensor"
output_stream: "DETECTIONS:detections"
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
min_score_threshold: 0.6
max_num_detections: 10
overlap_type: INTERSECTION_OVER_UNION
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,180 @@
# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/object_detection:object_detection_tflite.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:input_video"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 320
output_height: 320
}
}
}
# Converts the transformed input image on CPU into an image tensor as a
# TfLiteTensor. The zero_center option is set to true to normalize the
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "TENSORS:image_tensor"
node_options: {
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
zero_center: true
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 320
input_size_width: 320
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 91
num_boxes: 2034
num_coords: 4
ignore_classes: 0
apply_exponential_on_box_size: true
x_scale: 10.0
y_scale: 10.0
h_scale: 5.0
w_scale: 5.0
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
min_score_threshold: 0.6
max_num_detections: 5
overlap_type: INTERSECTION_OVER_UNION
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,193 @@
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectioncpu and
# mediapipe/examples/ios/objectdetectioncpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Transfers the input image from GPU to CPU memory for the purpose of
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
# origin defined at the bottom-left corner (OpenGL convention). As a result,
# the transferred image on CPU also shares the same representation.
node: {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "input_video"
output_stream: "input_video_cpu"
}
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video_cpu"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video_cpu"
}
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:throttled_input_video_cpu"
output_stream: "IMAGE:transformed_input_video_cpu"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 320
output_height: 320
}
}
}
# Converts the transformed input image on CPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video_cpu"
output_stream: "TENSORS:image_tensor"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 320
input_size_width: 320
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 91
num_boxes: 2034
num_coords: 4
ignore_classes: 0
sigmoid_score: true
apply_exponential_on_box_size: true
x_scale: 10.0
y_scale: 10.0
h_scale: 5.0
w_scale: 5.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
max_num_detections: 3
overlap_type: INTERSECTION_OVER_UNION
return_empty_detections: true
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video_cpu"
input_stream: "render_data"
output_stream: "IMAGE:output_video_cpu"
}
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
# the graph.
node: {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "output_video_cpu"
output_stream: "output_video"
}

View File

@ -0,0 +1,175 @@
# MediaPipe graph that performs object detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and
# mediapipe/examples/ios/objectdetectiongpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 320
output_height: 320
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "TENSORS_GPU:image_tensor"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS_GPU:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 320
input_size_width: 320
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS_GPU:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 91
num_boxes: 2034
num_coords: 4
ignore_classes: 0
sigmoid_score: true
apply_exponential_on_box_size: true
x_scale: 10.0
y_scale: 10.0
h_scale: 5.0
w_scale: 5.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
max_num_detections: 3
overlap_type: INTERSECTION_OVER_UNION
return_empty_detections: true
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -0,0 +1,80 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
exports_files(glob([
"*.pbtxt",
]))
cc_library(
name = "mobile_calculators",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_cropping_calculator",
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
"//mediapipe/modules/objectron:objectron_gpu",
],
)
cc_library(
name = "mobile_calculators_1stage",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/core:packet_resampler_calculator",
"//mediapipe/calculators/image:image_cropping_calculator",
"//mediapipe/gpu:gl_scaler_calculator",
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
"//mediapipe/modules/objectron:objectron_detection_1stage_gpu",
"//mediapipe/modules/objectron:objectron_tracking_1stage_gpu",
],
)
cc_library(
name = "desktop_cpu_calculators",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/graphs/object_detection_3d/subgraphs:renderer_cpu",
"//mediapipe/modules/objectron:objectron_cpu",
],
)
mediapipe_binary_graph(
name = "mobile_gpu_binary_graph",
graph = "object_occlusion_tracking.pbtxt",
output_name = "mobile_gpu_binary_graph.binarypb",
visibility = ["//visibility:public"],
deps = [":mobile_calculators"],
)
mediapipe_binary_graph(
name = "mobile_gpu_1stage_binary_graph",
graph = "object_occlusion_tracking_1stage.pbtxt",
output_name = "mobile_gpu_1stage_binary_graph.binarypb",
visibility = ["//visibility:public"],
deps = [":mobile_calculators_1stage"],
)

View File

@ -0,0 +1,113 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_proto_library(
name = "gl_animation_overlay_calculator_proto",
srcs = ["gl_animation_overlay_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_proto_library(
name = "annotations_to_model_matrices_calculator_proto",
srcs = ["annotations_to_model_matrices_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_proto_library(
name = "model_matrix_proto",
srcs = ["model_matrix.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_proto_library(
name = "annotations_to_render_data_calculator_proto",
srcs = ["annotations_to_render_data_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
"//mediapipe/util:color_proto",
],
)
cc_library(
name = "gl_animation_overlay_calculator",
srcs = ["gl_animation_overlay_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":gl_animation_overlay_calculator_cc_proto",
":model_matrix_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:shader_util",
"//mediapipe/modules/objectron/calculators:camera_parameters_cc_proto",
"//mediapipe/util/android:asset_manager_util",
],
alwayslink = 1,
)
cc_library(
name = "annotations_to_model_matrices_calculator",
srcs = ["annotations_to_model_matrices_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":annotations_to_model_matrices_calculator_cc_proto",
":model_matrix_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
"//mediapipe/modules/objectron/calculators:box",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@eigen_archive//:eigen3",
],
alwayslink = 1,
)
cc_library(
name = "annotations_to_render_data_calculator",
srcs = ["annotations_to_render_data_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":annotations_to_render_data_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)

View File

@ -0,0 +1,215 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "Eigen/Core"
#include "Eigen/Dense"
#include "Eigen/Geometry"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.pb.h"
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
#include "mediapipe/modules/objectron/calculators/box.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe {
namespace {
constexpr char kAnnotationTag[] = "ANNOTATIONS";
constexpr char kModelMatricesTag[] = "MODEL_MATRICES";
using Matrix3fRM = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
using Matrix4fRM = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
} // namespace
// Converts the box prediction from Objectron Model to the Model matrices
// to be rendered.
//
// Input:
// ANNOTATIONS - Frame annotations with lifted 3D points, the points are in
// Objectron coordinate system.
// Output:
// MODEL_MATRICES - Result ModelMatrices, in OpenGL coordinate system.
//
// Usage example:
// node {
// calculator: "AnnotationsToModelMatricesCalculator"
// input_stream: "ANNOTATIONS:objects"
// output_stream: "MODEL_MATRICES:model_matrices"
//}
class AnnotationsToModelMatricesCalculator : public CalculatorBase {
public:
AnnotationsToModelMatricesCalculator() {}
~AnnotationsToModelMatricesCalculator() override {}
AnnotationsToModelMatricesCalculator(
const AnnotationsToModelMatricesCalculator&) = delete;
AnnotationsToModelMatricesCalculator& operator=(
const AnnotationsToModelMatricesCalculator&) = delete;
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
absl::Status GetModelMatricesForAnnotations(
const FrameAnnotation& annotations,
TimedModelMatrixProtoList* model_matrix_list);
AnnotationsToModelMatricesCalculatorOptions options_;
Eigen::Vector3f model_scale_;
Matrix4fRM model_transformation_;
};
REGISTER_CALCULATOR(AnnotationsToModelMatricesCalculator);
absl::Status AnnotationsToModelMatricesCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
if (cc->Inputs().HasTag(kAnnotationTag)) {
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
}
if (cc->Outputs().HasTag(kModelMatricesTag)) {
cc->Outputs().Tag(kModelMatricesTag).Set<TimedModelMatrixProtoList>();
}
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
cc->InputSidePackets().Tag("MODEL_SCALE").Set<float[]>();
}
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Set<float[]>();
}
return absl::OkStatus();
}
absl::Status AnnotationsToModelMatricesCalculator::Open(CalculatorContext* cc) {
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag));
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<AnnotationsToModelMatricesCalculatorOptions>();
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
model_scale_ = Eigen::Map<const Eigen::Vector3f>(
cc->InputSidePackets().Tag("MODEL_SCALE").Get<float[]>());
} else if (options_.model_scale_size() == 3) {
model_scale_ =
Eigen::Map<const Eigen::Vector3f>(options_.model_scale().data());
} else {
model_scale_.setOnes();
}
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
model_transformation_ = Eigen::Map<const Matrix4fRM>(
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Get<float[]>());
} else if (options_.model_transformation_size() == 16) {
model_transformation_ =
Eigen::Map<const Matrix4fRM>(options_.model_transformation().data());
} else {
model_transformation_.setIdentity();
}
return absl::OkStatus();
}
absl::Status AnnotationsToModelMatricesCalculator::Process(
CalculatorContext* cc) {
auto model_matrices = std::make_unique<TimedModelMatrixProtoList>();
const FrameAnnotation& annotations =
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
if (!GetModelMatricesForAnnotations(annotations, model_matrices.get()).ok()) {
return absl::InvalidArgumentError("Error in GetModelMatricesForBoxes");
}
cc->Outputs()
.Tag(kModelMatricesTag)
.Add(model_matrices.release(), cc->InputTimestamp());
return absl::OkStatus();
}
absl::Status
AnnotationsToModelMatricesCalculator::GetModelMatricesForAnnotations(
const FrameAnnotation& annotations,
TimedModelMatrixProtoList* model_matrix_list) {
if (model_matrix_list == nullptr) {
return absl::InvalidArgumentError("model_matrix_list is nullptr");
}
model_matrix_list->clear_model_matrix();
for (const auto& object : annotations.annotations()) {
TimedModelMatrixProto* model_matrix = model_matrix_list->add_model_matrix();
model_matrix->set_id(object.object_id());
// Get object rotation, translation and scale.
const auto object_rotation =
Eigen::Map<const Matrix3fRM>(object.rotation().data());
const auto object_translation =
Eigen::Map<const Eigen::Vector3f>(object.translation().data());
const auto object_scale =
Eigen::Map<const Eigen::Vector3f>(object.scale().data());
// Compose object transformation matrix.
Matrix4fRM object_transformation;
object_transformation.setIdentity();
object_transformation.topLeftCorner<3, 3>() = object_rotation;
object_transformation.topRightCorner<3, 1>() = object_translation;
Matrix4fRM model_view;
Matrix4fRM objectron_model;
// The reference view is
//
// ref << 0., 0., 1., 0.,
// -1., 0., 0., 0.,
// 0., -1., 0., 0.,
// 0., 0., 0., 1.;
// We have objectron_model * model = model_view, to get objectron_model:
// objectron_model = model_view * model^-1
// clang-format off
objectron_model << 1.0, 0.0, 0.0, 0.0,
0.0, -1., 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0;
// clang-format on
// Re-scale the CAD model to the scale of the estimated bounding box.
const Eigen::Vector3f scale = model_scale_.cwiseProduct(object_scale);
const Matrix4fRM model =
model_transformation_.array().colwise() * scale.homogeneous().array();
// Finally compute the model_view matrix.
model_view = objectron_model * object_transformation * model;
for (int i = 0; i < model_view.rows(); ++i) {
for (int j = 0; j < model_view.cols(); ++j) {
model_matrix->add_matrix_entries(model_view(i, j));
}
}
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,33 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message AnnotationsToModelMatricesCalculatorOptions {
extend CalculatorOptions {
optional AnnotationsToModelMatricesCalculatorOptions ext = 290166283;
}
// Vector of size 3 indicating the scale vector [x, y, z]. We will re-scale
// the model size with this vector. (Defaults to [1., 1., 1.])
repeated float model_scale = 1;
// 4x4 Row major matrix denoting the transformation from the model to the
// Deep Pursuit 3D coordinate system (where front is +z, and up is +y).
repeated float model_transformation = 2;
}

View File

@ -0,0 +1,271 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.pb.h"
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
namespace mediapipe {
namespace {
constexpr char kAnnotationTag[] = "ANNOTATIONS";
constexpr char kRenderDataTag[] = "RENDER_DATA";
constexpr char kKeypointLabel[] = "KEYPOINT";
constexpr int kMaxLandmarkThickness = 18;
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
annotation->mutable_color()->set_r(color.r());
annotation->mutable_color()->set_g(color.g());
annotation->mutable_color()->set_b(color.b());
}
// Remap x from range [lo hi] to range [0 1] then multiply by scale.
inline float Remap(float x, float lo, float hi, float scale) {
return (x - lo) / (hi - lo + 1e-6) * scale;
}
inline void GetMinMaxZ(const FrameAnnotation& annotations, float* z_min,
float* z_max) {
*z_min = std::numeric_limits<float>::max();
*z_max = std::numeric_limits<float>::min();
// Use a global depth scale for all the objects in the scene
for (const auto& object : annotations.annotations()) {
for (const auto& keypoint : object.keypoints()) {
*z_min = std::min(keypoint.point_2d().depth(), *z_min);
*z_max = std::max(keypoint.point_2d().depth(), *z_max);
}
}
}
void SetColorSizeValueFromZ(float z, float z_min, float z_max,
RenderAnnotation* render_annotation) {
const int color_value = 255 - static_cast<int>(Remap(z, z_min, z_max, 255));
::mediapipe::Color color;
color.set_r(color_value);
color.set_g(color_value);
color.set_b(color_value);
SetColor(render_annotation, color);
const int thickness = static_cast<int>((1.f - Remap(z, z_min, z_max, 1)) *
kMaxLandmarkThickness);
render_annotation->set_thickness(thickness);
}
} // namespace
// A calculator that converts FrameAnnotation proto to RenderData proto for
// visualization. The input should be the FrameAnnotation proto buffer. It is
// also possible to specify the connections between landmarks.
//
// Example config:
// node {
// calculator: "AnnotationsToRenderDataCalculator"
// input_stream: "ANNOTATIONS:annotations"
// output_stream: "RENDER_DATA:render_data"
// options {
// [AnnotationsToRenderDataCalculator.ext] {
// landmark_connections: [0, 1, 1, 2]
// landmark_color { r: 0 g: 255 b: 0 }
// connection_color { r: 0 g: 255 b: 0 }
// thickness: 4.0
// }
// }
// }
class AnnotationsToRenderDataCalculator : public CalculatorBase {
public:
AnnotationsToRenderDataCalculator() {}
~AnnotationsToRenderDataCalculator() override {}
AnnotationsToRenderDataCalculator(const AnnotationsToRenderDataCalculator&) =
delete;
AnnotationsToRenderDataCalculator& operator=(
const AnnotationsToRenderDataCalculator&) = delete;
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
static void SetRenderAnnotationColorThickness(
const AnnotationsToRenderDataCalculatorOptions& options,
RenderAnnotation* render_annotation);
static RenderAnnotation* AddPointRenderData(
const AnnotationsToRenderDataCalculatorOptions& options,
RenderData* render_data);
// Add a command to draw a line in the rendering queue. The line is drawn from
// (start_x, start_y) to (end_x, end_y). The input x,y can either be in pixel
// or normalized coordinate [0, 1] as indicated by the normalized flag.
static void AddConnectionToRenderData(
float start_x, float start_y, float end_x, float end_y,
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
RenderData* render_data);
// Same as above function. Instead of using color data to render the line, it
// re-colors the line according to the two depth value. gray_val1 is the color
// of the starting point and gray_val2 is the color of the ending point. The
// line is colored using gradient color from gray_val1 to gray_val2. The
// gray_val ranges from [0 to 255] for black to white.
static void AddConnectionToRenderData(
float start_x, float start_y, float end_x, float end_y,
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
int gray_val1, int gray_val2, RenderData* render_data);
AnnotationsToRenderDataCalculatorOptions options_;
};
REGISTER_CALCULATOR(AnnotationsToRenderDataCalculator);
absl::Status AnnotationsToRenderDataCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
if (cc->Inputs().HasTag(kAnnotationTag)) {
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
}
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
return absl::OkStatus();
}
absl::Status AnnotationsToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<AnnotationsToRenderDataCalculatorOptions>();
return absl::OkStatus();
}
absl::Status AnnotationsToRenderDataCalculator::Process(CalculatorContext* cc) {
auto render_data = absl::make_unique<RenderData>();
bool visualize_depth = options_.visualize_landmark_depth();
float z_min = 0.f;
float z_max = 0.f;
if (cc->Inputs().HasTag(kAnnotationTag)) {
const auto& annotations =
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
<< "Number of entries in landmark connections must be a multiple of 2";
if (visualize_depth) {
GetMinMaxZ(annotations, &z_min, &z_max);
// Only change rendering if there are actually z values other than 0.
visualize_depth &= ((z_max - z_min) > 1e-3);
}
for (const auto& object : annotations.annotations()) {
for (const auto& keypoint : object.keypoints()) {
auto* keypoint_data_render =
AddPointRenderData(options_, render_data.get());
auto* point = keypoint_data_render->mutable_point();
if (visualize_depth) {
SetColorSizeValueFromZ(keypoint.point_2d().depth(), z_min, z_max,
keypoint_data_render);
}
point->set_normalized(true);
point->set_x(keypoint.point_2d().x());
point->set_y(keypoint.point_2d().y());
}
// Add edges
for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
const auto& ld0 =
object.keypoints(options_.landmark_connections(i)).point_2d();
const auto& ld1 =
object.keypoints(options_.landmark_connections(i + 1)).point_2d();
const bool normalized = true;
if (visualize_depth) {
const int gray_val1 =
255 - static_cast<int>(Remap(ld0.depth(), z_min, z_max, 255));
const int gray_val2 =
255 - static_cast<int>(Remap(ld1.depth(), z_min, z_max, 255));
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
options_, normalized, gray_val1, gray_val2,
render_data.get());
} else {
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
options_, normalized, render_data.get());
}
}
}
}
cc->Outputs()
.Tag(kRenderDataTag)
.Add(render_data.release(), cc->InputTimestamp());
return absl::OkStatus();
}
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
float start_x, float start_y, float end_x, float end_y,
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
int gray_val1, int gray_val2, RenderData* render_data) {
auto* connection_annotation = render_data->add_render_annotations();
RenderAnnotation::GradientLine* line =
connection_annotation->mutable_gradient_line();
line->set_x_start(start_x);
line->set_y_start(start_y);
line->set_x_end(end_x);
line->set_y_end(end_y);
line->set_normalized(normalized);
line->mutable_color1()->set_r(gray_val1);
line->mutable_color1()->set_g(gray_val1);
line->mutable_color1()->set_b(gray_val1);
line->mutable_color2()->set_r(gray_val2);
line->mutable_color2()->set_g(gray_val2);
line->mutable_color2()->set_b(gray_val2);
connection_annotation->set_thickness(options.thickness());
}
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
float start_x, float start_y, float end_x, float end_y,
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
RenderData* render_data) {
auto* connection_annotation = render_data->add_render_annotations();
RenderAnnotation::Line* line = connection_annotation->mutable_line();
line->set_x_start(start_x);
line->set_y_start(start_y);
line->set_x_end(end_x);
line->set_y_end(end_y);
line->set_normalized(normalized);
SetColor(connection_annotation, options.connection_color());
connection_annotation->set_thickness(options.thickness());
}
RenderAnnotation* AnnotationsToRenderDataCalculator::AddPointRenderData(
const AnnotationsToRenderDataCalculatorOptions& options,
RenderData* render_data) {
auto* landmark_data_annotation = render_data->add_render_annotations();
landmark_data_annotation->set_scene_tag(kKeypointLabel);
SetRenderAnnotationColorThickness(options, landmark_data_annotation);
return landmark_data_annotation;
}
void AnnotationsToRenderDataCalculator::SetRenderAnnotationColorThickness(
const AnnotationsToRenderDataCalculatorOptions& options,
RenderAnnotation* render_annotation) {
SetColor(render_annotation, options.landmark_color());
render_annotation->set_thickness(options.thickness());
}
} // namespace mediapipe

View File

@ -0,0 +1,43 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/util/color.proto";
message AnnotationsToRenderDataCalculatorOptions {
extend CalculatorOptions {
optional AnnotationsToRenderDataCalculatorOptions ext = 267644238;
}
// Specifies the landmarks to be connected in the drawing. For example, the
// landmark_connections value of [0, 1, 1, 2] specifies two connections: one
// that connects landmarks with index 0 and 1, and another that connects
// landmarks with index 1 and 2.
repeated int32 landmark_connections = 1;
// Color of the landmarks.
optional Color landmark_color = 2;
// Color of the connections.
optional Color connection_color = 3;
// Thickness of the drawing of landmarks and connections.
optional double thickness = 4 [default = 1.0];
// Change color and size of rendered landmarks based on its z value.
optional bool visualize_landmark_depth = 5 [default = true];
}

View File

@ -0,0 +1,947 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(__ANDROID__)
#include "mediapipe/util/android/asset_manager_util.h"
#else
#include <fstream>
#include <iostream>
#endif
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/shader_util.h"
#include "mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.pb.h"
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
#include "mediapipe/modules/objectron/calculators/camera_parameters.pb.h"
namespace mediapipe {
namespace {
#if defined(GL_DEBUG)
#define GLCHECK(command) \
command; \
if (int err = glGetError()) LOG(ERROR) << "GL error detected: " << err;
#else
#define GLCHECK(command) command
#endif
// For ease of use, we prefer ImageFrame on Android and GpuBuffer otherwise.
#if defined(__ANDROID__)
typedef ImageFrame AssetTextureFormat;
#else
typedef GpuBuffer AssetTextureFormat;
#endif
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, ATTRIB_NORMAL, NUM_ATTRIBUTES };
static const int kNumMatrixEntries = 16;
// Hard-coded MVP Matrix for testing.
static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0,
0.06146407, 0.8076706, 0.5864218, 0.0,
-0.54367524, -0.4656292, 0.69828844, 0.0,
0.0, 0.0, -98.64117, 1.0};
// Loads a texture from an input side packet, and streams in an animation file
// from a filename given in another input side packet, and renders the animation
// over the screen according to the input timestamp and desired animation FPS.
//
// Inputs:
// VIDEO (GpuBuffer, optional):
// If provided, the input buffer will be assumed to be unique, and will be
// consumed by this calculator and rendered to directly. The output video
// buffer will then be the released reference to the input video buffer.
// MODEL_MATRICES (TimedModelMatrixProtoList, optional):
// If provided, will set the model matrices for the objects to be rendered
// during future rendering calls.
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
// Texture to use with animation file. Texture is REQUIRED to be passed into
// the calculator, but can be passed in as a Side Packet OR Input Stream.
//
// Input side packets:
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
// Texture to use with animation file. Texture is REQUIRED to be passed into
// the calculator, but can be passed in as a Side Packet OR Input Stream.
// ANIMATION_ASSET (String, required):
// Path of animation file to load and render. The file format expects an
// arbitrary number of animation frames, concatenated directly together,
// with each animation frame looking like:
// HEADER
// VERTICES
// TEXTURE_COORDS
// INDICES
// The header consists of 3 int32 lengths, the sizes of the vertex data,
// the texcoord data, and the index data, respectively. Let us call those
// N1, N2, and N3. Then we expect N1 float32's for vertex information
// (x1,y1,z1,x2,y2,z2,etc.), followed by N2 float32's for texcoord
// information (u1,v1,u2,v2,u3,v3,etc.), followed by N3 shorts/int16's
// for triangle indices (a1,b1,c1,a2,b2,c2,etc.).
// CAMERA_PARAMETERS_PROTO_STRING (String, optional):
// Serialized proto std::string of CameraParametersProto. We need this to
// get the right aspect ratio and field of view.
// Options:
// aspect_ratio: the ratio between the rendered image width and height.
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
// is provided.
// vertical_fov_degrees: vertical field of view in degrees.
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
// is provided.
// z_clipping_plane_near: near plane value for z-clipping.
// z_clipping_plane_far: far plane value for z-clipping.
// animation_speed_fps: speed at which to cycle through animation frames (in
// frames per second).
//
// Outputs:
// OUTPUT, or index 0 (GpuBuffer):
// Frames filled with the given texture.
// Simple helper-struct for containing the parsed geometry data from a 3D
// animation frame for rendering.
struct TriangleMesh {
int index_count = 0; // Needed for glDrawElements rendering call
std::unique_ptr<float[]> normals = nullptr;
std::unique_ptr<float[]> vertices = nullptr;
std::unique_ptr<float[]> texture_coords = nullptr;
std::unique_ptr<int16[]> triangle_indices = nullptr;
};
typedef std::unique_ptr<float[]> ModelMatrix;
} // namespace
class GlAnimationOverlayCalculator : public CalculatorBase {
public:
GlAnimationOverlayCalculator() {}
~GlAnimationOverlayCalculator();
static absl::Status GetContract(CalculatorContract *cc);
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
private:
bool has_video_stream_ = false;
bool has_model_matrix_stream_ = false;
bool has_mask_model_matrix_stream_ = false;
bool has_occlusion_mask_ = false;
GlCalculatorHelper helper_;
bool initialized_ = false;
GlTexture texture_;
GlTexture mask_texture_;
GLuint renderbuffer_ = 0;
bool depth_buffer_created_ = false;
GLuint program_ = 0;
GLint texture_uniform_ = -1;
GLint perspective_matrix_uniform_ = -1;
GLint model_matrix_uniform_ = -1;
std::vector<TriangleMesh> triangle_meshes_;
std::vector<TriangleMesh> mask_meshes_;
Timestamp animation_start_time_;
int frame_count_ = 0;
float animation_speed_fps_;
std::vector<ModelMatrix> current_model_matrices_;
std::vector<ModelMatrix> current_mask_model_matrices_;
// Perspective matrix for rendering, to be applied to all model matrices
// prior to passing through to the shader as a MVP matrix. Initialized during
// first image packet read.
float perspective_matrix_[kNumMatrixEntries];
void ComputeAspectRatioAndFovFromCameraParameters(
const CameraParametersProto &camera_parameters, float *aspect_ratio,
float *vertical_fov_degrees);
int GetAnimationFrameIndex(Timestamp timestamp);
absl::Status GlSetup();
absl::Status GlBind(const TriangleMesh &triangle_mesh,
const GlTexture &texture);
absl::Status GlRender(const TriangleMesh &triangle_mesh,
const float *model_matrix);
void InitializePerspectiveMatrix(float aspect_ratio,
float vertical_fov_degrees, float z_near,
float z_far);
void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices,
std::vector<ModelMatrix> *current_model_matrices);
void CalculateTriangleMeshNormals(int normals_len,
TriangleMesh *triangle_mesh);
void Normalize3f(float input[3]);
#if !defined(__ANDROID__)
// Asset loading routine for all non-Android platforms.
bool LoadAnimation(const std::string &filename);
#else
// Asset loading for all Android platforms.
bool LoadAnimationAndroid(const std::string &filename,
std::vector<TriangleMesh> *mesh);
bool ReadBytesFromAsset(AAsset *asset, void *buffer, int num_bytes_to_read);
#endif
};
REGISTER_CALCULATOR(GlAnimationOverlayCalculator);
// static
absl::Status GlAnimationOverlayCalculator::GetContract(CalculatorContract *cc) {
MP_RETURN_IF_ERROR(
GlCalculatorHelper::SetupInputSidePackets(&(cc->InputSidePackets())));
if (cc->Inputs().HasTag("VIDEO")) {
// Currently used only for size and timestamp.
cc->Inputs().Tag("VIDEO").Set<GpuBuffer>();
}
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0).Set<GpuBuffer>();
if (cc->Inputs().HasTag("MODEL_MATRICES")) {
cc->Inputs().Tag("MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
}
if (cc->Inputs().HasTag("MASK_MODEL_MATRICES")) {
cc->Inputs().Tag("MASK_MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
}
// Must have texture as Input Stream or Side Packet
if (cc->InputSidePackets().HasTag("TEXTURE")) {
cc->InputSidePackets().Tag("TEXTURE").Set<AssetTextureFormat>();
} else {
cc->Inputs().Tag("TEXTURE").Set<AssetTextureFormat>();
}
cc->InputSidePackets().Tag("ANIMATION_ASSET").Set<std::string>();
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
cc->InputSidePackets()
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
.Set<std::string>();
}
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
cc->InputSidePackets().Tag("MASK_TEXTURE").Set<AssetTextureFormat>();
}
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
cc->InputSidePackets().Tag("MASK_ASSET").Set<std::string>();
}
return absl::OkStatus();
}
void GlAnimationOverlayCalculator::CalculateTriangleMeshNormals(
int normals_len, TriangleMesh *triangle_mesh) {
// Set triangle_mesh normals for shader usage
triangle_mesh->normals.reset(new float[normals_len]);
// Used for storing the vertex normals prior to averaging
std::vector<float> vertex_normals_sum(normals_len, 0.0f);
// Compute every triangle surface normal and store them for averaging
for (int idx = 0; idx < triangle_mesh->index_count; idx += 3) {
int v_idx[3];
v_idx[0] = triangle_mesh->triangle_indices.get()[idx];
v_idx[1] = triangle_mesh->triangle_indices.get()[idx + 1];
v_idx[2] = triangle_mesh->triangle_indices.get()[idx + 2];
// (V1) vertex X,Y,Z indices in triangle_mesh.vertices
const float v1x = triangle_mesh->vertices[v_idx[0] * 3];
const float v1y = triangle_mesh->vertices[v_idx[0] * 3 + 1];
const float v1z = triangle_mesh->vertices[v_idx[0] * 3 + 2];
// (V2) vertex X,Y,Z indices in triangle_mesh.vertices
const float v2x = triangle_mesh->vertices[v_idx[1] * 3];
const float v2y = triangle_mesh->vertices[v_idx[1] * 3 + 1];
const float v2z = triangle_mesh->vertices[v_idx[1] * 3 + 2];
// (V3) vertex X,Y,Z indices in triangle_mesh.vertices
const float v3x = triangle_mesh->vertices[v_idx[2] * 3];
const float v3y = triangle_mesh->vertices[v_idx[2] * 3 + 1];
const float v3z = triangle_mesh->vertices[v_idx[2] * 3 + 2];
// Calculate normals from vertices
// V2 - V1
const float ax = v2x - v1x;
const float ay = v2y - v1y;
const float az = v2z - v1z;
// V3 - V1
const float bx = v3x - v1x;
const float by = v3y - v1y;
const float bz = v3z - v1z;
// Calculate cross product
const float normal_x = ay * bz - az * by;
const float normal_y = az * bx - ax * bz;
const float normal_z = ax * by - ay * bx;
// The normals calculated above must be normalized if we wish to prevent
// triangles with a larger surface area from dominating the normal
// calculations, however, none of our current models require this
// normalization.
// Add connected normal to each associated vertex
// It is also necessary to increment each vertex denominator for averaging
for (int i = 0; i < 3; i++) {
vertex_normals_sum[v_idx[i] * 3] += normal_x;
vertex_normals_sum[v_idx[i] * 3 + 1] += normal_y;
vertex_normals_sum[v_idx[i] * 3 + 2] += normal_z;
}
}
// Combine all triangle normals connected to each vertex by adding the X,Y,Z
// value of each adjacent triangle surface normal to every vertex and then
// averaging the combined value.
for (int idx = 0; idx < normals_len; idx += 3) {
float normal[3];
normal[0] = vertex_normals_sum[idx];
normal[1] = vertex_normals_sum[idx + 1];
normal[2] = vertex_normals_sum[idx + 2];
Normalize3f(normal);
triangle_mesh->normals.get()[idx] = normal[0];
triangle_mesh->normals.get()[idx + 1] = normal[1];
triangle_mesh->normals.get()[idx + 2] = normal[2];
}
}
void GlAnimationOverlayCalculator::Normalize3f(float input[3]) {
float product = 0.0;
product += input[0] * input[0];
product += input[1] * input[1];
product += input[2] * input[2];
float magnitude = sqrt(product);
input[0] /= magnitude;
input[1] /= magnitude;
input[2] /= magnitude;
}
// Helper function for initializing our perspective matrix.
void GlAnimationOverlayCalculator::InitializePerspectiveMatrix(
float aspect_ratio, float fov_degrees, float z_near, float z_far) {
// Standard perspective projection matrix calculations.
const float f = 1.0f / std::tan(fov_degrees * M_PI / 360.0f);
for (int i = 0; i < kNumMatrixEntries; i++) {
perspective_matrix_[i] = 0;
}
const float denom = 1.0f / (z_near - z_far);
perspective_matrix_[0] = f / aspect_ratio;
perspective_matrix_[5] = f;
perspective_matrix_[10] = (z_near + z_far) * denom;
perspective_matrix_[11] = -1.0f;
perspective_matrix_[14] = 2.0f * z_far * z_near * denom;
}
#if defined(__ANDROID__)
// Helper function for reading in a specified number of bytes from an Android
// asset. Returns true if successfully reads in all bytes into buffer.
bool GlAnimationOverlayCalculator::ReadBytesFromAsset(AAsset *asset,
void *buffer,
int num_bytes_to_read) {
// Most file systems use block sizes of 4KB or 8KB; ideally we'd choose a
// small multiple of the block size for best input streaming performance, so
// we go for a reasobably safe buffer size of 8KB = 8*1024 bytes.
static const int kMaxChunkSize = 8192;
int bytes_left = num_bytes_to_read;
int bytes_read = 1; // any value > 0 here just to start looping.
// Treat as uint8_t array so we can deal in single byte arithmetic easily.
uint8_t *currBufferIndex = reinterpret_cast<uint8_t *>(buffer);
while (bytes_read > 0 && bytes_left > 0) {
bytes_read = AAsset_read(asset, (void *)currBufferIndex,
std::min(bytes_left, kMaxChunkSize));
bytes_left -= bytes_read;
currBufferIndex += bytes_read;
}
// At least log any I/O errors encountered.
if (bytes_read < 0) {
LOG(ERROR) << "Error reading from AAsset: " << bytes_read;
return false;
}
if (bytes_left > 0) {
// Reached EOF before reading in specified number of bytes.
LOG(WARNING) << "Reached EOF before reading in specified number of bytes.";
return false;
}
return true;
}
// The below asset streaming code is Android-only, making use of the platform
// JNI helper classes AAssetManager and AAsset.
bool GlAnimationOverlayCalculator::LoadAnimationAndroid(
const std::string &filename, std::vector<TriangleMesh> *meshes) {
mediapipe::AssetManager *mediapipe_asset_manager =
Singleton<mediapipe::AssetManager>::get();
AAssetManager *asset_manager = mediapipe_asset_manager->GetAssetManager();
if (!asset_manager) {
LOG(ERROR) << "Failed to access Android asset manager.";
return false;
}
// New read-bytes stuff here! First we open file for streaming.
AAsset *asset = AAssetManager_open(asset_manager, filename.c_str(),
AASSET_MODE_STREAMING);
if (!asset) {
LOG(ERROR) << "Failed to open animation asset: " << filename;
return false;
}
// And now, while we are able to stream in more frames, we do so.
frame_count_ = 0;
int32 lengths[3];
while (ReadBytesFromAsset(asset, (void *)lengths, sizeof(lengths[0]) * 3)) {
// About to start reading the next animation frame. Stream it in here.
// Each frame stores first the object counts of its three arrays
// (vertices, texture coordinates, triangle indices; respectively), and
// then stores each of those arrays as a byte dump, in order.
meshes->emplace_back();
TriangleMesh &triangle_mesh = meshes->back();
// Try to read in vertices (4-byte floats)
triangle_mesh.vertices.reset(new float[lengths[0]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.vertices.get(),
sizeof(float) * lengths[0])) {
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
return false;
}
// Try to read in texture coordinates (4-byte floats)
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.texture_coords.get(),
sizeof(float) * lengths[1])) {
LOG(ERROR) << "Failed to read tex-coords for frame " << frame_count_;
return false;
}
// Try to read in indices (2-byte shorts)
triangle_mesh.index_count = lengths[2];
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.triangle_indices.get(),
sizeof(int16) * lengths[2])) {
LOG(ERROR) << "Failed to read indices for frame " << frame_count_;
return false;
}
// Set the normals for this triangle_mesh
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
frame_count_++;
}
AAsset_close(asset);
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
if (meshes->empty()) {
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
return false;
}
return true;
}
#else // defined(__ANDROID__)
bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) {
std::ifstream infile(filename.c_str(), std::ifstream::binary);
if (!infile) {
LOG(ERROR) << "Error opening asset with filename: " << filename;
return false;
}
frame_count_ = 0;
int32 lengths[3];
while (true) {
// See if we have more initial size counts to read in.
infile.read((char *)(lengths), sizeof(lengths[0]) * 3);
if (!infile) {
// No more frames to read. Close out.
infile.close();
break;
}
triangle_meshes_.emplace_back();
TriangleMesh &triangle_mesh = triangle_meshes_.back();
// Try to read in vertices (4-byte floats).
triangle_mesh.vertices.reset(new float[lengths[0]]);
infile.read((char *)(triangle_mesh.vertices.get()),
sizeof(float) * lengths[0]);
if (!infile) {
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
return false;
}
// Try to read in texture coordinates (4-byte floats)
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
infile.read((char *)(triangle_mesh.texture_coords.get()),
sizeof(float) * lengths[1]);
if (!infile) {
LOG(ERROR) << "Failed to read texture coordinates for frame "
<< frame_count_;
return false;
}
// Try to read in the triangle indices (2-byte shorts)
triangle_mesh.index_count = lengths[2];
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
infile.read((char *)(triangle_mesh.triangle_indices.get()),
sizeof(int16) * lengths[2]);
if (!infile) {
LOG(ERROR) << "Failed to read triangle indices for frame "
<< frame_count_;
return false;
}
// Set the normals for this triangle_mesh
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
frame_count_++;
}
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
if (triangle_meshes_.empty()) {
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
return false;
}
return true;
}
#endif
void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters(
const CameraParametersProto &camera_parameters, float *aspect_ratio,
float *vertical_fov_degrees) {
CHECK(aspect_ratio != nullptr);
CHECK(vertical_fov_degrees != nullptr);
*aspect_ratio =
camera_parameters.portrait_width() / camera_parameters.portrait_height();
*vertical_fov_degrees =
std::atan(camera_parameters.portrait_height() * 0.5f) * 2 * 180 / M_PI;
}
absl::Status GlAnimationOverlayCalculator::Open(CalculatorContext *cc) {
cc->SetOffset(TimestampDiff(0));
MP_RETURN_IF_ERROR(helper_.Open(cc));
const auto &options = cc->Options<GlAnimationOverlayCalculatorOptions>();
animation_speed_fps_ = options.animation_speed_fps();
// Construct projection matrix using input side packets or option
float aspect_ratio;
float vertical_fov_degrees;
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
const std::string &camera_parameters_proto_string =
cc->InputSidePackets()
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
.Get<std::string>();
CameraParametersProto camera_parameters_proto;
camera_parameters_proto.ParseFromString(camera_parameters_proto_string);
ComputeAspectRatioAndFovFromCameraParameters(
camera_parameters_proto, &aspect_ratio, &vertical_fov_degrees);
} else {
aspect_ratio = options.aspect_ratio();
vertical_fov_degrees = options.vertical_fov_degrees();
}
// when constructing projection matrix.
InitializePerspectiveMatrix(aspect_ratio, vertical_fov_degrees,
options.z_clipping_plane_near(),
options.z_clipping_plane_far());
// See what streams we have.
has_video_stream_ = cc->Inputs().HasTag("VIDEO");
has_model_matrix_stream_ = cc->Inputs().HasTag("MODEL_MATRICES");
has_mask_model_matrix_stream_ = cc->Inputs().HasTag("MASK_MODEL_MATRICES");
// Try to load in the animation asset in a platform-specific manner.
const std::string &asset_name =
cc->InputSidePackets().Tag("ANIMATION_ASSET").Get<std::string>();
bool loaded_animation = false;
#if defined(__ANDROID__)
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
has_occlusion_mask_ = true;
const std::string &mask_asset_name =
cc->InputSidePackets().Tag("MASK_ASSET").Get<std::string>();
loaded_animation = LoadAnimationAndroid(mask_asset_name, &mask_meshes_);
if (!loaded_animation) {
LOG(ERROR) << "Failed to load mask asset.";
return absl::UnknownError("Failed to load mask asset.");
}
}
loaded_animation = LoadAnimationAndroid(asset_name, &triangle_meshes_);
#else
loaded_animation = LoadAnimation(asset_name);
#endif
if (!loaded_animation) {
LOG(ERROR) << "Failed to load animation asset.";
return absl::UnknownError("Failed to load animation asset.");
}
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
const auto &mask_texture =
cc->InputSidePackets().Tag("MASK_TEXTURE").Get<AssetTextureFormat>();
mask_texture_ = helper_.CreateSourceTexture(mask_texture);
}
// Load in all static texture data if it exists
if (cc->InputSidePackets().HasTag("TEXTURE")) {
const auto &input_texture =
cc->InputSidePackets().Tag("TEXTURE").Get<AssetTextureFormat>();
texture_ = helper_.CreateSourceTexture(input_texture);
}
VLOG(2) << "Input texture size: " << texture_.width() << ", "
<< texture_.height() << std::endl;
return absl::OkStatus();
});
}
int GlAnimationOverlayCalculator::GetAnimationFrameIndex(Timestamp timestamp) {
double seconds_delta = timestamp.Seconds() - animation_start_time_.Seconds();
int64_t frame_index =
static_cast<int64_t>(seconds_delta * animation_speed_fps_);
frame_index %= frame_count_;
return static_cast<int>(frame_index);
}
void GlAnimationOverlayCalculator::LoadModelMatrices(
const TimedModelMatrixProtoList &model_matrices,
std::vector<ModelMatrix> *current_model_matrices) {
current_model_matrices->clear();
for (int i = 0; i < model_matrices.model_matrix_size(); ++i) {
const auto &model_matrix = model_matrices.model_matrix(i);
CHECK(model_matrix.matrix_entries_size() == kNumMatrixEntries)
<< "Invalid Model Matrix";
current_model_matrices->emplace_back();
ModelMatrix &new_matrix = current_model_matrices->back();
new_matrix.reset(new float[kNumMatrixEntries]);
for (int j = 0; j < kNumMatrixEntries; j++) {
// Model matrices streamed in using ROW-MAJOR format, but we want
// COLUMN-MAJOR for rendering, so we transpose here.
int col = j % 4;
int row = j / 4;
new_matrix[row + col * 4] = model_matrix.matrix_entries(j);
}
}
}
absl::Status GlAnimationOverlayCalculator::Process(CalculatorContext *cc) {
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
if (!initialized_) {
MP_RETURN_IF_ERROR(GlSetup());
initialized_ = true;
animation_start_time_ = cc->InputTimestamp();
}
// Process model matrices, if any are being streamed in, and update our
// list.
current_model_matrices_.clear();
if (has_model_matrix_stream_ &&
!cc->Inputs().Tag("MODEL_MATRICES").IsEmpty()) {
const TimedModelMatrixProtoList &model_matrices =
cc->Inputs().Tag("MODEL_MATRICES").Get<TimedModelMatrixProtoList>();
LoadModelMatrices(model_matrices, &current_model_matrices_);
}
current_mask_model_matrices_.clear();
if (has_mask_model_matrix_stream_ &&
!cc->Inputs().Tag("MASK_MODEL_MATRICES").IsEmpty()) {
const TimedModelMatrixProtoList &model_matrices =
cc->Inputs()
.Tag("MASK_MODEL_MATRICES")
.Get<TimedModelMatrixProtoList>();
LoadModelMatrices(model_matrices, &current_mask_model_matrices_);
}
// Arbitrary default width and height for output destination texture, in the
// event that we don't have a valid and unique input buffer to overlay.
int width = 640;
int height = 480;
GlTexture dst;
std::unique_ptr<GpuBuffer> input_frame(nullptr);
if (has_video_stream_ && !(cc->Inputs().Tag("VIDEO").IsEmpty())) {
auto result = cc->Inputs().Tag("VIDEO").Value().Consume<GpuBuffer>();
if (result.ok()) {
input_frame = std::move(result).value();
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
input_frame->GetGlTextureBufferSharedPtr()->Reuse();
#endif
width = input_frame->width();
height = input_frame->height();
dst = helper_.CreateSourceTexture(*input_frame);
} else {
LOG(ERROR) << "Unable to consume input video frame for overlay!";
LOG(ERROR) << "Status returned was: " << result.status();
dst = helper_.CreateDestinationTexture(width, height);
}
} else if (!has_video_stream_) {
dst = helper_.CreateDestinationTexture(width, height);
} else {
// We have an input video stream, but not for this frame. Don't render!
return absl::OkStatus();
}
helper_.BindFramebuffer(dst);
if (!depth_buffer_created_) {
// Create our private depth buffer.
GLCHECK(glGenRenderbuffers(1, &renderbuffer_));
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
width, height));
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
depth_buffer_created_ = true;
}
// Re-bind our depth renderbuffer to our FBO depth attachment here.
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER, renderbuffer_));
GLenum status = GLCHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER));
if (status != GL_FRAMEBUFFER_COMPLETE) {
LOG(ERROR) << "Incomplete framebuffer with status: " << status;
}
GLCHECK(glClear(GL_DEPTH_BUFFER_BIT));
if (has_occlusion_mask_) {
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
const TriangleMesh &mask_frame = mask_meshes_.front();
MP_RETURN_IF_ERROR(GlBind(mask_frame, mask_texture_));
// Draw objects using our latest model matrix stream packet.
for (const ModelMatrix &model_matrix : current_mask_model_matrices_) {
MP_RETURN_IF_ERROR(GlRender(mask_frame, model_matrix.get()));
}
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
int frame_index = GetAnimationFrameIndex(cc->InputTimestamp());
const TriangleMesh &current_frame = triangle_meshes_[frame_index];
// Load dynamic texture if it exists
if (cc->Inputs().HasTag("TEXTURE")) {
const auto &input_texture =
cc->Inputs().Tag("TEXTURE").Get<AssetTextureFormat>();
texture_ = helper_.CreateSourceTexture(input_texture);
}
MP_RETURN_IF_ERROR(GlBind(current_frame, texture_));
if (has_model_matrix_stream_) {
// Draw objects using our latest model matrix stream packet.
for (const ModelMatrix &model_matrix : current_model_matrices_) {
MP_RETURN_IF_ERROR(GlRender(current_frame, model_matrix.get()));
}
} else {
// Just draw one object to a static model matrix.
MP_RETURN_IF_ERROR(GlRender(current_frame, kModelMatrix));
}
// Disable vertex attributes
GLCHECK(glDisableVertexAttribArray(ATTRIB_VERTEX));
GLCHECK(glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
GLCHECK(glDisableVertexAttribArray(ATTRIB_NORMAL));
// Disable depth test
GLCHECK(glDisable(GL_DEPTH_TEST));
// Unbind texture
GLCHECK(glActiveTexture(GL_TEXTURE1));
GLCHECK(glBindTexture(texture_.target(), 0));
// Unbind depth buffer
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
GLCHECK(glFlush());
auto output = dst.GetFrame<GpuBuffer>();
dst.Release();
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0)
.Add(output.release(), cc->InputTimestamp());
GLCHECK(glFrontFace(GL_CCW));
return absl::OkStatus();
});
}
absl::Status GlAnimationOverlayCalculator::GlSetup() {
// Load vertex and fragment shaders
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
ATTRIB_NORMAL,
};
const GLchar *attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
"normal",
};
const GLchar *vert_src = R"(
// Perspective projection matrix for rendering / clipping
uniform mat4 perspectiveMatrix;
// Matrix defining the currently rendered object model
uniform mat4 modelMatrix;
// vertex position in threespace
attribute vec4 position;
attribute vec3 normal;
// texture coordinate for each vertex in normalized texture space (0..1)
attribute mediump vec4 texture_coordinate;
// texture coordinate for fragment shader (will be interpolated)
varying mediump vec2 sampleCoordinate;
varying mediump vec3 vNormal;
void main() {
sampleCoordinate = texture_coordinate.xy;
mat4 mvpMatrix = perspectiveMatrix * modelMatrix;
gl_Position = mvpMatrix * position;
// TODO: Pass in rotation submatrix with no scaling or transforms to prevent
// breaking vNormal in case of model matrix having non-uniform scaling
vec4 tmpNormal = mvpMatrix * vec4(normal, 1.0);
vec4 transformedZero = mvpMatrix * vec4(0.0, 0.0, 0.0, 1.0);
tmpNormal = tmpNormal - transformedZero;
vNormal = normalize(tmpNormal.xyz);
}
)";
const GLchar *frag_src = R"(
precision mediump float;
varying vec2 sampleCoordinate; // texture coordinate (0..1)
varying vec3 vNormal;
uniform sampler2D texture; // texture to shade with
const float kPi = 3.14159265359;
// Define ambient lighting factor that is applied to our texture in order to
// generate ambient lighting of the scene on the object. Range is [0.0-1.0],
// with the factor being proportional to the brightness of the lighting in the
// scene being applied to the object
const float kAmbientLighting = 0.75;
// Define RGB values for light source
const vec3 kLightColor = vec3(0.25);
// Exponent for directional lighting that governs diffusion of surface light
const float kExponent = 1.0;
// Define direction of lighting effect source
const vec3 lightDir = vec3(0.0, -1.0, -0.6);
// Hard-coded view direction
const vec3 viewDir = vec3(0.0, 0.0, -1.0);
// DirectionalLighting procedure imported from Lullaby @ https://github.com/google/lullaby
// Calculate and return the color (diffuse and specular together) reflected by
// a directional light.
vec3 GetDirectionalLight(vec3 pos, vec3 normal, vec3 viewDir, vec3 lightDir, vec3 lightColor, float exponent) {
// Intensity of the diffuse light. Saturate to keep within the 0-1 range.
float normal_dot_light_dir = dot(-normal, -lightDir);
float intensity = clamp(normal_dot_light_dir, 0.0, 1.0);
// Calculate the diffuse light
vec3 diffuse = intensity * lightColor;
// http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/
float kEnergyConservation = (2.0 + exponent) / (2.0 * kPi);
vec3 reflect_dir = reflect(lightDir, -normal);
// Intensity of the specular light
float view_dot_reflect = dot(-viewDir, reflect_dir);
// Use an epsilon for pow because pow(x,y) is undefined if x < 0 or x == 0
// and y <= 0 (GLSL Spec 8.2)
const float kEpsilon = 1e-5;
intensity = kEnergyConservation * pow(clamp(view_dot_reflect, kEpsilon, 1.0),
exponent);
// Specular color:
vec3 specular = intensity * lightColor;
return diffuse + specular;
}
void main() {
// Sample the texture, retrieving an rgba pixel value
vec4 pixel = texture2D(texture, sampleCoordinate);
// If the alpha (background) value is near transparent, then discard the
// pixel, this allows the rendering of transparent background GIFs
// TODO: Adding a toggle to perform pixel alpha discarding for transparent
// GIFs (prevent interference with Objectron system).
if (pixel.a < 0.2) discard;
// Generate directional lighting effect
vec3 lighting = GetDirectionalLight(gl_FragCoord.xyz, vNormal, viewDir, lightDir, kLightColor, kExponent);
// Apply both ambient and directional lighting to our texture
gl_FragColor = vec4((vec3(kAmbientLighting) + lighting) * pixel.rgb, 1.0);
}
)";
// Shader program
GLCHECK(GlhCreateProgram(vert_src, frag_src, NUM_ATTRIBUTES,
(const GLchar **)&attr_name[0], attr_location,
&program_));
RET_CHECK(program_) << "Problem initializing the program.";
texture_uniform_ = GLCHECK(glGetUniformLocation(program_, "texture"));
perspective_matrix_uniform_ =
GLCHECK(glGetUniformLocation(program_, "perspectiveMatrix"));
model_matrix_uniform_ =
GLCHECK(glGetUniformLocation(program_, "modelMatrix"));
return absl::OkStatus();
}
absl::Status GlAnimationOverlayCalculator::GlBind(
const TriangleMesh &triangle_mesh, const GlTexture &texture) {
GLCHECK(glUseProgram(program_));
// Disable backface culling to allow occlusion effects.
// Some options for solid arbitrary 3D geometry rendering
GLCHECK(glEnable(GL_BLEND));
GLCHECK(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
GLCHECK(glEnable(GL_DEPTH_TEST));
GLCHECK(glFrontFace(GL_CW));
GLCHECK(glDepthMask(GL_TRUE));
GLCHECK(glDepthFunc(GL_LESS));
// Clear our depth buffer before starting draw calls
GLCHECK(glVertexAttribPointer(ATTRIB_VERTEX, 3, GL_FLOAT, 0, 0,
triangle_mesh.vertices.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
triangle_mesh.texture_coords.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
GLCHECK(glVertexAttribPointer(ATTRIB_NORMAL, 3, GL_FLOAT, 0, 0,
triangle_mesh.normals.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_NORMAL));
GLCHECK(glActiveTexture(GL_TEXTURE1));
GLCHECK(glBindTexture(texture.target(), texture.name()));
// We previously bound it to GL_TEXTURE1
GLCHECK(glUniform1i(texture_uniform_, 1));
GLCHECK(glUniformMatrix4fv(perspective_matrix_uniform_, 1, GL_FALSE,
perspective_matrix_));
return absl::OkStatus();
}
absl::Status GlAnimationOverlayCalculator::GlRender(
const TriangleMesh &triangle_mesh, const float *model_matrix) {
GLCHECK(glUniformMatrix4fv(model_matrix_uniform_, 1, GL_FALSE, model_matrix));
GLCHECK(glDrawElements(GL_TRIANGLES, triangle_mesh.index_count,
GL_UNSIGNED_SHORT,
triangle_mesh.triangle_indices.get()));
return absl::OkStatus();
}
GlAnimationOverlayCalculator::~GlAnimationOverlayCalculator() {
helper_.RunInGlContext([this] {
if (program_) {
GLCHECK(glDeleteProgram(program_));
program_ = 0;
}
if (depth_buffer_created_) {
GLCHECK(glDeleteRenderbuffers(1, &renderbuffer_));
renderbuffer_ = 0;
}
if (texture_.width() > 0) {
texture_.Release();
}
if (mask_texture_.width() > 0) {
mask_texture_.Release();
}
});
}
} // namespace mediapipe

View File

@ -0,0 +1,41 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message GlAnimationOverlayCalculatorOptions {
extend CalculatorOptions {
optional GlAnimationOverlayCalculatorOptions ext = 174760573;
}
// Default aspect ratio of rendering target width over height.
// This specific value is for 3:4 view. Do not change this default value.
optional float aspect_ratio = 1 [default = 0.75];
// Default vertical field of view in degrees. This specific default value
// is arbitrary. Do not change this default value. If you want to use
// a different vertical_fov_degrees, set it in the options.
optional float vertical_fov_degrees = 2 [default = 70.0];
// Perspective projection matrix z-clipping near plane value.
optional float z_clipping_plane_near = 3 [default = 0.1];
// Perspective projection matrix z-clipping far plane value.
optional float z_clipping_plane_far = 4 [default = 1000.0];
// Speed at which to play the animation (in frames per second).
optional float animation_speed_fps = 5 [default = 25.0];
}

View File

@ -0,0 +1,48 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
message TimedModelMatrixProto {
// 4x4 model matrix stored in ROW major order.
repeated float matrix_entries = 1 [packed = true];
// Timestamp of this model matrix in milliseconds.
optional int64 time_msec = 2 [default = 0];
// Unique per object id
optional int32 id = 3 [default = -1];
}
message TimedModelMatrixProtoList {
repeated TimedModelMatrixProto model_matrix = 1;
}
// For convenience, when the desired information or transformation can be
// encoded into vectors (e.g. when the matrix represents a scale or Euler-angle-
// based rotation operation.)
message TimedVectorProto {
// The vector values themselves.
repeated float vector_entries = 1 [packed = true];
// Timestamp of this vector in milliseconds.
optional int64 time_msec = 2 [default = 0];
// Unique per object id
optional int32 id = 3 [default = -1];
}
message TimedVectorProtoList {
repeated TimedVectorProto vector_list = 1;
}

View File

@ -0,0 +1,33 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
java_library(
name = "obj_parser_lib",
srcs = [
"ObjParserMain.java",
"SimpleObjParser.java",
],
javacopts = ["-Xep:DefaultPackage:OFF"],
)
java_binary(
name = "ObjParser",
javacopts = ["-Xep:DefaultPackage:OFF"],
main_class = "ObjParserMain",
runtime_deps = [
":obj_parser_lib",
],
)

View File

@ -0,0 +1,205 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
/**
* Class for running desktop-side parsing/packing routines on .obj AR assets. Usage: ObjParser
* --input_dir=[INPUT_DIRECTORY] --output_dir=[OUTPUT_DIRECTORY] where INPUT_DIRECTORY is the folder
* with asset obj files to process, and OUTPUT_DIRECTORY is the folder where processed asset uuu
* file should be placed.
*
* <p>NOTE: Directories are assumed to be absolute paths.
*/
public final class ObjParserMain {
// Simple FileFilter implementation to let us walk over only our .obj files in a particular
// directory.
private static final class ObjFileFilter implements FileFilter {
ObjFileFilter() {
// Nothing to do here.
}
@Override
public boolean accept(File file) {
return file.getName().endsWith(".obj");
}
}
// File extension for binary output files; tagged onto end of initial file extension.
private static final String BINARY_FILE_EXT = ".uuu";
private static final String INPUT_DIR_FLAG = "--input_dir=";
private static final String OUTPUT_DIR_FLAG = "--output_dir=";
private static final float DEFAULT_VERTEX_SCALE_FACTOR = 30.0f;
private static final double NS_TO_SECONDS = 1e9;
public final PrintWriter writer;
public ObjParserMain() {
super();
this.writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, UTF_8)));
}
// Simple overridable logging function.
protected void logString(String infoLog) {
writer.println(infoLog);
}
/*
* Main program logic: parse command-line arguments and perform actions.
*/
public void run(String inDirectory, String outDirectory) {
if (inDirectory.isEmpty()) {
logString("Error: Must provide input directory with " + INPUT_DIR_FLAG);
return;
}
if (outDirectory.isEmpty()) {
logString("Error: Must provide output directory with " + OUTPUT_DIR_FLAG);
return;
}
File dirAsFile = new File(inDirectory);
ObjFileFilter objFileFilter = new ObjFileFilter();
File[] objFiles = dirAsFile.listFiles(objFileFilter);
FileOutputStream outputStream = null;
logString("Parsing directory: " + inDirectory);
// We need frames processed in correct order.
Arrays.sort(objFiles);
for (File objFile : objFiles) {
String fileName = objFile.getAbsolutePath();
// Just take the file name of the first processed frame.
if (outputStream == null) {
String outputFileName = outDirectory + objFile.getName() + BINARY_FILE_EXT;
try {
// Create new file here, if we can.
outputStream = new FileOutputStream(outputFileName);
logString("Created outfile: " + outputFileName);
} catch (Exception e) {
logString("Error creating outfile: " + e.toString());
e.printStackTrace(writer);
return;
}
}
// Process each file into the stream.
logString("Processing file: " + fileName);
processFile(fileName, outputStream);
}
// Finally close the stream out.
try {
if (outputStream != null) {
outputStream.close();
}
} catch (Exception e) {
logString("Error trying to close output stream: " + e.toString());
e.printStackTrace(writer);
}
}
/*
* Entrypoint for command-line executable.
*/
public static void main(String[] args) {
// Parse flags
String inDirectory = "";
String outDirectory = "";
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith(INPUT_DIR_FLAG)) {
inDirectory = args[i].substring(INPUT_DIR_FLAG.length());
// Make sure this will be treated as a directory
if (!inDirectory.endsWith("/")) {
inDirectory += "/";
}
}
if (args[i].startsWith(OUTPUT_DIR_FLAG)) {
outDirectory = args[i].substring(OUTPUT_DIR_FLAG.length());
// Make sure this will be treated as a directory
if (!outDirectory.endsWith("/")) {
outDirectory += "/";
}
}
}
ObjParserMain parser = new ObjParserMain();
parser.run(inDirectory, outDirectory);
parser.writer.flush();
}
/*
* Internal helper function to parse a .obj from an infile name and stream the resulting data
* directly out in binary-dump format to outputStream.
*/
private void processFile(String infileName, OutputStream outputStream) {
long start = System.nanoTime();
// First we parse the obj.
SimpleObjParser objParser = new SimpleObjParser(infileName, DEFAULT_VERTEX_SCALE_FACTOR);
if (!objParser.parse()) {
logString("Error parsing .obj file before processing");
return;
}
final float[] vertices = objParser.getVertices();
final float[] textureCoords = objParser.getTextureCoords();
final ArrayList<Short> triangleList = objParser.getTriangles();
// Overall byte count to stream: 12 for the 3 list-length ints, and then 4 for each vertex and
// texCoord int, and finally 2 for each triangle index short.
final int bbSize =
12 + 4 * vertices.length + 4 * textureCoords.length + 2 * triangleList.size();
// Ensure ByteBuffer is native order, just like we want to read it in, but is NOT direct, so
// we can call .array() on it.
ByteBuffer bb = ByteBuffer.allocate(bbSize);
bb.order(ByteOrder.nativeOrder());
bb.putInt(vertices.length);
bb.putInt(textureCoords.length);
bb.putInt(triangleList.size());
logString(String.format("Writing... Vertices: %d, TextureCoords: %d, Indices: %d.%n",
vertices.length, textureCoords.length, triangleList.size()));
for (float vertex : vertices) {
bb.putFloat(vertex);
}
for (float textureCoord : textureCoords) {
bb.putFloat(textureCoord);
}
for (Short vertexIndex : triangleList) {
bb.putShort(vertexIndex.shortValue());
}
bb.position(0);
try {
outputStream.write(bb.array(), 0, bbSize);
logString(String.format("Processing successful! Took %.4f seconds.%n",
(System.nanoTime() - start) / NS_TO_SECONDS));
} catch (Exception e) {
logString("Error writing during processing: " + e.toString());
e.printStackTrace(writer);
}
}
}

View File

@ -0,0 +1,386 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* Class for parsing a single .obj file into openGL-usable pieces.
*
* <p>Usage:
*
* <p>SimpleObjParser objParser = new SimpleObjParser("animations/cow/cow320.obj", .015f);
*
* <p>if (objParser.parse()) { ... }
*/
public class SimpleObjParser {
private static class ShortPair {
private final Short first;
private final Short second;
public ShortPair(Short newFirst, Short newSecond) {
first = newFirst;
second = newSecond;
}
public Short getFirst() {
return first;
}
public Short getSecond() {
return second;
}
}
private static final String TAG = SimpleObjParser.class.getSimpleName();
private static final boolean DEBUG = false;
private static final int INVALID_INDEX = -1;
private static final int POSITIONS_COORDS_PER_VERTEX = 3;
private static final int TEXTURE_COORDS_PER_VERTEX = 2;
private final String fileName;
// Since .obj doesn't tie together texture coordinates and vertex
// coordinates, but OpenGL does, we need to keep a map of all such pairings that occur in
// our face list.
private final HashMap<ShortPair, Short> vertexTexCoordMap;
// Internal (de-coupled) unique vertices and texture coordinates
private ArrayList<Float> vertices;
private ArrayList<Float> textureCoords;
// Data we expose to openGL for rendering
private float[] finalizedVertices;
private float[] finalizedTextureCoords;
private ArrayList<Short> finalizedTriangles;
// So we only display warnings about dropped w-coordinates once
private boolean vertexCoordIgnoredWarning;
private boolean textureCoordIgnoredWarning;
private boolean startedProcessingFaces;
private int numPrimitiveVertices;
private int numPrimitiveTextureCoords;
private int numPrimitiveFaces;
// For scratchwork, so we don't have to keep reallocating
private float[] tempCoords;
// We scale all our position coordinates uniformly by this factor
private float objectUniformScaleFactor;
public SimpleObjParser(String objFile, float scaleFactor) {
objectUniformScaleFactor = scaleFactor;
fileName = objFile;
vertices = new ArrayList<Float>();
textureCoords = new ArrayList<Float>();
vertexTexCoordMap = new HashMap<ShortPair, Short>();
finalizedTriangles = new ArrayList<Short>();
tempCoords = new float[Math.max(POSITIONS_COORDS_PER_VERTEX, TEXTURE_COORDS_PER_VERTEX)];
numPrimitiveFaces = 0;
vertexCoordIgnoredWarning = false;
textureCoordIgnoredWarning = false;
startedProcessingFaces = false;
}
// Simple helper wrapper function
private void debugLogString(String message) {
if (DEBUG) {
System.out.println(message);
}
}
private void parseVertex(String[] linePieces) {
// Note: Traditionally xyzw is acceptable as a format, with w defaulting to 1.0, but for now
// we only parse xyz.
if (linePieces.length < POSITIONS_COORDS_PER_VERTEX + 1
|| linePieces.length > POSITIONS_COORDS_PER_VERTEX + 2) {
System.out.println("Malformed vertex coordinate specification, assuming xyz format only.");
return;
} else if (linePieces.length == POSITIONS_COORDS_PER_VERTEX + 2 && !vertexCoordIgnoredWarning) {
System.out.println(
"Only x, y, and z parsed for vertex coordinates; w coordinates will be ignored.");
vertexCoordIgnoredWarning = true;
}
boolean success = true;
try {
for (int i = 1; i < POSITIONS_COORDS_PER_VERTEX + 1; i++) {
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
}
} catch (NumberFormatException e) {
success = false;
System.out.println("Malformed vertex coordinate error: " + e.toString());
}
if (success) {
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
vertices.add(Float.valueOf(tempCoords[i] * objectUniformScaleFactor));
}
}
}
private void parseTextureCoordinate(String[] linePieces) {
// Similar to vertices, uvw is acceptable as a format, with w defaulting to 0.0, but for now we
// only parse uv.
if (linePieces.length < TEXTURE_COORDS_PER_VERTEX + 1
|| linePieces.length > TEXTURE_COORDS_PER_VERTEX + 2) {
System.out.println("Malformed texture coordinate specification, assuming uv format only.");
return;
} else if (linePieces.length == (TEXTURE_COORDS_PER_VERTEX + 2)
&& !textureCoordIgnoredWarning) {
debugLogString("Only u and v parsed for texture coordinates; w coordinates will be ignored.");
textureCoordIgnoredWarning = true;
}
boolean success = true;
try {
for (int i = 1; i < TEXTURE_COORDS_PER_VERTEX + 1; i++) {
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
}
} catch (NumberFormatException e) {
success = false;
System.out.println("Malformed texture coordinate error: " + e.toString());
}
if (success) {
// .obj files treat (0,0) as top-left, compared to bottom-left for openGL. So invert "v"
// texture coordinate only here.
textureCoords.add(Float.valueOf(tempCoords[0]));
textureCoords.add(Float.valueOf(1.0f - tempCoords[1]));
}
}
// Will return INVALID_INDEX if error occurs, and otherwise will return finalized (combined)
// index, adding and hashing new combinations as it sees them.
private short parseAndProcessCombinedVertexCoord(String coordString) {
String[] coords = coordString.split("/");
try {
// Parse vertex and texture indices; 1-indexed from front if positive and from end of list if
// negative.
short vertexIndex = Short.parseShort(coords[0]);
short textureIndex = Short.parseShort(coords[1]);
if (vertexIndex > 0) {
vertexIndex--;
} else {
vertexIndex = (short) (vertexIndex + numPrimitiveVertices);
}
if (textureIndex > 0) {
textureIndex--;
} else {
textureIndex = (short) (textureIndex + numPrimitiveTextureCoords);
}
// Combine indices and look up in pair map.
ShortPair indexPair = new ShortPair(Short.valueOf(vertexIndex), Short.valueOf(textureIndex));
Short combinedIndex = vertexTexCoordMap.get(indexPair);
if (combinedIndex == null) {
short numIndexPairs = (short) vertexTexCoordMap.size();
vertexTexCoordMap.put(indexPair, numIndexPairs);
return numIndexPairs;
} else {
return combinedIndex.shortValue();
}
} catch (NumberFormatException e) {
// Failure to parse coordinates as shorts
return INVALID_INDEX;
}
}
// Note: it is assumed that face list occurs AFTER vertex and texture coordinate lists finish in
// the obj file format.
private void parseFace(String[] linePieces) {
if (linePieces.length < 4) {
System.out.println("Malformed face index list: there must be at least 3 indices per face");
return;
}
short[] faceIndices = new short[linePieces.length - 1];
boolean success = true;
for (int i = 1; i < linePieces.length; i++) {
short faceIndex = parseAndProcessCombinedVertexCoord(linePieces[i]);
if (faceIndex < 0) {
System.out.println(faceIndex);
System.out.println("Malformed face index: " + linePieces[i]);
success = false;
break;
}
faceIndices[i - 1] = faceIndex;
}
if (success) {
numPrimitiveFaces++;
// Manually triangulate the face under the assumption that the points are coplanar, the poly
// is convex, and the points are listed in either clockwise or anti-clockwise orientation.
for (int i = 1; i < faceIndices.length - 1; i++) {
// We use a triangle fan here, so first point is part of all triangles
finalizedTriangles.add(faceIndices[0]);
finalizedTriangles.add(faceIndices[i]);
finalizedTriangles.add(faceIndices[i + 1]);
}
}
}
// Iterate over map and reconstruct proper vertex/texture coordinate pairings.
private boolean constructFinalCoordinatesFromMap() {
final int numIndexPairs = vertexTexCoordMap.size();
// XYZ vertices and UV texture coordinates
finalizedVertices = new float[POSITIONS_COORDS_PER_VERTEX * numIndexPairs];
finalizedTextureCoords = new float[TEXTURE_COORDS_PER_VERTEX * numIndexPairs];
try {
for (Map.Entry<ShortPair, Short> entry : vertexTexCoordMap.entrySet()) {
ShortPair indexPair = entry.getKey();
short rawVertexIndex = indexPair.getFirst().shortValue();
short rawTexCoordIndex = indexPair.getSecond().shortValue();
short finalIndex = entry.getValue().shortValue();
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
finalizedVertices[POSITIONS_COORDS_PER_VERTEX * finalIndex + i]
= vertices.get(rawVertexIndex * POSITIONS_COORDS_PER_VERTEX + i);
}
for (int i = 0; i < TEXTURE_COORDS_PER_VERTEX; i++) {
finalizedTextureCoords[TEXTURE_COORDS_PER_VERTEX * finalIndex + i]
= textureCoords.get(rawTexCoordIndex * TEXTURE_COORDS_PER_VERTEX + i);
}
}
} catch (NumberFormatException e) {
System.out.println("Malformed index in vertex/texture coordinate mapping.");
return false;
}
return true;
}
/**
* Returns the vertex position coordinate list (x1, y1, z1, x2, y2, z2, ...) after a successful
* call to parse().
*/
public float[] getVertices() {
return finalizedVertices;
}
/**
* Returns the vertex texture coordinate list (u1, v1, u2, v2, ...) after a successful call to
* parse().
*/
public float[] getTextureCoords() {
return finalizedTextureCoords;
}
/**
* Returns the list of indices (a1, b1, c1, a2, b2, c2, ...) after a successful call to parse().
* Each (a, b, c) triplet specifies a triangle to be rendered, with a, b, and c Short objects used
* to index into the coordinates returned by getVertices() and getTextureCoords().<p></p>
* For example, a Short index representing 5 should be used to index into vertices[15],
* vertices[16], and vertices[17], as well as textureCoords[10] and textureCoords[11].
*/
public ArrayList<Short> getTriangles() {
return finalizedTriangles;
}
/**
* Attempts to locate and read the specified .obj file, and parse it accordingly. None of the
* getter functions in this class will return valid results until a value of true is returned
* from this function.
* @return true on success.
*/
public boolean parse() {
boolean success = true;
BufferedReader reader = null;
try {
reader = Files.newBufferedReader(Paths.get(fileName), UTF_8);
String line;
while ((line = reader.readLine()) != null) {
// Skip over lines with no characters
if (line.length() < 1) {
continue;
}
// Ignore comment lines entirely
if (line.charAt(0) == '#') {
continue;
}
// Split into pieces based on whitespace, and process according to first command piece
String[] linePieces = line.split(" +");
switch (linePieces[0]) {
case "v":
// Add vertex
if (startedProcessingFaces) {
throw new IOException("Vertices must all be declared before faces in obj files.");
}
parseVertex(linePieces);
break;
case "vt":
// Add texture coordinate
if (startedProcessingFaces) {
throw new IOException(
"Texture coordinates must all be declared before faces in obj files.");
}
parseTextureCoordinate(linePieces);
break;
case "f":
// Vertex and texture coordinate lists should be locked into place by now
if (!startedProcessingFaces) {
startedProcessingFaces = true;
numPrimitiveVertices = vertices.size() / POSITIONS_COORDS_PER_VERTEX;
numPrimitiveTextureCoords = textureCoords.size() / TEXTURE_COORDS_PER_VERTEX;
}
// Add face
parseFace(linePieces);
break;
default:
// Unknown or unused directive: ignoring
// Note: We do not yet process vertex normals or curves, so we ignore {vp, vn, s}
// Note: We assume only a single object, so we ignore {g, o}
// Note: We also assume a single texture, which we process independently, so we ignore
// {mtllib, usemtl}
break;
}
}
// If we made it all the way through, then we have a vertex-to-tex-coord pair mapping, so
// construct our final vertex and texture coordinate lists now.
success = constructFinalCoordinatesFromMap();
} catch (IOException e) {
success = false;
System.out.println("Failure to parse obj file: " + e.toString());
} finally {
try {
if (reader != null) {
reader.close();
}
} catch (IOException e) {
System.out.println("Couldn't close reader");
}
}
if (success) {
debugLogString("Successfully parsed " + numPrimitiveVertices + " vertices and "
+ numPrimitiveTextureCoords + " texture coordinates into " + vertexTexCoordMap.size()
+ " combined vertices and " + numPrimitiveFaces + " faces, represented as a mesh of "
+ finalizedTriangles.size() / 3 + " triangles.");
}
return success;
}
}

Some files were not shown because too many files have changed in this diff Show More