code fill
This commit is contained in:
parent
d109328198
commit
5578aa50e8
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -8,3 +8,4 @@ Cargo.lock
|
|||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
/refs/
|
||||
|
|
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "ux-mediapipe"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[lib]
|
||||
name = "mediapipe"
|
||||
|
||||
[dependencies]
|
||||
cgmath = "0.18.0"
|
||||
libc = "0.2.0"
|
||||
opencv = {version = "0.63.0", default-features = false, features = ["videoio", "highgui", "imgproc"]}
|
||||
protobuf = "2.23.0"
|
|
@ -1,2 +1,4 @@
|
|||
# ux-media
|
||||
# ux-mediapipe
|
||||
Rust and mediapipe
|
||||
|
||||
bazel build --define MEDIAPIPE_DISABLE_GPU=1 mediapipe:mediagraph
|
48
examples/hand_tracking_desktop_live_gpu.txt
Normal file
48
examples/hand_tracking_desktop_live_gpu.txt
Normal file
|
@ -0,0 +1,48 @@
|
|||
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "hand_landmarks"
|
||||
|
||||
# Generates side packet cotaining max number of hands to detect/track.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_hands"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects/tracks hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see hand_renderer_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "HandRendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
input_stream: "LANDMARKS:hand_landmarks"
|
||||
input_stream: "HANDEDNESS:handedness"
|
||||
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
212
examples/hello.rs
Normal file
212
examples/hello.rs
Normal file
|
@ -0,0 +1,212 @@
|
|||
#![allow(unused_variables)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
use mediapipe::*;
|
||||
|
||||
mod examples {
|
||||
use super::*;
|
||||
use opencv::prelude::*;
|
||||
use opencv::{highgui, imgproc, videoio, Result};
|
||||
|
||||
pub fn corner_rectangle() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
let detector = hands::HandDetector::default();
|
||||
|
||||
loop {
|
||||
let mut frame = Mat::default();
|
||||
cap.read(&mut frame)?;
|
||||
let size = frame.size()?;
|
||||
if size.width > 0 {
|
||||
highgui::imshow(window, &mut frame)?
|
||||
}
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// pub fn face_detection() -> Result<()> {
|
||||
// let window = "video capture";
|
||||
|
||||
// highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
// let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
// if !cap.is_opened()? {
|
||||
// panic!("Unable to open default cam")
|
||||
// }
|
||||
|
||||
// let detector = mediapipe::face_detection::FaceDetector::default();
|
||||
|
||||
// loop {
|
||||
// let mut frame = Mat::default();
|
||||
// cap.read(&mut frame)?;
|
||||
// let size = frame.size()?;
|
||||
// if size.width > 0 {
|
||||
// highgui::imshow(window, &mut frame)?
|
||||
// }
|
||||
// let key = highgui::wait_key(10)?;
|
||||
// if key > 0 && key != 255 {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
pub fn face_mesh() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||
|
||||
let mut mesh = FaceMesh::default();
|
||||
let mut detector = face_mesh::FaceMeshDetector::default();
|
||||
|
||||
let mut raw_frame = Mat::default();
|
||||
let mut rgb_frame = Mat::default();
|
||||
let mut flip_frame = Mat::default();
|
||||
loop {
|
||||
cap.read(&mut raw_frame)?;
|
||||
|
||||
let size = raw_frame.size()?;
|
||||
if size.width > 0 && !raw_frame.empty() {
|
||||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
detector.process(&flip_frame, &mut mesh);
|
||||
|
||||
highgui::imshow(window, &mut flip_frame)?;
|
||||
println!(
|
||||
"LANDMARK: {} {} {}",
|
||||
mesh.data[0].x, mesh.data[0].y, mesh.data[0].z
|
||||
);
|
||||
} else {
|
||||
println!("WARN: Skip empty frame");
|
||||
}
|
||||
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn hand_tracking() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||
|
||||
let mut left = Hand::default();
|
||||
let mut right = Hand::default();
|
||||
let mut detector = hands::HandDetector::default();
|
||||
|
||||
let mut raw_frame = Mat::default();
|
||||
let mut rgb_frame = Mat::default();
|
||||
let mut flip_frame = Mat::default();
|
||||
loop {
|
||||
cap.read(&mut raw_frame)?;
|
||||
|
||||
let size = raw_frame.size()?;
|
||||
if size.width > 0 && !raw_frame.empty() {
|
||||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
detector.process(&flip_frame, &mut left, &mut right);
|
||||
|
||||
highgui::imshow(window, &mut flip_frame)?;
|
||||
println!(
|
||||
"LANDMARK: {} {} {}",
|
||||
left.data[0].x, left.data[0].y, left.data[0].z
|
||||
);
|
||||
} else {
|
||||
println!("WARN: Skip empty frame");
|
||||
}
|
||||
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pose_estimation() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||
|
||||
let mut pose = Pose::default();
|
||||
let mut detector = pose::PoseDetector::default();
|
||||
|
||||
let mut raw_frame = Mat::default();
|
||||
let mut rgb_frame = Mat::default();
|
||||
let mut flip_frame = Mat::default();
|
||||
loop {
|
||||
cap.read(&mut raw_frame)?;
|
||||
|
||||
let size = raw_frame.size()?;
|
||||
if size.width > 0 && !raw_frame.empty() {
|
||||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
detector.process(&rgb_frame, &mut pose);
|
||||
|
||||
highgui::imshow(window, &mut rgb_frame)?;
|
||||
println!(
|
||||
"LANDMARK: {} {} {}",
|
||||
pose.data[0].x, pose.data[0].y, pose.data[0].z
|
||||
);
|
||||
} else {
|
||||
println!("WARN: Skip empty frame");
|
||||
}
|
||||
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// examples::pose_estimation().unwrap()
|
||||
// examples::hand_tracking().unwrap()
|
||||
examples::face_mesh().unwrap()
|
||||
}
|
36
mediapipe/graphs/edge_detection/BUILD
Normal file
36
mediapipe/graphs/edge_detection/BUILD
Normal file
|
@ -0,0 +1,36 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:luminance_calculator",
|
||||
"//mediapipe/calculators/image:sobel_edges_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_binary_graph",
|
||||
graph = "edge_detection_mobile_gpu.pbtxt",
|
||||
output_name = "mobile_gpu.binarypb",
|
||||
)
|
|
@ -0,0 +1,22 @@
|
|||
# MediaPipe graph that performs GPU Sobel edge detection on a live video stream.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:helloworld
|
||||
# and mediapipe/examples/ios/helloworld.
|
||||
|
||||
# Images coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Converts RGB images into luminance images, still stored in RGB format.
|
||||
node: {
|
||||
calculator: "LuminanceCalculator"
|
||||
input_stream: "input_video"
|
||||
output_stream: "luma_video"
|
||||
}
|
||||
|
||||
# Applies the Sobel filter to luminance images stored in RGB format.
|
||||
node: {
|
||||
calculator: "SobelEdgesCalculator"
|
||||
input_stream: "luma_video"
|
||||
output_stream: "output_video"
|
||||
}
|
95
mediapipe/graphs/face_detection/BUILD
Normal file
95
mediapipe/graphs/face_detection/BUILD
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_gpu_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_detection_mobile_cpu_binary_graph",
|
||||
graph = "face_detection_mobile_cpu.pbtxt",
|
||||
output_name = "face_detection_mobile_cpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_detection_mobile_gpu_binary_graph",
|
||||
graph = "face_detection_mobile_gpu.pbtxt",
|
||||
output_name = "face_detection_mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "face_detection_full_range_mobile_gpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_full_range_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_detection_full_range_mobile_gpu_binary_graph",
|
||||
graph = "face_detection_full_range_mobile_gpu.pbtxt",
|
||||
output_name = "face_detection_full_range_mobile_gpu.binarypb",
|
||||
deps = [":face_detection_full_range_mobile_gpu_deps"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "face_detection_full_range_desktop_live_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,58 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||
|
||||
# CPU buffer. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "face_detections"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||
# generating the corresponding detections before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionFullRangeCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
|
||||
# mediapipie/examples/ios/facedetectiongpu.
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||
# generating the corresponding detections before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionFullRangeGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "face_detections"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Transfers the input image from GPU to CPU memory for the purpose of
|
||||
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
|
||||
# origin defined at the bottom-left corner (OpenGL convention). As a result,
|
||||
# the transferred image on CPU also shares the same representation.
|
||||
node: {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "throttled_input_video"
|
||||
output_stream: "input_video_cpu"
|
||||
}
|
||||
|
||||
# Subgraph that detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCpu"
|
||||
input_stream: "IMAGE:input_video_cpu"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_video_cpu"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video_cpu"
|
||||
}
|
||||
|
||||
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
|
||||
# the graph.
|
||||
node: {
|
||||
calculator: "ImageFrameToGpuBufferCalculator"
|
||||
input_stream: "output_video_cpu"
|
||||
output_stream: "output_video"
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "face_detections"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
}
|
44
mediapipe/graphs/face_effect/BUILD
Normal file
44
mediapipe/graphs/face_effect/BUILD
Normal file
|
@ -0,0 +1,44 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "face_effect_gpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:immediate_mux_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_detection_gpu",
|
||||
"//mediapipe/graphs/face_effect/subgraphs:single_face_geometry_from_landmarks_gpu",
|
||||
"//mediapipe/modules/face_geometry:effect_renderer_calculator",
|
||||
"//mediapipe/modules/face_geometry:env_generator_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_effect_gpu_binary_graph",
|
||||
graph = "face_effect_gpu.pbtxt",
|
||||
output_name = "face_effect_gpu.binarypb",
|
||||
deps = [":face_effect_gpu_deps"],
|
||||
)
|
47
mediapipe/graphs/face_effect/data/BUILD
Normal file
47
mediapipe/graphs/face_effect/data/BUILD
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
encode_binary_proto(
|
||||
name = "axis",
|
||||
input = "axis.pbtxt",
|
||||
message_type = "mediapipe.face_geometry.Mesh3d",
|
||||
output = "axis.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
|
||||
],
|
||||
)
|
||||
|
||||
encode_binary_proto(
|
||||
name = "glasses",
|
||||
input = "glasses.pbtxt",
|
||||
message_type = "mediapipe.face_geometry.Mesh3d",
|
||||
output = "glasses.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# `.pngblob` is used instead of `.png` to prevent iOS build from preprocessing the image.
|
||||
# OpenCV is unable to read a PNG file preprocessed by the iOS build.
|
||||
exports_files([
|
||||
"axis.pngblob",
|
||||
"facepaint.pngblob",
|
||||
"glasses.pngblob",
|
||||
])
|
320
mediapipe/graphs/face_effect/data/axis.pbtxt
Normal file
320
mediapipe/graphs/face_effect/data/axis.pbtxt
Normal file
|
@ -0,0 +1,320 @@
|
|||
vertex_type: VERTEX_PT
|
||||
primitive_type: TRIANGLE
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.873006
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.928502
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.928502
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.873006
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.928502
|
||||
vertex_buffer: 0.500000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.873006
|
||||
vertex_buffer: 0.500000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.928502
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.873006
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.928502
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 11.500000
|
||||
vertex_buffer: 0.873006
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.983999
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.983999
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.817509
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.500000
|
||||
vertex_buffer: 0.817509
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.069341
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.123429
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.123429
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.069341
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.123419
|
||||
vertex_buffer: 0.499992
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.069341
|
||||
vertex_buffer: 0.500000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.123429
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.069341
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.123429
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.069341
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.177516
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.177516
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.015254
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.015254
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.472252
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.527748
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.527748
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.472252
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.527748
|
||||
vertex_buffer: 0.500000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.472252
|
||||
vertex_buffer: 0.500000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.527748
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.472252
|
||||
vertex_buffer: 0.250000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.527748
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.472252
|
||||
vertex_buffer: 0.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.583245
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: 0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.583245
|
||||
vertex_buffer: 0.750000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.400000
|
||||
vertex_buffer: 0.416755
|
||||
vertex_buffer: 1.000000
|
||||
vertex_buffer: -0.100000
|
||||
vertex_buffer: 3.000000
|
||||
vertex_buffer: 8.600000
|
||||
vertex_buffer: 0.416755
|
||||
vertex_buffer: 0.750000
|
||||
index_buffer: 0
|
||||
index_buffer: 1
|
||||
index_buffer: 2
|
||||
index_buffer: 0
|
||||
index_buffer: 2
|
||||
index_buffer: 3
|
||||
index_buffer: 3
|
||||
index_buffer: 2
|
||||
index_buffer: 4
|
||||
index_buffer: 3
|
||||
index_buffer: 4
|
||||
index_buffer: 5
|
||||
index_buffer: 5
|
||||
index_buffer: 4
|
||||
index_buffer: 6
|
||||
index_buffer: 5
|
||||
index_buffer: 6
|
||||
index_buffer: 7
|
||||
index_buffer: 7
|
||||
index_buffer: 6
|
||||
index_buffer: 8
|
||||
index_buffer: 7
|
||||
index_buffer: 8
|
||||
index_buffer: 9
|
||||
index_buffer: 1
|
||||
index_buffer: 10
|
||||
index_buffer: 11
|
||||
index_buffer: 1
|
||||
index_buffer: 11
|
||||
index_buffer: 2
|
||||
index_buffer: 12
|
||||
index_buffer: 0
|
||||
index_buffer: 3
|
||||
index_buffer: 12
|
||||
index_buffer: 3
|
||||
index_buffer: 13
|
||||
index_buffer: 14
|
||||
index_buffer: 15
|
||||
index_buffer: 16
|
||||
index_buffer: 14
|
||||
index_buffer: 16
|
||||
index_buffer: 17
|
||||
index_buffer: 17
|
||||
index_buffer: 16
|
||||
index_buffer: 18
|
||||
index_buffer: 17
|
||||
index_buffer: 18
|
||||
index_buffer: 19
|
||||
index_buffer: 19
|
||||
index_buffer: 18
|
||||
index_buffer: 20
|
||||
index_buffer: 19
|
||||
index_buffer: 20
|
||||
index_buffer: 21
|
||||
index_buffer: 21
|
||||
index_buffer: 20
|
||||
index_buffer: 22
|
||||
index_buffer: 21
|
||||
index_buffer: 22
|
||||
index_buffer: 23
|
||||
index_buffer: 15
|
||||
index_buffer: 24
|
||||
index_buffer: 25
|
||||
index_buffer: 15
|
||||
index_buffer: 25
|
||||
index_buffer: 16
|
||||
index_buffer: 26
|
||||
index_buffer: 14
|
||||
index_buffer: 17
|
||||
index_buffer: 26
|
||||
index_buffer: 17
|
||||
index_buffer: 27
|
||||
index_buffer: 28
|
||||
index_buffer: 29
|
||||
index_buffer: 30
|
||||
index_buffer: 28
|
||||
index_buffer: 30
|
||||
index_buffer: 31
|
||||
index_buffer: 31
|
||||
index_buffer: 30
|
||||
index_buffer: 32
|
||||
index_buffer: 31
|
||||
index_buffer: 32
|
||||
index_buffer: 33
|
||||
index_buffer: 33
|
||||
index_buffer: 32
|
||||
index_buffer: 34
|
||||
index_buffer: 33
|
||||
index_buffer: 34
|
||||
index_buffer: 35
|
||||
index_buffer: 35
|
||||
index_buffer: 34
|
||||
index_buffer: 36
|
||||
index_buffer: 35
|
||||
index_buffer: 36
|
||||
index_buffer: 37
|
||||
index_buffer: 29
|
||||
index_buffer: 38
|
||||
index_buffer: 39
|
||||
index_buffer: 29
|
||||
index_buffer: 39
|
||||
index_buffer: 30
|
||||
index_buffer: 40
|
||||
index_buffer: 28
|
||||
index_buffer: 31
|
||||
index_buffer: 40
|
||||
index_buffer: 31
|
||||
index_buffer: 41
|
BIN
mediapipe/graphs/face_effect/data/axis.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/axis.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 492 B |
BIN
mediapipe/graphs/face_effect/data/facepaint.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/facepaint.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 593 KiB |
27815
mediapipe/graphs/face_effect/data/glasses.pbtxt
Normal file
27815
mediapipe/graphs/face_effect/data/glasses.pbtxt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
mediapipe/graphs/face_effect/data/glasses.pngblob
Normal file
BIN
mediapipe/graphs/face_effect/data/glasses.pngblob
Normal file
Binary file not shown.
After Width: | Height: | Size: 293 KiB |
130
mediapipe/graphs/face_effect/face_effect_gpu.pbtxt
Normal file
130
mediapipe/graphs/face_effect/face_effect_gpu.pbtxt
Normal file
|
@ -0,0 +1,130 @@
|
|||
# MediaPipe graph that applies a face effect to the input video stream.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# An integer, which indicate which effect is selected. (int)
|
||||
#
|
||||
# If `selected_effect_id` is `0`, the Axis effect is selected.
|
||||
# If `selected_effect_id` is `1`, the Facepaint effect is selected.
|
||||
# If `selected_effect_id` is `2`, the Glasses effect is selected.
|
||||
#
|
||||
# No other values are allowed for `selected_effect_id`.
|
||||
input_stream: "selected_effect_id"
|
||||
|
||||
# Indicates whether to use the face detection as the input source. (bool)
|
||||
#
|
||||
# If `true`, the face detection pipeline will be used to produce landmarks.
|
||||
# If `false`, the face landmark pipeline will be used to produce landmarks.
|
||||
input_side_packet: "use_face_detection_input_source"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
|
||||
# A list of geometry data for a single detected face.
|
||||
#
|
||||
# NOTE: there will not be an output packet in this stream for this particular
|
||||
# timestamp if none of faces detected.
|
||||
#
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
output_stream: "multi_face_geometry"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Generates an environment that describes the current virtual scene.
|
||||
node {
|
||||
calculator: "FaceGeometryEnvGeneratorCalculator"
|
||||
output_side_packet: "ENVIRONMENT:environment"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FaceGeometryEnvGeneratorCalculatorOptions] {
|
||||
environment: {
|
||||
origin_point_location: TOP_LEFT_CORNER
|
||||
perspective_camera: {
|
||||
vertical_fov_degrees: 63.0 # 63 degrees
|
||||
near: 1.0 # 1cm
|
||||
far: 10000.0 # 100m
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Computes the face geometry for a single face. The input source is defined
|
||||
# through `use_face_detection_input_source`.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "ENABLE:use_face_detection_input_source"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
|
||||
contained_node: {
|
||||
calculator: "SingleFaceGeometryFromLandmarksGpu"
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "SingleFaceGeometryFromDetectionGpu"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Renders the selected effect based on `selected_effect_id`.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_stream: "SELECT:selected_effect_id"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SwitchContainerOptions] {
|
||||
contained_node: {
|
||||
calculator: "FaceGeometryEffectRendererCalculator"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||
effect_texture_path: "mediapipe/graphs/face_effect/data/axis.pngblob"
|
||||
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/axis.binarypb"
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "FaceGeometryEffectRendererCalculator"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||
effect_texture_path: "mediapipe/graphs/face_effect/data/facepaint.pngblob"
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "FaceGeometryEffectRendererCalculator"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FaceGeometryEffectRendererCalculatorOptions] {
|
||||
effect_texture_path: "mediapipe/graphs/face_effect/data/glasses.pngblob"
|
||||
effect_mesh_3d_path: "mediapipe/graphs/face_effect/data/glasses.binarypb"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
61
mediapipe/graphs/face_effect/subgraphs/BUILD
Normal file
61
mediapipe/graphs/face_effect/subgraphs/BUILD
Normal file
|
@ -0,0 +1,61 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_smoothing",
|
||||
graph = "face_landmarks_smoothing.pbtxt",
|
||||
register_as = "FaceLandmarksSmoothing",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "single_face_geometry_from_detection_gpu",
|
||||
graph = "single_face_geometry_from_detection_gpu.pbtxt",
|
||||
register_as = "SingleFaceGeometryFromDetectionGpu",
|
||||
deps = [
|
||||
":face_landmarks_smoothing",
|
||||
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "single_face_geometry_from_landmarks_gpu",
|
||||
graph = "single_face_geometry_from_landmarks_gpu.pbtxt",
|
||||
register_as = "SingleFaceGeometryFromLandmarksGpu",
|
||||
deps = [
|
||||
":face_landmarks_smoothing",
|
||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_smoothing_calculator",
|
||||
"//mediapipe/modules/face_geometry:face_geometry_from_landmarks",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,24 @@
|
|||
# MediaPipe subgraph that smoothes face landmarks.
|
||||
|
||||
type: "FaceLandmarksSmoothing"
|
||||
|
||||
input_stream: "NORM_LANDMARKS:landmarks"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
|
||||
|
||||
# Applies smoothing to a face landmark list. The filter options were handpicked
|
||||
# to achieve better visual results.
|
||||
node {
|
||||
calculator: "LandmarksSmoothingCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] {
|
||||
velocity_filter: {
|
||||
window_size: 5
|
||||
velocity_scale: 20.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
# MediaPipe subgraph that extracts geometry from a single face using the face
|
||||
# landmark pipeline on an input GPU image. The face landmarks are also
|
||||
# "smoothed" to achieve better visual results.
|
||||
|
||||
type: "SingleFaceGeometryFromDetectionGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:input_image"
|
||||
|
||||
# Environment that describes the current virtual scene.
|
||||
# (face_geometry::Environment)
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
|
||||
# A list of geometry data for a single detected face. The size of this
|
||||
# collection is at most 1 because of the single-face use in this graph.
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
#
|
||||
# NOTE: if no face is detected at a particular timestamp, there will not be an
|
||||
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks using the face
|
||||
# detection pipeline.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeGpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "DETECTIONS:multi_face_detection"
|
||||
}
|
||||
|
||||
# Extracts the first face detection associated with the most prominent face from
|
||||
# a collection.
|
||||
node {
|
||||
calculator: "SplitDetectionVectorCalculator"
|
||||
input_stream: "multi_face_detection"
|
||||
output_stream: "face_detection"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts face detection keypoints as a normalized landmarks.
|
||||
node {
|
||||
calculator: "DetectionToLandmarksCalculator"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# Extracts the input image frame dimensions as a separate packet.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
output_stream: "SIZE:input_image_size"
|
||||
}
|
||||
|
||||
# Applies smoothing to the face landmarks previously extracted from the face
|
||||
# detection keypoints.
|
||||
node {
|
||||
calculator: "FaceLandmarksSmoothing"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
|
||||
}
|
||||
|
||||
# Converts smoothed face landmarks back into the detection format.
|
||||
node {
|
||||
calculator: "LandmarksToDetectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:smoothed_face_landmarks"
|
||||
output_stream: "DETECTION:smoothed_face_detection"
|
||||
}
|
||||
|
||||
# Puts the smoothed single face detection back into a collection to simplify
|
||||
# passing the result into the `FaceGeometryFromDetection` subgraph.
|
||||
node {
|
||||
calculator: "ConcatenateDetectionVectorCalculator"
|
||||
input_stream: "smoothed_face_detection"
|
||||
output_stream: "multi_smoothed_face_detection"
|
||||
}
|
||||
|
||||
# Computes face geometry from the single face detection.
|
||||
node {
|
||||
calculator: "FaceGeometryFromDetection"
|
||||
input_stream: "MULTI_FACE_DETECTION:multi_smoothed_face_detection"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
# MediaPipe subgraph that extracts geometry from a single face using the face
|
||||
# landmark pipeline on an input GPU image. The face landmarks are also
|
||||
# "smoothed" to achieve better visual results.
|
||||
|
||||
type: "SingleFaceGeometryFromLandmarksGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:input_image"
|
||||
|
||||
# Environment that describes the current virtual scene.
|
||||
# (face_geometry::Environment)
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
|
||||
# A list of geometry data for a single detected face. The size of this
|
||||
# collection is at most 1 because of the single-face use in this graph.
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
#
|
||||
# NOTE: if no face is detected at a particular timestamp, there will not be an
|
||||
# output packet in the `MULTI_FACE_GEOMETRY` stream for this timestamp. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
|
||||
# Creates a packet to inform the `FaceLandmarkFrontGpu` subgraph to detect at
|
||||
# most 1 face.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks using the face
|
||||
# landmark pipeline.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Extracts a single set of face landmarks associated with the most prominent
|
||||
# face detected from a collection.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "multi_face_landmarks"
|
||||
output_stream: "face_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts the input image frame dimensions as a separate packet.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
output_stream: "SIZE:input_image_size"
|
||||
}
|
||||
|
||||
# Applies smoothing to the single set of face landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarksSmoothing"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
output_stream: "NORM_FILTERED_LANDMARKS:smoothed_face_landmarks"
|
||||
}
|
||||
|
||||
# Puts the single set of smoothed landmarks back into a collection to simplify
|
||||
# passing the result into the `FaceGeometryFromLandmarks` subgraph.
|
||||
node {
|
||||
calculator: "ConcatenateLandmarListVectorCalculator"
|
||||
input_stream: "smoothed_face_landmarks"
|
||||
output_stream: "multi_smoothed_face_landmarks"
|
||||
}
|
||||
|
||||
# Computes face geometry from face landmarks for a single face.
|
||||
node {
|
||||
calculator: "FaceGeometryFromLandmarks"
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_smoothed_face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:input_image_size"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
}
|
69
mediapipe/graphs/face_mesh/BUILD
Normal file
69
mediapipe/graphs/face_mesh/BUILD
Normal file
|
@ -0,0 +1,69 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "desktop_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_gpu_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_mesh_mobile_gpu_binary_graph",
|
||||
graph = "face_mesh_mobile.pbtxt",
|
||||
output_name = "face_mesh_mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
37
mediapipe/graphs/face_mesh/calculators/BUILD
Normal file
37
mediapipe/graphs/face_mesh/calculators/BUILD
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "face_landmarks_to_render_data_calculator",
|
||||
srcs = ["face_landmarks_to_render_data_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,104 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h"
|
||||
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/color.pb.h"
|
||||
#include "mediapipe/util/render_data.pb.h"
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kNumFaceLandmarkConnections = 132;
|
||||
// Pairs of landmark indices to be rendered with connections.
|
||||
constexpr int kFaceLandmarkConnections[] = {
|
||||
// Lips.
|
||||
61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321,
|
||||
321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267,
|
||||
269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14,
|
||||
14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81,
|
||||
81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308,
|
||||
// Left eye.
|
||||
33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133,
|
||||
33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173,
|
||||
133,
|
||||
// Left eyebrow.
|
||||
46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107,
|
||||
// Left iris.
|
||||
474, 475, 475, 476, 476, 477, 477, 474,
|
||||
// Right eye.
|
||||
263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382,
|
||||
362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398,
|
||||
398, 362,
|
||||
// Right eyebrow.
|
||||
276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296,
|
||||
336,
|
||||
// Right iris.
|
||||
469, 470, 470, 471, 471, 472, 472, 469,
|
||||
// Face oval.
|
||||
10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
|
||||
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
|
||||
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
|
||||
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
|
||||
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator that converts face landmarks to RenderData proto for
|
||||
// visualization. Ignores landmark_connections specified in
|
||||
// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set
|
||||
// of landmark connections specific to face landmark (defined in
|
||||
// kFaceLandmarkConnections[] above).
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
// input_stream: "NORM_LANDMARKS:landmarks"
|
||||
// output_stream: "RENDER_DATA:render_data"
|
||||
// options {
|
||||
// [LandmarksToRenderDataCalculatorOptions.ext] {
|
||||
// landmark_color { r: 0 g: 255 b: 0 }
|
||||
// connection_color { r: 0 g: 255 b: 0 }
|
||||
// thickness: 4.0
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class FaceLandmarksToRenderDataCalculator
|
||||
: public LandmarksToRenderDataCalculator {
|
||||
public:
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator);
|
||||
|
||||
absl::Status FaceLandmarksToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<mediapipe::LandmarksToRenderDataCalculatorOptions>();
|
||||
|
||||
for (int i = 0; i < kNumFaceLandmarkConnections; ++i) {
|
||||
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]);
|
||||
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]);
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
70
mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt
Normal file
70
mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt
Normal file
|
@ -0,0 +1,70 @@
|
|||
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
|
||||
# Path to the input video file. (string)
|
||||
input_side_packet: "input_video_path"
|
||||
# Path to the output video file. (string)
|
||||
input_side_packet: "output_video_path"
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
output_side_packet: "PACKET:1:with_attention"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input video.
|
||||
node {
|
||||
calculator: "FaceRendererCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
Normal file
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
Normal file
|
@ -0,0 +1,66 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||
|
||||
# Input image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
output_side_packet: "PACKET:1:with_attention"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
Normal file
66
mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
Normal file
|
@ -0,0 +1,66 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||
|
||||
# Input image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
output_side_packet: "PACKET:1:with_attention"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
67
mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt
Normal file
67
mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt
Normal file
|
@ -0,0 +1,67 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Max number of faces to detect/process. (int)
|
||||
input_side_packet: "num_faces"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:with_attention"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
52
mediapipe/graphs/face_mesh/subgraphs/BUILD
Normal file
52
mediapipe/graphs/face_mesh/subgraphs/BUILD
Normal file
|
@ -0,0 +1,52 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "renderer_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_renderer_gpu",
|
||||
graph = "face_renderer_gpu.pbtxt",
|
||||
register_as = "FaceRendererGpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_renderer_cpu",
|
||||
graph = "face_renderer_cpu.pbtxt",
|
||||
register_as = "FaceRendererCpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
],
|
||||
)
|
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt
Normal file
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,96 @@
|
|||
# MediaPipe face mesh rendering subgraph.
|
||||
|
||||
type: "FaceRendererCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:detections"
|
||||
|
||||
# CPU image with rendered data. (ImageFrame)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_face_landmarks"
|
||||
output_stream: "ITEM:face_landmarks"
|
||||
output_stream: "BATCH_END:landmark_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:landmarks_render_data"
|
||||
input_stream: "BATCH_END:landmark_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
output_stream: "RENDER_DATA:rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||
input_stream: "rects_render_data"
|
||||
output_stream: "IMAGE:output_image"
|
||||
}
|
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt
Normal file
96
mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt
Normal file
|
@ -0,0 +1,96 @@
|
|||
# MediaPipe face mesh rendering subgraph.
|
||||
|
||||
type: "FaceRendererGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:detections"
|
||||
|
||||
# GPU image with rendered data. (GpuBuffer)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_face_landmarks"
|
||||
output_stream: "ITEM:face_landmarks"
|
||||
output_stream: "BATCH_END:end_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:landmarks_render_data"
|
||||
input_stream: "BATCH_END:end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
output_stream: "RENDER_DATA:rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||
input_stream: "rects_render_data"
|
||||
output_stream: "IMAGE_GPU:output_image"
|
||||
}
|
61
mediapipe/graphs/hair_segmentation/BUILD
Normal file
61
mediapipe/graphs/hair_segmentation/BUILD
Normal file
|
@ -0,0 +1,61 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/image:recolor_calculator",
|
||||
"//mediapipe/calculators/image:set_alpha_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
|
||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/image:recolor_calculator",
|
||||
"//mediapipe/calculators/image:set_alpha_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_binary_graph",
|
||||
graph = "hair_segmentation_mobile_gpu.pbtxt",
|
||||
output_name = "mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
|
@ -0,0 +1,152 @@
|
|||
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on CPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/desktop/hair_segmentation:hair_segmentation_cpu
|
||||
|
||||
# Images on CPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
|
||||
# generating the corresponding hair mask before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:hair_mask"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Transforms the input image on CPU to a 512x512 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the hair
|
||||
# segmentation model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "IMAGE:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 512
|
||||
output_height: 512
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Caches a mask fed back from the previous round of hair segmentation, and upon
|
||||
# the arrival of the next input image sends out the cached mask with the
|
||||
# timestamp replaced by that of the input image, essentially generating a packet
|
||||
# that carries the previous mask. Note that upon the arrival of the very first
|
||||
# input image, an empty packet is sent out to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:throttled_input_video"
|
||||
input_stream: "LOOP:hair_mask"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:previous_hair_mask"
|
||||
}
|
||||
|
||||
# Embeds the hair mask generated from the previous round of hair segmentation
|
||||
# as the alpha channel of the current input image.
|
||||
node {
|
||||
calculator: "SetAlphaCalculator"
|
||||
input_stream: "IMAGE:transformed_input_video"
|
||||
input_stream: "ALPHA:previous_hair_mask"
|
||||
output_stream: "IMAGE:mask_embedded_input_video"
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor stored in
|
||||
# TfLiteTensor. The zero_center option is set to false to normalize the
|
||||
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
|
||||
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
|
||||
# image tensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:mask_embedded_input_video"
|
||||
output_stream: "TENSORS:image_tensor"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||
zero_center: false
|
||||
max_num_channels: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||
# supports custom ops needed by the model used in this graph.
|
||||
node {
|
||||
calculator: "TfLiteCustomOpResolverCalculator"
|
||||
output_side_packet: "op_resolver"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
|
||||
use_gpu: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# tensor representing the hair segmentation, which has the same width and height
|
||||
# as the input image tensor.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:image_tensor"
|
||||
output_stream: "TENSORS:segmentation_tensor"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/hair_segmentation.tflite"
|
||||
use_gpu: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
|
||||
# mask of values in [0, 255], stored in a CPU buffer. It also
|
||||
# takes the mask generated previously as another input to improve the temporal
|
||||
# consistency.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToSegmentationCalculator"
|
||||
input_stream: "TENSORS:segmentation_tensor"
|
||||
input_stream: "PREV_MASK:previous_hair_mask"
|
||||
output_stream: "MASK:hair_mask"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
|
||||
tensor_width: 512
|
||||
tensor_height: 512
|
||||
tensor_channels: 2
|
||||
combine_with_previous_ratio: 0.9
|
||||
output_layer_index: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Colors the hair segmentation with the color specified in the option.
|
||||
node {
|
||||
calculator: "RecolorCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "MASK:hair_mask"
|
||||
output_stream: "IMAGE:output_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
|
||||
color { r: 0 g: 0 b: 255 }
|
||||
mask_channel: RED
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
# MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu.
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
|
||||
# generating the corresponding hair mask before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:hair_mask"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Transforms the input image on GPU to a 512x512 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the hair
|
||||
# segmentation model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 512
|
||||
output_height: 512
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Caches a mask fed back from the previous round of hair segmentation, and upon
|
||||
# the arrival of the next input image sends out the cached mask with the
|
||||
# timestamp replaced by that of the input image, essentially generating a packet
|
||||
# that carries the previous mask. Note that upon the arrival of the very first
|
||||
# input image, an empty packet is sent out to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:throttled_input_video"
|
||||
input_stream: "LOOP:hair_mask"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:previous_hair_mask"
|
||||
}
|
||||
|
||||
# Embeds the hair mask generated from the previous round of hair segmentation
|
||||
# as the alpha channel of the current input image.
|
||||
node {
|
||||
calculator: "SetAlphaCalculator"
|
||||
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||
input_stream: "ALPHA_GPU:previous_hair_mask"
|
||||
output_stream: "IMAGE_GPU:mask_embedded_input_video"
|
||||
}
|
||||
|
||||
# Converts the transformed input image on GPU into an image tensor stored in
|
||||
# tflite::gpu::GlBuffer. The zero_center option is set to false to normalize the
|
||||
# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
|
||||
# max_num_channels option set to 4, all 4 RGBA channels are contained in the
|
||||
# image tensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE_GPU:mask_embedded_input_video"
|
||||
output_stream: "TENSORS_GPU:image_tensor"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||
zero_center: false
|
||||
max_num_channels: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||
# supports custom ops needed by the model used in this graph.
|
||||
node {
|
||||
calculator: "TfLiteCustomOpResolverCalculator"
|
||||
output_side_packet: "op_resolver"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
|
||||
use_gpu: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# tensor representing the hair segmentation, which has the same width and height
|
||||
# as the input image tensor.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS_GPU:image_tensor"
|
||||
output_stream: "TENSORS_GPU:segmentation_tensor"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/hair_segmentation.tflite"
|
||||
use_gpu: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
|
||||
# mask of values in [0.f, 1.f], stored in the R channel of a GPU buffer. It also
|
||||
# takes the mask generated previously as another input to improve the temporal
|
||||
# consistency.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToSegmentationCalculator"
|
||||
input_stream: "TENSORS_GPU:segmentation_tensor"
|
||||
input_stream: "PREV_MASK_GPU:previous_hair_mask"
|
||||
output_stream: "MASK_GPU:hair_mask"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
|
||||
tensor_width: 512
|
||||
tensor_height: 512
|
||||
tensor_channels: 2
|
||||
combine_with_previous_ratio: 0.9
|
||||
output_layer_index: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Colors the hair segmentation with the color specified in the option.
|
||||
node {
|
||||
calculator: "RecolorCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "MASK_GPU:hair_mask"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
|
||||
color { r: 0 g: 0 b: 255 }
|
||||
mask_channel: RED
|
||||
}
|
||||
}
|
||||
}
|
91
mediapipe/graphs/hand_tracking/BUILD
Normal file
91
mediapipe/graphs/hand_tracking/BUILD
Normal file
|
@ -0,0 +1,91 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
exports_files(glob([
|
||||
"*.pbtxt",
|
||||
]))
|
||||
|
||||
cc_library(
|
||||
name = "desktop_offline_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:immediate_mux_calculator",
|
||||
"//mediapipe/calculators/core:packet_inner_join_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_tflite_calculators",
|
||||
deps = [
|
||||
":desktop_offline_calculators",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:merge_calculator",
|
||||
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_cpu",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "hand_tracking_desktop_live_binary_graph",
|
||||
graph = "hand_tracking_desktop_live.pbtxt",
|
||||
output_name = "hand_tracking_desktop_live.binarypb",
|
||||
deps = [":desktop_tflite_calculators"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/hand_tracking/subgraphs:hand_renderer_gpu",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "hand_tracking_mobile_gpu_binary_graph",
|
||||
graph = "hand_tracking_mobile.pbtxt",
|
||||
output_name = "hand_tracking_mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "detection_mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "hand_detection_mobile_gpu_binary_graph",
|
||||
graph = "hand_detection_mobile.pbtxt",
|
||||
output_name = "hand_detection_mobile_gpu.binarypb",
|
||||
deps = [":detection_mobile_calculators"],
|
||||
)
|
17
mediapipe/graphs/hand_tracking/calculators/BUILD
Normal file
17
mediapipe/graphs/hand_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
61
mediapipe/graphs/hand_tracking/hand_detection_desktop.pbtxt
Normal file
61
mediapipe/graphs/hand_tracking/hand_detection_desktop.pbtxt
Normal file
|
@ -0,0 +1,61 @@
|
|||
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/desktop/hand_tracking:hand_detection_tflite.
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the original image coming into
|
||||
# the graph.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
# MediaPipe graph that performs hand detection on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipe/examples/desktop/hand_tracking:hand_detection_cpu.
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
|
||||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the original image coming into
|
||||
# the graph.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
59
mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt
Normal file
59
mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt
Normal file
|
@ -0,0 +1,59 @@
|
|||
# MediaPipe graph that performs hand detection with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/handdetectiongpu and
|
||||
# mediapipe/examples/ios/handdetectiongpu.
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for PalmDetectionGpu
|
||||
# downstream in the graph to finish its tasks before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images in PalmDetectionGpu to 1. This prevents the nodes in
|
||||
# PalmDetectionGpu from queuing up incoming images and data excessively, which
|
||||
# leads to increased latency and memory usage, unwanted in real-time mobile
|
||||
# applications. It also eliminates unnecessarily computation, e.g., the output
|
||||
# produced by a node in the subgraph may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:palm_detections"
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "detection_render_data"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
}
|
68
mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt
Normal file
68
mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt
Normal file
|
@ -0,0 +1,68 @@
|
|||
# MediaPipe graph that performs hands tracking on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipe/examples/desktop/hand_tracking:hand_tracking_tflite.
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Generates side packet cotaining max number of hands to detect/track.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_hands"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects/tracks hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
output_stream: "PALM_DETECTIONS:multi_palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see hand_renderer_cpu.pbtxt).
|
||||
node {
|
||||
calculator: "HandRendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:multi_palm_detections"
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
input_stream: "HANDEDNESS:handedness"
|
||||
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
# MediaPipe graph that performs hands tracking on desktop with TensorFlow
|
||||
# Lite on CPU.
|
||||
# Used in the example in
|
||||
# mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu.
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
|
||||
# Generates side packet cotaining max number of hands to detect/track.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_hands"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects/tracks hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
output_stream: "PALM_DETECTIONS:multi_palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:multi_hand_rects"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:multi_palm_rects"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see hand_renderer_cpu.pbtxt).
|
||||
node {
|
||||
calculator: "HandRendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:multi_palm_detections"
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
input_stream: "HANDEDNESS:handedness"
|
||||
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "hand_landmarks"
|
||||
|
||||
# Generates side packet cotaining max number of hands to detect/track.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_hands"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects/tracks hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see hand_renderer_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "HandRendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
input_stream: "LANDMARKS:hand_landmarks"
|
||||
input_stream: "HANDEDNESS:handedness"
|
||||
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
65
mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
Normal file
65
mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
Normal file
|
@ -0,0 +1,65 @@
|
|||
# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu.
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Max number of hands to detect/process. (int)
|
||||
input_side_packet: "num_hands"
|
||||
|
||||
# Model complexity (0 or 1). (int)
|
||||
input_side_packet: "model_complexity"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "hand_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Detects/tracks hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects_from_landmarks"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see hand_renderer_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "HandRendererSubgraph"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
input_stream: "LANDMARKS:hand_landmarks"
|
||||
input_stream: "HANDEDNESS:handedness"
|
||||
input_stream: "NORM_RECTS:0:hand_rects_from_palm_detections"
|
||||
input_stream: "NORM_RECTS:1:hand_rects_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
58
mediapipe/graphs/hand_tracking/subgraphs/BUILD
Normal file
58
mediapipe/graphs/hand_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,58 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_renderer_cpu",
|
||||
graph = "hand_renderer_cpu.pbtxt",
|
||||
register_as = "HandRendererSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:labels_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_renderer_gpu",
|
||||
graph = "hand_renderer_gpu.pbtxt",
|
||||
register_as = "HandRendererSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:labels_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
],
|
||||
)
|
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_cpu.pbtxt
Normal file
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,209 @@
|
|||
# MediaPipe graph to render hand landmarks and some related debug information.
|
||||
|
||||
type: "HandRendererSubgraph"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (std::vector<ClassificationList>)
|
||||
input_stream: "HANDEDNESS:multi_handedness"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
|
||||
# Updated CPU image. (ImageFrame)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:multi_hand_rects"
|
||||
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:multi_palm_rects"
|
||||
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 125 g: 0 b: 122 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_hand_landmarks"
|
||||
output_stream: "ITEM:single_hand_landmarks"
|
||||
output_stream: "BATCH_END:landmark_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 0
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 5
|
||||
landmark_connections: 9
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 9
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 15
|
||||
landmark_connections: 15
|
||||
landmark_connections: 16
|
||||
landmark_connections: 13
|
||||
landmark_connections: 17
|
||||
landmark_connections: 0
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 18
|
||||
landmark_connections: 18
|
||||
landmark_connections: 19
|
||||
landmark_connections: 19
|
||||
landmark_connections: 20
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:single_hand_landmark_render_data"
|
||||
input_stream: "BATCH_END:landmark_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Don't render handedness if there are more than one handedness reported.
|
||||
node {
|
||||
calculator: "ClassificationListVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:multi_handedness"
|
||||
output_stream: "disallow_handedness_rendering"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
|
||||
min_size: 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "multi_handedness"
|
||||
input_stream: "DISALLOW:disallow_handedness_rendering"
|
||||
output_stream: "allowed_multi_handedness"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
|
||||
empty_packets_as_allow: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "SplitClassificationListVectorCalculator"
|
||||
input_stream: "allowed_multi_handedness"
|
||||
output_stream: "handedness"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts classification to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LabelsToRenderDataCalculator"
|
||||
input_stream: "CLASSIFICATIONS:handedness"
|
||||
output_stream: "RENDER_DATA:handedness_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 10.0
|
||||
font_height_px: 50
|
||||
horizontal_offset_px: 30
|
||||
vertical_offset_px: 50
|
||||
|
||||
max_num_labels: 1
|
||||
location: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images. Consumes
|
||||
# a vector of RenderData objects and draws each of them on the input frame.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "detection_render_data"
|
||||
input_stream: "multi_hand_rects_render_data"
|
||||
input_stream: "multi_palm_rects_render_data"
|
||||
input_stream: "handedness_render_data"
|
||||
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
|
||||
output_stream: "IMAGE:output_image"
|
||||
}
|
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt
Normal file
209
mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt
Normal file
|
@ -0,0 +1,209 @@
|
|||
# MediaPipe graph to render hand landmarks and some related debug information.
|
||||
|
||||
type: "HandRendererSubgraph"
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (std::vector<ClassificationList>)
|
||||
input_stream: "HANDEDNESS:multi_handedness"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:0:multi_palm_rects"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:1:multi_hand_rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
|
||||
# Updated GPU buffer. (GpuBuffer)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:palm_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:multi_hand_rects"
|
||||
output_stream: "RENDER_DATA:multi_hand_rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:multi_palm_rects"
|
||||
output_stream: "RENDER_DATA:multi_palm_rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 125 g: 0 b: 122 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_hand_landmarks"
|
||||
output_stream: "ITEM:single_hand_landmarks"
|
||||
output_stream: "BATCH_END:landmark_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "RENDER_DATA:single_hand_landmark_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 0
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 5
|
||||
landmark_connections: 9
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 9
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 15
|
||||
landmark_connections: 15
|
||||
landmark_connections: 16
|
||||
landmark_connections: 13
|
||||
landmark_connections: 17
|
||||
landmark_connections: 0
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 18
|
||||
landmark_connections: 18
|
||||
landmark_connections: 19
|
||||
landmark_connections: 19
|
||||
landmark_connections: 20
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:single_hand_landmark_render_data"
|
||||
input_stream: "BATCH_END:landmark_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Don't render handedness if there are more than one handedness reported.
|
||||
node {
|
||||
calculator: "ClassificationListVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:multi_handedness"
|
||||
output_stream: "disallow_handedness_rendering"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
|
||||
min_size: 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "multi_handedness"
|
||||
input_stream: "DISALLOW:disallow_handedness_rendering"
|
||||
output_stream: "allowed_multi_handedness"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
|
||||
empty_packets_as_allow: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "SplitClassificationListVectorCalculator"
|
||||
input_stream: "allowed_multi_handedness"
|
||||
output_stream: "handedness"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts classification to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LabelsToRenderDataCalculator"
|
||||
input_stream: "CLASSIFICATIONS:handedness"
|
||||
output_stream: "RENDER_DATA:handedness_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: {
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 10.0
|
||||
font_height_px: 50
|
||||
horizontal_offset_px: 30
|
||||
vertical_offset_px: 50
|
||||
|
||||
max_num_labels: 1
|
||||
location: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images. Consumes
|
||||
# a vector of RenderData objects and draws each of them on the input frame.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
input_stream: "detection_render_data"
|
||||
input_stream: "multi_hand_rects_render_data"
|
||||
input_stream: "multi_palm_rects_render_data"
|
||||
input_stream: "handedness_render_data"
|
||||
input_stream: "VECTOR:0:multi_hand_landmarks_render_data"
|
||||
output_stream: "IMAGE_GPU:output_image"
|
||||
}
|
70
mediapipe/graphs/holistic_tracking/BUILD
Normal file
70
mediapipe/graphs/holistic_tracking/BUILD
Normal file
|
@ -0,0 +1,70 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "holistic_tracking_to_render_data",
|
||||
graph = "holistic_tracking_to_render_data.pbtxt",
|
||||
register_as = "HolisticTrackingToRenderData",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||
"//mediapipe/calculators/core:merge_calculator",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_scale_calculator",
|
||||
"//mediapipe/modules/holistic_landmark:hand_wrist_for_pose",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "holistic_tracking_gpu_deps",
|
||||
deps = [
|
||||
":holistic_tracking_to_render_data",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "holistic_tracking_gpu",
|
||||
graph = "holistic_tracking_gpu.pbtxt",
|
||||
output_name = "holistic_tracking_gpu.binarypb",
|
||||
deps = [":holistic_tracking_gpu_deps"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "holistic_tracking_cpu_graph_deps",
|
||||
deps = [
|
||||
":holistic_tracking_to_render_data",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,75 @@
|
|||
# Tracks and renders pose + hands + face landmarks.
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# CPU image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||
# than 1 second.
|
||||
in_flight_timeout: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "HolisticLandmarkCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
output_stream: "POSE_ROI:pose_roi"
|
||||
output_stream: "POSE_DETECTION:pose_detection"
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
}
|
||||
|
||||
# Gets image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts pose, hands and face landmarks to a render data vector.
|
||||
node {
|
||||
calculator: "HolisticTrackingToRenderData"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
input_stream: "POSE_ROI:pose_roi"
|
||||
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "VECTOR:render_data_vector"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
# Tracks and renders pose + hands + face landmarks.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# GPU image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||
# than 1 second.
|
||||
in_flight_timeout: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "HolisticLandmarkGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
output_stream: "POSE_ROI:pose_roi"
|
||||
output_stream: "POSE_DETECTION:pose_detection"
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
}
|
||||
|
||||
# Gets image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts pose, hands and face landmarks to a render data vector.
|
||||
node {
|
||||
calculator: "HolisticTrackingToRenderData"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
input_stream: "POSE_ROI:pose_roi"
|
||||
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "VECTOR:render_data_vector"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
}
|
|
@ -0,0 +1,757 @@
|
|||
# Converts pose + hands + face landmarks to a render data vector.
|
||||
|
||||
type: "HolisticTrackingToRenderData"
|
||||
|
||||
# Image size. (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "POSE_LANDMARKS:landmarks"
|
||||
# Region of interest calculated based on pose landmarks. (NormalizedRect)
|
||||
input_stream: "POSE_ROI:roi"
|
||||
# Left hand landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
# Right hand landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
||||
# Render data vector. (std::vector<RenderData>)
|
||||
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# ------------------ Calculates scale for render objects -------------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Calculates rendering scale based on the pose bounding box.
|
||||
node {
|
||||
calculator: "RectToRenderScaleCalculator"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "RENDER_SCALE:render_scale"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderScaleCalculatorOptions] {
|
||||
multiplier: 0.0008
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# --------------- Combines pose and hands into pose skeleton ---------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Gets pose landmarks before wrists.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks"
|
||||
output_stream: "landmarks_before_wrist"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 11 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets pose left wrist landmark.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks"
|
||||
output_stream: "landmarks_left_wrist"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 15 end: 16 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets pose right wrist landmark.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks"
|
||||
output_stream: "landmarks_right_wrist"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 16 end: 17 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets pose landmarks after wrists.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks"
|
||||
output_stream: "landmarks_after_wrist"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 23 end: 33 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets left hand wrist landmark.
|
||||
node {
|
||||
calculator: "HandWristForPose"
|
||||
input_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||
output_stream: "WRIST_LANDMARK:left_hand_wrist_landmark"
|
||||
}
|
||||
|
||||
# Gets left hand wrist landmark or keep pose wrist landmark if hand was not
|
||||
# predicted.
|
||||
node {
|
||||
calculator: "MergeCalculator"
|
||||
input_stream: "left_hand_wrist_landmark"
|
||||
input_stream: "landmarks_left_wrist"
|
||||
output_stream: "merged_left_hand_wrist_landmark"
|
||||
}
|
||||
|
||||
# Gets right hand wrist landmark.
|
||||
node {
|
||||
calculator: "HandWristForPose"
|
||||
input_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||
output_stream: "WRIST_LANDMARK:right_hand_wrist_landmark"
|
||||
}
|
||||
|
||||
# Gets right hand wrist landmark or keep pose wrist landmark if hand was not
|
||||
# predicted.
|
||||
node {
|
||||
calculator: "MergeCalculator"
|
||||
input_stream: "right_hand_wrist_landmark"
|
||||
input_stream: "landmarks_right_wrist"
|
||||
output_stream: "merged_right_hand_wrist_landmark"
|
||||
}
|
||||
|
||||
# Combines pose landmarks all together.
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_before_wrist"
|
||||
input_stream: "merged_left_hand_wrist_landmark"
|
||||
input_stream: "merged_right_hand_wrist_landmark"
|
||||
input_stream: "landmarks_after_wrist"
|
||||
output_stream: "landmarks_merged"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConcatenateVectorCalculatorOptions] {
|
||||
only_emit_if_all_present: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Takes left pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_merged"
|
||||
output_stream: "landmarks_left_side"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
ranges: { begin: 6 end: 7 }
|
||||
ranges: { begin: 8 end: 9 }
|
||||
ranges: { begin: 10 end: 11 }
|
||||
ranges: { begin: 12 end: 13 }
|
||||
ranges: { begin: 14 end: 15 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Takes right pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_merged"
|
||||
output_stream: "landmarks_right_side"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
ranges: { begin: 5 end: 6 }
|
||||
ranges: { begin: 7 end: 8 }
|
||||
ranges: { begin: 9 end: 10 }
|
||||
ranges: { begin: 11 end: 12 }
|
||||
ranges: { begin: 13 end: 14 }
|
||||
ranges: { begin: 15 end: 16 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# ---------------------------------- Pose ----------------------------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Converts pose connections to white lines.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks_merged"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 0
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 4
|
||||
landmark_connections: 1
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 5
|
||||
landmark_connections: 0
|
||||
landmark_connections: 6
|
||||
landmark_connections: 1
|
||||
landmark_connections: 7
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 6
|
||||
landmark_connections: 8
|
||||
landmark_connections: 7
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 10
|
||||
landmark_connections: 9
|
||||
landmark_connections: 11
|
||||
landmark_connections: 10
|
||||
landmark_connections: 12
|
||||
landmark_connections: 11
|
||||
landmark_connections: 13
|
||||
landmark_connections: 12
|
||||
landmark_connections: 14
|
||||
landmark_connections: 13
|
||||
landmark_connections: 15
|
||||
landmark_connections: 10
|
||||
landmark_connections: 14
|
||||
landmark_connections: 11
|
||||
landmark_connections: 15
|
||||
|
||||
landmark_color { r: 255 g: 255 b: 255 }
|
||||
connection_color { r: 255 g: 255 b: 255 }
|
||||
thickness: 3.0
|
||||
visualize_landmark_depth: false
|
||||
utilize_visibility: true
|
||||
visibility_threshold: 0.1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts pose joints to big white circles.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks_merged"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:landmarks_background_joints_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 255 g: 255 b: 255 }
|
||||
connection_color { r: 255 g: 255 b: 255 }
|
||||
thickness: 5.0
|
||||
visualize_landmark_depth: false
|
||||
utilize_visibility: true
|
||||
visibility_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts pose left side joints to orange circles (inside white ones).
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks_left_side"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:landmarks_left_joints_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 255 g: 138 b: 0 }
|
||||
connection_color { r: 255 g: 138 b: 0 }
|
||||
thickness: 3.0
|
||||
visualize_landmark_depth: false
|
||||
utilize_visibility: true
|
||||
visibility_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts pose right side joints to cyan circles (inside white ones).
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks_right_side"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:landmarks_right_joints_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 0 g: 217 b: 231 }
|
||||
connection_color { r: 0 g: 217 b: 231 }
|
||||
thickness: 3.0
|
||||
visualize_landmark_depth: false
|
||||
utilize_visibility: true
|
||||
visibility_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# ------------------------------- Left hand --------------------------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Converts left hand connections to white lines.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:left_hand_landmarks_connections_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 0
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 5
|
||||
landmark_connections: 9
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 9
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 15
|
||||
landmark_connections: 15
|
||||
landmark_connections: 16
|
||||
landmark_connections: 13
|
||||
landmark_connections: 17
|
||||
landmark_connections: 0
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 18
|
||||
landmark_connections: 18
|
||||
landmark_connections: 19
|
||||
landmark_connections: 19
|
||||
landmark_connections: 20
|
||||
landmark_color { r: 255 g: 255 b: 255 }
|
||||
connection_color { r: 255 g: 255 b: 255 }
|
||||
thickness: 4.0
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts left hand color joints.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:left_hand_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:left_hand_landmarks_joints_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 255 g: 138 b: 0 }
|
||||
connection_color { r: 255 g: 138 b: 0 }
|
||||
thickness: 3.0
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# -------------------------------- Right hand ------------------------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Converts right hand connections to white lines.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:right_hand_landmarks_connections_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 0
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 5
|
||||
landmark_connections: 9
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 9
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 15
|
||||
landmark_connections: 15
|
||||
landmark_connections: 16
|
||||
landmark_connections: 13
|
||||
landmark_connections: 17
|
||||
landmark_connections: 0
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 18
|
||||
landmark_connections: 18
|
||||
landmark_connections: 19
|
||||
landmark_connections: 19
|
||||
landmark_connections: 20
|
||||
landmark_color { r: 255 g: 255 b: 255 }
|
||||
connection_color { r: 255 g: 255 b: 255 }
|
||||
thickness: 4.0
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts right hand color joints.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:right_hand_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:right_hand_landmarks_joints_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 0 g: 217 b: 231 }
|
||||
connection_color { r: 0 g: 217 b: 231 }
|
||||
thickness: 3.0
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# ---------------------------------- Face ----------------------------------- #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# Converts face connections to white lines.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:face_landmarks_connections_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
# Lips.
|
||||
landmark_connections: 61
|
||||
landmark_connections: 146
|
||||
landmark_connections: 146
|
||||
landmark_connections: 91
|
||||
landmark_connections: 91
|
||||
landmark_connections: 181
|
||||
landmark_connections: 181
|
||||
landmark_connections: 84
|
||||
landmark_connections: 84
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 314
|
||||
landmark_connections: 314
|
||||
landmark_connections: 405
|
||||
landmark_connections: 405
|
||||
landmark_connections: 321
|
||||
landmark_connections: 321
|
||||
landmark_connections: 375
|
||||
landmark_connections: 375
|
||||
landmark_connections: 291
|
||||
landmark_connections: 61
|
||||
landmark_connections: 185
|
||||
landmark_connections: 185
|
||||
landmark_connections: 40
|
||||
landmark_connections: 40
|
||||
landmark_connections: 39
|
||||
landmark_connections: 39
|
||||
landmark_connections: 37
|
||||
landmark_connections: 37
|
||||
landmark_connections: 0
|
||||
landmark_connections: 0
|
||||
landmark_connections: 267
|
||||
landmark_connections: 267
|
||||
landmark_connections: 269
|
||||
landmark_connections: 269
|
||||
landmark_connections: 270
|
||||
landmark_connections: 270
|
||||
landmark_connections: 409
|
||||
landmark_connections: 409
|
||||
landmark_connections: 291
|
||||
landmark_connections: 78
|
||||
landmark_connections: 95
|
||||
landmark_connections: 95
|
||||
landmark_connections: 88
|
||||
landmark_connections: 88
|
||||
landmark_connections: 178
|
||||
landmark_connections: 178
|
||||
landmark_connections: 87
|
||||
landmark_connections: 87
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 317
|
||||
landmark_connections: 317
|
||||
landmark_connections: 402
|
||||
landmark_connections: 402
|
||||
landmark_connections: 318
|
||||
landmark_connections: 318
|
||||
landmark_connections: 324
|
||||
landmark_connections: 324
|
||||
landmark_connections: 308
|
||||
landmark_connections: 78
|
||||
landmark_connections: 191
|
||||
landmark_connections: 191
|
||||
landmark_connections: 80
|
||||
landmark_connections: 80
|
||||
landmark_connections: 81
|
||||
landmark_connections: 81
|
||||
landmark_connections: 82
|
||||
landmark_connections: 82
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 312
|
||||
landmark_connections: 312
|
||||
landmark_connections: 311
|
||||
landmark_connections: 311
|
||||
landmark_connections: 310
|
||||
landmark_connections: 310
|
||||
landmark_connections: 415
|
||||
landmark_connections: 415
|
||||
landmark_connections: 308
|
||||
# Left eye.
|
||||
landmark_connections: 33
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 163
|
||||
landmark_connections: 163
|
||||
landmark_connections: 144
|
||||
landmark_connections: 144
|
||||
landmark_connections: 145
|
||||
landmark_connections: 145
|
||||
landmark_connections: 153
|
||||
landmark_connections: 153
|
||||
landmark_connections: 154
|
||||
landmark_connections: 154
|
||||
landmark_connections: 155
|
||||
landmark_connections: 155
|
||||
landmark_connections: 133
|
||||
landmark_connections: 33
|
||||
landmark_connections: 246
|
||||
landmark_connections: 246
|
||||
landmark_connections: 161
|
||||
landmark_connections: 161
|
||||
landmark_connections: 160
|
||||
landmark_connections: 160
|
||||
landmark_connections: 159
|
||||
landmark_connections: 159
|
||||
landmark_connections: 158
|
||||
landmark_connections: 158
|
||||
landmark_connections: 157
|
||||
landmark_connections: 157
|
||||
landmark_connections: 173
|
||||
landmark_connections: 173
|
||||
landmark_connections: 133
|
||||
# Left eyebrow.
|
||||
landmark_connections: 46
|
||||
landmark_connections: 53
|
||||
landmark_connections: 53
|
||||
landmark_connections: 52
|
||||
landmark_connections: 52
|
||||
landmark_connections: 65
|
||||
landmark_connections: 65
|
||||
landmark_connections: 55
|
||||
landmark_connections: 70
|
||||
landmark_connections: 63
|
||||
landmark_connections: 63
|
||||
landmark_connections: 105
|
||||
landmark_connections: 105
|
||||
landmark_connections: 66
|
||||
landmark_connections: 66
|
||||
landmark_connections: 107
|
||||
# Right eye.
|
||||
landmark_connections: 263
|
||||
landmark_connections: 249
|
||||
landmark_connections: 249
|
||||
landmark_connections: 390
|
||||
landmark_connections: 390
|
||||
landmark_connections: 373
|
||||
landmark_connections: 373
|
||||
landmark_connections: 374
|
||||
landmark_connections: 374
|
||||
landmark_connections: 380
|
||||
landmark_connections: 380
|
||||
landmark_connections: 381
|
||||
landmark_connections: 381
|
||||
landmark_connections: 382
|
||||
landmark_connections: 382
|
||||
landmark_connections: 362
|
||||
landmark_connections: 263
|
||||
landmark_connections: 466
|
||||
landmark_connections: 466
|
||||
landmark_connections: 388
|
||||
landmark_connections: 388
|
||||
landmark_connections: 387
|
||||
landmark_connections: 387
|
||||
landmark_connections: 386
|
||||
landmark_connections: 386
|
||||
landmark_connections: 385
|
||||
landmark_connections: 385
|
||||
landmark_connections: 384
|
||||
landmark_connections: 384
|
||||
landmark_connections: 398
|
||||
landmark_connections: 398
|
||||
landmark_connections: 362
|
||||
# Right eyebrow.
|
||||
landmark_connections: 276
|
||||
landmark_connections: 283
|
||||
landmark_connections: 283
|
||||
landmark_connections: 282
|
||||
landmark_connections: 282
|
||||
landmark_connections: 295
|
||||
landmark_connections: 295
|
||||
landmark_connections: 285
|
||||
landmark_connections: 300
|
||||
landmark_connections: 293
|
||||
landmark_connections: 293
|
||||
landmark_connections: 334
|
||||
landmark_connections: 334
|
||||
landmark_connections: 296
|
||||
landmark_connections: 296
|
||||
landmark_connections: 336
|
||||
# Face oval.
|
||||
landmark_connections: 10
|
||||
landmark_connections: 338
|
||||
landmark_connections: 338
|
||||
landmark_connections: 297
|
||||
landmark_connections: 297
|
||||
landmark_connections: 332
|
||||
landmark_connections: 332
|
||||
landmark_connections: 284
|
||||
landmark_connections: 284
|
||||
landmark_connections: 251
|
||||
landmark_connections: 251
|
||||
landmark_connections: 389
|
||||
landmark_connections: 389
|
||||
landmark_connections: 356
|
||||
landmark_connections: 356
|
||||
landmark_connections: 454
|
||||
landmark_connections: 454
|
||||
landmark_connections: 323
|
||||
landmark_connections: 323
|
||||
landmark_connections: 361
|
||||
landmark_connections: 361
|
||||
landmark_connections: 288
|
||||
landmark_connections: 288
|
||||
landmark_connections: 397
|
||||
landmark_connections: 397
|
||||
landmark_connections: 365
|
||||
landmark_connections: 365
|
||||
landmark_connections: 379
|
||||
landmark_connections: 379
|
||||
landmark_connections: 378
|
||||
landmark_connections: 378
|
||||
landmark_connections: 400
|
||||
landmark_connections: 400
|
||||
landmark_connections: 377
|
||||
landmark_connections: 377
|
||||
landmark_connections: 152
|
||||
landmark_connections: 152
|
||||
landmark_connections: 148
|
||||
landmark_connections: 148
|
||||
landmark_connections: 176
|
||||
landmark_connections: 176
|
||||
landmark_connections: 149
|
||||
landmark_connections: 149
|
||||
landmark_connections: 150
|
||||
landmark_connections: 150
|
||||
landmark_connections: 136
|
||||
landmark_connections: 136
|
||||
landmark_connections: 172
|
||||
landmark_connections: 172
|
||||
landmark_connections: 58
|
||||
landmark_connections: 58
|
||||
landmark_connections: 132
|
||||
landmark_connections: 132
|
||||
landmark_connections: 93
|
||||
landmark_connections: 93
|
||||
landmark_connections: 234
|
||||
landmark_connections: 234
|
||||
landmark_connections: 127
|
||||
landmark_connections: 127
|
||||
landmark_connections: 162
|
||||
landmark_connections: 162
|
||||
landmark_connections: 21
|
||||
landmark_connections: 21
|
||||
landmark_connections: 54
|
||||
landmark_connections: 54
|
||||
landmark_connections: 103
|
||||
landmark_connections: 103
|
||||
landmark_connections: 67
|
||||
landmark_connections: 67
|
||||
landmark_connections: 109
|
||||
landmark_connections: 109
|
||||
landmark_connections: 10
|
||||
landmark_color { r: 255 g: 255 b: 255 }
|
||||
connection_color { r: 255 g: 255 b: 255 }
|
||||
thickness: 0.5
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts face joints to cyan circles.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
input_stream: "RENDER_SCALE:render_scale"
|
||||
output_stream: "RENDER_DATA:face_landmarks_joints_rd"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 0 g: 217 b: 231 }
|
||||
connection_color { r: 0 g: 217 b: 231 }
|
||||
thickness: 0.5
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Concatenates all render data.
|
||||
node {
|
||||
calculator: "ConcatenateRenderDataVectorCalculator"
|
||||
input_stream: "landmarks_render_data"
|
||||
input_stream: "landmarks_background_joints_render_data"
|
||||
input_stream: "landmarks_left_joints_render_data"
|
||||
input_stream: "landmarks_right_joints_render_data"
|
||||
|
||||
# Left hand.
|
||||
input_stream: "left_hand_landmarks_connections_rd"
|
||||
input_stream: "left_hand_landmarks_joints_rd"
|
||||
|
||||
# Right hand.
|
||||
input_stream: "right_hand_landmarks_connections_rd"
|
||||
input_stream: "right_hand_landmarks_joints_rd"
|
||||
|
||||
# Face.
|
||||
input_stream: "face_landmarks_connections_rd"
|
||||
input_stream: "face_landmarks_joints_rd"
|
||||
|
||||
output_stream: "render_data_vector"
|
||||
}
|
39
mediapipe/graphs/instant_motion_tracking/BUILD
Normal file
39
mediapipe/graphs/instant_motion_tracking/BUILD
Normal file
|
@ -0,0 +1,39 @@
|
|||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "instant_motion_tracking_deps",
|
||||
deps = [
|
||||
"//mediapipe/graphs/instant_motion_tracking/calculators:matrices_manager_calculator",
|
||||
"//mediapipe/graphs/instant_motion_tracking/calculators:sticker_manager_calculator",
|
||||
"//mediapipe/graphs/instant_motion_tracking/subgraphs:region_tracking",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "instant_motion_tracking_binary_graph",
|
||||
graph = "instant_motion_tracking.pbtxt",
|
||||
output_name = "instant_motion_tracking.binarypb",
|
||||
deps = [":instant_motion_tracking_deps"],
|
||||
)
|
84
mediapipe/graphs/instant_motion_tracking/calculators/BUILD
Normal file
84
mediapipe/graphs/instant_motion_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,84 @@
|
|||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
proto_library(
|
||||
name = "sticker_buffer_proto",
|
||||
srcs = [
|
||||
"sticker_buffer.proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "sticker_buffer_cc_proto",
|
||||
srcs = [
|
||||
"sticker_buffer.proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":sticker_buffer_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "sticker_manager_calculator",
|
||||
srcs = ["sticker_manager_calculator.cc"],
|
||||
hdrs = ["transformations.h"],
|
||||
deps = [
|
||||
":sticker_buffer_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:timestamp",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "matrices_manager_calculator",
|
||||
srcs = ["matrices_manager_calculator.cc"],
|
||||
hdrs = ["transformations.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:timestamp",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:model_matrix_cc_proto",
|
||||
"//mediapipe/modules/objectron/calculators:box",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracked_anchor_manager_calculator",
|
||||
srcs = ["tracked_anchor_manager_calculator.cc"],
|
||||
hdrs = ["transformations.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util/tracking:box_tracker_cc_proto",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,393 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/Dense"
|
||||
#include "Eigen/Geometry"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||
#include "mediapipe/modules/objectron/calculators/box.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
using Matrix4fCM = Eigen::Matrix<float, 4, 4, Eigen::ColMajor>;
|
||||
using Vector3f = Eigen::Vector3f;
|
||||
using Matrix3f = Eigen::Matrix3f;
|
||||
using DiagonalMatrix3f = Eigen::DiagonalMatrix<float, 3>;
|
||||
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||
constexpr char kIMUMatrixTag[] = "IMU_ROTATION";
|
||||
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
|
||||
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
|
||||
constexpr char kRendersTag[] = "RENDER_DATA";
|
||||
constexpr char kGifAspectRatioTag[] = "GIF_ASPECT_RATIO";
|
||||
constexpr char kFOVSidePacketTag[] = "FOV";
|
||||
constexpr char kAspectRatioSidePacketTag[] = "ASPECT_RATIO";
|
||||
// initial Z value (-10 is center point in visual range for OpenGL render)
|
||||
constexpr float kInitialZ = -10.0f;
|
||||
} // namespace
|
||||
|
||||
// Intermediary for rotation and translation data to model matrix usable by
|
||||
// gl_animation_overlay_calculator. For information on the construction of
|
||||
// OpenGL objects and transformations (including a breakdown of model matrices),
|
||||
// please visit: https://open.gl/transformations
|
||||
//
|
||||
// Input Side Packets:
|
||||
// FOV - Vertical field of view for device [REQUIRED - Defines perspective
|
||||
// matrix] ASPECT_RATIO - Aspect ratio of device [REQUIRED - Defines
|
||||
// perspective matrix]
|
||||
//
|
||||
// Input streams:
|
||||
// ANCHORS - Anchor data with x,y,z coordinates (x,y are in [0.0-1.0] range for
|
||||
// position on the device screen, while z is the scaling factor that changes
|
||||
// in proportion to the distance from the tracked region) [REQUIRED]
|
||||
// IMU_ROTATION - float[9] of row-major device rotation matrix [REQUIRED]
|
||||
// USER_ROTATIONS - UserRotations with corresponding radians of rotation
|
||||
// [REQUIRED]
|
||||
// USER_SCALINGS - UserScalings with corresponding scale factor [REQUIRED]
|
||||
// GIF_ASPECT_RATIO - Aspect ratio of GIF image used to dynamically scale
|
||||
// GIF asset defined as width / height [OPTIONAL]
|
||||
// Output:
|
||||
// MATRICES - TimedModelMatrixProtoList of each object type to render
|
||||
// [REQUIRED]
|
||||
//
|
||||
// Example config:
|
||||
// node{
|
||||
// calculator: "MatricesManagerCalculator"
|
||||
// input_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||
// input_stream: "IMU_ROTATION:imu_rotation_matrix"
|
||||
// input_stream: "USER_ROTATIONS:user_rotation_data"
|
||||
// input_stream: "USER_SCALINGS:user_scaling_data"
|
||||
// input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
|
||||
// output_stream: "MATRICES:0:first_render_matrices"
|
||||
// output_stream: "MATRICES:1:second_render_matrices" [unbounded input size]
|
||||
// input_side_packet: "FOV:vertical_fov_radians"
|
||||
// input_side_packet: "ASPECT_RATIO:aspect_ratio"
|
||||
// }
|
||||
|
||||
class MatricesManagerCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc);
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
// Device properties that will be preset by side packets
|
||||
float vertical_fov_radians_ = 0.0f;
|
||||
float aspect_ratio_ = 0.0f;
|
||||
float gif_aspect_ratio_ = 1.0f;
|
||||
|
||||
const Matrix3f GenerateUserRotationMatrix(const float rotation_radians) const;
|
||||
const Matrix4fCM GenerateEigenModelMatrix(
|
||||
const Vector3f& translation_vector,
|
||||
const Matrix3f& rotation_submatrix) const;
|
||||
const Vector3f GenerateAnchorVector(const Anchor& tracked_anchor) const;
|
||||
DiagonalMatrix3f GetDefaultRenderScaleDiagonal(
|
||||
const int render_id, const float user_scale_factor,
|
||||
const float gif_aspect_ratio) const;
|
||||
|
||||
// Returns a user scaling increment associated with the sticker_id
|
||||
// TODO: Adjust lookup function if total number of stickers is uncapped to
|
||||
// improve performance
|
||||
const float GetUserScaler(const std::vector<UserScaling>& scalings,
|
||||
const int sticker_id) const {
|
||||
for (const UserScaling& user_scaling : scalings) {
|
||||
if (user_scaling.sticker_id == sticker_id) {
|
||||
return user_scaling.scale_factor;
|
||||
}
|
||||
}
|
||||
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
|
||||
<< ", returning 1.0f scaling";
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// Returns a user rotation in radians associated with the sticker_id
|
||||
const float GetUserRotation(const std::vector<UserRotation>& rotations,
|
||||
const int sticker_id) {
|
||||
for (const UserRotation& rotation : rotations) {
|
||||
if (rotation.sticker_id == sticker_id) {
|
||||
return rotation.rotation_radians;
|
||||
}
|
||||
}
|
||||
LOG(WARNING) << "Cannot find sticker_id: " << sticker_id
|
||||
<< ", returning 0.0f rotation";
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(MatricesManagerCalculator);
|
||||
|
||||
absl::Status MatricesManagerCalculator::GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
|
||||
cc->Inputs().HasTag(kIMUMatrixTag) &&
|
||||
cc->Inputs().HasTag(kUserRotationsTag) &&
|
||||
cc->Inputs().HasTag(kUserScalingsTag) &&
|
||||
cc->InputSidePackets().HasTag(kFOVSidePacketTag) &&
|
||||
cc->InputSidePackets().HasTag(kAspectRatioSidePacketTag));
|
||||
|
||||
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||
cc->Inputs().Tag(kIMUMatrixTag).Set<float[]>();
|
||||
cc->Inputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
|
||||
cc->Inputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
|
||||
cc->Inputs().Tag(kRendersTag).Set<std::vector<int>>();
|
||||
if (cc->Inputs().HasTag(kGifAspectRatioTag)) {
|
||||
cc->Inputs().Tag(kGifAspectRatioTag).Set<float>();
|
||||
}
|
||||
|
||||
for (CollectionItemId id = cc->Outputs().BeginId("MATRICES");
|
||||
id < cc->Outputs().EndId("MATRICES"); ++id) {
|
||||
cc->Outputs().Get(id).Set<mediapipe::TimedModelMatrixProtoList>();
|
||||
}
|
||||
cc->InputSidePackets().Tag(kFOVSidePacketTag).Set<float>();
|
||||
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Set<float>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MatricesManagerCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
// Set device properties from side packets
|
||||
vertical_fov_radians_ =
|
||||
cc->InputSidePackets().Tag(kFOVSidePacketTag).Get<float>();
|
||||
aspect_ratio_ =
|
||||
cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Get<float>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status MatricesManagerCalculator::Process(CalculatorContext* cc) {
|
||||
// Define each object's model matrices
|
||||
auto asset_matrices_gif =
|
||||
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
|
||||
auto asset_matrices_1 =
|
||||
std::make_unique<mediapipe::TimedModelMatrixProtoList>();
|
||||
// Clear all model matrices
|
||||
asset_matrices_gif->clear_model_matrix();
|
||||
asset_matrices_1->clear_model_matrix();
|
||||
|
||||
const std::vector<UserRotation> user_rotation_data =
|
||||
cc->Inputs().Tag(kUserRotationsTag).Get<std::vector<UserRotation>>();
|
||||
|
||||
const std::vector<UserScaling> user_scaling_data =
|
||||
cc->Inputs().Tag(kUserScalingsTag).Get<std::vector<UserScaling>>();
|
||||
|
||||
const std::vector<int> render_data =
|
||||
cc->Inputs().Tag(kRendersTag).Get<std::vector<int>>();
|
||||
|
||||
const std::vector<Anchor> anchor_data =
|
||||
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
|
||||
if (cc->Inputs().HasTag(kGifAspectRatioTag) &&
|
||||
!cc->Inputs().Tag(kGifAspectRatioTag).IsEmpty()) {
|
||||
gif_aspect_ratio_ = cc->Inputs().Tag(kGifAspectRatioTag).Get<float>();
|
||||
}
|
||||
|
||||
// Device IMU rotation submatrix
|
||||
const auto imu_matrix = cc->Inputs().Tag(kIMUMatrixTag).Get<float[]>();
|
||||
Matrix3f imu_rotation_submatrix;
|
||||
int idx = 0;
|
||||
for (int x = 0; x < 3; ++x) {
|
||||
for (int y = 0; y < 3; ++y) {
|
||||
// Input matrix is row-major matrix, it must be reformatted to
|
||||
// column-major via transpose procedure
|
||||
imu_rotation_submatrix(y, x) = imu_matrix[idx++];
|
||||
}
|
||||
}
|
||||
|
||||
int render_idx = 0;
|
||||
for (const Anchor& anchor : anchor_data) {
|
||||
const int id = anchor.sticker_id;
|
||||
mediapipe::TimedModelMatrixProto* model_matrix;
|
||||
// Add model matrix to matrices list for defined object render ID
|
||||
if (render_data[render_idx] == 0) { // GIF
|
||||
model_matrix = asset_matrices_gif->add_model_matrix();
|
||||
} else { // Asset 3D
|
||||
if (render_data[render_idx] != 1) {
|
||||
LOG(ERROR) << "render id: " << render_data[render_idx]
|
||||
<< " is not supported. Fall back to using render_id = 1.";
|
||||
}
|
||||
model_matrix = asset_matrices_1->add_model_matrix();
|
||||
}
|
||||
|
||||
model_matrix->set_id(id);
|
||||
|
||||
// The user transformation data associated with this sticker must be defined
|
||||
const float user_rotation_radians = GetUserRotation(user_rotation_data, id);
|
||||
const float user_scale_factor = GetUserScaler(user_scaling_data, id);
|
||||
|
||||
// A vector representative of a user's sticker rotation transformation can
|
||||
// be created
|
||||
const Matrix3f user_rotation_submatrix =
|
||||
GenerateUserRotationMatrix(user_rotation_radians);
|
||||
// Next, the diagonal representative of the combined scaling data
|
||||
const DiagonalMatrix3f scaling_diagonal = GetDefaultRenderScaleDiagonal(
|
||||
render_data[render_idx], user_scale_factor, gif_aspect_ratio_);
|
||||
// Increment to next render id from vector
|
||||
render_idx++;
|
||||
|
||||
// The user transformation data can be concatenated into a final rotation
|
||||
// submatrix with the device IMU rotational data
|
||||
const Matrix3f user_transformed_rotation_submatrix =
|
||||
imu_rotation_submatrix * user_rotation_submatrix * scaling_diagonal;
|
||||
|
||||
// A vector representative of the translation of the object in OpenGL
|
||||
// coordinate space must be generated
|
||||
const Vector3f translation_vector = GenerateAnchorVector(anchor);
|
||||
|
||||
// Concatenate all model matrix data
|
||||
const Matrix4fCM final_model_matrix = GenerateEigenModelMatrix(
|
||||
translation_vector, user_transformed_rotation_submatrix);
|
||||
|
||||
// The generated model matrix must be mapped to TimedModelMatrixProto
|
||||
// (col-wise)
|
||||
for (int x = 0; x < final_model_matrix.rows(); ++x) {
|
||||
for (int y = 0; y < final_model_matrix.cols(); ++y) {
|
||||
model_matrix->add_matrix_entries(final_model_matrix(x, y));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Output all individual render matrices
|
||||
// TODO: Perform depth ordering with gl_animation_overlay_calculator to render
|
||||
// objects in order by depth to allow occlusion.
|
||||
cc->Outputs()
|
||||
.Get(cc->Outputs().GetId("MATRICES", 0))
|
||||
.Add(asset_matrices_gif.release(), cc->InputTimestamp());
|
||||
cc->Outputs()
|
||||
.Get(cc->Outputs().GetId("MATRICES", 1))
|
||||
.Add(asset_matrices_1.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// Using a specified rotation value in radians, generate a rotation matrix for
|
||||
// use with base rotation submatrix
|
||||
const Matrix3f MatricesManagerCalculator::GenerateUserRotationMatrix(
|
||||
const float rotation_radians) const {
|
||||
Eigen::Matrix3f user_rotation_submatrix;
|
||||
user_rotation_submatrix =
|
||||
// The rotation in radians must be inverted to rotate the object
|
||||
// with the direction of finger movement from the user (system dependent)
|
||||
Eigen::AngleAxisf(-rotation_radians, Eigen::Vector3f::UnitY()) *
|
||||
Eigen::AngleAxisf(0.0f, Eigen::Vector3f::UnitZ()) *
|
||||
// Model orientations all assume z-axis is up, but we need y-axis upwards,
|
||||
// therefore, a +(M_PI * 0.5f) transformation must be applied
|
||||
// TODO: Bring default rotations, translations, and scalings into
|
||||
// independent sticker configuration
|
||||
Eigen::AngleAxisf(M_PI * 0.5f, Eigen::Vector3f::UnitX());
|
||||
// Matrix must be transposed due to the method of submatrix generation in
|
||||
// Eigen
|
||||
return user_rotation_submatrix.transpose();
|
||||
}
|
||||
|
||||
// TODO: Investigate possible differences in warping of tracking speed across
|
||||
// screen Using the sticker anchor data, a translation vector can be generated
|
||||
// in OpenGL coordinate space
|
||||
const Vector3f MatricesManagerCalculator::GenerateAnchorVector(
|
||||
const Anchor& tracked_anchor) const {
|
||||
// Using an initial z-value in OpenGL space, generate a new base z-axis value
|
||||
// to mimic scaling by distance.
|
||||
const float z = kInitialZ * tracked_anchor.z;
|
||||
|
||||
// Using triangle geometry, the minimum for a y-coordinate that will appear in
|
||||
// the view field for the given z value above can be found.
|
||||
const float y_half_range = z * (tan(vertical_fov_radians_ * 0.5f));
|
||||
|
||||
// The aspect ratio of the device and y_minimum calculated above can be used
|
||||
// to find the minimum value for x that will appear in the view field of the
|
||||
// device screen.
|
||||
const float x_half_range = y_half_range * aspect_ratio_;
|
||||
|
||||
// Given the minimum bounds of the screen in OpenGL space, the tracked anchor
|
||||
// coordinates can be converted to OpenGL coordinate space.
|
||||
//
|
||||
// (i.e: X and Y will be converted from [0.0-1.0] space to [x_minimum,
|
||||
// -x_minimum] space and [y_minimum, -y_minimum] space respectively)
|
||||
const float x = (-2.0f * tracked_anchor.x * x_half_range) + x_half_range;
|
||||
const float y = (-2.0f * tracked_anchor.y * y_half_range) + y_half_range;
|
||||
|
||||
// A translation transformation vector can be generated via Eigen
|
||||
const Vector3f t_vector(x, y, z);
|
||||
return t_vector;
|
||||
}
|
||||
|
||||
// Generates a model matrix via Eigen with appropriate transformations
|
||||
const Matrix4fCM MatricesManagerCalculator::GenerateEigenModelMatrix(
|
||||
const Vector3f& translation_vector,
|
||||
const Matrix3f& rotation_submatrix) const {
|
||||
// Define basic empty model matrix
|
||||
Matrix4fCM mvp_matrix;
|
||||
|
||||
// Set the translation vector
|
||||
mvp_matrix.topRightCorner<3, 1>() = translation_vector;
|
||||
|
||||
// Set the rotation submatrix
|
||||
mvp_matrix.topLeftCorner<3, 3>() = rotation_submatrix;
|
||||
|
||||
// Set trailing 1.0 required by OpenGL to define coordinate space
|
||||
mvp_matrix(3, 3) = 1.0f;
|
||||
|
||||
return mvp_matrix;
|
||||
}
|
||||
|
||||
// This returns a scaling matrix to alter the projection matrix for
|
||||
// the specified render id in order to ensure all objects render at a similar
|
||||
// size in the view screen upon initial placement
|
||||
DiagonalMatrix3f MatricesManagerCalculator::GetDefaultRenderScaleDiagonal(
|
||||
const int render_id, const float user_scale_factor,
|
||||
const float gif_aspect_ratio) const {
|
||||
float scale_preset = 1.0f;
|
||||
float x_scalar = 1.0f;
|
||||
float y_scalar = 1.0f;
|
||||
|
||||
switch (render_id) {
|
||||
case 0: { // GIF
|
||||
// 160 is the scaling preset to make the GIF asset appear relatively
|
||||
// similar in size to all other assets
|
||||
scale_preset = 160.0f;
|
||||
if (gif_aspect_ratio >= 1.0f) {
|
||||
// GIF is wider horizontally (scale on x-axis)
|
||||
x_scalar = gif_aspect_ratio;
|
||||
} else {
|
||||
// GIF is wider vertically (scale on y-axis)
|
||||
y_scalar = 1.0f / gif_aspect_ratio;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1: { // 3D asset
|
||||
// 5 is the scaling preset to make the 3D asset appear relatively
|
||||
// similar in size to all other assets
|
||||
scale_preset = 5.0f;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG(INFO) << "Unsupported render_id: " << render_id
|
||||
<< ", returning default render_scale";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DiagonalMatrix3f scaling(scale_preset * user_scale_factor * x_scalar,
|
||||
scale_preset * user_scale_factor * y_scalar,
|
||||
scale_preset * user_scale_factor);
|
||||
return scaling;
|
||||
}
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
option java_package = "com.google.mediapipe.graphs.instantmotiontracking";
|
||||
option java_outer_classname = "StickerBufferProto";
|
||||
|
||||
message Sticker {
|
||||
optional int32 id = 1;
|
||||
optional float x = 2;
|
||||
optional float y = 3;
|
||||
optional float rotation = 4;
|
||||
optional float scale = 5;
|
||||
optional int32 render_id = 6;
|
||||
}
|
||||
|
||||
message StickerRoll {
|
||||
repeated Sticker sticker = 1;
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.pb.h"
|
||||
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
constexpr char kProtoDataString[] = "PROTO";
|
||||
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||
constexpr char kUserRotationsTag[] = "USER_ROTATIONS";
|
||||
constexpr char kUserScalingsTag[] = "USER_SCALINGS";
|
||||
constexpr char kRenderDescriptorsTag[] = "RENDER_DATA";
|
||||
|
||||
// This calculator takes in the sticker protobuffer data and parses each
|
||||
// individual sticker object into anchors, user rotations and scalings, in
|
||||
// addition to basic render data represented in integer form.
|
||||
//
|
||||
// Input:
|
||||
// PROTO - String of sticker data in appropriate protobuf format [REQUIRED]
|
||||
// Output:
|
||||
// ANCHORS - Anchors with initial normalized X,Y coordinates [REQUIRED]
|
||||
// USER_ROTATIONS - UserRotations with radians of rotation from user [REQUIRED]
|
||||
// USER_SCALINGS - UserScalings with increment of scaling from user [REQUIRED]
|
||||
// RENDER_DATA - Descriptors of which objects/animations to render for stickers
|
||||
// [REQUIRED]
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "StickerManagerCalculator"
|
||||
// input_stream: "PROTO:sticker_proto_string"
|
||||
// output_stream: "ANCHORS:initial_anchor_data"
|
||||
// output_stream: "USER_ROTATIONS:user_rotation_data"
|
||||
// output_stream: "USER_SCALINGS:user_scaling_data"
|
||||
// output_stream: "RENDER_DATA:sticker_render_data"
|
||||
// }
|
||||
|
||||
class StickerManagerCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kProtoDataString));
|
||||
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
|
||||
cc->Outputs().HasTag(kUserRotationsTag) &&
|
||||
cc->Outputs().HasTag(kUserScalingsTag) &&
|
||||
cc->Outputs().HasTag(kRenderDescriptorsTag));
|
||||
|
||||
cc->Inputs().Tag(kProtoDataString).Set<std::string>();
|
||||
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||
cc->Outputs().Tag(kUserRotationsTag).Set<std::vector<UserRotation>>();
|
||||
cc->Outputs().Tag(kUserScalingsTag).Set<std::vector<UserScaling>>();
|
||||
cc->Outputs().Tag(kRenderDescriptorsTag).Set<std::vector<int>>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
std::string sticker_proto_string =
|
||||
cc->Inputs().Tag(kProtoDataString).Get<std::string>();
|
||||
|
||||
std::vector<Anchor> initial_anchor_data;
|
||||
std::vector<UserRotation> user_rotation_data;
|
||||
std::vector<UserScaling> user_scaling_data;
|
||||
std::vector<int> render_data;
|
||||
|
||||
::mediapipe::StickerRoll sticker_roll;
|
||||
bool parse_success = sticker_roll.ParseFromString(sticker_proto_string);
|
||||
|
||||
// Ensure parsing was a success
|
||||
RET_CHECK(parse_success) << "Error parsing sticker protobuf data";
|
||||
|
||||
for (int i = 0; i < sticker_roll.sticker().size(); ++i) {
|
||||
// Declare empty structures for sticker data
|
||||
Anchor initial_anchor;
|
||||
UserRotation user_rotation;
|
||||
UserScaling user_scaling;
|
||||
// Get individual Sticker object as defined by Protobuffer
|
||||
::mediapipe::Sticker sticker = sticker_roll.sticker(i);
|
||||
// Set individual data structure ids to associate with this sticker
|
||||
initial_anchor.sticker_id = sticker.id();
|
||||
user_rotation.sticker_id = sticker.id();
|
||||
user_scaling.sticker_id = sticker.id();
|
||||
initial_anchor.x = sticker.x();
|
||||
initial_anchor.y = sticker.y();
|
||||
initial_anchor.z = 1.0f; // default to 1.0 in normalized 3d space
|
||||
user_rotation.rotation_radians = sticker.rotation();
|
||||
user_scaling.scale_factor = sticker.scale();
|
||||
const int render_id = sticker.render_id();
|
||||
// Set all vector data with sticker attributes
|
||||
initial_anchor_data.emplace_back(initial_anchor);
|
||||
user_rotation_data.emplace_back(user_rotation);
|
||||
user_scaling_data.emplace_back(user_scaling);
|
||||
render_data.emplace_back(render_id);
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kAnchorsTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kAnchorsTag)
|
||||
.AddPacket(MakePacket<std::vector<Anchor>>(initial_anchor_data)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kUserRotationsTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kUserRotationsTag)
|
||||
.AddPacket(MakePacket<std::vector<UserRotation>>(user_rotation_data)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kUserScalingsTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kUserScalingsTag)
|
||||
.AddPacket(MakePacket<std::vector<UserScaling>>(user_scaling_data)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kRenderDescriptorsTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kRenderDescriptorsTag)
|
||||
.AddPacket(MakePacket<std::vector<int>>(render_data)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Close(CalculatorContext* cc) override {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(StickerManagerCalculator);
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,210 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
constexpr char kSentinelTag[] = "SENTINEL";
|
||||
constexpr char kAnchorsTag[] = "ANCHORS";
|
||||
constexpr char kBoxesInputTag[] = "BOXES";
|
||||
constexpr char kBoxesOutputTag[] = "START_POS";
|
||||
constexpr char kCancelTag[] = "CANCEL_ID";
|
||||
// TODO: Find optimal Height/Width (0.1-0.3)
|
||||
constexpr float kBoxEdgeSize =
|
||||
0.2f; // Used to establish tracking box dimensions
|
||||
constexpr float kUsToMs =
|
||||
1000.0f; // Used to convert from microseconds to millis
|
||||
|
||||
// This calculator manages the regions being tracked for each individual sticker
|
||||
// and adjusts the regions being tracked if a change is detected in a sticker's
|
||||
// initial anchor placement. Regions being tracked that have no associated
|
||||
// sticker will be automatically removed upon the next iteration of the graph to
|
||||
// optimize performance and remove all sticker artifacts
|
||||
//
|
||||
// Input:
|
||||
// SENTINEL - ID of sticker which has an anchor that must be reset (-1 when no
|
||||
// anchor must be reset) [REQUIRED]
|
||||
// ANCHORS - Initial anchor data (tracks changes and where to re/position)
|
||||
// [REQUIRED] BOXES - Used in cycle, boxes being tracked meant to update
|
||||
// positions [OPTIONAL
|
||||
// - provided by subgraph]
|
||||
// Output:
|
||||
// START_POS - Positions of boxes being tracked (can be overwritten with ID)
|
||||
// [REQUIRED] CANCEL_ID - Single integer ID of tracking box to remove from
|
||||
// tracker subgraph [OPTIONAL] ANCHORS - Updated set of anchors with tracked
|
||||
// and normalized X,Y,Z [REQUIRED]
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "TrackedAnchorManagerCalculator"
|
||||
// input_stream: "SENTINEL:sticker_sentinel"
|
||||
// input_stream: "ANCHORS:initial_anchor_data"
|
||||
// input_stream: "BOXES:boxes"
|
||||
// input_stream_info: {
|
||||
// tag_index: 'BOXES'
|
||||
// back_edge: true
|
||||
// }
|
||||
// output_stream: "START_POS:start_pos"
|
||||
// output_stream: "CANCEL_ID:cancel_object_id"
|
||||
// output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||
// }
|
||||
|
||||
class TrackedAnchorManagerCalculator : public CalculatorBase {
|
||||
private:
|
||||
// Previous graph iteration anchor data
|
||||
std::vector<Anchor> previous_anchor_data_;
|
||||
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) &&
|
||||
cc->Inputs().HasTag(kSentinelTag));
|
||||
RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) &&
|
||||
cc->Outputs().HasTag(kBoxesOutputTag));
|
||||
|
||||
cc->Inputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||
cc->Inputs().Tag(kSentinelTag).Set<int>();
|
||||
|
||||
if (cc->Inputs().HasTag(kBoxesInputTag)) {
|
||||
cc->Inputs().Tag(kBoxesInputTag).Set<mediapipe::TimedBoxProtoList>();
|
||||
}
|
||||
|
||||
cc->Outputs().Tag(kAnchorsTag).Set<std::vector<Anchor>>();
|
||||
cc->Outputs().Tag(kBoxesOutputTag).Set<mediapipe::TimedBoxProtoList>();
|
||||
|
||||
if (cc->Outputs().HasTag(kCancelTag)) {
|
||||
cc->Outputs().Tag(kCancelTag).Set<int>();
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override { return absl::OkStatus(); }
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(TrackedAnchorManagerCalculator);
|
||||
|
||||
absl::Status TrackedAnchorManagerCalculator::Process(CalculatorContext* cc) {
|
||||
mediapipe::Timestamp timestamp = cc->InputTimestamp();
|
||||
const int sticker_sentinel = cc->Inputs().Tag(kSentinelTag).Get<int>();
|
||||
std::vector<Anchor> current_anchor_data =
|
||||
cc->Inputs().Tag(kAnchorsTag).Get<std::vector<Anchor>>();
|
||||
auto pos_boxes = absl::make_unique<mediapipe::TimedBoxProtoList>();
|
||||
std::vector<Anchor> tracked_scaled_anchor_data;
|
||||
|
||||
// Delete any boxes being tracked without an associated anchor
|
||||
for (const mediapipe::TimedBoxProto& box :
|
||||
cc->Inputs()
|
||||
.Tag(kBoxesInputTag)
|
||||
.Get<mediapipe::TimedBoxProtoList>()
|
||||
.box()) {
|
||||
bool anchor_exists = false;
|
||||
for (Anchor anchor : current_anchor_data) {
|
||||
if (box.id() == anchor.sticker_id) {
|
||||
anchor_exists = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!anchor_exists) {
|
||||
cc->Outputs()
|
||||
.Tag(kCancelTag)
|
||||
.AddPacket(MakePacket<int>(box.id()).At(timestamp++));
|
||||
}
|
||||
}
|
||||
|
||||
// Perform tracking or updating for each anchor position
|
||||
for (const Anchor& anchor : current_anchor_data) {
|
||||
Anchor output_anchor = anchor;
|
||||
// Check if anchor position is being reset by user in this graph iteration
|
||||
if (sticker_sentinel == anchor.sticker_id) {
|
||||
// Delete associated tracking box
|
||||
// TODO: BoxTrackingSubgraph should accept vector to avoid breaking
|
||||
// timestamp rules
|
||||
cc->Outputs()
|
||||
.Tag(kCancelTag)
|
||||
.AddPacket(MakePacket<int>(anchor.sticker_id).At(timestamp++));
|
||||
// Add a tracking box
|
||||
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
|
||||
box->set_left(anchor.x - kBoxEdgeSize * 0.5f);
|
||||
box->set_right(anchor.x + kBoxEdgeSize * 0.5f);
|
||||
box->set_top(anchor.y - kBoxEdgeSize * 0.5f);
|
||||
box->set_bottom(anchor.y + kBoxEdgeSize * 0.5f);
|
||||
box->set_id(anchor.sticker_id);
|
||||
box->set_time_msec((timestamp++).Microseconds() / kUsToMs);
|
||||
// Default value for normalized z (scale factor)
|
||||
output_anchor.z = 1.0f;
|
||||
} else {
|
||||
// Anchor position was not reset by user
|
||||
// Attempt to update anchor position from tracking subgraph
|
||||
// (TimedBoxProto)
|
||||
bool updated_from_tracker = false;
|
||||
const mediapipe::TimedBoxProtoList box_list =
|
||||
cc->Inputs().Tag(kBoxesInputTag).Get<mediapipe::TimedBoxProtoList>();
|
||||
for (const auto& box : box_list.box()) {
|
||||
if (box.id() == anchor.sticker_id) {
|
||||
// Get center x normalized coordinate [0.0-1.0]
|
||||
output_anchor.x = (box.left() + box.right()) * 0.5f;
|
||||
// Get center y normalized coordinate [0.0-1.0]
|
||||
output_anchor.y = (box.top() + box.bottom()) * 0.5f;
|
||||
// Get center z coordinate [z starts at normalized 1.0 and scales
|
||||
// inversely with box-width]
|
||||
// TODO: Look into issues with uniform scaling on x-axis and y-axis
|
||||
output_anchor.z = kBoxEdgeSize / (box.right() - box.left());
|
||||
updated_from_tracker = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If anchor position was not updated from tracker, create new tracking
|
||||
// box at last recorded anchor coordinates. This will allow all current
|
||||
// stickers to be tracked at approximately last location even if
|
||||
// re-acquisitioning in the BoxTrackingSubgraph encounters errors
|
||||
if (!updated_from_tracker) {
|
||||
for (const Anchor& prev_anchor : previous_anchor_data_) {
|
||||
if (anchor.sticker_id == prev_anchor.sticker_id) {
|
||||
mediapipe::TimedBoxProto* box = pos_boxes->add_box();
|
||||
box->set_left(prev_anchor.x - kBoxEdgeSize * 0.5f);
|
||||
box->set_right(prev_anchor.x + kBoxEdgeSize * 0.5f);
|
||||
box->set_top(prev_anchor.y - kBoxEdgeSize * 0.5f);
|
||||
box->set_bottom(prev_anchor.y + kBoxEdgeSize * 0.5f);
|
||||
box->set_id(prev_anchor.sticker_id);
|
||||
box->set_time_msec(cc->InputTimestamp().Microseconds() / kUsToMs);
|
||||
output_anchor = prev_anchor;
|
||||
// Default value for normalized z (scale factor)
|
||||
output_anchor.z = 1.0f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tracked_scaled_anchor_data.emplace_back(output_anchor);
|
||||
}
|
||||
// Set anchor data for next iteration
|
||||
previous_anchor_data_ = tracked_scaled_anchor_data;
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kAnchorsTag)
|
||||
.AddPacket(MakePacket<std::vector<Anchor>>(tracked_scaled_anchor_data)
|
||||
.At(cc->InputTimestamp()));
|
||||
cc->Outputs()
|
||||
.Tag(kBoxesOutputTag)
|
||||
.Add(pos_boxes.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
||||
#define MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Radians by which to rotate the object (Provided by UI input)
|
||||
struct UserRotation {
|
||||
float rotation_radians;
|
||||
int sticker_id;
|
||||
};
|
||||
|
||||
// Scaling factor provided by the UI application end
|
||||
struct UserScaling {
|
||||
float scale_factor;
|
||||
int sticker_id;
|
||||
};
|
||||
|
||||
// The normalized anchor coordinates of a sticker
|
||||
struct Anchor {
|
||||
float x; // [0.0-1.0]
|
||||
float y; // [0.0-1.0]
|
||||
float z; // Centered around 1.0 [current_scale = z * initial_scale]
|
||||
int sticker_id;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# MediaPipe graph that performs region tracking and 3d object (AR sticker) rendering.
|
||||
|
||||
# Images in/out of graph with sticker data and IMU information from device
|
||||
input_stream: "input_video"
|
||||
input_stream: "sticker_sentinel"
|
||||
input_stream: "sticker_proto_string"
|
||||
input_stream: "imu_rotation_matrix"
|
||||
input_stream: "gif_texture"
|
||||
input_stream: "gif_aspect_ratio"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Converts sticker data into user data (rotations/scalings), render data, and
|
||||
# initial anchors.
|
||||
node {
|
||||
calculator: "StickerManagerCalculator"
|
||||
input_stream: "PROTO:sticker_proto_string"
|
||||
output_stream: "ANCHORS:initial_anchor_data"
|
||||
output_stream: "USER_ROTATIONS:user_rotation_data"
|
||||
output_stream: "USER_SCALINGS:user_scaling_data"
|
||||
output_stream: "RENDER_DATA:sticker_render_data"
|
||||
}
|
||||
|
||||
# Uses box tracking in order to create 'anchors' for associated 3d stickers.
|
||||
node {
|
||||
calculator: "RegionTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "SENTINEL:sticker_sentinel"
|
||||
input_stream: "ANCHORS:initial_anchor_data"
|
||||
output_stream: "ANCHORS:tracked_anchor_data"
|
||||
}
|
||||
|
||||
# Concatenates all transformations to generate model matrices for the OpenGL
|
||||
# animation overlay calculator.
|
||||
node {
|
||||
calculator: "MatricesManagerCalculator"
|
||||
input_stream: "ANCHORS:tracked_anchor_data"
|
||||
input_stream: "IMU_ROTATION:imu_rotation_matrix"
|
||||
input_stream: "USER_ROTATIONS:user_rotation_data"
|
||||
input_stream: "USER_SCALINGS:user_scaling_data"
|
||||
input_stream: "RENDER_DATA:sticker_render_data"
|
||||
input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio"
|
||||
output_stream: "MATRICES:0:gif_matrices"
|
||||
output_stream: "MATRICES:1:asset_3d_matrices"
|
||||
input_side_packet: "FOV:vertical_fov_radians"
|
||||
input_side_packet: "ASPECT_RATIO:aspect_ratio"
|
||||
}
|
||||
|
||||
# Renders the final 3d stickers and overlays them on input image.
|
||||
node {
|
||||
calculator: "GlAnimationOverlayCalculator"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "MODEL_MATRICES:gif_matrices"
|
||||
input_stream: "TEXTURE:gif_texture"
|
||||
input_side_packet: "ANIMATION_ASSET:gif_asset_name"
|
||||
output_stream: "asset_gif_rendered"
|
||||
}
|
||||
|
||||
# Renders the final 3d stickers and overlays them on top of the input image.
|
||||
node {
|
||||
calculator: "GlAnimationOverlayCalculator"
|
||||
input_stream: "VIDEO:asset_gif_rendered"
|
||||
input_stream: "MODEL_MATRICES:asset_3d_matrices"
|
||||
input_side_packet: "TEXTURE:texture_3d"
|
||||
input_side_packet: "ANIMATION_ASSET:asset_3d"
|
||||
output_stream: "output_video"
|
||||
}
|
32
mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD
Normal file
32
mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,32 @@
|
|||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "region_tracking",
|
||||
graph = "region_tracking.pbtxt",
|
||||
register_as = "RegionTrackingSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/graphs/instant_motion_tracking/calculators:tracked_anchor_manager_calculator",
|
||||
"//mediapipe/graphs/tracking/subgraphs:box_tracking_gpu",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,47 @@
|
|||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# MediaPipe graph that performs region tracking on initial anchor positions
|
||||
# provided by the application
|
||||
|
||||
# Images in/out of graph with tracked and scaled normalized anchor data
|
||||
type: "RegionTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "SENTINEL:sticker_sentinel"
|
||||
input_stream: "ANCHORS:initial_anchor_data"
|
||||
output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||
|
||||
# Manages the anchors and tracking if user changes/adds/deletes anchors
|
||||
node {
|
||||
calculator: "TrackedAnchorManagerCalculator"
|
||||
input_stream: "SENTINEL:sticker_sentinel"
|
||||
input_stream: "ANCHORS:initial_anchor_data"
|
||||
input_stream: "BOXES:boxes"
|
||||
input_stream_info: {
|
||||
tag_index: 'BOXES'
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "START_POS:start_pos"
|
||||
output_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "ANCHORS:tracked_scaled_anchor_data"
|
||||
}
|
||||
|
||||
# Subgraph performs anchor placement and tracking
|
||||
node {
|
||||
calculator: "BoxTrackingSubgraphGpu"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "BOXES:start_pos"
|
||||
input_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "BOXES:boxes"
|
||||
}
|
86
mediapipe/graphs/iris_tracking/BUILD
Normal file
86
mediapipe/graphs/iris_tracking/BUILD
Normal file
|
@ -0,0 +1,86 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "iris_depth_cpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_file_properties_calculator",
|
||||
"//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator",
|
||||
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "iris_tracking_cpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "iris_tracking_cpu_video_input_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "iris_tracking_gpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
"//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "iris_tracking_gpu_binary_graph",
|
||||
graph = "iris_tracking_gpu.pbtxt",
|
||||
output_name = "iris_tracking_gpu.binarypb",
|
||||
deps = [":iris_tracking_gpu_deps"],
|
||||
)
|
107
mediapipe/graphs/iris_tracking/calculators/BUILD
Normal file
107
mediapipe/graphs/iris_tracking/calculators/BUILD
Normal file
|
@ -0,0 +1,107 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
proto_library(
|
||||
name = "iris_to_render_data_calculator_proto",
|
||||
srcs = ["iris_to_render_data_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util:color_proto",
|
||||
"//mediapipe/util:render_data_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "iris_to_render_data_calculator_cc_proto",
|
||||
srcs = ["iris_to_render_data_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":iris_to_render_data_calculator_proto"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "iris_to_render_data_calculator",
|
||||
srcs = ["iris_to_render_data_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":iris_to_render_data_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "iris_to_depth_calculator_proto",
|
||||
srcs = ["iris_to_depth_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "iris_to_depth_calculator_cc_proto",
|
||||
srcs = ["iris_to_depth_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":iris_to_depth_calculator_proto"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "iris_to_depth_calculator",
|
||||
srcs = ["iris_to_depth_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":iris_to_depth_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_file_properties_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "update_face_landmarks_calculator",
|
||||
srcs = ["update_face_landmarks_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_file_properties_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,245 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_file_properties.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_depth_calculator.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kIrisTag[] = "IRIS";
|
||||
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
constexpr char kFocalLengthPixelTag[] = "FOCAL_LENGTH";
|
||||
constexpr char kImageFilePropertiesTag[] = "IMAGE_FILE_PROPERTIES";
|
||||
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
|
||||
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
|
||||
constexpr int kNumIrisLandmarksPerEye = 5;
|
||||
constexpr float kDepthWeightUpdate = 0.1;
|
||||
// Avergae fixed iris size across human beings.
|
||||
constexpr float kIrisSizeInMM = 11.8;
|
||||
|
||||
inline float GetDepth(float x0, float y0, float x1, float y1) {
|
||||
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
|
||||
}
|
||||
|
||||
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
|
||||
const NormalizedLandmark& ld1,
|
||||
const std::pair<int, int>& image_size) {
|
||||
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
|
||||
ld1.x() * image_size.first, ld1.y() * image_size.second);
|
||||
}
|
||||
|
||||
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
|
||||
const std::pair<int, int>& image_size) {
|
||||
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
|
||||
landmarks.landmark(2), image_size);
|
||||
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
|
||||
landmarks.landmark(4), image_size);
|
||||
return (dist_hori + dist_vert) / 2.0f;
|
||||
}
|
||||
|
||||
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
|
||||
float iris_size, float img_w, float img_h) {
|
||||
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
|
||||
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
|
||||
center.y() * img_h);
|
||||
const auto x = std::sqrt(focal_length * focal_length + y * y);
|
||||
const auto depth = kIrisSizeInMM * x / iris_size;
|
||||
return depth;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Estimates depth from iris to camera given focal length and image size.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "IrisToDepthCalculator"
|
||||
// # A NormalizedLandmarkList contains landmarks for both iris.
|
||||
// input_stream: "IRIS:iris_landmarks"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
|
||||
// # to get focal length in pixels. Sending focal length in pixels to
|
||||
// # this calculator is optional.
|
||||
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||
// # OR
|
||||
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
// }
|
||||
class IrisToDepthCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
|
||||
// Only one of kFocalLengthPixelTag or kImageFilePropertiesTag must exist
|
||||
// if they are present.
|
||||
RET_CHECK(!(cc->InputSidePackets().HasTag(kFocalLengthPixelTag) &&
|
||||
cc->InputSidePackets().HasTag(kImageFilePropertiesTag)));
|
||||
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||
cc->InputSidePackets().Tag(kFocalLengthPixelTag).SetAny();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
|
||||
cc->InputSidePackets()
|
||||
.Tag(kImageFilePropertiesTag)
|
||||
.Set<ImageFileProperties>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
|
||||
cc->Outputs().Tag(kLeftIrisDepthTag).Set<float>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
|
||||
cc->Outputs().Tag(kRightIrisDepthTag).Set<float>();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
float focal_length_pixels_ = -1.f;
|
||||
// TODO: Consolidate the logic when switching to input stream for
|
||||
// focal length.
|
||||
bool compute_depth_from_iris_ = false;
|
||||
float smoothed_left_depth_mm_ = -1.f;
|
||||
float smoothed_right_depth_mm_ = -1.f;
|
||||
|
||||
void GetLeftIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris);
|
||||
void GetRightIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris);
|
||||
::mediapipe::IrisToDepthCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(IrisToDepthCalculator);
|
||||
|
||||
absl::Status IrisToDepthCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||
#if defined(__APPLE__)
|
||||
focal_length_pixels_ = *cc->InputSidePackets()
|
||||
.Tag(kFocalLengthPixelTag)
|
||||
.Get<std::unique_ptr<float>>();
|
||||
#else
|
||||
focal_length_pixels_ =
|
||||
cc->InputSidePackets().Tag(kFocalLengthPixelTag).Get<float>();
|
||||
#endif
|
||||
compute_depth_from_iris_ = true;
|
||||
} else if (cc->InputSidePackets().HasTag(kImageFilePropertiesTag)) {
|
||||
const auto& properties = cc->InputSidePackets()
|
||||
.Tag(kImageFilePropertiesTag)
|
||||
.Get<ImageFileProperties>();
|
||||
focal_length_pixels_ = properties.focal_length_pixels();
|
||||
compute_depth_from_iris_ = true;
|
||||
}
|
||||
|
||||
options_ = cc->Options<::mediapipe::IrisToDepthCalculatorOptions>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status IrisToDepthCalculator::Process(CalculatorContext* cc) {
|
||||
// Only process if there's input landmarks.
|
||||
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
const auto& iris_landmarks =
|
||||
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
|
||||
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
|
||||
<< "Wrong number of iris landmarks";
|
||||
|
||||
std::pair<int, int> image_size;
|
||||
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
|
||||
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
|
||||
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||
GetLeftIris(iris_landmarks, left_iris.get());
|
||||
GetRightIris(iris_landmarks, right_iris.get());
|
||||
|
||||
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
|
||||
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
|
||||
|
||||
#if defined(__APPLE__)
|
||||
if (cc->InputSidePackets().HasTag(kFocalLengthPixelTag)) {
|
||||
focal_length_pixels_ = *cc->InputSidePackets()
|
||||
.Tag(kFocalLengthPixelTag)
|
||||
.Get<std::unique_ptr<float>>();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (compute_depth_from_iris_ && focal_length_pixels_ > 0) {
|
||||
const auto left_depth =
|
||||
CalculateDepth(left_iris->landmark(0), focal_length_pixels_,
|
||||
left_iris_size, image_size.first, image_size.second);
|
||||
const auto right_depth =
|
||||
CalculateDepth(right_iris->landmark(0), focal_length_pixels_,
|
||||
right_iris_size, image_size.first, image_size.second);
|
||||
smoothed_left_depth_mm_ =
|
||||
smoothed_left_depth_mm_ < 0 || std::isinf(smoothed_left_depth_mm_)
|
||||
? left_depth
|
||||
: smoothed_left_depth_mm_ * (1 - kDepthWeightUpdate) +
|
||||
left_depth * kDepthWeightUpdate;
|
||||
smoothed_right_depth_mm_ =
|
||||
smoothed_right_depth_mm_ < 0 || std::isinf(smoothed_right_depth_mm_)
|
||||
? right_depth
|
||||
: smoothed_right_depth_mm_ * (1 - kDepthWeightUpdate) +
|
||||
right_depth * kDepthWeightUpdate;
|
||||
|
||||
if (cc->Outputs().HasTag(kLeftIrisDepthTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kLeftIrisDepthTag)
|
||||
.AddPacket(MakePacket<float>(smoothed_left_depth_mm_)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kRightIrisDepthTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kRightIrisDepthTag)
|
||||
.AddPacket(MakePacket<float>(smoothed_right_depth_mm_)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void IrisToDepthCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris) {
|
||||
// Center, top, bottom, left, right
|
||||
*iris->add_landmark() = lds.landmark(options_.left_iris_center_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.left_iris_top_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.left_iris_bottom_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.left_iris_left_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.left_iris_right_index());
|
||||
}
|
||||
|
||||
void IrisToDepthCalculator::GetRightIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris) {
|
||||
// Center, top, bottom, left, right
|
||||
*iris->add_landmark() = lds.landmark(options_.right_iris_center_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.right_iris_top_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.right_iris_bottom_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.right_iris_left_index());
|
||||
*iris->add_landmark() = lds.landmark(options_.right_iris_right_index());
|
||||
}
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message IrisToDepthCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional IrisToDepthCalculatorOptions ext = 303429002;
|
||||
}
|
||||
|
||||
// Indices of correspondent left iris landmarks in input stream.
|
||||
optional int32 left_iris_center_index = 1 [default = 0];
|
||||
optional int32 left_iris_top_index = 2 [default = 2];
|
||||
optional int32 left_iris_bottom_index = 3 [default = 4];
|
||||
optional int32 left_iris_left_index = 4 [default = 3];
|
||||
optional int32 left_iris_right_index = 5 [default = 1];
|
||||
|
||||
// Indices of correspondent right iris landmarks in input stream.
|
||||
optional int32 right_iris_center_index = 6 [default = 5];
|
||||
optional int32 right_iris_top_index = 7 [default = 7];
|
||||
optional int32 right_iris_bottom_index = 8 [default = 9];
|
||||
optional int32 right_iris_left_index = 9 [default = 6];
|
||||
optional int32 right_iris_right_index = 10 [default = 8];
|
||||
}
|
|
@ -0,0 +1,318 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/iris_tracking/calculators/iris_to_render_data_calculator.pb.h"
|
||||
#include "mediapipe/util/color.pb.h"
|
||||
#include "mediapipe/util/render_data.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kIrisTag[] = "IRIS";
|
||||
constexpr char kRenderDataTag[] = "RENDER_DATA";
|
||||
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
constexpr char kLeftIrisDepthTag[] = "LEFT_IRIS_DEPTH_MM";
|
||||
constexpr char kRightIrisDepthTag[] = "RIGHT_IRIS_DEPTH_MM";
|
||||
constexpr char kOvalLabel[] = "OVAL";
|
||||
constexpr float kFontHeightScale = 1.5f;
|
||||
constexpr int kNumIrisLandmarksPerEye = 5;
|
||||
// TODO: Source.
|
||||
constexpr float kIrisSizeInMM = 11.8;
|
||||
|
||||
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
|
||||
annotation->mutable_color()->set_r(color.r());
|
||||
annotation->mutable_color()->set_g(color.g());
|
||||
annotation->mutable_color()->set_b(color.b());
|
||||
}
|
||||
|
||||
inline float GetDepth(float x0, float y0, float x1, float y1) {
|
||||
return std::sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1));
|
||||
}
|
||||
|
||||
inline float GetLandmarkDepth(const NormalizedLandmark& ld0,
|
||||
const NormalizedLandmark& ld1,
|
||||
const std::pair<int, int>& image_size) {
|
||||
return GetDepth(ld0.x() * image_size.first, ld0.y() * image_size.second,
|
||||
ld1.x() * image_size.first, ld1.y() * image_size.second);
|
||||
}
|
||||
|
||||
float CalculateIrisDiameter(const NormalizedLandmarkList& landmarks,
|
||||
const std::pair<int, int>& image_size) {
|
||||
const float dist_vert = GetLandmarkDepth(landmarks.landmark(1),
|
||||
landmarks.landmark(2), image_size);
|
||||
const float dist_hori = GetLandmarkDepth(landmarks.landmark(3),
|
||||
landmarks.landmark(4), image_size);
|
||||
return (dist_hori + dist_vert) / 2.0f;
|
||||
}
|
||||
|
||||
float CalculateDepth(const NormalizedLandmark& center, float focal_length,
|
||||
float iris_size, float img_w, float img_h) {
|
||||
std::pair<float, float> origin{img_w / 2.f, img_h / 2.f};
|
||||
const auto y = GetDepth(origin.first, origin.second, center.x() * img_w,
|
||||
center.y() * img_h);
|
||||
const auto x = std::sqrt(focal_length * focal_length + y * y);
|
||||
const auto depth = kIrisSizeInMM * x / iris_size;
|
||||
return depth;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Converts iris landmarks to render data and estimates depth from the camera if
|
||||
// focal length and image size. The depth will be rendered as part of the render
|
||||
// data on the frame.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "IrisToRenderDataCalculator"
|
||||
// input_stream: "IRIS:iris_landmarks"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// # Note: Only one of FOCAL_LENGTH or IMAGE_FILE_PROPERTIES is necessary
|
||||
// # to get focal length in pixels. Sending focal length in pixels to
|
||||
// # this calculator is optional.
|
||||
// input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||
// # OR
|
||||
// input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||
// output_stream: "RENDER_DATA:iris_render_data"
|
||||
// output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
// output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
// node_options: {
|
||||
// [type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||
// color { r: 255 g: 255 b: 255 }
|
||||
// thickness: 2.0
|
||||
// font_height_px: 50
|
||||
// horizontal_offset_px: 200
|
||||
// vertical_offset_px: 200
|
||||
// location: TOP_LEFT
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class IrisToRenderDataCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag(kIrisTag).Set<NormalizedLandmarkList>();
|
||||
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
|
||||
if (cc->Inputs().HasTag(kLeftIrisDepthTag)) {
|
||||
cc->Inputs().Tag(kLeftIrisDepthTag).Set<float>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kRightIrisDepthTag)) {
|
||||
cc->Inputs().Tag(kRightIrisDepthTag).Set<float>();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
void RenderIris(const NormalizedLandmarkList& iris_landmarks,
|
||||
const IrisToRenderDataCalculatorOptions& options,
|
||||
const std::pair<int, int>& image_size, float iris_size,
|
||||
RenderData* render_data);
|
||||
void GetLeftIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris);
|
||||
void GetRightIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris);
|
||||
|
||||
void AddTextRenderData(const IrisToRenderDataCalculatorOptions& options,
|
||||
const std::pair<int, int>& image_size,
|
||||
const std::vector<std::string>& lines,
|
||||
RenderData* render_data);
|
||||
|
||||
static RenderAnnotation* AddOvalRenderData(
|
||||
const IrisToRenderDataCalculatorOptions& options,
|
||||
RenderData* render_data);
|
||||
static RenderAnnotation* AddPointRenderData(
|
||||
const IrisToRenderDataCalculatorOptions& options,
|
||||
RenderData* render_data);
|
||||
};
|
||||
REGISTER_CALCULATOR(IrisToRenderDataCalculator);
|
||||
|
||||
absl::Status IrisToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status IrisToRenderDataCalculator::Process(CalculatorContext* cc) {
|
||||
// Only process if there's input landmarks.
|
||||
if (cc->Inputs().Tag(kIrisTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::IrisToRenderDataCalculatorOptions>();
|
||||
|
||||
const auto& iris_landmarks =
|
||||
cc->Inputs().Tag(kIrisTag).Get<NormalizedLandmarkList>();
|
||||
RET_CHECK_EQ(iris_landmarks.landmark_size(), kNumIrisLandmarksPerEye * 2)
|
||||
<< "Wrong number of iris landmarks";
|
||||
|
||||
std::pair<int, int> image_size;
|
||||
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
|
||||
image_size = cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
|
||||
auto render_data = absl::make_unique<RenderData>();
|
||||
auto left_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||
auto right_iris = absl::make_unique<NormalizedLandmarkList>();
|
||||
GetLeftIris(iris_landmarks, left_iris.get());
|
||||
GetRightIris(iris_landmarks, right_iris.get());
|
||||
|
||||
const auto left_iris_size = CalculateIrisDiameter(*left_iris, image_size);
|
||||
const auto right_iris_size = CalculateIrisDiameter(*right_iris, image_size);
|
||||
RenderIris(*left_iris, options, image_size, left_iris_size,
|
||||
render_data.get());
|
||||
RenderIris(*right_iris, options, image_size, right_iris_size,
|
||||
render_data.get());
|
||||
|
||||
std::vector<std::string> lines;
|
||||
std::string line;
|
||||
if (cc->Inputs().HasTag(kLeftIrisDepthTag) &&
|
||||
!cc->Inputs().Tag(kLeftIrisDepthTag).IsEmpty()) {
|
||||
const float left_iris_depth =
|
||||
cc->Inputs().Tag(kLeftIrisDepthTag).Get<float>();
|
||||
if (!std::isinf(left_iris_depth)) {
|
||||
line = "Left : ";
|
||||
absl::StrAppend(&line, ":", std::round(left_iris_depth / 10), " cm");
|
||||
lines.emplace_back(line);
|
||||
}
|
||||
}
|
||||
if (cc->Inputs().HasTag(kRightIrisDepthTag) &&
|
||||
!cc->Inputs().Tag(kRightIrisDepthTag).IsEmpty()) {
|
||||
const float right_iris_depth =
|
||||
cc->Inputs().Tag(kRightIrisDepthTag).Get<float>();
|
||||
if (!std::isinf(right_iris_depth)) {
|
||||
line = "Right : ";
|
||||
absl::StrAppend(&line, ":", std::round(right_iris_depth / 10), " cm");
|
||||
lines.emplace_back(line);
|
||||
}
|
||||
}
|
||||
AddTextRenderData(options, image_size, lines, render_data.get());
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kRenderDataTag)
|
||||
.Add(render_data.release(), cc->InputTimestamp());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void IrisToRenderDataCalculator::AddTextRenderData(
|
||||
const IrisToRenderDataCalculatorOptions& options,
|
||||
const std::pair<int, int>& image_size,
|
||||
const std::vector<std::string>& lines, RenderData* render_data) {
|
||||
int label_baseline_px = options.vertical_offset_px();
|
||||
float label_height_px =
|
||||
std::ceil(options.font_height_px() * kFontHeightScale);
|
||||
if (options.location() == IrisToRenderDataCalculatorOptions::TOP_LEFT) {
|
||||
label_baseline_px += label_height_px;
|
||||
} else if (options.location() ==
|
||||
IrisToRenderDataCalculatorOptions::BOTTOM_LEFT) {
|
||||
label_baseline_px += image_size.second - label_height_px * lines.size();
|
||||
}
|
||||
const auto label_left_px = options.horizontal_offset_px();
|
||||
for (int i = 0; i < lines.size(); ++i) {
|
||||
auto* label_annotation = render_data->add_render_annotations();
|
||||
label_annotation->set_thickness(5);
|
||||
|
||||
label_annotation->mutable_color()->set_r(255);
|
||||
label_annotation->mutable_color()->set_g(0);
|
||||
label_annotation->mutable_color()->set_b(0);
|
||||
//
|
||||
auto* text = label_annotation->mutable_text();
|
||||
text->set_display_text(lines[i]);
|
||||
text->set_font_height(options.font_height_px());
|
||||
text->set_left(label_left_px);
|
||||
text->set_baseline(label_baseline_px + i * label_height_px);
|
||||
text->set_font_face(options.font_face());
|
||||
}
|
||||
}
|
||||
|
||||
void IrisToRenderDataCalculator::RenderIris(
|
||||
const NormalizedLandmarkList& iris_landmarks,
|
||||
const IrisToRenderDataCalculatorOptions& options,
|
||||
const std::pair<int, int>& image_size, float iris_size,
|
||||
RenderData* render_data) {
|
||||
auto* oval_data_render = AddOvalRenderData(options, render_data);
|
||||
auto* oval_data = oval_data_render->mutable_oval();
|
||||
const float iris_radius = iris_size / 2.f;
|
||||
const auto& iris_center = iris_landmarks.landmark(0);
|
||||
|
||||
oval_data->mutable_rectangle()->set_top(iris_center.y() -
|
||||
iris_radius / image_size.second);
|
||||
oval_data->mutable_rectangle()->set_bottom(iris_center.y() +
|
||||
iris_radius / image_size.second);
|
||||
oval_data->mutable_rectangle()->set_left(iris_center.x() -
|
||||
iris_radius / image_size.first);
|
||||
oval_data->mutable_rectangle()->set_right(iris_center.x() +
|
||||
iris_radius / image_size.first);
|
||||
oval_data->mutable_rectangle()->set_normalized(true);
|
||||
|
||||
for (int i = 0; i < iris_landmarks.landmark_size(); ++i) {
|
||||
const NormalizedLandmark& landmark = iris_landmarks.landmark(i);
|
||||
auto* landmark_data_render = AddPointRenderData(options, render_data);
|
||||
auto* landmark_data = landmark_data_render->mutable_point();
|
||||
landmark_data->set_normalized(true);
|
||||
landmark_data->set_x(landmark.x());
|
||||
landmark_data->set_y(landmark.y());
|
||||
}
|
||||
}
|
||||
|
||||
void IrisToRenderDataCalculator::GetLeftIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris) {
|
||||
// Center, top, bottom, left, right
|
||||
*iris->add_landmark() = lds.landmark(0);
|
||||
*iris->add_landmark() = lds.landmark(2);
|
||||
*iris->add_landmark() = lds.landmark(4);
|
||||
*iris->add_landmark() = lds.landmark(3);
|
||||
*iris->add_landmark() = lds.landmark(1);
|
||||
}
|
||||
|
||||
void IrisToRenderDataCalculator::GetRightIris(const NormalizedLandmarkList& lds,
|
||||
NormalizedLandmarkList* iris) {
|
||||
// Center, top, bottom, left, right
|
||||
*iris->add_landmark() = lds.landmark(5);
|
||||
*iris->add_landmark() = lds.landmark(7);
|
||||
*iris->add_landmark() = lds.landmark(9);
|
||||
*iris->add_landmark() = lds.landmark(6);
|
||||
*iris->add_landmark() = lds.landmark(8);
|
||||
}
|
||||
|
||||
RenderAnnotation* IrisToRenderDataCalculator::AddOvalRenderData(
|
||||
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
|
||||
auto* oval_data_annotation = render_data->add_render_annotations();
|
||||
oval_data_annotation->set_scene_tag(kOvalLabel);
|
||||
|
||||
SetColor(oval_data_annotation, options.oval_color());
|
||||
oval_data_annotation->set_thickness(options.oval_thickness());
|
||||
return oval_data_annotation;
|
||||
}
|
||||
|
||||
RenderAnnotation* IrisToRenderDataCalculator::AddPointRenderData(
|
||||
const IrisToRenderDataCalculatorOptions& options, RenderData* render_data) {
|
||||
auto* landmark_data_annotation = render_data->add_render_annotations();
|
||||
SetColor(landmark_data_annotation, options.landmark_color());
|
||||
landmark_data_annotation->set_thickness(options.landmark_thickness());
|
||||
|
||||
return landmark_data_annotation;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/color.proto";
|
||||
|
||||
message IrisToRenderDataCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional IrisToRenderDataCalculatorOptions ext = 289530040;
|
||||
}
|
||||
|
||||
// Color of the oval.
|
||||
optional Color oval_color = 1;
|
||||
// Color of the landmarks.
|
||||
optional Color landmark_color = 9;
|
||||
|
||||
// Thickness of the drawing of landmarks and iris oval.
|
||||
optional double oval_thickness = 2 [default = 1.0];
|
||||
optional double landmark_thickness = 10 [default = 1.0];
|
||||
|
||||
// The font height in absolute pixels.
|
||||
optional int32 font_height_px = 3 [default = 50];
|
||||
|
||||
// The offset of the starting text in horizontal direction in absolute pixels.
|
||||
optional int32 horizontal_offset_px = 7 [default = 0];
|
||||
// The offset of the starting text in vertical direction in absolute pixels.
|
||||
optional int32 vertical_offset_px = 8 [default = 0];
|
||||
|
||||
// Specifies the font for the text. Font must be one of the following from
|
||||
// OpenCV:
|
||||
// cv::FONT_HERSHEY_SIMPLEX (0)
|
||||
// cv::FONT_HERSHEY_PLAIN (1)
|
||||
// cv::FONT_HERSHEY_DUPLEX (2)
|
||||
// cv::FONT_HERSHEY_COMPLEX (3)
|
||||
// cv::FONT_HERSHEY_TRIPLEX (4)
|
||||
// cv::FONT_HERSHEY_COMPLEX_SMALL (5)
|
||||
// cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6)
|
||||
// cv::FONT_HERSHEY_SCRIPT_COMPLEX (7)
|
||||
optional int32 font_face = 5 [default = 0];
|
||||
|
||||
// Label location.
|
||||
enum Location {
|
||||
TOP_LEFT = 0;
|
||||
BOTTOM_LEFT = 1;
|
||||
}
|
||||
optional Location location = 6 [default = TOP_LEFT];
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kFaceLandmarksTag[] = "FACE_LANDMARKS";
|
||||
constexpr char kNewEyeLandmarksTag[] = "NEW_EYE_LANDMARKS";
|
||||
constexpr char kUpdatedFaceLandmarksTag[] = "UPDATED_FACE_LANDMARKS";
|
||||
|
||||
constexpr int kNumFaceLandmarks = 468;
|
||||
// 71 landamrks for left eye and 71 landmarks for right eye.
|
||||
constexpr int kNumEyeLandmarks = 142;
|
||||
|
||||
constexpr int kEyeLandmarkIndicesInFaceLandmarks[] = {
|
||||
// Left eye
|
||||
// eye lower contour
|
||||
33,
|
||||
7,
|
||||
163,
|
||||
144,
|
||||
145,
|
||||
153,
|
||||
154,
|
||||
155,
|
||||
133,
|
||||
// eye upper contour (excluding corners)
|
||||
246,
|
||||
161,
|
||||
160,
|
||||
159,
|
||||
158,
|
||||
157,
|
||||
173,
|
||||
// halo x2 lower contour
|
||||
130,
|
||||
25,
|
||||
110,
|
||||
24,
|
||||
23,
|
||||
22,
|
||||
26,
|
||||
112,
|
||||
243,
|
||||
// halo x2 upper contour (excluding corners)
|
||||
247,
|
||||
30,
|
||||
29,
|
||||
27,
|
||||
28,
|
||||
56,
|
||||
190,
|
||||
// halo x3 lower contour
|
||||
226,
|
||||
31,
|
||||
228,
|
||||
229,
|
||||
230,
|
||||
231,
|
||||
232,
|
||||
233,
|
||||
244,
|
||||
// halo x3 upper contour (excluding corners)
|
||||
113,
|
||||
225,
|
||||
224,
|
||||
223,
|
||||
222,
|
||||
221,
|
||||
189,
|
||||
// halo x4 upper contour (no lower because of mesh structure)
|
||||
// or eyebrow inner contour
|
||||
35,
|
||||
124,
|
||||
46,
|
||||
53,
|
||||
52,
|
||||
65,
|
||||
// halo x5 lower contour
|
||||
143,
|
||||
111,
|
||||
117,
|
||||
118,
|
||||
119,
|
||||
120,
|
||||
121,
|
||||
128,
|
||||
245,
|
||||
// halo x5 upper contour (excluding corners)
|
||||
// or eyebrow outer contour
|
||||
156,
|
||||
70,
|
||||
63,
|
||||
105,
|
||||
66,
|
||||
107,
|
||||
55,
|
||||
193,
|
||||
|
||||
// Right eye
|
||||
// eye lower contour
|
||||
263,
|
||||
249,
|
||||
390,
|
||||
373,
|
||||
374,
|
||||
380,
|
||||
381,
|
||||
382,
|
||||
362,
|
||||
// eye upper contour (excluding corners)
|
||||
466,
|
||||
388,
|
||||
387,
|
||||
386,
|
||||
385,
|
||||
384,
|
||||
398,
|
||||
// halo x2 lower contour
|
||||
359,
|
||||
255,
|
||||
339,
|
||||
254,
|
||||
253,
|
||||
252,
|
||||
256,
|
||||
341,
|
||||
463,
|
||||
// halo x2 upper contour (excluding corners)
|
||||
467,
|
||||
260,
|
||||
259,
|
||||
257,
|
||||
258,
|
||||
286,
|
||||
414,
|
||||
// halo x3 lower contour
|
||||
446,
|
||||
261,
|
||||
448,
|
||||
449,
|
||||
450,
|
||||
451,
|
||||
452,
|
||||
453,
|
||||
464,
|
||||
// halo x3 upper contour (excluding corners)
|
||||
342,
|
||||
445,
|
||||
444,
|
||||
443,
|
||||
442,
|
||||
441,
|
||||
413,
|
||||
// halo x4 upper contour (no lower because of mesh structure)
|
||||
// or eyebrow inner contour
|
||||
265,
|
||||
353,
|
||||
276,
|
||||
283,
|
||||
282,
|
||||
295,
|
||||
// halo x5 lower contour
|
||||
372,
|
||||
340,
|
||||
346,
|
||||
347,
|
||||
348,
|
||||
349,
|
||||
350,
|
||||
357,
|
||||
465,
|
||||
// halo x5 upper contour (excluding corners)
|
||||
// or eyebrow outer contour
|
||||
383,
|
||||
300,
|
||||
293,
|
||||
334,
|
||||
296,
|
||||
336,
|
||||
285,
|
||||
417,
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// Update face landmarks with new (e.g., refined) values. Currently only updates
|
||||
// landmarks around the eyes.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "UpdateFaceLandmarksCalculator"
|
||||
// input_stream: "NEW_EYE_LANDMARKS:new_eye_landmarks"
|
||||
// input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
// output_stream: "UPDATED_FACE_LANDMARKS:refine_face_landmarks"
|
||||
// }
|
||||
//
|
||||
class UpdateFaceLandmarksCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag(kFaceLandmarksTag).Set<NormalizedLandmarkList>();
|
||||
cc->Inputs().Tag(kNewEyeLandmarksTag).Set<NormalizedLandmarkList>();
|
||||
|
||||
cc->Outputs().Tag(kUpdatedFaceLandmarksTag).Set<NormalizedLandmarkList>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
absl::Status Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(UpdateFaceLandmarksCalculator);
|
||||
|
||||
absl::Status UpdateFaceLandmarksCalculator::Process(CalculatorContext* cc) {
|
||||
if (cc->Inputs().Tag(kFaceLandmarksTag).IsEmpty() ||
|
||||
cc->Inputs().Tag(kNewEyeLandmarksTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
const auto& face_landmarks =
|
||||
cc->Inputs().Tag(kFaceLandmarksTag).Get<NormalizedLandmarkList>();
|
||||
const auto& new_eye_landmarks =
|
||||
cc->Inputs().Tag(kNewEyeLandmarksTag).Get<NormalizedLandmarkList>();
|
||||
|
||||
RET_CHECK_EQ(face_landmarks.landmark_size(), kNumFaceLandmarks)
|
||||
<< "Wrong number of face landmarks";
|
||||
RET_CHECK_EQ(new_eye_landmarks.landmark_size(), kNumEyeLandmarks)
|
||||
<< "Wrong number of face landmarks";
|
||||
|
||||
auto refined_face_landmarks =
|
||||
absl::make_unique<NormalizedLandmarkList>(face_landmarks);
|
||||
for (int i = 0; i < kNumEyeLandmarks; ++i) {
|
||||
const auto& refined_ld = new_eye_landmarks.landmark(i);
|
||||
const int id = kEyeLandmarkIndicesInFaceLandmarks[i];
|
||||
refined_face_landmarks->mutable_landmark(id)->set_x(refined_ld.x());
|
||||
refined_face_landmarks->mutable_landmark(id)->set_y(refined_ld.y());
|
||||
refined_face_landmarks->mutable_landmark(id)->set_z(refined_ld.z());
|
||||
refined_face_landmarks->mutable_landmark(id)->set_visibility(
|
||||
refined_ld.visibility());
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kUpdatedFaceLandmarksTag)
|
||||
.Add(refined_face_landmarks.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
159
mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt
Normal file
159
mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt
Normal file
|
@ -0,0 +1,159 @@
|
|||
# MediaPipe graph that performs iris distance computation on desktop with
|
||||
# TensorFlow Lite on CPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/desktop/iris_tracking:iris_depth_from_image_desktop.
|
||||
|
||||
# Raw image bytes. (std::string)
|
||||
input_stream: "input_image_bytes"
|
||||
|
||||
# Image with all the detections rendered. (ImageFrame)
|
||||
output_stream: "output_image"
|
||||
# Estimated depth in mm from the camera to the left iris of the face (if any) in
|
||||
# the image. (float)
|
||||
output_stream: "left_iris_depth_mm"
|
||||
# Estimated depth in mm from the camera to the right iris of the face (if any)
|
||||
# in the image. (float)
|
||||
output_stream: "right_iris_depth_mm"
|
||||
|
||||
# Computes the focal length in pixels based on EXIF information stored in the
|
||||
# image file. The output is an ImageFileProperties object containing relevant
|
||||
# image EXIF information along with focal length in pixels.
|
||||
node {
|
||||
calculator: "ImageFilePropertiesCalculator"
|
||||
input_stream: "input_image_bytes"
|
||||
output_side_packet: "image_file_properties"
|
||||
}
|
||||
|
||||
# Converts a raw string with encoded image bytes into an ImageFrame object
|
||||
# via OpenCV so that it can be processed by downstream calculators.
|
||||
node {
|
||||
calculator: "OpenCvEncodedImageToImageFrameCalculator"
|
||||
input_stream: "input_image_bytes"
|
||||
output_stream: "input_image"
|
||||
}
|
||||
|
||||
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||
# face (left and right eye), and therefore this should always be set to 1.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "multi_face_landmarks"
|
||||
output_stream: "face_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||
# vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedRectVectorCalculator"
|
||||
input_stream: "face_rects_from_landmarks"
|
||||
output_stream: "face_rect"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define left eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "left_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 33 end: 34 }
|
||||
ranges: { begin: 133 end: 134 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define right eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "right_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 362 end: 363 }
|
||||
ranges: { begin: 263 end: 264 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||
node {
|
||||
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_eye_contour_landmarks"
|
||||
input_stream: "right_eye_contour_landmarks"
|
||||
output_stream: "refined_eye_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "UpdateFaceLandmarksCalculator"
|
||||
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||
}
|
||||
|
||||
# Renders annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "IrisAndDepthRendererCpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:face_rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_image"
|
||||
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
}
|
142
mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt
Normal file
142
mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt
Normal file
|
@ -0,0 +1,142 @@
|
|||
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/desktop/iris_tracking:iris_tracking_cpu.
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Face landmarks with iris. (NormalizedLandmarkList)
|
||||
output_stream: "face_landmarks_with_iris"
|
||||
|
||||
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||
# face (left and right eye), and therefore this should always be set to 1.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "multi_face_landmarks"
|
||||
output_stream: "face_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||
# vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedRectVectorCalculator"
|
||||
input_stream: "face_rects_from_landmarks"
|
||||
output_stream: "face_rect"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define left eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "left_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 33 end: 34 }
|
||||
ranges: { begin: 133 end: 134 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define right eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "right_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 362 end: 363 }
|
||||
ranges: { begin: 263 end: 264 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||
node {
|
||||
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_eye_contour_landmarks"
|
||||
input_stream: "right_eye_contour_landmarks"
|
||||
output_stream: "refined_eye_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "UpdateFaceLandmarksCalculator"
|
||||
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||
}
|
||||
|
||||
# Renders annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "IrisRendererCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:face_rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "updated_face_landmarks"
|
||||
input_stream: "iris_landmarks"
|
||||
output_stream: "face_landmarks_with_iris"
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
# MediaPipe graph that performs iris tracking on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||
# face (left and right eye), and therefore this should always be set to 1.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:0:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "multi_face_landmarks"
|
||||
output_stream: "face_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||
# vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedRectVectorCalculator"
|
||||
input_stream: "face_rects_from_landmarks"
|
||||
output_stream: "face_rect"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define left eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "left_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 33 end: 34 }
|
||||
ranges: { begin: 133 end: 134 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define right eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "right_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 362 end: 363 }
|
||||
ranges: { begin: 263 end: 264 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||
node {
|
||||
calculator: "IrisLandmarkLeftAndRightCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_eye_contour_landmarks"
|
||||
input_stream: "right_eye_contour_landmarks"
|
||||
output_stream: "refined_eye_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "UpdateFaceLandmarksCalculator"
|
||||
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||
}
|
||||
|
||||
# Renders annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "IrisRendererCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:face_rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
163
mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt
Normal file
163
mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt
Normal file
|
@ -0,0 +1,163 @@
|
|||
# MediaPipe graph that performs iris tracking with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/iristrackinggpu and
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Face landmarks with iris. (NormalizedLandmarkList)
|
||||
output_stream: "face_landmarks_with_iris"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines how many faces to detect. Iris tracking currently only handles one
|
||||
# face (left and right eye), and therefore this should always be set to 1.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Gets the very first and only face from "multi_face_landmarks" vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "multi_face_landmarks"
|
||||
output_stream: "face_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets the very first and only face rect from "face_rects_from_landmarks"
|
||||
# vector.
|
||||
node {
|
||||
calculator: "SplitNormalizedRectVectorCalculator"
|
||||
input_stream: "face_rects_from_landmarks"
|
||||
output_stream: "face_rect"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define left eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "left_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 33 end: 34 }
|
||||
ranges: { begin: 133 end: 134 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Gets two landmarks which define right eye boundary.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "face_landmarks"
|
||||
output_stream: "right_eye_boundary_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 362 end: 363 }
|
||||
ranges: { begin: 263 end: 264 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
|
||||
node {
|
||||
calculator: "IrisLandmarkLeftAndRightGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
|
||||
input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
|
||||
output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks"
|
||||
output_stream: "LEFT_EYE_ROI:left_eye_rect_from_landmarks"
|
||||
output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks"
|
||||
output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_eye_contour_landmarks"
|
||||
input_stream: "right_eye_contour_landmarks"
|
||||
output_stream: "refined_eye_landmarks"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "UpdateFaceLandmarksCalculator"
|
||||
input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks"
|
||||
}
|
||||
|
||||
# Renders annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "IrisAndDepthRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "FACE_LANDMARKS:updated_face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:face_rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "updated_face_landmarks"
|
||||
input_stream: "iris_landmarks"
|
||||
output_stream: "face_landmarks_with_iris"
|
||||
}
|
67
mediapipe/graphs/iris_tracking/subgraphs/BUILD
Normal file
67
mediapipe/graphs/iris_tracking/subgraphs/BUILD
Normal file
|
@ -0,0 +1,67 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "renderer_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
"//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "iris_and_depth_renderer_gpu",
|
||||
graph = "iris_and_depth_renderer_gpu.pbtxt",
|
||||
register_as = "IrisAndDepthRendererGpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "iris_renderer_cpu",
|
||||
graph = "iris_renderer_cpu.pbtxt",
|
||||
register_as = "IrisRendererCpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "iris_and_depth_renderer_cpu",
|
||||
graph = "iris_and_depth_renderer_cpu.pbtxt",
|
||||
register_as = "IrisAndDepthRendererCpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
"//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,267 @@
|
|||
# MediaPipe iris tracking rendering subgraph.
|
||||
|
||||
type: "IrisAndDepthRendererCpu"
|
||||
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_image"
|
||||
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_left_eye_contour_landmarks"
|
||||
output_stream: "left_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_right_eye_contour_landmarks"
|
||||
output_stream: "right_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Concatenate iris landmarks from both eyes.
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_iris_landmarks"
|
||||
input_stream: "right_iris_landmarks"
|
||||
output_stream: "iris_landmarks"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 150 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 150 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text ("Face").
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "labeled_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label: "Face"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:labeled_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
output_stream: "RENDER_DATA:rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "IrisToDepthCalculator"
|
||||
input_stream: "IRIS:iris_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties"
|
||||
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "IrisToRenderDataCalculator"
|
||||
input_stream: "IRIS:iris_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
output_stream: "RENDER_DATA:iris_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||
oval_color { r: 0 g: 0 b: 255 }
|
||||
landmark_color { r: 0 g: 255 b: 0 }
|
||||
oval_thickness: 2.0
|
||||
landmark_thickness: 1.0
|
||||
font_height_px: 50
|
||||
horizontal_offset_px: 200
|
||||
vertical_offset_px: 200
|
||||
location: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "detection_render_data"
|
||||
input_stream: "face_landmarks_render_data"
|
||||
input_stream: "right_eye_contour_landmarks_render_data"
|
||||
input_stream: "left_eye_contour_landmarks_render_data"
|
||||
input_stream: "iris_render_data"
|
||||
output_stream: "IMAGE:output_image"
|
||||
}
|
|
@ -0,0 +1,270 @@
|
|||
# MediaPipe iris tracking rendering subgraph.
|
||||
|
||||
type: "IrisAndDepthRendererGpu"
|
||||
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_left_eye_contour_landmarks"
|
||||
output_stream: "left_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_right_eye_contour_landmarks"
|
||||
output_stream: "right_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Concatenate iris landmarks from both eyes.
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_iris_landmarks"
|
||||
input_stream: "right_iris_landmarks"
|
||||
output_stream: "iris_landmarks"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 150 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 150 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text ("Face").
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "labeled_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label: "Face"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:labeled_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 2.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 2.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
output_stream: "RENDER_DATA:rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "IrisToDepthCalculator"
|
||||
input_stream: "IRIS:iris_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_side_packet: "FOCAL_LENGTH:focal_length_pixel"
|
||||
output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "IrisToRenderDataCalculator"
|
||||
input_stream: "IRIS:iris_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm"
|
||||
input_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm"
|
||||
output_stream: "RENDER_DATA:iris_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||
oval_color { r: 0 g: 0 b: 255 }
|
||||
landmark_color { r: 0 g: 255 b: 0 }
|
||||
oval_thickness: 4.0
|
||||
landmark_thickness: 2.0
|
||||
font_height_px: 50
|
||||
horizontal_offset_px: 200
|
||||
vertical_offset_px: 200
|
||||
location: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
input_stream: "detection_render_data"
|
||||
input_stream: "face_landmarks_render_data"
|
||||
input_stream: "right_eye_contour_landmarks_render_data"
|
||||
input_stream: "left_eye_contour_landmarks_render_data"
|
||||
input_stream: "iris_render_data"
|
||||
output_stream: "IMAGE_GPU:output_image"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.AnnotationOverlayCalculatorOptions] {
|
||||
gpu_scale_factor: 0.5
|
||||
}
|
||||
}
|
||||
}
|
254
mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt
Normal file
254
mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,254 @@
|
|||
# MediaPipe iris tracking rendering subgraph.
|
||||
|
||||
type: "IrisRendererCpu"
|
||||
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_LEFT:all_left_eye_contour_landmarks"
|
||||
input_stream: "EYE_LANDMARKS_RIGHT:all_right_eye_contour_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks"
|
||||
input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks"
|
||||
input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks"
|
||||
output_stream: "IRIS_LANDMARKS:iris_landmarks"
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_left_eye_contour_landmarks"
|
||||
output_stream: "left_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "all_right_eye_contour_landmarks"
|
||||
output_stream: "right_eye_contour_landmarks"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 15 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Concatenate iris landmarks from both eyes.
|
||||
node {
|
||||
calculator: "ConcatenateNormalizedLandmarkListCalculator"
|
||||
input_stream: "left_iris_landmarks"
|
||||
input_stream: "right_iris_landmarks"
|
||||
output_stream: "iris_landmarks"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:face_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_color { r: 150 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 150 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text ("Face").
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "labeled_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label: "Face"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:labeled_detections"
|
||||
output_stream: "RENDER_DATA:detection_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:left_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:right_eye_contour_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_contour_landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
landmark_connections: 0
|
||||
landmark_connections: 1
|
||||
landmark_connections: 1
|
||||
landmark_connections: 2
|
||||
landmark_connections: 2
|
||||
landmark_connections: 3
|
||||
landmark_connections: 3
|
||||
landmark_connections: 4
|
||||
landmark_connections: 4
|
||||
landmark_connections: 5
|
||||
landmark_connections: 5
|
||||
landmark_connections: 6
|
||||
landmark_connections: 6
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 8
|
||||
landmark_connections: 9
|
||||
landmark_connections: 10
|
||||
landmark_connections: 10
|
||||
landmark_connections: 11
|
||||
landmark_connections: 11
|
||||
landmark_connections: 12
|
||||
landmark_connections: 12
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 14
|
||||
landmark_connections: 0
|
||||
landmark_connections: 9
|
||||
landmark_connections: 8
|
||||
landmark_connections: 14
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 255 g: 0 b: 0 }
|
||||
visualize_landmark_depth: false
|
||||
thickness: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:rect"
|
||||
output_stream: "RENDER_DATA:rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:right_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:right_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECT:left_eye_rect_from_landmarks"
|
||||
output_stream: "RENDER_DATA:left_eye_rect_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "IrisToRenderDataCalculator"
|
||||
input_stream: "IRIS:iris_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "RENDER_DATA:iris_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.IrisToRenderDataCalculatorOptions] {
|
||||
oval_color { r: 0 g: 0 b: 255 }
|
||||
landmark_color { r: 0 g: 255 b: 0 }
|
||||
oval_thickness: 4.0
|
||||
landmark_thickness: 2.0
|
||||
font_height_px: 50
|
||||
horizontal_offset_px: 200
|
||||
vertical_offset_px: 200
|
||||
location: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "detection_render_data"
|
||||
input_stream: "face_landmarks_render_data"
|
||||
input_stream: "right_eye_contour_landmarks_render_data"
|
||||
input_stream: "left_eye_contour_landmarks_render_data"
|
||||
input_stream: "iris_render_data"
|
||||
output_stream: "IMAGE:output_image"
|
||||
}
|
47
mediapipe/graphs/media_sequence/BUILD
Normal file
47
mediapipe/graphs/media_sequence/BUILD
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "clipped_images_from_file_at_24fps_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||
"//mediapipe/calculators/image:scale_image_calculator",
|
||||
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
|
||||
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
|
||||
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tvl1_flow_and_rgb_from_file_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:packet_inner_join_calculator",
|
||||
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||
"//mediapipe/calculators/core:sequence_shift_calculator",
|
||||
"//mediapipe/calculators/image:opencv_image_encoder_calculator",
|
||||
"//mediapipe/calculators/image:scale_image_calculator",
|
||||
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator",
|
||||
"//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator",
|
||||
"//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator",
|
||||
"//mediapipe/calculators/video:flow_to_image_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:tvl1_optical_flow_calculator",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Convert the string input into a decoded SequenceExample.
|
||||
node {
|
||||
calculator: "StringToSequenceExampleCalculator"
|
||||
input_side_packet: "STRING:input_sequence_example"
|
||||
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
}
|
||||
|
||||
# Unpack the data path and clip timing from the SequenceExample.
|
||||
node {
|
||||
calculator: "UnpackMediaSequenceCalculator"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
output_side_packet: "DATA_PATH:input_video_path"
|
||||
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
|
||||
base_packet_resampler_options: {
|
||||
frame_rate: 24.0
|
||||
base_timestamp: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decode the entire video.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:decoded_frames"
|
||||
}
|
||||
|
||||
# Extract the subset of frames we want to keep.
|
||||
node {
|
||||
calculator: "PacketResamplerCalculator"
|
||||
input_stream: "decoded_frames"
|
||||
output_stream: "sampled_frames"
|
||||
input_side_packet: "OPTIONS:packet_resampler_options"
|
||||
}
|
||||
|
||||
# Encode the images to store in the SequenceExample.
|
||||
node {
|
||||
calculator: "OpenCvImageEncoderCalculator"
|
||||
input_stream: "sampled_frames"
|
||||
output_stream: "encoded_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||
quality: 80
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Store the images in the SequenceExample.
|
||||
node {
|
||||
calculator: "PackMediaSequenceCalculator"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||
input_stream: "IMAGE:encoded_frames"
|
||||
}
|
||||
|
||||
# Serialize the SequenceExample to a string for storage.
|
||||
node {
|
||||
calculator: "StringToSequenceExampleCalculator"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||
output_side_packet: "STRING:output_sequence_example"
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Convert the string input into a decoded SequenceExample.
|
||||
node {
|
||||
calculator: "StringToSequenceExampleCalculator"
|
||||
input_side_packet: "STRING:input_sequence_example"
|
||||
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
}
|
||||
|
||||
# Unpack the data path and clip timing from the SequenceExample.
|
||||
node {
|
||||
calculator: "UnpackMediaSequenceCalculator"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
output_side_packet: "DATA_PATH:input_video_path"
|
||||
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
|
||||
base_packet_resampler_options: {
|
||||
frame_rate: 25.0
|
||||
base_timestamp: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decode the entire video.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:decoded_frames"
|
||||
}
|
||||
|
||||
# Extract the subset of frames we want to keep.
|
||||
node {
|
||||
calculator: "PacketResamplerCalculator"
|
||||
input_stream: "decoded_frames"
|
||||
output_stream: "sampled_frames"
|
||||
input_side_packet: "OPTIONS:packet_resampler_options"
|
||||
}
|
||||
|
||||
# Fit the images into the target size.
|
||||
node: {
|
||||
calculator: "ScaleImageCalculator"
|
||||
input_stream: "sampled_frames"
|
||||
output_stream: "scaled_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ScaleImageCalculatorOptions]: {
|
||||
target_height: 256
|
||||
preserve_aspect_ratio: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Shift the the timestamps of packets along a stream.
|
||||
# With a packet_offset of -1, the first packet will be dropped, the second will
|
||||
# be output with the timestamp of the first, the third with the timestamp of
|
||||
# the second, and so on.
|
||||
node: {
|
||||
calculator: "SequenceShiftCalculator"
|
||||
input_stream: "scaled_frames"
|
||||
output_stream: "shifted_scaled_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SequenceShiftCalculatorOptions]: {
|
||||
packet_offset: -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Join the original input stream and the one that is shifted by one packet.
|
||||
node: {
|
||||
calculator: "PacketInnerJoinCalculator"
|
||||
input_stream: "scaled_frames"
|
||||
input_stream: "shifted_scaled_frames"
|
||||
output_stream: "first_frames"
|
||||
output_stream: "second_frames"
|
||||
}
|
||||
|
||||
# Compute the forward optical flow.
|
||||
node {
|
||||
calculator: "Tvl1OpticalFlowCalculator"
|
||||
input_stream: "FIRST_FRAME:first_frames"
|
||||
input_stream: "SECOND_FRAME:second_frames"
|
||||
output_stream: "FORWARD_FLOW:forward_flow"
|
||||
max_in_flight: 32
|
||||
}
|
||||
|
||||
# Convert an optical flow to be an image frame with 2 channels (v_x and v_y),
|
||||
# each channel is quantized to 0-255.
|
||||
node: {
|
||||
calculator: "FlowToImageCalculator"
|
||||
input_stream: "forward_flow"
|
||||
output_stream: "flow_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowToImageCalculatorOptions]: {
|
||||
min_value: -20.0
|
||||
max_value: 20.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Encode the optical flow images to store in the SequenceExample.
|
||||
node {
|
||||
calculator: "OpenCvImageEncoderCalculator"
|
||||
input_stream: "flow_frames"
|
||||
output_stream: "encoded_flow_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||
quality: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Encode the rgb images to store in the SequenceExample.
|
||||
node {
|
||||
calculator: "OpenCvImageEncoderCalculator"
|
||||
input_stream: "scaled_frames"
|
||||
output_stream: "encoded_frames"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvImageEncoderCalculatorOptions]: {
|
||||
quality: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Store the images in the SequenceExample.
|
||||
node {
|
||||
calculator: "PackMediaSequenceCalculator"
|
||||
input_stream: "IMAGE:encoded_frames"
|
||||
input_stream: "FORWARD_FLOW_ENCODED:encoded_flow_frames"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
||||
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||
}
|
||||
|
||||
# Serialize the SequenceExample to a string for storage.
|
||||
node {
|
||||
calculator: "StringToSequenceExampleCalculator"
|
||||
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
||||
output_side_packet: "STRING:output_sequence_example"
|
||||
}
|
||||
|
||||
num_threads: 32
|
94
mediapipe/graphs/object_detection/BUILD
Normal file
94
mediapipe/graphs/object_detection/BUILD
Normal file
|
@ -0,0 +1,94 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_tensorflow_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensorflow:image_frame_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator",
|
||||
"//mediapipe/calculators/tensorflow:object_detection_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/tensorflow:tensor_squeeze_dimensions_calculator",
|
||||
"//mediapipe/calculators/tensorflow:tensorflow_inference_calculator",
|
||||
"//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_tflite_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_cpu_binary_graph",
|
||||
graph = "object_detection_mobile_cpu.pbtxt",
|
||||
output_name = "mobile_cpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_binary_graph",
|
||||
graph = "object_detection_mobile_gpu.pbtxt",
|
||||
output_name = "mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
|
@ -0,0 +1,174 @@
|
|||
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/desktop/object_detection:object_detection_cpu.
|
||||
|
||||
# Images on CPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||
# generating the corresponding detections before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "IMAGE:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:transformed_input_video"
|
||||
output_stream: "TENSORS:image_tensor"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:image_tensor"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
sigmoid_score: true
|
||||
apply_exponential_on_box_size: true
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
max_num_detections: 3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
return_empty_detections: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
# MediaPipe graph that performs object detection on desktop with TensorFlow
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipie/examples/desktop/object_detection:object_detection_tensorflow.
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Converts the input image into an image tensor as a tensorflow::Tensor.
|
||||
node {
|
||||
calculator: "ImageFrameToTensorCalculator"
|
||||
input_stream: "input_video"
|
||||
output_stream: "image_tensor"
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a TensorFlow session from a saved
|
||||
# model. The directory path that contains the saved model is specified in the
|
||||
# saved_model_path option, and the name of the saved model file has to be
|
||||
# "saved_model.pb".
|
||||
node {
|
||||
calculator: "TensorFlowSessionFromSavedModelCalculator"
|
||||
output_side_packet: "SESSION:object_detection_session"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: {
|
||||
saved_model_path: "mediapipe/models/object_detection_saved_model"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow session (specified as an input side packet) that takes an
|
||||
# image tensor and outputs multiple tensors that describe the objects detected
|
||||
# in the image. The batch_size option is set to 1 to disable batching entirely.
|
||||
# Note that the particular TensorFlow model used in this session handles image
|
||||
# scaling internally before the object-detection inference, and therefore no
|
||||
# additional calculator for image transformation is needed in this MediaPipe
|
||||
# graph.
|
||||
node: {
|
||||
calculator: "TensorFlowInferenceCalculator"
|
||||
input_side_packet: "SESSION:object_detection_session"
|
||||
input_stream: "INPUTS:image_tensor"
|
||||
output_stream: "DETECTION_BOXES:detection_boxes_tensor"
|
||||
output_stream: "DETECTION_CLASSES:detection_classes_tensor"
|
||||
output_stream: "DETECTION_SCORES:detection_scores_tensor"
|
||||
output_stream: "NUM_DETECTIONS:num_detections_tensor"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
|
||||
batch_size: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors from the TensorFlow model into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "ObjectDetectionTensorsToDetectionsCalculator"
|
||||
input_stream: "BOXES:detection_boxes_tensor"
|
||||
input_stream: "SCORES:detection_scores_tensor"
|
||||
input_stream: "CLASSES:detection_classes_tensor"
|
||||
input_stream: "NUM_DETECTIONS:num_detections_tensor"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
min_score_threshold: 0.6
|
||||
max_num_detections: 10
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
# Used in the example in
|
||||
# mediapipe/examples/desktop/object_detection:object_detection_tflite.
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "IMAGE:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor as a
|
||||
# TfLiteTensor. The zero_center option is set to true to normalize the
|
||||
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:transformed_input_video"
|
||||
output_stream: "TENSORS:image_tensor"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||
zero_center: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:image_tensor"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
apply_exponential_on_box_size: true
|
||||
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
min_score_threshold: 0.6
|
||||
max_num_detections: 5
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
# MediaPipe graph that performs object detection with TensorFlow Lite on CPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectioncpu and
|
||||
# mediapipe/examples/ios/objectdetectioncpu.
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Transfers the input image from GPU to CPU memory for the purpose of
|
||||
# demonstrating a CPU-based pipeline. Note that the input image on GPU has the
|
||||
# origin defined at the bottom-left corner (OpenGL convention). As a result,
|
||||
# the transferred image on CPU also shares the same representation.
|
||||
node: {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "input_video"
|
||||
output_stream: "input_video_cpu"
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||
# generating the corresponding detections before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video_cpu"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video_cpu"
|
||||
}
|
||||
|
||||
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video_cpu"
|
||||
output_stream: "IMAGE:transformed_input_video_cpu"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:transformed_input_video_cpu"
|
||||
output_stream: "TENSORS:image_tensor"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:image_tensor"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
sigmoid_score: true
|
||||
apply_exponential_on_box_size: true
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
max_num_detections: 3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
return_empty_detections: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:throttled_input_video_cpu"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE:output_video_cpu"
|
||||
}
|
||||
|
||||
# Transfers the annotated image from CPU back to GPU memory, to be sent out of
|
||||
# the graph.
|
||||
node: {
|
||||
calculator: "ImageFrameToGpuBufferCalculator"
|
||||
input_stream: "output_video_cpu"
|
||||
output_stream: "output_video"
|
||||
}
|
|
@ -0,0 +1,175 @@
|
|||
# MediaPipe graph that performs object detection with TensorFlow Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and
|
||||
# mediapipe/examples/ios/objectdetectiongpu.
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for
|
||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||
# generating the corresponding detections before it passes through another
|
||||
# image. All images that come in while waiting are dropped, limiting the number
|
||||
# of in-flight images between this calculator and
|
||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||
# from queuing up incoming images and data excessively, which leads to increased
|
||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||
# processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on GPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||
output_stream: "TENSORS_GPU:image_tensor"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS_GPU:image_tensor"
|
||||
output_stream: "TENSORS_GPU:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS_GPU:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
sigmoid_score: true
|
||||
apply_exponential_on_box_size: true
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
max_num_detections: 3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
return_empty_detections: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "IMAGE_GPU:output_video"
|
||||
}
|
80
mediapipe/graphs/object_detection_3d/BUILD
Normal file
80
mediapipe/graphs/object_detection_3d/BUILD
Normal file
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
exports_files(glob([
|
||||
"*.pbtxt",
|
||||
]))
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_cropping_calculator",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||
"//mediapipe/modules/objectron:objectron_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators_1stage",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||
"//mediapipe/calculators/image:image_cropping_calculator",
|
||||
"//mediapipe/gpu:gl_scaler_calculator",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
|
||||
"//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
|
||||
"//mediapipe/modules/objectron:objectron_detection_1stage_gpu",
|
||||
"//mediapipe/modules/objectron:objectron_tracking_1stage_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_cpu_calculators",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
"//mediapipe/graphs/object_detection_3d/subgraphs:renderer_cpu",
|
||||
"//mediapipe/modules/objectron:objectron_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_binary_graph",
|
||||
graph = "object_occlusion_tracking.pbtxt",
|
||||
output_name = "mobile_gpu_binary_graph.binarypb",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_1stage_binary_graph",
|
||||
graph = "object_occlusion_tracking_1stage.pbtxt",
|
||||
output_name = "mobile_gpu_1stage_binary_graph.binarypb",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":mobile_calculators_1stage"],
|
||||
)
|
113
mediapipe/graphs/object_detection_3d/calculators/BUILD
Normal file
113
mediapipe/graphs/object_detection_3d/calculators/BUILD
Normal file
|
@ -0,0 +1,113 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "gl_animation_overlay_calculator_proto",
|
||||
srcs = ["gl_animation_overlay_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "annotations_to_model_matrices_calculator_proto",
|
||||
srcs = ["annotations_to_model_matrices_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "model_matrix_proto",
|
||||
srcs = ["model_matrix.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "annotations_to_render_data_calculator_proto",
|
||||
srcs = ["annotations_to_render_data_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util:color_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "gl_animation_overlay_calculator",
|
||||
srcs = ["gl_animation_overlay_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":gl_animation_overlay_calculator_cc_proto",
|
||||
":model_matrix_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:shader_util",
|
||||
"//mediapipe/modules/objectron/calculators:camera_parameters_cc_proto",
|
||||
"//mediapipe/util/android:asset_manager_util",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "annotations_to_model_matrices_calculator",
|
||||
srcs = ["annotations_to_model_matrices_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":annotations_to_model_matrices_calculator_cc_proto",
|
||||
":model_matrix_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
|
||||
"//mediapipe/modules/objectron/calculators:box",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "annotations_to_render_data_calculator",
|
||||
srcs = ["annotations_to_render_data_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":annotations_to_render_data_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/modules/objectron/calculators:annotation_cc_proto",
|
||||
"//mediapipe/util:color_cc_proto",
|
||||
"//mediapipe/util:render_data_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,215 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/Dense"
|
||||
#include "Eigen/Geometry"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.pb.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
|
||||
#include "mediapipe/modules/objectron/calculators/box.h"
|
||||
#include "mediapipe/util/color.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kAnnotationTag[] = "ANNOTATIONS";
|
||||
constexpr char kModelMatricesTag[] = "MODEL_MATRICES";
|
||||
|
||||
using Matrix3fRM = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
|
||||
using Matrix4fRM = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
|
||||
|
||||
} // namespace
|
||||
|
||||
// Converts the box prediction from Objectron Model to the Model matrices
|
||||
// to be rendered.
|
||||
//
|
||||
// Input:
|
||||
// ANNOTATIONS - Frame annotations with lifted 3D points, the points are in
|
||||
// Objectron coordinate system.
|
||||
// Output:
|
||||
// MODEL_MATRICES - Result ModelMatrices, in OpenGL coordinate system.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "AnnotationsToModelMatricesCalculator"
|
||||
// input_stream: "ANNOTATIONS:objects"
|
||||
// output_stream: "MODEL_MATRICES:model_matrices"
|
||||
//}
|
||||
|
||||
class AnnotationsToModelMatricesCalculator : public CalculatorBase {
|
||||
public:
|
||||
AnnotationsToModelMatricesCalculator() {}
|
||||
~AnnotationsToModelMatricesCalculator() override {}
|
||||
AnnotationsToModelMatricesCalculator(
|
||||
const AnnotationsToModelMatricesCalculator&) = delete;
|
||||
AnnotationsToModelMatricesCalculator& operator=(
|
||||
const AnnotationsToModelMatricesCalculator&) = delete;
|
||||
|
||||
static absl::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
absl::Status GetModelMatricesForAnnotations(
|
||||
const FrameAnnotation& annotations,
|
||||
TimedModelMatrixProtoList* model_matrix_list);
|
||||
|
||||
AnnotationsToModelMatricesCalculatorOptions options_;
|
||||
Eigen::Vector3f model_scale_;
|
||||
Matrix4fRM model_transformation_;
|
||||
};
|
||||
REGISTER_CALCULATOR(AnnotationsToModelMatricesCalculator);
|
||||
|
||||
absl::Status AnnotationsToModelMatricesCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
|
||||
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kModelMatricesTag)) {
|
||||
cc->Outputs().Tag(kModelMatricesTag).Set<TimedModelMatrixProtoList>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
|
||||
cc->InputSidePackets().Tag("MODEL_SCALE").Set<float[]>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
|
||||
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Set<float[]>();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status AnnotationsToModelMatricesCalculator::Open(CalculatorContext* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag));
|
||||
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<AnnotationsToModelMatricesCalculatorOptions>();
|
||||
|
||||
if (cc->InputSidePackets().HasTag("MODEL_SCALE")) {
|
||||
model_scale_ = Eigen::Map<const Eigen::Vector3f>(
|
||||
cc->InputSidePackets().Tag("MODEL_SCALE").Get<float[]>());
|
||||
} else if (options_.model_scale_size() == 3) {
|
||||
model_scale_ =
|
||||
Eigen::Map<const Eigen::Vector3f>(options_.model_scale().data());
|
||||
} else {
|
||||
model_scale_.setOnes();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("MODEL_TRANSFORMATION")) {
|
||||
model_transformation_ = Eigen::Map<const Matrix4fRM>(
|
||||
cc->InputSidePackets().Tag("MODEL_TRANSFORMATION").Get<float[]>());
|
||||
} else if (options_.model_transformation_size() == 16) {
|
||||
model_transformation_ =
|
||||
Eigen::Map<const Matrix4fRM>(options_.model_transformation().data());
|
||||
} else {
|
||||
model_transformation_.setIdentity();
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status AnnotationsToModelMatricesCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
auto model_matrices = std::make_unique<TimedModelMatrixProtoList>();
|
||||
|
||||
const FrameAnnotation& annotations =
|
||||
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
|
||||
|
||||
if (!GetModelMatricesForAnnotations(annotations, model_matrices.get()).ok()) {
|
||||
return absl::InvalidArgumentError("Error in GetModelMatricesForBoxes");
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kModelMatricesTag)
|
||||
.Add(model_matrices.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status
|
||||
AnnotationsToModelMatricesCalculator::GetModelMatricesForAnnotations(
|
||||
const FrameAnnotation& annotations,
|
||||
TimedModelMatrixProtoList* model_matrix_list) {
|
||||
if (model_matrix_list == nullptr) {
|
||||
return absl::InvalidArgumentError("model_matrix_list is nullptr");
|
||||
}
|
||||
model_matrix_list->clear_model_matrix();
|
||||
|
||||
for (const auto& object : annotations.annotations()) {
|
||||
TimedModelMatrixProto* model_matrix = model_matrix_list->add_model_matrix();
|
||||
model_matrix->set_id(object.object_id());
|
||||
|
||||
// Get object rotation, translation and scale.
|
||||
const auto object_rotation =
|
||||
Eigen::Map<const Matrix3fRM>(object.rotation().data());
|
||||
const auto object_translation =
|
||||
Eigen::Map<const Eigen::Vector3f>(object.translation().data());
|
||||
const auto object_scale =
|
||||
Eigen::Map<const Eigen::Vector3f>(object.scale().data());
|
||||
|
||||
// Compose object transformation matrix.
|
||||
Matrix4fRM object_transformation;
|
||||
object_transformation.setIdentity();
|
||||
object_transformation.topLeftCorner<3, 3>() = object_rotation;
|
||||
object_transformation.topRightCorner<3, 1>() = object_translation;
|
||||
|
||||
Matrix4fRM model_view;
|
||||
Matrix4fRM objectron_model;
|
||||
// The reference view is
|
||||
//
|
||||
// ref << 0., 0., 1., 0.,
|
||||
// -1., 0., 0., 0.,
|
||||
// 0., -1., 0., 0.,
|
||||
// 0., 0., 0., 1.;
|
||||
// We have objectron_model * model = model_view, to get objectron_model:
|
||||
// objectron_model = model_view * model^-1
|
||||
// clang-format off
|
||||
objectron_model << 1.0, 0.0, 0.0, 0.0,
|
||||
0.0, -1., 0.0, 0.0,
|
||||
0.0, 0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 0.0, 1.0;
|
||||
// clang-format on
|
||||
|
||||
// Re-scale the CAD model to the scale of the estimated bounding box.
|
||||
const Eigen::Vector3f scale = model_scale_.cwiseProduct(object_scale);
|
||||
const Matrix4fRM model =
|
||||
model_transformation_.array().colwise() * scale.homogeneous().array();
|
||||
|
||||
// Finally compute the model_view matrix.
|
||||
model_view = objectron_model * object_transformation * model;
|
||||
|
||||
for (int i = 0; i < model_view.rows(); ++i) {
|
||||
for (int j = 0; j < model_view.cols(); ++j) {
|
||||
model_matrix->add_matrix_entries(model_view(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message AnnotationsToModelMatricesCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional AnnotationsToModelMatricesCalculatorOptions ext = 290166283;
|
||||
}
|
||||
|
||||
// Vector of size 3 indicating the scale vector [x, y, z]. We will re-scale
|
||||
// the model size with this vector. (Defaults to [1., 1., 1.])
|
||||
repeated float model_scale = 1;
|
||||
|
||||
// 4x4 Row major matrix denoting the transformation from the model to the
|
||||
// Deep Pursuit 3D coordinate system (where front is +z, and up is +y).
|
||||
repeated float model_transformation = 2;
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.pb.h"
|
||||
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
|
||||
#include "mediapipe/util/color.pb.h"
|
||||
#include "mediapipe/util/render_data.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kAnnotationTag[] = "ANNOTATIONS";
|
||||
constexpr char kRenderDataTag[] = "RENDER_DATA";
|
||||
constexpr char kKeypointLabel[] = "KEYPOINT";
|
||||
constexpr int kMaxLandmarkThickness = 18;
|
||||
|
||||
inline void SetColor(RenderAnnotation* annotation, const Color& color) {
|
||||
annotation->mutable_color()->set_r(color.r());
|
||||
annotation->mutable_color()->set_g(color.g());
|
||||
annotation->mutable_color()->set_b(color.b());
|
||||
}
|
||||
|
||||
// Remap x from range [lo hi] to range [0 1] then multiply by scale.
|
||||
inline float Remap(float x, float lo, float hi, float scale) {
|
||||
return (x - lo) / (hi - lo + 1e-6) * scale;
|
||||
}
|
||||
|
||||
inline void GetMinMaxZ(const FrameAnnotation& annotations, float* z_min,
|
||||
float* z_max) {
|
||||
*z_min = std::numeric_limits<float>::max();
|
||||
*z_max = std::numeric_limits<float>::min();
|
||||
// Use a global depth scale for all the objects in the scene
|
||||
for (const auto& object : annotations.annotations()) {
|
||||
for (const auto& keypoint : object.keypoints()) {
|
||||
*z_min = std::min(keypoint.point_2d().depth(), *z_min);
|
||||
*z_max = std::max(keypoint.point_2d().depth(), *z_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetColorSizeValueFromZ(float z, float z_min, float z_max,
|
||||
RenderAnnotation* render_annotation) {
|
||||
const int color_value = 255 - static_cast<int>(Remap(z, z_min, z_max, 255));
|
||||
::mediapipe::Color color;
|
||||
color.set_r(color_value);
|
||||
color.set_g(color_value);
|
||||
color.set_b(color_value);
|
||||
SetColor(render_annotation, color);
|
||||
const int thickness = static_cast<int>((1.f - Remap(z, z_min, z_max, 1)) *
|
||||
kMaxLandmarkThickness);
|
||||
render_annotation->set_thickness(thickness);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator that converts FrameAnnotation proto to RenderData proto for
|
||||
// visualization. The input should be the FrameAnnotation proto buffer. It is
|
||||
// also possible to specify the connections between landmarks.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "AnnotationsToRenderDataCalculator"
|
||||
// input_stream: "ANNOTATIONS:annotations"
|
||||
// output_stream: "RENDER_DATA:render_data"
|
||||
// options {
|
||||
// [AnnotationsToRenderDataCalculator.ext] {
|
||||
// landmark_connections: [0, 1, 1, 2]
|
||||
// landmark_color { r: 0 g: 255 b: 0 }
|
||||
// connection_color { r: 0 g: 255 b: 0 }
|
||||
// thickness: 4.0
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class AnnotationsToRenderDataCalculator : public CalculatorBase {
|
||||
public:
|
||||
AnnotationsToRenderDataCalculator() {}
|
||||
~AnnotationsToRenderDataCalculator() override {}
|
||||
AnnotationsToRenderDataCalculator(const AnnotationsToRenderDataCalculator&) =
|
||||
delete;
|
||||
AnnotationsToRenderDataCalculator& operator=(
|
||||
const AnnotationsToRenderDataCalculator&) = delete;
|
||||
|
||||
static absl::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
static void SetRenderAnnotationColorThickness(
|
||||
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||
RenderAnnotation* render_annotation);
|
||||
static RenderAnnotation* AddPointRenderData(
|
||||
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||
RenderData* render_data);
|
||||
|
||||
// Add a command to draw a line in the rendering queue. The line is drawn from
|
||||
// (start_x, start_y) to (end_x, end_y). The input x,y can either be in pixel
|
||||
// or normalized coordinate [0, 1] as indicated by the normalized flag.
|
||||
static void AddConnectionToRenderData(
|
||||
float start_x, float start_y, float end_x, float end_y,
|
||||
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||
RenderData* render_data);
|
||||
|
||||
// Same as above function. Instead of using color data to render the line, it
|
||||
// re-colors the line according to the two depth value. gray_val1 is the color
|
||||
// of the starting point and gray_val2 is the color of the ending point. The
|
||||
// line is colored using gradient color from gray_val1 to gray_val2. The
|
||||
// gray_val ranges from [0 to 255] for black to white.
|
||||
static void AddConnectionToRenderData(
|
||||
float start_x, float start_y, float end_x, float end_y,
|
||||
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||
int gray_val1, int gray_val2, RenderData* render_data);
|
||||
|
||||
AnnotationsToRenderDataCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(AnnotationsToRenderDataCalculator);
|
||||
|
||||
absl::Status AnnotationsToRenderDataCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
|
||||
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||
cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
|
||||
}
|
||||
cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status AnnotationsToRenderDataCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<AnnotationsToRenderDataCalculatorOptions>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status AnnotationsToRenderDataCalculator::Process(CalculatorContext* cc) {
|
||||
auto render_data = absl::make_unique<RenderData>();
|
||||
bool visualize_depth = options_.visualize_landmark_depth();
|
||||
float z_min = 0.f;
|
||||
float z_max = 0.f;
|
||||
|
||||
if (cc->Inputs().HasTag(kAnnotationTag)) {
|
||||
const auto& annotations =
|
||||
cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
|
||||
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
|
||||
<< "Number of entries in landmark connections must be a multiple of 2";
|
||||
|
||||
if (visualize_depth) {
|
||||
GetMinMaxZ(annotations, &z_min, &z_max);
|
||||
// Only change rendering if there are actually z values other than 0.
|
||||
visualize_depth &= ((z_max - z_min) > 1e-3);
|
||||
}
|
||||
|
||||
for (const auto& object : annotations.annotations()) {
|
||||
for (const auto& keypoint : object.keypoints()) {
|
||||
auto* keypoint_data_render =
|
||||
AddPointRenderData(options_, render_data.get());
|
||||
auto* point = keypoint_data_render->mutable_point();
|
||||
if (visualize_depth) {
|
||||
SetColorSizeValueFromZ(keypoint.point_2d().depth(), z_min, z_max,
|
||||
keypoint_data_render);
|
||||
}
|
||||
|
||||
point->set_normalized(true);
|
||||
point->set_x(keypoint.point_2d().x());
|
||||
point->set_y(keypoint.point_2d().y());
|
||||
}
|
||||
|
||||
// Add edges
|
||||
for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
|
||||
const auto& ld0 =
|
||||
object.keypoints(options_.landmark_connections(i)).point_2d();
|
||||
const auto& ld1 =
|
||||
object.keypoints(options_.landmark_connections(i + 1)).point_2d();
|
||||
const bool normalized = true;
|
||||
|
||||
if (visualize_depth) {
|
||||
const int gray_val1 =
|
||||
255 - static_cast<int>(Remap(ld0.depth(), z_min, z_max, 255));
|
||||
const int gray_val2 =
|
||||
255 - static_cast<int>(Remap(ld1.depth(), z_min, z_max, 255));
|
||||
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
|
||||
options_, normalized, gray_val1, gray_val2,
|
||||
render_data.get());
|
||||
} else {
|
||||
AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
|
||||
options_, normalized, render_data.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kRenderDataTag)
|
||||
.Add(render_data.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
|
||||
float start_x, float start_y, float end_x, float end_y,
|
||||
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||
int gray_val1, int gray_val2, RenderData* render_data) {
|
||||
auto* connection_annotation = render_data->add_render_annotations();
|
||||
RenderAnnotation::GradientLine* line =
|
||||
connection_annotation->mutable_gradient_line();
|
||||
line->set_x_start(start_x);
|
||||
line->set_y_start(start_y);
|
||||
line->set_x_end(end_x);
|
||||
line->set_y_end(end_y);
|
||||
line->set_normalized(normalized);
|
||||
line->mutable_color1()->set_r(gray_val1);
|
||||
line->mutable_color1()->set_g(gray_val1);
|
||||
line->mutable_color1()->set_b(gray_val1);
|
||||
line->mutable_color2()->set_r(gray_val2);
|
||||
line->mutable_color2()->set_g(gray_val2);
|
||||
line->mutable_color2()->set_b(gray_val2);
|
||||
connection_annotation->set_thickness(options.thickness());
|
||||
}
|
||||
|
||||
void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
|
||||
float start_x, float start_y, float end_x, float end_y,
|
||||
const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
|
||||
RenderData* render_data) {
|
||||
auto* connection_annotation = render_data->add_render_annotations();
|
||||
RenderAnnotation::Line* line = connection_annotation->mutable_line();
|
||||
line->set_x_start(start_x);
|
||||
line->set_y_start(start_y);
|
||||
line->set_x_end(end_x);
|
||||
line->set_y_end(end_y);
|
||||
line->set_normalized(normalized);
|
||||
SetColor(connection_annotation, options.connection_color());
|
||||
connection_annotation->set_thickness(options.thickness());
|
||||
}
|
||||
|
||||
RenderAnnotation* AnnotationsToRenderDataCalculator::AddPointRenderData(
|
||||
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||
RenderData* render_data) {
|
||||
auto* landmark_data_annotation = render_data->add_render_annotations();
|
||||
landmark_data_annotation->set_scene_tag(kKeypointLabel);
|
||||
SetRenderAnnotationColorThickness(options, landmark_data_annotation);
|
||||
return landmark_data_annotation;
|
||||
}
|
||||
|
||||
void AnnotationsToRenderDataCalculator::SetRenderAnnotationColorThickness(
|
||||
const AnnotationsToRenderDataCalculatorOptions& options,
|
||||
RenderAnnotation* render_annotation) {
|
||||
SetColor(render_annotation, options.landmark_color());
|
||||
render_annotation->set_thickness(options.thickness());
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/color.proto";
|
||||
|
||||
message AnnotationsToRenderDataCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional AnnotationsToRenderDataCalculatorOptions ext = 267644238;
|
||||
}
|
||||
|
||||
// Specifies the landmarks to be connected in the drawing. For example, the
|
||||
// landmark_connections value of [0, 1, 1, 2] specifies two connections: one
|
||||
// that connects landmarks with index 0 and 1, and another that connects
|
||||
// landmarks with index 1 and 2.
|
||||
repeated int32 landmark_connections = 1;
|
||||
|
||||
// Color of the landmarks.
|
||||
optional Color landmark_color = 2;
|
||||
// Color of the connections.
|
||||
optional Color connection_color = 3;
|
||||
|
||||
// Thickness of the drawing of landmarks and connections.
|
||||
optional double thickness = 4 [default = 1.0];
|
||||
|
||||
// Change color and size of rendered landmarks based on its z value.
|
||||
optional bool visualize_landmark_depth = 5 [default = true];
|
||||
}
|
|
@ -0,0 +1,947 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
#include "mediapipe/util/android/asset_manager_util.h"
|
||||
#else
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/shader_util.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.pb.h"
|
||||
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
|
||||
#include "mediapipe/modules/objectron/calculators/camera_parameters.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(GL_DEBUG)
|
||||
#define GLCHECK(command) \
|
||||
command; \
|
||||
if (int err = glGetError()) LOG(ERROR) << "GL error detected: " << err;
|
||||
#else
|
||||
#define GLCHECK(command) command
|
||||
#endif
|
||||
|
||||
// For ease of use, we prefer ImageFrame on Android and GpuBuffer otherwise.
|
||||
#if defined(__ANDROID__)
|
||||
typedef ImageFrame AssetTextureFormat;
|
||||
#else
|
||||
typedef GpuBuffer AssetTextureFormat;
|
||||
#endif
|
||||
|
||||
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, ATTRIB_NORMAL, NUM_ATTRIBUTES };
|
||||
static const int kNumMatrixEntries = 16;
|
||||
|
||||
// Hard-coded MVP Matrix for testing.
|
||||
static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0,
|
||||
0.06146407, 0.8076706, 0.5864218, 0.0,
|
||||
-0.54367524, -0.4656292, 0.69828844, 0.0,
|
||||
0.0, 0.0, -98.64117, 1.0};
|
||||
|
||||
// Loads a texture from an input side packet, and streams in an animation file
|
||||
// from a filename given in another input side packet, and renders the animation
|
||||
// over the screen according to the input timestamp and desired animation FPS.
|
||||
//
|
||||
// Inputs:
|
||||
// VIDEO (GpuBuffer, optional):
|
||||
// If provided, the input buffer will be assumed to be unique, and will be
|
||||
// consumed by this calculator and rendered to directly. The output video
|
||||
// buffer will then be the released reference to the input video buffer.
|
||||
// MODEL_MATRICES (TimedModelMatrixProtoList, optional):
|
||||
// If provided, will set the model matrices for the objects to be rendered
|
||||
// during future rendering calls.
|
||||
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
|
||||
// Texture to use with animation file. Texture is REQUIRED to be passed into
|
||||
// the calculator, but can be passed in as a Side Packet OR Input Stream.
|
||||
//
|
||||
// Input side packets:
|
||||
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
|
||||
// Texture to use with animation file. Texture is REQUIRED to be passed into
|
||||
// the calculator, but can be passed in as a Side Packet OR Input Stream.
|
||||
// ANIMATION_ASSET (String, required):
|
||||
// Path of animation file to load and render. The file format expects an
|
||||
// arbitrary number of animation frames, concatenated directly together,
|
||||
// with each animation frame looking like:
|
||||
// HEADER
|
||||
// VERTICES
|
||||
// TEXTURE_COORDS
|
||||
// INDICES
|
||||
// The header consists of 3 int32 lengths, the sizes of the vertex data,
|
||||
// the texcoord data, and the index data, respectively. Let us call those
|
||||
// N1, N2, and N3. Then we expect N1 float32's for vertex information
|
||||
// (x1,y1,z1,x2,y2,z2,etc.), followed by N2 float32's for texcoord
|
||||
// information (u1,v1,u2,v2,u3,v3,etc.), followed by N3 shorts/int16's
|
||||
// for triangle indices (a1,b1,c1,a2,b2,c2,etc.).
|
||||
// CAMERA_PARAMETERS_PROTO_STRING (String, optional):
|
||||
// Serialized proto std::string of CameraParametersProto. We need this to
|
||||
// get the right aspect ratio and field of view.
|
||||
// Options:
|
||||
// aspect_ratio: the ratio between the rendered image width and height.
|
||||
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
|
||||
// is provided.
|
||||
// vertical_fov_degrees: vertical field of view in degrees.
|
||||
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
|
||||
// is provided.
|
||||
// z_clipping_plane_near: near plane value for z-clipping.
|
||||
// z_clipping_plane_far: far plane value for z-clipping.
|
||||
// animation_speed_fps: speed at which to cycle through animation frames (in
|
||||
// frames per second).
|
||||
//
|
||||
// Outputs:
|
||||
// OUTPUT, or index 0 (GpuBuffer):
|
||||
// Frames filled with the given texture.
|
||||
|
||||
// Simple helper-struct for containing the parsed geometry data from a 3D
|
||||
// animation frame for rendering.
|
||||
struct TriangleMesh {
|
||||
int index_count = 0; // Needed for glDrawElements rendering call
|
||||
std::unique_ptr<float[]> normals = nullptr;
|
||||
std::unique_ptr<float[]> vertices = nullptr;
|
||||
std::unique_ptr<float[]> texture_coords = nullptr;
|
||||
std::unique_ptr<int16[]> triangle_indices = nullptr;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<float[]> ModelMatrix;
|
||||
|
||||
} // namespace
|
||||
|
||||
class GlAnimationOverlayCalculator : public CalculatorBase {
|
||||
public:
|
||||
GlAnimationOverlayCalculator() {}
|
||||
~GlAnimationOverlayCalculator();
|
||||
|
||||
static absl::Status GetContract(CalculatorContract *cc);
|
||||
|
||||
absl::Status Open(CalculatorContext *cc) override;
|
||||
absl::Status Process(CalculatorContext *cc) override;
|
||||
|
||||
private:
|
||||
bool has_video_stream_ = false;
|
||||
bool has_model_matrix_stream_ = false;
|
||||
bool has_mask_model_matrix_stream_ = false;
|
||||
bool has_occlusion_mask_ = false;
|
||||
|
||||
GlCalculatorHelper helper_;
|
||||
bool initialized_ = false;
|
||||
GlTexture texture_;
|
||||
GlTexture mask_texture_;
|
||||
|
||||
GLuint renderbuffer_ = 0;
|
||||
bool depth_buffer_created_ = false;
|
||||
|
||||
GLuint program_ = 0;
|
||||
GLint texture_uniform_ = -1;
|
||||
GLint perspective_matrix_uniform_ = -1;
|
||||
GLint model_matrix_uniform_ = -1;
|
||||
|
||||
std::vector<TriangleMesh> triangle_meshes_;
|
||||
std::vector<TriangleMesh> mask_meshes_;
|
||||
Timestamp animation_start_time_;
|
||||
int frame_count_ = 0;
|
||||
float animation_speed_fps_;
|
||||
|
||||
std::vector<ModelMatrix> current_model_matrices_;
|
||||
std::vector<ModelMatrix> current_mask_model_matrices_;
|
||||
|
||||
// Perspective matrix for rendering, to be applied to all model matrices
|
||||
// prior to passing through to the shader as a MVP matrix. Initialized during
|
||||
// first image packet read.
|
||||
float perspective_matrix_[kNumMatrixEntries];
|
||||
|
||||
void ComputeAspectRatioAndFovFromCameraParameters(
|
||||
const CameraParametersProto &camera_parameters, float *aspect_ratio,
|
||||
float *vertical_fov_degrees);
|
||||
|
||||
int GetAnimationFrameIndex(Timestamp timestamp);
|
||||
absl::Status GlSetup();
|
||||
absl::Status GlBind(const TriangleMesh &triangle_mesh,
|
||||
const GlTexture &texture);
|
||||
absl::Status GlRender(const TriangleMesh &triangle_mesh,
|
||||
const float *model_matrix);
|
||||
void InitializePerspectiveMatrix(float aspect_ratio,
|
||||
float vertical_fov_degrees, float z_near,
|
||||
float z_far);
|
||||
void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices,
|
||||
std::vector<ModelMatrix> *current_model_matrices);
|
||||
void CalculateTriangleMeshNormals(int normals_len,
|
||||
TriangleMesh *triangle_mesh);
|
||||
void Normalize3f(float input[3]);
|
||||
|
||||
#if !defined(__ANDROID__)
|
||||
// Asset loading routine for all non-Android platforms.
|
||||
bool LoadAnimation(const std::string &filename);
|
||||
#else
|
||||
// Asset loading for all Android platforms.
|
||||
bool LoadAnimationAndroid(const std::string &filename,
|
||||
std::vector<TriangleMesh> *mesh);
|
||||
bool ReadBytesFromAsset(AAsset *asset, void *buffer, int num_bytes_to_read);
|
||||
#endif
|
||||
};
|
||||
REGISTER_CALCULATOR(GlAnimationOverlayCalculator);
|
||||
|
||||
// static
|
||||
absl::Status GlAnimationOverlayCalculator::GetContract(CalculatorContract *cc) {
|
||||
MP_RETURN_IF_ERROR(
|
||||
GlCalculatorHelper::SetupInputSidePackets(&(cc->InputSidePackets())));
|
||||
if (cc->Inputs().HasTag("VIDEO")) {
|
||||
// Currently used only for size and timestamp.
|
||||
cc->Inputs().Tag("VIDEO").Set<GpuBuffer>();
|
||||
}
|
||||
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0).Set<GpuBuffer>();
|
||||
|
||||
if (cc->Inputs().HasTag("MODEL_MATRICES")) {
|
||||
cc->Inputs().Tag("MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
|
||||
}
|
||||
if (cc->Inputs().HasTag("MASK_MODEL_MATRICES")) {
|
||||
cc->Inputs().Tag("MASK_MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
|
||||
}
|
||||
|
||||
// Must have texture as Input Stream or Side Packet
|
||||
if (cc->InputSidePackets().HasTag("TEXTURE")) {
|
||||
cc->InputSidePackets().Tag("TEXTURE").Set<AssetTextureFormat>();
|
||||
} else {
|
||||
cc->Inputs().Tag("TEXTURE").Set<AssetTextureFormat>();
|
||||
}
|
||||
|
||||
cc->InputSidePackets().Tag("ANIMATION_ASSET").Set<std::string>();
|
||||
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
|
||||
cc->InputSidePackets()
|
||||
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
|
||||
.Set<std::string>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
|
||||
cc->InputSidePackets().Tag("MASK_TEXTURE").Set<AssetTextureFormat>();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
|
||||
cc->InputSidePackets().Tag("MASK_ASSET").Set<std::string>();
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void GlAnimationOverlayCalculator::CalculateTriangleMeshNormals(
|
||||
int normals_len, TriangleMesh *triangle_mesh) {
|
||||
// Set triangle_mesh normals for shader usage
|
||||
triangle_mesh->normals.reset(new float[normals_len]);
|
||||
// Used for storing the vertex normals prior to averaging
|
||||
std::vector<float> vertex_normals_sum(normals_len, 0.0f);
|
||||
// Compute every triangle surface normal and store them for averaging
|
||||
for (int idx = 0; idx < triangle_mesh->index_count; idx += 3) {
|
||||
int v_idx[3];
|
||||
v_idx[0] = triangle_mesh->triangle_indices.get()[idx];
|
||||
v_idx[1] = triangle_mesh->triangle_indices.get()[idx + 1];
|
||||
v_idx[2] = triangle_mesh->triangle_indices.get()[idx + 2];
|
||||
// (V1) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||
const float v1x = triangle_mesh->vertices[v_idx[0] * 3];
|
||||
const float v1y = triangle_mesh->vertices[v_idx[0] * 3 + 1];
|
||||
const float v1z = triangle_mesh->vertices[v_idx[0] * 3 + 2];
|
||||
// (V2) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||
const float v2x = triangle_mesh->vertices[v_idx[1] * 3];
|
||||
const float v2y = triangle_mesh->vertices[v_idx[1] * 3 + 1];
|
||||
const float v2z = triangle_mesh->vertices[v_idx[1] * 3 + 2];
|
||||
// (V3) vertex X,Y,Z indices in triangle_mesh.vertices
|
||||
const float v3x = triangle_mesh->vertices[v_idx[2] * 3];
|
||||
const float v3y = triangle_mesh->vertices[v_idx[2] * 3 + 1];
|
||||
const float v3z = triangle_mesh->vertices[v_idx[2] * 3 + 2];
|
||||
// Calculate normals from vertices
|
||||
// V2 - V1
|
||||
const float ax = v2x - v1x;
|
||||
const float ay = v2y - v1y;
|
||||
const float az = v2z - v1z;
|
||||
// V3 - V1
|
||||
const float bx = v3x - v1x;
|
||||
const float by = v3y - v1y;
|
||||
const float bz = v3z - v1z;
|
||||
// Calculate cross product
|
||||
const float normal_x = ay * bz - az * by;
|
||||
const float normal_y = az * bx - ax * bz;
|
||||
const float normal_z = ax * by - ay * bx;
|
||||
// The normals calculated above must be normalized if we wish to prevent
|
||||
// triangles with a larger surface area from dominating the normal
|
||||
// calculations, however, none of our current models require this
|
||||
// normalization.
|
||||
|
||||
// Add connected normal to each associated vertex
|
||||
// It is also necessary to increment each vertex denominator for averaging
|
||||
for (int i = 0; i < 3; i++) {
|
||||
vertex_normals_sum[v_idx[i] * 3] += normal_x;
|
||||
vertex_normals_sum[v_idx[i] * 3 + 1] += normal_y;
|
||||
vertex_normals_sum[v_idx[i] * 3 + 2] += normal_z;
|
||||
}
|
||||
}
|
||||
|
||||
// Combine all triangle normals connected to each vertex by adding the X,Y,Z
|
||||
// value of each adjacent triangle surface normal to every vertex and then
|
||||
// averaging the combined value.
|
||||
for (int idx = 0; idx < normals_len; idx += 3) {
|
||||
float normal[3];
|
||||
normal[0] = vertex_normals_sum[idx];
|
||||
normal[1] = vertex_normals_sum[idx + 1];
|
||||
normal[2] = vertex_normals_sum[idx + 2];
|
||||
Normalize3f(normal);
|
||||
triangle_mesh->normals.get()[idx] = normal[0];
|
||||
triangle_mesh->normals.get()[idx + 1] = normal[1];
|
||||
triangle_mesh->normals.get()[idx + 2] = normal[2];
|
||||
}
|
||||
}
|
||||
|
||||
void GlAnimationOverlayCalculator::Normalize3f(float input[3]) {
|
||||
float product = 0.0;
|
||||
product += input[0] * input[0];
|
||||
product += input[1] * input[1];
|
||||
product += input[2] * input[2];
|
||||
float magnitude = sqrt(product);
|
||||
input[0] /= magnitude;
|
||||
input[1] /= magnitude;
|
||||
input[2] /= magnitude;
|
||||
}
|
||||
|
||||
// Helper function for initializing our perspective matrix.
|
||||
void GlAnimationOverlayCalculator::InitializePerspectiveMatrix(
|
||||
float aspect_ratio, float fov_degrees, float z_near, float z_far) {
|
||||
// Standard perspective projection matrix calculations.
|
||||
const float f = 1.0f / std::tan(fov_degrees * M_PI / 360.0f);
|
||||
for (int i = 0; i < kNumMatrixEntries; i++) {
|
||||
perspective_matrix_[i] = 0;
|
||||
}
|
||||
const float denom = 1.0f / (z_near - z_far);
|
||||
perspective_matrix_[0] = f / aspect_ratio;
|
||||
perspective_matrix_[5] = f;
|
||||
perspective_matrix_[10] = (z_near + z_far) * denom;
|
||||
perspective_matrix_[11] = -1.0f;
|
||||
perspective_matrix_[14] = 2.0f * z_far * z_near * denom;
|
||||
}
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
// Helper function for reading in a specified number of bytes from an Android
|
||||
// asset. Returns true if successfully reads in all bytes into buffer.
|
||||
bool GlAnimationOverlayCalculator::ReadBytesFromAsset(AAsset *asset,
|
||||
void *buffer,
|
||||
int num_bytes_to_read) {
|
||||
// Most file systems use block sizes of 4KB or 8KB; ideally we'd choose a
|
||||
// small multiple of the block size for best input streaming performance, so
|
||||
// we go for a reasobably safe buffer size of 8KB = 8*1024 bytes.
|
||||
static const int kMaxChunkSize = 8192;
|
||||
|
||||
int bytes_left = num_bytes_to_read;
|
||||
int bytes_read = 1; // any value > 0 here just to start looping.
|
||||
|
||||
// Treat as uint8_t array so we can deal in single byte arithmetic easily.
|
||||
uint8_t *currBufferIndex = reinterpret_cast<uint8_t *>(buffer);
|
||||
while (bytes_read > 0 && bytes_left > 0) {
|
||||
bytes_read = AAsset_read(asset, (void *)currBufferIndex,
|
||||
std::min(bytes_left, kMaxChunkSize));
|
||||
bytes_left -= bytes_read;
|
||||
currBufferIndex += bytes_read;
|
||||
}
|
||||
// At least log any I/O errors encountered.
|
||||
if (bytes_read < 0) {
|
||||
LOG(ERROR) << "Error reading from AAsset: " << bytes_read;
|
||||
return false;
|
||||
}
|
||||
if (bytes_left > 0) {
|
||||
// Reached EOF before reading in specified number of bytes.
|
||||
LOG(WARNING) << "Reached EOF before reading in specified number of bytes.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// The below asset streaming code is Android-only, making use of the platform
|
||||
// JNI helper classes AAssetManager and AAsset.
|
||||
bool GlAnimationOverlayCalculator::LoadAnimationAndroid(
|
||||
const std::string &filename, std::vector<TriangleMesh> *meshes) {
|
||||
mediapipe::AssetManager *mediapipe_asset_manager =
|
||||
Singleton<mediapipe::AssetManager>::get();
|
||||
AAssetManager *asset_manager = mediapipe_asset_manager->GetAssetManager();
|
||||
if (!asset_manager) {
|
||||
LOG(ERROR) << "Failed to access Android asset manager.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// New read-bytes stuff here! First we open file for streaming.
|
||||
AAsset *asset = AAssetManager_open(asset_manager, filename.c_str(),
|
||||
AASSET_MODE_STREAMING);
|
||||
if (!asset) {
|
||||
LOG(ERROR) << "Failed to open animation asset: " << filename;
|
||||
return false;
|
||||
}
|
||||
|
||||
// And now, while we are able to stream in more frames, we do so.
|
||||
frame_count_ = 0;
|
||||
int32 lengths[3];
|
||||
while (ReadBytesFromAsset(asset, (void *)lengths, sizeof(lengths[0]) * 3)) {
|
||||
// About to start reading the next animation frame. Stream it in here.
|
||||
// Each frame stores first the object counts of its three arrays
|
||||
// (vertices, texture coordinates, triangle indices; respectively), and
|
||||
// then stores each of those arrays as a byte dump, in order.
|
||||
meshes->emplace_back();
|
||||
TriangleMesh &triangle_mesh = meshes->back();
|
||||
// Try to read in vertices (4-byte floats)
|
||||
triangle_mesh.vertices.reset(new float[lengths[0]]);
|
||||
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.vertices.get(),
|
||||
sizeof(float) * lengths[0])) {
|
||||
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
|
||||
return false;
|
||||
}
|
||||
// Try to read in texture coordinates (4-byte floats)
|
||||
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
|
||||
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.texture_coords.get(),
|
||||
sizeof(float) * lengths[1])) {
|
||||
LOG(ERROR) << "Failed to read tex-coords for frame " << frame_count_;
|
||||
return false;
|
||||
}
|
||||
// Try to read in indices (2-byte shorts)
|
||||
triangle_mesh.index_count = lengths[2];
|
||||
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
|
||||
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.triangle_indices.get(),
|
||||
sizeof(int16) * lengths[2])) {
|
||||
LOG(ERROR) << "Failed to read indices for frame " << frame_count_;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the normals for this triangle_mesh
|
||||
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
|
||||
|
||||
frame_count_++;
|
||||
}
|
||||
AAsset_close(asset);
|
||||
|
||||
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
|
||||
if (meshes->empty()) {
|
||||
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // defined(__ANDROID__)
|
||||
|
||||
bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) {
|
||||
std::ifstream infile(filename.c_str(), std::ifstream::binary);
|
||||
if (!infile) {
|
||||
LOG(ERROR) << "Error opening asset with filename: " << filename;
|
||||
return false;
|
||||
}
|
||||
|
||||
frame_count_ = 0;
|
||||
int32 lengths[3];
|
||||
while (true) {
|
||||
// See if we have more initial size counts to read in.
|
||||
infile.read((char *)(lengths), sizeof(lengths[0]) * 3);
|
||||
if (!infile) {
|
||||
// No more frames to read. Close out.
|
||||
infile.close();
|
||||
break;
|
||||
}
|
||||
|
||||
triangle_meshes_.emplace_back();
|
||||
TriangleMesh &triangle_mesh = triangle_meshes_.back();
|
||||
|
||||
// Try to read in vertices (4-byte floats).
|
||||
triangle_mesh.vertices.reset(new float[lengths[0]]);
|
||||
infile.read((char *)(triangle_mesh.vertices.get()),
|
||||
sizeof(float) * lengths[0]);
|
||||
if (!infile) {
|
||||
LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to read in texture coordinates (4-byte floats)
|
||||
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
|
||||
infile.read((char *)(triangle_mesh.texture_coords.get()),
|
||||
sizeof(float) * lengths[1]);
|
||||
if (!infile) {
|
||||
LOG(ERROR) << "Failed to read texture coordinates for frame "
|
||||
<< frame_count_;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to read in the triangle indices (2-byte shorts)
|
||||
triangle_mesh.index_count = lengths[2];
|
||||
triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
|
||||
infile.read((char *)(triangle_mesh.triangle_indices.get()),
|
||||
sizeof(int16) * lengths[2]);
|
||||
if (!infile) {
|
||||
LOG(ERROR) << "Failed to read triangle indices for frame "
|
||||
<< frame_count_;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the normals for this triangle_mesh
|
||||
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
|
||||
|
||||
frame_count_++;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
|
||||
if (triangle_meshes_.empty()) {
|
||||
LOG(ERROR) << "No animation frames were parsed! Erroring out calculator.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters(
|
||||
const CameraParametersProto &camera_parameters, float *aspect_ratio,
|
||||
float *vertical_fov_degrees) {
|
||||
CHECK(aspect_ratio != nullptr);
|
||||
CHECK(vertical_fov_degrees != nullptr);
|
||||
*aspect_ratio =
|
||||
camera_parameters.portrait_width() / camera_parameters.portrait_height();
|
||||
*vertical_fov_degrees =
|
||||
std::atan(camera_parameters.portrait_height() * 0.5f) * 2 * 180 / M_PI;
|
||||
}
|
||||
|
||||
absl::Status GlAnimationOverlayCalculator::Open(CalculatorContext *cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
MP_RETURN_IF_ERROR(helper_.Open(cc));
|
||||
|
||||
const auto &options = cc->Options<GlAnimationOverlayCalculatorOptions>();
|
||||
|
||||
animation_speed_fps_ = options.animation_speed_fps();
|
||||
|
||||
// Construct projection matrix using input side packets or option
|
||||
float aspect_ratio;
|
||||
float vertical_fov_degrees;
|
||||
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
|
||||
const std::string &camera_parameters_proto_string =
|
||||
cc->InputSidePackets()
|
||||
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
|
||||
.Get<std::string>();
|
||||
CameraParametersProto camera_parameters_proto;
|
||||
camera_parameters_proto.ParseFromString(camera_parameters_proto_string);
|
||||
ComputeAspectRatioAndFovFromCameraParameters(
|
||||
camera_parameters_proto, &aspect_ratio, &vertical_fov_degrees);
|
||||
} else {
|
||||
aspect_ratio = options.aspect_ratio();
|
||||
vertical_fov_degrees = options.vertical_fov_degrees();
|
||||
}
|
||||
|
||||
// when constructing projection matrix.
|
||||
InitializePerspectiveMatrix(aspect_ratio, vertical_fov_degrees,
|
||||
options.z_clipping_plane_near(),
|
||||
options.z_clipping_plane_far());
|
||||
|
||||
// See what streams we have.
|
||||
has_video_stream_ = cc->Inputs().HasTag("VIDEO");
|
||||
has_model_matrix_stream_ = cc->Inputs().HasTag("MODEL_MATRICES");
|
||||
has_mask_model_matrix_stream_ = cc->Inputs().HasTag("MASK_MODEL_MATRICES");
|
||||
|
||||
// Try to load in the animation asset in a platform-specific manner.
|
||||
const std::string &asset_name =
|
||||
cc->InputSidePackets().Tag("ANIMATION_ASSET").Get<std::string>();
|
||||
bool loaded_animation = false;
|
||||
#if defined(__ANDROID__)
|
||||
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
|
||||
has_occlusion_mask_ = true;
|
||||
const std::string &mask_asset_name =
|
||||
cc->InputSidePackets().Tag("MASK_ASSET").Get<std::string>();
|
||||
loaded_animation = LoadAnimationAndroid(mask_asset_name, &mask_meshes_);
|
||||
if (!loaded_animation) {
|
||||
LOG(ERROR) << "Failed to load mask asset.";
|
||||
return absl::UnknownError("Failed to load mask asset.");
|
||||
}
|
||||
}
|
||||
loaded_animation = LoadAnimationAndroid(asset_name, &triangle_meshes_);
|
||||
#else
|
||||
loaded_animation = LoadAnimation(asset_name);
|
||||
#endif
|
||||
if (!loaded_animation) {
|
||||
LOG(ERROR) << "Failed to load animation asset.";
|
||||
return absl::UnknownError("Failed to load animation asset.");
|
||||
}
|
||||
|
||||
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
|
||||
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
|
||||
const auto &mask_texture =
|
||||
cc->InputSidePackets().Tag("MASK_TEXTURE").Get<AssetTextureFormat>();
|
||||
mask_texture_ = helper_.CreateSourceTexture(mask_texture);
|
||||
}
|
||||
|
||||
// Load in all static texture data if it exists
|
||||
if (cc->InputSidePackets().HasTag("TEXTURE")) {
|
||||
const auto &input_texture =
|
||||
cc->InputSidePackets().Tag("TEXTURE").Get<AssetTextureFormat>();
|
||||
texture_ = helper_.CreateSourceTexture(input_texture);
|
||||
}
|
||||
|
||||
VLOG(2) << "Input texture size: " << texture_.width() << ", "
|
||||
<< texture_.height() << std::endl;
|
||||
|
||||
return absl::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
int GlAnimationOverlayCalculator::GetAnimationFrameIndex(Timestamp timestamp) {
|
||||
double seconds_delta = timestamp.Seconds() - animation_start_time_.Seconds();
|
||||
int64_t frame_index =
|
||||
static_cast<int64_t>(seconds_delta * animation_speed_fps_);
|
||||
frame_index %= frame_count_;
|
||||
return static_cast<int>(frame_index);
|
||||
}
|
||||
|
||||
void GlAnimationOverlayCalculator::LoadModelMatrices(
|
||||
const TimedModelMatrixProtoList &model_matrices,
|
||||
std::vector<ModelMatrix> *current_model_matrices) {
|
||||
current_model_matrices->clear();
|
||||
for (int i = 0; i < model_matrices.model_matrix_size(); ++i) {
|
||||
const auto &model_matrix = model_matrices.model_matrix(i);
|
||||
CHECK(model_matrix.matrix_entries_size() == kNumMatrixEntries)
|
||||
<< "Invalid Model Matrix";
|
||||
current_model_matrices->emplace_back();
|
||||
ModelMatrix &new_matrix = current_model_matrices->back();
|
||||
new_matrix.reset(new float[kNumMatrixEntries]);
|
||||
for (int j = 0; j < kNumMatrixEntries; j++) {
|
||||
// Model matrices streamed in using ROW-MAJOR format, but we want
|
||||
// COLUMN-MAJOR for rendering, so we transpose here.
|
||||
int col = j % 4;
|
||||
int row = j / 4;
|
||||
new_matrix[row + col * 4] = model_matrix.matrix_entries(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status GlAnimationOverlayCalculator::Process(CalculatorContext *cc) {
|
||||
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
|
||||
if (!initialized_) {
|
||||
MP_RETURN_IF_ERROR(GlSetup());
|
||||
initialized_ = true;
|
||||
animation_start_time_ = cc->InputTimestamp();
|
||||
}
|
||||
|
||||
// Process model matrices, if any are being streamed in, and update our
|
||||
// list.
|
||||
current_model_matrices_.clear();
|
||||
if (has_model_matrix_stream_ &&
|
||||
!cc->Inputs().Tag("MODEL_MATRICES").IsEmpty()) {
|
||||
const TimedModelMatrixProtoList &model_matrices =
|
||||
cc->Inputs().Tag("MODEL_MATRICES").Get<TimedModelMatrixProtoList>();
|
||||
LoadModelMatrices(model_matrices, ¤t_model_matrices_);
|
||||
}
|
||||
|
||||
current_mask_model_matrices_.clear();
|
||||
if (has_mask_model_matrix_stream_ &&
|
||||
!cc->Inputs().Tag("MASK_MODEL_MATRICES").IsEmpty()) {
|
||||
const TimedModelMatrixProtoList &model_matrices =
|
||||
cc->Inputs()
|
||||
.Tag("MASK_MODEL_MATRICES")
|
||||
.Get<TimedModelMatrixProtoList>();
|
||||
LoadModelMatrices(model_matrices, ¤t_mask_model_matrices_);
|
||||
}
|
||||
|
||||
// Arbitrary default width and height for output destination texture, in the
|
||||
// event that we don't have a valid and unique input buffer to overlay.
|
||||
int width = 640;
|
||||
int height = 480;
|
||||
|
||||
GlTexture dst;
|
||||
std::unique_ptr<GpuBuffer> input_frame(nullptr);
|
||||
if (has_video_stream_ && !(cc->Inputs().Tag("VIDEO").IsEmpty())) {
|
||||
auto result = cc->Inputs().Tag("VIDEO").Value().Consume<GpuBuffer>();
|
||||
if (result.ok()) {
|
||||
input_frame = std::move(result).value();
|
||||
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
input_frame->GetGlTextureBufferSharedPtr()->Reuse();
|
||||
#endif
|
||||
width = input_frame->width();
|
||||
height = input_frame->height();
|
||||
dst = helper_.CreateSourceTexture(*input_frame);
|
||||
} else {
|
||||
LOG(ERROR) << "Unable to consume input video frame for overlay!";
|
||||
LOG(ERROR) << "Status returned was: " << result.status();
|
||||
dst = helper_.CreateDestinationTexture(width, height);
|
||||
}
|
||||
} else if (!has_video_stream_) {
|
||||
dst = helper_.CreateDestinationTexture(width, height);
|
||||
} else {
|
||||
// We have an input video stream, but not for this frame. Don't render!
|
||||
return absl::OkStatus();
|
||||
}
|
||||
helper_.BindFramebuffer(dst);
|
||||
|
||||
if (!depth_buffer_created_) {
|
||||
// Create our private depth buffer.
|
||||
GLCHECK(glGenRenderbuffers(1, &renderbuffer_));
|
||||
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
|
||||
GLCHECK(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
|
||||
width, height));
|
||||
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
|
||||
GL_RENDERBUFFER, renderbuffer_));
|
||||
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
|
||||
depth_buffer_created_ = true;
|
||||
}
|
||||
|
||||
// Re-bind our depth renderbuffer to our FBO depth attachment here.
|
||||
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
|
||||
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
|
||||
GL_RENDERBUFFER, renderbuffer_));
|
||||
GLenum status = GLCHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER));
|
||||
if (status != GL_FRAMEBUFFER_COMPLETE) {
|
||||
LOG(ERROR) << "Incomplete framebuffer with status: " << status;
|
||||
}
|
||||
GLCHECK(glClear(GL_DEPTH_BUFFER_BIT));
|
||||
|
||||
if (has_occlusion_mask_) {
|
||||
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
|
||||
const TriangleMesh &mask_frame = mask_meshes_.front();
|
||||
MP_RETURN_IF_ERROR(GlBind(mask_frame, mask_texture_));
|
||||
// Draw objects using our latest model matrix stream packet.
|
||||
for (const ModelMatrix &model_matrix : current_mask_model_matrices_) {
|
||||
MP_RETURN_IF_ERROR(GlRender(mask_frame, model_matrix.get()));
|
||||
}
|
||||
}
|
||||
|
||||
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
int frame_index = GetAnimationFrameIndex(cc->InputTimestamp());
|
||||
const TriangleMesh ¤t_frame = triangle_meshes_[frame_index];
|
||||
|
||||
// Load dynamic texture if it exists
|
||||
if (cc->Inputs().HasTag("TEXTURE")) {
|
||||
const auto &input_texture =
|
||||
cc->Inputs().Tag("TEXTURE").Get<AssetTextureFormat>();
|
||||
texture_ = helper_.CreateSourceTexture(input_texture);
|
||||
}
|
||||
|
||||
MP_RETURN_IF_ERROR(GlBind(current_frame, texture_));
|
||||
if (has_model_matrix_stream_) {
|
||||
// Draw objects using our latest model matrix stream packet.
|
||||
for (const ModelMatrix &model_matrix : current_model_matrices_) {
|
||||
MP_RETURN_IF_ERROR(GlRender(current_frame, model_matrix.get()));
|
||||
}
|
||||
} else {
|
||||
// Just draw one object to a static model matrix.
|
||||
MP_RETURN_IF_ERROR(GlRender(current_frame, kModelMatrix));
|
||||
}
|
||||
|
||||
// Disable vertex attributes
|
||||
GLCHECK(glDisableVertexAttribArray(ATTRIB_VERTEX));
|
||||
GLCHECK(glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
|
||||
GLCHECK(glDisableVertexAttribArray(ATTRIB_NORMAL));
|
||||
|
||||
// Disable depth test
|
||||
GLCHECK(glDisable(GL_DEPTH_TEST));
|
||||
|
||||
// Unbind texture
|
||||
GLCHECK(glActiveTexture(GL_TEXTURE1));
|
||||
GLCHECK(glBindTexture(texture_.target(), 0));
|
||||
|
||||
// Unbind depth buffer
|
||||
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
|
||||
|
||||
GLCHECK(glFlush());
|
||||
|
||||
auto output = dst.GetFrame<GpuBuffer>();
|
||||
dst.Release();
|
||||
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0)
|
||||
.Add(output.release(), cc->InputTimestamp());
|
||||
GLCHECK(glFrontFace(GL_CCW));
|
||||
return absl::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
absl::Status GlAnimationOverlayCalculator::GlSetup() {
|
||||
// Load vertex and fragment shaders
|
||||
const GLint attr_location[NUM_ATTRIBUTES] = {
|
||||
ATTRIB_VERTEX,
|
||||
ATTRIB_TEXTURE_POSITION,
|
||||
ATTRIB_NORMAL,
|
||||
};
|
||||
const GLchar *attr_name[NUM_ATTRIBUTES] = {
|
||||
"position",
|
||||
"texture_coordinate",
|
||||
"normal",
|
||||
};
|
||||
|
||||
const GLchar *vert_src = R"(
|
||||
// Perspective projection matrix for rendering / clipping
|
||||
uniform mat4 perspectiveMatrix;
|
||||
|
||||
// Matrix defining the currently rendered object model
|
||||
uniform mat4 modelMatrix;
|
||||
|
||||
// vertex position in threespace
|
||||
attribute vec4 position;
|
||||
attribute vec3 normal;
|
||||
|
||||
// texture coordinate for each vertex in normalized texture space (0..1)
|
||||
attribute mediump vec4 texture_coordinate;
|
||||
|
||||
// texture coordinate for fragment shader (will be interpolated)
|
||||
varying mediump vec2 sampleCoordinate;
|
||||
varying mediump vec3 vNormal;
|
||||
|
||||
void main() {
|
||||
sampleCoordinate = texture_coordinate.xy;
|
||||
mat4 mvpMatrix = perspectiveMatrix * modelMatrix;
|
||||
gl_Position = mvpMatrix * position;
|
||||
|
||||
// TODO: Pass in rotation submatrix with no scaling or transforms to prevent
|
||||
// breaking vNormal in case of model matrix having non-uniform scaling
|
||||
vec4 tmpNormal = mvpMatrix * vec4(normal, 1.0);
|
||||
vec4 transformedZero = mvpMatrix * vec4(0.0, 0.0, 0.0, 1.0);
|
||||
tmpNormal = tmpNormal - transformedZero;
|
||||
vNormal = normalize(tmpNormal.xyz);
|
||||
}
|
||||
)";
|
||||
|
||||
const GLchar *frag_src = R"(
|
||||
precision mediump float;
|
||||
|
||||
varying vec2 sampleCoordinate; // texture coordinate (0..1)
|
||||
varying vec3 vNormal;
|
||||
uniform sampler2D texture; // texture to shade with
|
||||
const float kPi = 3.14159265359;
|
||||
|
||||
// Define ambient lighting factor that is applied to our texture in order to
|
||||
// generate ambient lighting of the scene on the object. Range is [0.0-1.0],
|
||||
// with the factor being proportional to the brightness of the lighting in the
|
||||
// scene being applied to the object
|
||||
const float kAmbientLighting = 0.75;
|
||||
|
||||
// Define RGB values for light source
|
||||
const vec3 kLightColor = vec3(0.25);
|
||||
// Exponent for directional lighting that governs diffusion of surface light
|
||||
const float kExponent = 1.0;
|
||||
// Define direction of lighting effect source
|
||||
const vec3 lightDir = vec3(0.0, -1.0, -0.6);
|
||||
// Hard-coded view direction
|
||||
const vec3 viewDir = vec3(0.0, 0.0, -1.0);
|
||||
|
||||
// DirectionalLighting procedure imported from Lullaby @ https://github.com/google/lullaby
|
||||
// Calculate and return the color (diffuse and specular together) reflected by
|
||||
// a directional light.
|
||||
vec3 GetDirectionalLight(vec3 pos, vec3 normal, vec3 viewDir, vec3 lightDir, vec3 lightColor, float exponent) {
|
||||
// Intensity of the diffuse light. Saturate to keep within the 0-1 range.
|
||||
float normal_dot_light_dir = dot(-normal, -lightDir);
|
||||
float intensity = clamp(normal_dot_light_dir, 0.0, 1.0);
|
||||
// Calculate the diffuse light
|
||||
vec3 diffuse = intensity * lightColor;
|
||||
// http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/
|
||||
float kEnergyConservation = (2.0 + exponent) / (2.0 * kPi);
|
||||
vec3 reflect_dir = reflect(lightDir, -normal);
|
||||
// Intensity of the specular light
|
||||
float view_dot_reflect = dot(-viewDir, reflect_dir);
|
||||
// Use an epsilon for pow because pow(x,y) is undefined if x < 0 or x == 0
|
||||
// and y <= 0 (GLSL Spec 8.2)
|
||||
const float kEpsilon = 1e-5;
|
||||
intensity = kEnergyConservation * pow(clamp(view_dot_reflect, kEpsilon, 1.0),
|
||||
exponent);
|
||||
// Specular color:
|
||||
vec3 specular = intensity * lightColor;
|
||||
return diffuse + specular;
|
||||
}
|
||||
|
||||
void main() {
|
||||
// Sample the texture, retrieving an rgba pixel value
|
||||
vec4 pixel = texture2D(texture, sampleCoordinate);
|
||||
// If the alpha (background) value is near transparent, then discard the
|
||||
// pixel, this allows the rendering of transparent background GIFs
|
||||
// TODO: Adding a toggle to perform pixel alpha discarding for transparent
|
||||
// GIFs (prevent interference with Objectron system).
|
||||
if (pixel.a < 0.2) discard;
|
||||
|
||||
// Generate directional lighting effect
|
||||
vec3 lighting = GetDirectionalLight(gl_FragCoord.xyz, vNormal, viewDir, lightDir, kLightColor, kExponent);
|
||||
// Apply both ambient and directional lighting to our texture
|
||||
gl_FragColor = vec4((vec3(kAmbientLighting) + lighting) * pixel.rgb, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
// Shader program
|
||||
GLCHECK(GlhCreateProgram(vert_src, frag_src, NUM_ATTRIBUTES,
|
||||
(const GLchar **)&attr_name[0], attr_location,
|
||||
&program_));
|
||||
RET_CHECK(program_) << "Problem initializing the program.";
|
||||
texture_uniform_ = GLCHECK(glGetUniformLocation(program_, "texture"));
|
||||
perspective_matrix_uniform_ =
|
||||
GLCHECK(glGetUniformLocation(program_, "perspectiveMatrix"));
|
||||
model_matrix_uniform_ =
|
||||
GLCHECK(glGetUniformLocation(program_, "modelMatrix"));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GlAnimationOverlayCalculator::GlBind(
|
||||
const TriangleMesh &triangle_mesh, const GlTexture &texture) {
|
||||
GLCHECK(glUseProgram(program_));
|
||||
|
||||
// Disable backface culling to allow occlusion effects.
|
||||
// Some options for solid arbitrary 3D geometry rendering
|
||||
GLCHECK(glEnable(GL_BLEND));
|
||||
GLCHECK(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
|
||||
GLCHECK(glEnable(GL_DEPTH_TEST));
|
||||
GLCHECK(glFrontFace(GL_CW));
|
||||
GLCHECK(glDepthMask(GL_TRUE));
|
||||
GLCHECK(glDepthFunc(GL_LESS));
|
||||
|
||||
// Clear our depth buffer before starting draw calls
|
||||
GLCHECK(glVertexAttribPointer(ATTRIB_VERTEX, 3, GL_FLOAT, 0, 0,
|
||||
triangle_mesh.vertices.get()));
|
||||
GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
|
||||
GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
|
||||
triangle_mesh.texture_coords.get()));
|
||||
GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
|
||||
GLCHECK(glVertexAttribPointer(ATTRIB_NORMAL, 3, GL_FLOAT, 0, 0,
|
||||
triangle_mesh.normals.get()));
|
||||
GLCHECK(glEnableVertexAttribArray(ATTRIB_NORMAL));
|
||||
GLCHECK(glActiveTexture(GL_TEXTURE1));
|
||||
GLCHECK(glBindTexture(texture.target(), texture.name()));
|
||||
|
||||
// We previously bound it to GL_TEXTURE1
|
||||
GLCHECK(glUniform1i(texture_uniform_, 1));
|
||||
|
||||
GLCHECK(glUniformMatrix4fv(perspective_matrix_uniform_, 1, GL_FALSE,
|
||||
perspective_matrix_));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GlAnimationOverlayCalculator::GlRender(
|
||||
const TriangleMesh &triangle_mesh, const float *model_matrix) {
|
||||
GLCHECK(glUniformMatrix4fv(model_matrix_uniform_, 1, GL_FALSE, model_matrix));
|
||||
GLCHECK(glDrawElements(GL_TRIANGLES, triangle_mesh.index_count,
|
||||
GL_UNSIGNED_SHORT,
|
||||
triangle_mesh.triangle_indices.get()));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
GlAnimationOverlayCalculator::~GlAnimationOverlayCalculator() {
|
||||
helper_.RunInGlContext([this] {
|
||||
if (program_) {
|
||||
GLCHECK(glDeleteProgram(program_));
|
||||
program_ = 0;
|
||||
}
|
||||
if (depth_buffer_created_) {
|
||||
GLCHECK(glDeleteRenderbuffers(1, &renderbuffer_));
|
||||
renderbuffer_ = 0;
|
||||
}
|
||||
if (texture_.width() > 0) {
|
||||
texture_.Release();
|
||||
}
|
||||
if (mask_texture_.width() > 0) {
|
||||
mask_texture_.Release();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message GlAnimationOverlayCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional GlAnimationOverlayCalculatorOptions ext = 174760573;
|
||||
}
|
||||
|
||||
// Default aspect ratio of rendering target width over height.
|
||||
// This specific value is for 3:4 view. Do not change this default value.
|
||||
optional float aspect_ratio = 1 [default = 0.75];
|
||||
// Default vertical field of view in degrees. This specific default value
|
||||
// is arbitrary. Do not change this default value. If you want to use
|
||||
// a different vertical_fov_degrees, set it in the options.
|
||||
optional float vertical_fov_degrees = 2 [default = 70.0];
|
||||
|
||||
// Perspective projection matrix z-clipping near plane value.
|
||||
optional float z_clipping_plane_near = 3 [default = 0.1];
|
||||
// Perspective projection matrix z-clipping far plane value.
|
||||
optional float z_clipping_plane_far = 4 [default = 1000.0];
|
||||
|
||||
// Speed at which to play the animation (in frames per second).
|
||||
optional float animation_speed_fps = 5 [default = 25.0];
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
message TimedModelMatrixProto {
|
||||
// 4x4 model matrix stored in ROW major order.
|
||||
repeated float matrix_entries = 1 [packed = true];
|
||||
// Timestamp of this model matrix in milliseconds.
|
||||
optional int64 time_msec = 2 [default = 0];
|
||||
// Unique per object id
|
||||
optional int32 id = 3 [default = -1];
|
||||
}
|
||||
|
||||
message TimedModelMatrixProtoList {
|
||||
repeated TimedModelMatrixProto model_matrix = 1;
|
||||
}
|
||||
|
||||
// For convenience, when the desired information or transformation can be
|
||||
// encoded into vectors (e.g. when the matrix represents a scale or Euler-angle-
|
||||
// based rotation operation.)
|
||||
message TimedVectorProto {
|
||||
// The vector values themselves.
|
||||
repeated float vector_entries = 1 [packed = true];
|
||||
|
||||
// Timestamp of this vector in milliseconds.
|
||||
optional int64 time_msec = 2 [default = 0];
|
||||
|
||||
// Unique per object id
|
||||
optional int32 id = 3 [default = -1];
|
||||
}
|
||||
|
||||
message TimedVectorProtoList {
|
||||
repeated TimedVectorProto vector_list = 1;
|
||||
}
|
33
mediapipe/graphs/object_detection_3d/obj_parser/BUILD
Normal file
33
mediapipe/graphs/object_detection_3d/obj_parser/BUILD
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
java_library(
|
||||
name = "obj_parser_lib",
|
||||
srcs = [
|
||||
"ObjParserMain.java",
|
||||
"SimpleObjParser.java",
|
||||
],
|
||||
javacopts = ["-Xep:DefaultPackage:OFF"],
|
||||
)
|
||||
|
||||
java_binary(
|
||||
name = "ObjParser",
|
||||
javacopts = ["-Xep:DefaultPackage:OFF"],
|
||||
main_class = "ObjParserMain",
|
||||
runtime_deps = [
|
||||
":obj_parser_lib",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,205 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Class for running desktop-side parsing/packing routines on .obj AR assets. Usage: ObjParser
|
||||
* --input_dir=[INPUT_DIRECTORY] --output_dir=[OUTPUT_DIRECTORY] where INPUT_DIRECTORY is the folder
|
||||
* with asset obj files to process, and OUTPUT_DIRECTORY is the folder where processed asset uuu
|
||||
* file should be placed.
|
||||
*
|
||||
* <p>NOTE: Directories are assumed to be absolute paths.
|
||||
*/
|
||||
public final class ObjParserMain {
|
||||
// Simple FileFilter implementation to let us walk over only our .obj files in a particular
|
||||
// directory.
|
||||
private static final class ObjFileFilter implements FileFilter {
|
||||
ObjFileFilter() {
|
||||
// Nothing to do here.
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(File file) {
|
||||
return file.getName().endsWith(".obj");
|
||||
}
|
||||
}
|
||||
|
||||
// File extension for binary output files; tagged onto end of initial file extension.
|
||||
private static final String BINARY_FILE_EXT = ".uuu";
|
||||
private static final String INPUT_DIR_FLAG = "--input_dir=";
|
||||
private static final String OUTPUT_DIR_FLAG = "--output_dir=";
|
||||
private static final float DEFAULT_VERTEX_SCALE_FACTOR = 30.0f;
|
||||
private static final double NS_TO_SECONDS = 1e9;
|
||||
|
||||
public final PrintWriter writer;
|
||||
|
||||
public ObjParserMain() {
|
||||
super();
|
||||
this.writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, UTF_8)));
|
||||
}
|
||||
|
||||
// Simple overridable logging function.
|
||||
protected void logString(String infoLog) {
|
||||
writer.println(infoLog);
|
||||
}
|
||||
|
||||
/*
|
||||
* Main program logic: parse command-line arguments and perform actions.
|
||||
*/
|
||||
public void run(String inDirectory, String outDirectory) {
|
||||
if (inDirectory.isEmpty()) {
|
||||
logString("Error: Must provide input directory with " + INPUT_DIR_FLAG);
|
||||
return;
|
||||
}
|
||||
if (outDirectory.isEmpty()) {
|
||||
logString("Error: Must provide output directory with " + OUTPUT_DIR_FLAG);
|
||||
return;
|
||||
}
|
||||
|
||||
File dirAsFile = new File(inDirectory);
|
||||
ObjFileFilter objFileFilter = new ObjFileFilter();
|
||||
File[] objFiles = dirAsFile.listFiles(objFileFilter);
|
||||
|
||||
FileOutputStream outputStream = null;
|
||||
logString("Parsing directory: " + inDirectory);
|
||||
// We need frames processed in correct order.
|
||||
Arrays.sort(objFiles);
|
||||
|
||||
for (File objFile : objFiles) {
|
||||
String fileName = objFile.getAbsolutePath();
|
||||
|
||||
// Just take the file name of the first processed frame.
|
||||
if (outputStream == null) {
|
||||
String outputFileName = outDirectory + objFile.getName() + BINARY_FILE_EXT;
|
||||
try {
|
||||
// Create new file here, if we can.
|
||||
outputStream = new FileOutputStream(outputFileName);
|
||||
logString("Created outfile: " + outputFileName);
|
||||
} catch (Exception e) {
|
||||
logString("Error creating outfile: " + e.toString());
|
||||
e.printStackTrace(writer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Process each file into the stream.
|
||||
logString("Processing file: " + fileName);
|
||||
processFile(fileName, outputStream);
|
||||
}
|
||||
|
||||
// Finally close the stream out.
|
||||
try {
|
||||
if (outputStream != null) {
|
||||
outputStream.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logString("Error trying to close output stream: " + e.toString());
|
||||
e.printStackTrace(writer);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Entrypoint for command-line executable.
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// Parse flags
|
||||
String inDirectory = "";
|
||||
String outDirectory = "";
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
if (args[i].startsWith(INPUT_DIR_FLAG)) {
|
||||
inDirectory = args[i].substring(INPUT_DIR_FLAG.length());
|
||||
// Make sure this will be treated as a directory
|
||||
if (!inDirectory.endsWith("/")) {
|
||||
inDirectory += "/";
|
||||
}
|
||||
}
|
||||
if (args[i].startsWith(OUTPUT_DIR_FLAG)) {
|
||||
outDirectory = args[i].substring(OUTPUT_DIR_FLAG.length());
|
||||
// Make sure this will be treated as a directory
|
||||
if (!outDirectory.endsWith("/")) {
|
||||
outDirectory += "/";
|
||||
}
|
||||
}
|
||||
}
|
||||
ObjParserMain parser = new ObjParserMain();
|
||||
parser.run(inDirectory, outDirectory);
|
||||
parser.writer.flush();
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal helper function to parse a .obj from an infile name and stream the resulting data
|
||||
* directly out in binary-dump format to outputStream.
|
||||
*/
|
||||
private void processFile(String infileName, OutputStream outputStream) {
|
||||
long start = System.nanoTime();
|
||||
|
||||
// First we parse the obj.
|
||||
SimpleObjParser objParser = new SimpleObjParser(infileName, DEFAULT_VERTEX_SCALE_FACTOR);
|
||||
if (!objParser.parse()) {
|
||||
logString("Error parsing .obj file before processing");
|
||||
return;
|
||||
}
|
||||
|
||||
final float[] vertices = objParser.getVertices();
|
||||
final float[] textureCoords = objParser.getTextureCoords();
|
||||
final ArrayList<Short> triangleList = objParser.getTriangles();
|
||||
|
||||
// Overall byte count to stream: 12 for the 3 list-length ints, and then 4 for each vertex and
|
||||
// texCoord int, and finally 2 for each triangle index short.
|
||||
final int bbSize =
|
||||
12 + 4 * vertices.length + 4 * textureCoords.length + 2 * triangleList.size();
|
||||
|
||||
// Ensure ByteBuffer is native order, just like we want to read it in, but is NOT direct, so
|
||||
// we can call .array() on it.
|
||||
ByteBuffer bb = ByteBuffer.allocate(bbSize);
|
||||
bb.order(ByteOrder.nativeOrder());
|
||||
|
||||
bb.putInt(vertices.length);
|
||||
bb.putInt(textureCoords.length);
|
||||
bb.putInt(triangleList.size());
|
||||
logString(String.format("Writing... Vertices: %d, TextureCoords: %d, Indices: %d.%n",
|
||||
vertices.length, textureCoords.length, triangleList.size()));
|
||||
for (float vertex : vertices) {
|
||||
bb.putFloat(vertex);
|
||||
}
|
||||
for (float textureCoord : textureCoords) {
|
||||
bb.putFloat(textureCoord);
|
||||
}
|
||||
for (Short vertexIndex : triangleList) {
|
||||
bb.putShort(vertexIndex.shortValue());
|
||||
}
|
||||
bb.position(0);
|
||||
try {
|
||||
outputStream.write(bb.array(), 0, bbSize);
|
||||
logString(String.format("Processing successful! Took %.4f seconds.%n",
|
||||
(System.nanoTime() - start) / NS_TO_SECONDS));
|
||||
} catch (Exception e) {
|
||||
logString("Error writing during processing: " + e.toString());
|
||||
e.printStackTrace(writer);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,386 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Class for parsing a single .obj file into openGL-usable pieces.
|
||||
*
|
||||
* <p>Usage:
|
||||
*
|
||||
* <p>SimpleObjParser objParser = new SimpleObjParser("animations/cow/cow320.obj", .015f);
|
||||
*
|
||||
* <p>if (objParser.parse()) { ... }
|
||||
*/
|
||||
public class SimpleObjParser {
|
||||
private static class ShortPair {
|
||||
private final Short first;
|
||||
private final Short second;
|
||||
|
||||
public ShortPair(Short newFirst, Short newSecond) {
|
||||
first = newFirst;
|
||||
second = newSecond;
|
||||
}
|
||||
|
||||
public Short getFirst() {
|
||||
return first;
|
||||
}
|
||||
|
||||
public Short getSecond() {
|
||||
return second;
|
||||
}
|
||||
}
|
||||
|
||||
private static final String TAG = SimpleObjParser.class.getSimpleName();
|
||||
private static final boolean DEBUG = false;
|
||||
private static final int INVALID_INDEX = -1;
|
||||
private static final int POSITIONS_COORDS_PER_VERTEX = 3;
|
||||
private static final int TEXTURE_COORDS_PER_VERTEX = 2;
|
||||
private final String fileName;
|
||||
|
||||
// Since .obj doesn't tie together texture coordinates and vertex
|
||||
// coordinates, but OpenGL does, we need to keep a map of all such pairings that occur in
|
||||
// our face list.
|
||||
private final HashMap<ShortPair, Short> vertexTexCoordMap;
|
||||
|
||||
// Internal (de-coupled) unique vertices and texture coordinates
|
||||
private ArrayList<Float> vertices;
|
||||
private ArrayList<Float> textureCoords;
|
||||
|
||||
// Data we expose to openGL for rendering
|
||||
private float[] finalizedVertices;
|
||||
private float[] finalizedTextureCoords;
|
||||
private ArrayList<Short> finalizedTriangles;
|
||||
|
||||
// So we only display warnings about dropped w-coordinates once
|
||||
private boolean vertexCoordIgnoredWarning;
|
||||
private boolean textureCoordIgnoredWarning;
|
||||
private boolean startedProcessingFaces;
|
||||
|
||||
private int numPrimitiveVertices;
|
||||
private int numPrimitiveTextureCoords;
|
||||
private int numPrimitiveFaces;
|
||||
|
||||
// For scratchwork, so we don't have to keep reallocating
|
||||
private float[] tempCoords;
|
||||
|
||||
// We scale all our position coordinates uniformly by this factor
|
||||
private float objectUniformScaleFactor;
|
||||
|
||||
public SimpleObjParser(String objFile, float scaleFactor) {
|
||||
objectUniformScaleFactor = scaleFactor;
|
||||
|
||||
fileName = objFile;
|
||||
vertices = new ArrayList<Float>();
|
||||
textureCoords = new ArrayList<Float>();
|
||||
|
||||
vertexTexCoordMap = new HashMap<ShortPair, Short>();
|
||||
finalizedTriangles = new ArrayList<Short>();
|
||||
|
||||
tempCoords = new float[Math.max(POSITIONS_COORDS_PER_VERTEX, TEXTURE_COORDS_PER_VERTEX)];
|
||||
numPrimitiveFaces = 0;
|
||||
|
||||
vertexCoordIgnoredWarning = false;
|
||||
textureCoordIgnoredWarning = false;
|
||||
startedProcessingFaces = false;
|
||||
}
|
||||
|
||||
// Simple helper wrapper function
|
||||
private void debugLogString(String message) {
|
||||
if (DEBUG) {
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
||||
|
||||
private void parseVertex(String[] linePieces) {
|
||||
// Note: Traditionally xyzw is acceptable as a format, with w defaulting to 1.0, but for now
|
||||
// we only parse xyz.
|
||||
if (linePieces.length < POSITIONS_COORDS_PER_VERTEX + 1
|
||||
|| linePieces.length > POSITIONS_COORDS_PER_VERTEX + 2) {
|
||||
System.out.println("Malformed vertex coordinate specification, assuming xyz format only.");
|
||||
return;
|
||||
} else if (linePieces.length == POSITIONS_COORDS_PER_VERTEX + 2 && !vertexCoordIgnoredWarning) {
|
||||
System.out.println(
|
||||
"Only x, y, and z parsed for vertex coordinates; w coordinates will be ignored.");
|
||||
vertexCoordIgnoredWarning = true;
|
||||
}
|
||||
|
||||
boolean success = true;
|
||||
try {
|
||||
for (int i = 1; i < POSITIONS_COORDS_PER_VERTEX + 1; i++) {
|
||||
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
success = false;
|
||||
System.out.println("Malformed vertex coordinate error: " + e.toString());
|
||||
}
|
||||
|
||||
if (success) {
|
||||
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
|
||||
vertices.add(Float.valueOf(tempCoords[i] * objectUniformScaleFactor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void parseTextureCoordinate(String[] linePieces) {
|
||||
// Similar to vertices, uvw is acceptable as a format, with w defaulting to 0.0, but for now we
|
||||
// only parse uv.
|
||||
if (linePieces.length < TEXTURE_COORDS_PER_VERTEX + 1
|
||||
|| linePieces.length > TEXTURE_COORDS_PER_VERTEX + 2) {
|
||||
System.out.println("Malformed texture coordinate specification, assuming uv format only.");
|
||||
return;
|
||||
} else if (linePieces.length == (TEXTURE_COORDS_PER_VERTEX + 2)
|
||||
&& !textureCoordIgnoredWarning) {
|
||||
debugLogString("Only u and v parsed for texture coordinates; w coordinates will be ignored.");
|
||||
textureCoordIgnoredWarning = true;
|
||||
}
|
||||
|
||||
boolean success = true;
|
||||
try {
|
||||
for (int i = 1; i < TEXTURE_COORDS_PER_VERTEX + 1; i++) {
|
||||
tempCoords[i - 1] = Float.parseFloat(linePieces[i]);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
success = false;
|
||||
System.out.println("Malformed texture coordinate error: " + e.toString());
|
||||
}
|
||||
|
||||
if (success) {
|
||||
// .obj files treat (0,0) as top-left, compared to bottom-left for openGL. So invert "v"
|
||||
// texture coordinate only here.
|
||||
textureCoords.add(Float.valueOf(tempCoords[0]));
|
||||
textureCoords.add(Float.valueOf(1.0f - tempCoords[1]));
|
||||
}
|
||||
}
|
||||
|
||||
// Will return INVALID_INDEX if error occurs, and otherwise will return finalized (combined)
|
||||
// index, adding and hashing new combinations as it sees them.
|
||||
private short parseAndProcessCombinedVertexCoord(String coordString) {
|
||||
String[] coords = coordString.split("/");
|
||||
try {
|
||||
// Parse vertex and texture indices; 1-indexed from front if positive and from end of list if
|
||||
// negative.
|
||||
short vertexIndex = Short.parseShort(coords[0]);
|
||||
short textureIndex = Short.parseShort(coords[1]);
|
||||
if (vertexIndex > 0) {
|
||||
vertexIndex--;
|
||||
} else {
|
||||
vertexIndex = (short) (vertexIndex + numPrimitiveVertices);
|
||||
}
|
||||
if (textureIndex > 0) {
|
||||
textureIndex--;
|
||||
} else {
|
||||
textureIndex = (short) (textureIndex + numPrimitiveTextureCoords);
|
||||
}
|
||||
|
||||
// Combine indices and look up in pair map.
|
||||
ShortPair indexPair = new ShortPair(Short.valueOf(vertexIndex), Short.valueOf(textureIndex));
|
||||
Short combinedIndex = vertexTexCoordMap.get(indexPair);
|
||||
if (combinedIndex == null) {
|
||||
short numIndexPairs = (short) vertexTexCoordMap.size();
|
||||
vertexTexCoordMap.put(indexPair, numIndexPairs);
|
||||
return numIndexPairs;
|
||||
} else {
|
||||
return combinedIndex.shortValue();
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// Failure to parse coordinates as shorts
|
||||
return INVALID_INDEX;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: it is assumed that face list occurs AFTER vertex and texture coordinate lists finish in
|
||||
// the obj file format.
|
||||
private void parseFace(String[] linePieces) {
|
||||
if (linePieces.length < 4) {
|
||||
System.out.println("Malformed face index list: there must be at least 3 indices per face");
|
||||
return;
|
||||
}
|
||||
|
||||
short[] faceIndices = new short[linePieces.length - 1];
|
||||
boolean success = true;
|
||||
for (int i = 1; i < linePieces.length; i++) {
|
||||
short faceIndex = parseAndProcessCombinedVertexCoord(linePieces[i]);
|
||||
|
||||
if (faceIndex < 0) {
|
||||
System.out.println(faceIndex);
|
||||
System.out.println("Malformed face index: " + linePieces[i]);
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
faceIndices[i - 1] = faceIndex;
|
||||
}
|
||||
|
||||
if (success) {
|
||||
numPrimitiveFaces++;
|
||||
// Manually triangulate the face under the assumption that the points are coplanar, the poly
|
||||
// is convex, and the points are listed in either clockwise or anti-clockwise orientation.
|
||||
for (int i = 1; i < faceIndices.length - 1; i++) {
|
||||
// We use a triangle fan here, so first point is part of all triangles
|
||||
finalizedTriangles.add(faceIndices[0]);
|
||||
finalizedTriangles.add(faceIndices[i]);
|
||||
finalizedTriangles.add(faceIndices[i + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate over map and reconstruct proper vertex/texture coordinate pairings.
|
||||
private boolean constructFinalCoordinatesFromMap() {
|
||||
final int numIndexPairs = vertexTexCoordMap.size();
|
||||
// XYZ vertices and UV texture coordinates
|
||||
finalizedVertices = new float[POSITIONS_COORDS_PER_VERTEX * numIndexPairs];
|
||||
finalizedTextureCoords = new float[TEXTURE_COORDS_PER_VERTEX * numIndexPairs];
|
||||
try {
|
||||
for (Map.Entry<ShortPair, Short> entry : vertexTexCoordMap.entrySet()) {
|
||||
ShortPair indexPair = entry.getKey();
|
||||
short rawVertexIndex = indexPair.getFirst().shortValue();
|
||||
short rawTexCoordIndex = indexPair.getSecond().shortValue();
|
||||
short finalIndex = entry.getValue().shortValue();
|
||||
for (int i = 0; i < POSITIONS_COORDS_PER_VERTEX; i++) {
|
||||
finalizedVertices[POSITIONS_COORDS_PER_VERTEX * finalIndex + i]
|
||||
= vertices.get(rawVertexIndex * POSITIONS_COORDS_PER_VERTEX + i);
|
||||
}
|
||||
for (int i = 0; i < TEXTURE_COORDS_PER_VERTEX; i++) {
|
||||
finalizedTextureCoords[TEXTURE_COORDS_PER_VERTEX * finalIndex + i]
|
||||
= textureCoords.get(rawTexCoordIndex * TEXTURE_COORDS_PER_VERTEX + i);
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
System.out.println("Malformed index in vertex/texture coordinate mapping.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the vertex position coordinate list (x1, y1, z1, x2, y2, z2, ...) after a successful
|
||||
* call to parse().
|
||||
*/
|
||||
public float[] getVertices() {
|
||||
return finalizedVertices;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the vertex texture coordinate list (u1, v1, u2, v2, ...) after a successful call to
|
||||
* parse().
|
||||
*/
|
||||
public float[] getTextureCoords() {
|
||||
return finalizedTextureCoords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of indices (a1, b1, c1, a2, b2, c2, ...) after a successful call to parse().
|
||||
* Each (a, b, c) triplet specifies a triangle to be rendered, with a, b, and c Short objects used
|
||||
* to index into the coordinates returned by getVertices() and getTextureCoords().<p></p>
|
||||
* For example, a Short index representing 5 should be used to index into vertices[15],
|
||||
* vertices[16], and vertices[17], as well as textureCoords[10] and textureCoords[11].
|
||||
*/
|
||||
public ArrayList<Short> getTriangles() {
|
||||
return finalizedTriangles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to locate and read the specified .obj file, and parse it accordingly. None of the
|
||||
* getter functions in this class will return valid results until a value of true is returned
|
||||
* from this function.
|
||||
* @return true on success.
|
||||
*/
|
||||
public boolean parse() {
|
||||
boolean success = true;
|
||||
BufferedReader reader = null;
|
||||
try {
|
||||
reader = Files.newBufferedReader(Paths.get(fileName), UTF_8);
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
// Skip over lines with no characters
|
||||
if (line.length() < 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ignore comment lines entirely
|
||||
if (line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Split into pieces based on whitespace, and process according to first command piece
|
||||
String[] linePieces = line.split(" +");
|
||||
switch (linePieces[0]) {
|
||||
case "v":
|
||||
// Add vertex
|
||||
if (startedProcessingFaces) {
|
||||
throw new IOException("Vertices must all be declared before faces in obj files.");
|
||||
}
|
||||
parseVertex(linePieces);
|
||||
break;
|
||||
case "vt":
|
||||
// Add texture coordinate
|
||||
if (startedProcessingFaces) {
|
||||
throw new IOException(
|
||||
"Texture coordinates must all be declared before faces in obj files.");
|
||||
}
|
||||
parseTextureCoordinate(linePieces);
|
||||
break;
|
||||
case "f":
|
||||
// Vertex and texture coordinate lists should be locked into place by now
|
||||
if (!startedProcessingFaces) {
|
||||
startedProcessingFaces = true;
|
||||
numPrimitiveVertices = vertices.size() / POSITIONS_COORDS_PER_VERTEX;
|
||||
numPrimitiveTextureCoords = textureCoords.size() / TEXTURE_COORDS_PER_VERTEX;
|
||||
}
|
||||
// Add face
|
||||
parseFace(linePieces);
|
||||
break;
|
||||
default:
|
||||
// Unknown or unused directive: ignoring
|
||||
// Note: We do not yet process vertex normals or curves, so we ignore {vp, vn, s}
|
||||
// Note: We assume only a single object, so we ignore {g, o}
|
||||
// Note: We also assume a single texture, which we process independently, so we ignore
|
||||
// {mtllib, usemtl}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we made it all the way through, then we have a vertex-to-tex-coord pair mapping, so
|
||||
// construct our final vertex and texture coordinate lists now.
|
||||
success = constructFinalCoordinatesFromMap();
|
||||
|
||||
} catch (IOException e) {
|
||||
success = false;
|
||||
System.out.println("Failure to parse obj file: " + e.toString());
|
||||
} finally {
|
||||
try {
|
||||
if (reader != null) {
|
||||
reader.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Couldn't close reader");
|
||||
}
|
||||
}
|
||||
if (success) {
|
||||
debugLogString("Successfully parsed " + numPrimitiveVertices + " vertices and "
|
||||
+ numPrimitiveTextureCoords + " texture coordinates into " + vertexTexCoordMap.size()
|
||||
+ " combined vertices and " + numPrimitiveFaces + " faces, represented as a mesh of "
|
||||
+ finalizedTriangles.size() / 3 + " triangles.");
|
||||
}
|
||||
return success;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user