add multi pose estimation and multi person holistic tracking
This commit is contained in:
parent
f405c764b9
commit
b72fc70c01
|
@ -35,7 +35,7 @@ fn face_mesh() -> Result<()> {
|
|||
highgui::imshow(window, &mut flip_frame)?;
|
||||
|
||||
if !result.is_empty() {
|
||||
let landmark = result[0][0];
|
||||
let landmark = result[0].data[0];
|
||||
println!("LANDMARK: {} {} {}", landmark.x, landmark.y, landmark.z);
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -35,7 +35,7 @@ pub fn hand_tracking() -> Result<()> {
|
|||
highgui::imshow(window, &mut flip_frame)?;
|
||||
|
||||
if !result.is_empty() {
|
||||
let landmark = result[0][0];
|
||||
let landmark = result[0].data[0];
|
||||
println!("LANDMARK: {} {} {}", landmark.x, landmark.y, landmark.z);
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -31,12 +31,11 @@ fn face_mesh() -> Result<()> {
|
|||
|
||||
println!("processing");
|
||||
let result = detector.process(&flip_frame);
|
||||
println!("received {} types of landmarks", result.len());
|
||||
|
||||
highgui::imshow(window, &mut flip_frame)?;
|
||||
|
||||
if !result[0].is_empty() {
|
||||
let landmark = result[0][0][0];
|
||||
if let Some(pose) = result.pose {
|
||||
let landmark = pose.data[0];
|
||||
println!("LANDMARK: {} {} {}", landmark.x, landmark.y, landmark.z);
|
||||
}
|
||||
} else {
|
||||
|
|
57
examples/multi_person_holistic_tracking.rs
Normal file
57
examples/multi_person_holistic_tracking.rs
Normal file
|
@ -0,0 +1,57 @@
|
|||
use mediapipe::*;
|
||||
use opencv::prelude::*;
|
||||
use opencv::{highgui, imgproc, videoio, Result};
|
||||
|
||||
fn face_mesh() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||
|
||||
let mut detector = holistic::MultiPersonHolisticDetector::default();
|
||||
|
||||
let mut raw_frame = Mat::default();
|
||||
let mut rgb_frame = Mat::default();
|
||||
let mut flip_frame = Mat::default();
|
||||
loop {
|
||||
cap.read(&mut raw_frame)?;
|
||||
|
||||
let size = raw_frame.size()?;
|
||||
if size.width > 0 && !raw_frame.empty() {
|
||||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
println!("processing");
|
||||
let result = detector.process(&flip_frame);
|
||||
|
||||
highgui::imshow(window, &mut flip_frame)?;
|
||||
|
||||
if !result.is_empty() {
|
||||
if let Some(pose) = &result[0].pose {
|
||||
let landmark = pose.data[0];
|
||||
println!("LANDMARK: {} {} {}", landmark.x, landmark.y, landmark.z);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("WARN: Skip empty frame");
|
||||
}
|
||||
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
face_mesh().unwrap()
|
||||
}
|
55
examples/multi_pose_estimation.rs
Normal file
55
examples/multi_pose_estimation.rs
Normal file
|
@ -0,0 +1,55 @@
|
|||
use mediapipe::*;
|
||||
use opencv::prelude::*;
|
||||
use opencv::{highgui, imgproc, videoio, Result};
|
||||
|
||||
pub fn pose_estimation() -> Result<()> {
|
||||
let window = "video capture";
|
||||
|
||||
highgui::named_window(window, highgui::WINDOW_AUTOSIZE)?;
|
||||
|
||||
let mut cap = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;
|
||||
if !cap.is_opened()? {
|
||||
panic!("Unable to open default cam")
|
||||
}
|
||||
|
||||
cap.set(videoio::CAP_PROP_FRAME_WIDTH, 640.0)?;
|
||||
cap.set(videoio::CAP_PROP_FRAME_HEIGHT, 480.0)?;
|
||||
cap.set(videoio::CAP_PROP_FPS, 30.0)?;
|
||||
|
||||
let mut detector = pose::MultiPoseDetector::default();
|
||||
|
||||
let mut raw_frame = Mat::default();
|
||||
let mut rgb_frame = Mat::default();
|
||||
let mut flip_frame = Mat::default();
|
||||
loop {
|
||||
cap.read(&mut raw_frame)?;
|
||||
|
||||
let size = raw_frame.size()?;
|
||||
if size.width > 0 && !raw_frame.empty() {
|
||||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
println!("processing");
|
||||
let result = detector.process(&rgb_frame);
|
||||
|
||||
highgui::imshow(window, &mut rgb_frame)?;
|
||||
|
||||
if !result.is_empty() {
|
||||
let landmark = result[0].data[0];
|
||||
println!("LANDMARK: {} {} {}", landmark.x, landmark.y, landmark.z);
|
||||
}
|
||||
} else {
|
||||
println!("WARN: Skip empty frame");
|
||||
}
|
||||
|
||||
let key = highgui::wait_key(10)?;
|
||||
if key > 0 && key != 255 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
pose_estimation().unwrap()
|
||||
}
|
|
@ -10,7 +10,7 @@ impl FaceMeshDetector {
|
|||
let graph = Detector::new(
|
||||
include_str!("graphs/face_mesh_desktop_live.pbtxt"),
|
||||
vec![Output {
|
||||
type_: FeatureType::Face,
|
||||
type_: FeatureType::Faces,
|
||||
name: "multi_face_landmarks".into(),
|
||||
}],
|
||||
);
|
||||
|
@ -19,9 +19,17 @@ impl FaceMeshDetector {
|
|||
}
|
||||
|
||||
/// Processes the input frame, returns a face mesh if detected.
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<Vec<Landmark>> {
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<FaceMesh> {
|
||||
let landmarks = self.graph.process(input);
|
||||
landmarks[0].clone()
|
||||
let mut faces = vec![];
|
||||
|
||||
for face_landmarks in landmarks[0].iter() {
|
||||
let mut face = FaceMesh::default();
|
||||
face.data.copy_from_slice(&face_landmarks[..]);
|
||||
faces.push(face);
|
||||
}
|
||||
|
||||
faces
|
||||
}
|
||||
}
|
||||
|
||||
|
|
55
src/graphs/multi_person_holistic_tracking_cpu.pbtxt
Normal file
55
src/graphs/multi_person_holistic_tracking_cpu.pbtxt
Normal file
|
@ -0,0 +1,55 @@
|
|||
# Tracks pose + hands + face landmarks.
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
output_stream: "multi_pose_landmarks"
|
||||
|
||||
output_stream: "pose_rois"
|
||||
|
||||
output_stream: "pose_detections"
|
||||
|
||||
output_stream: "multi_left_hand_landmarks"
|
||||
|
||||
output_stream: "multi_right_hand_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||
# than 1 second.
|
||||
in_flight_timeout: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "MultiPersonHolisticLandmarkCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "POSE_LANDMARKS:multi_pose_landmarks"
|
||||
output_stream: "POSE_ROI:pose_rois"
|
||||
output_stream: "POSE_DETECTION:pose_detections"
|
||||
output_stream: "FACE_LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "LEFT_HAND_LANDMARKS:multi_left_hand_landmarks"
|
||||
output_stream: "RIGHT_HAND_LANDMARKS:multi_right_hand_landmarks"
|
||||
}
|
53
src/graphs/multi_person_pose_tracking_cpu.pbtxt
Normal file
53
src/graphs/multi_person_pose_tracking_cpu.pbtxt
Normal file
|
@ -0,0 +1,53 @@
|
|||
# MediaPipe graph that performs pose tracking with TensorFlow Lite on CPU.
|
||||
|
||||
# CPU buffer. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "multi_pose_landmarks"
|
||||
|
||||
output_stream: "pose_detections"
|
||||
|
||||
output_stream: "roi_from_landmarks"
|
||||
|
||||
# Generates side packet to enable segmentation.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects poses and corresponding landmarks.
|
||||
node {
|
||||
calculator: "MultiPoseLandmarkCpu"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "LANDMARKS:multi_pose_landmarks"
|
||||
output_stream: "DETECTION:pose_detections"
|
||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||
}
|
14
src/hands.rs
14
src/hands.rs
|
@ -1,6 +1,8 @@
|
|||
//! Hand detection utilities.
|
||||
use super::*;
|
||||
|
||||
pub const NUM_HAND_LANDMARKS: usize = 21;
|
||||
|
||||
/// Hand landmark indices.
|
||||
pub enum HandLandmark {
|
||||
WRIST = 0,
|
||||
|
@ -44,9 +46,17 @@ impl HandDetector {
|
|||
}
|
||||
|
||||
/// Processes the input frame, returns a list of hands
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<Vec<Landmark>> {
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<Hand> {
|
||||
let result = self.graph.process(input);
|
||||
result[0].clone()
|
||||
let mut hands = vec![];
|
||||
|
||||
for hand_landmarks in result[0].iter() {
|
||||
let mut hand = Hand::default();
|
||||
hand.data.copy_from_slice(&hand_landmarks[..]);
|
||||
hands.push(hand);
|
||||
}
|
||||
|
||||
hands
|
||||
}
|
||||
}
|
||||
|
||||
|
|
140
src/holistic.rs
140
src/holistic.rs
|
@ -5,6 +5,14 @@ pub struct HolisticDetector {
|
|||
graph: Detector,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct HolisticDetection {
|
||||
pub pose: Option<Pose>,
|
||||
pub face: Option<FaceMesh>,
|
||||
pub left_hand: Option<Hand>,
|
||||
pub right_hand: Option<Hand>,
|
||||
}
|
||||
|
||||
impl HolisticDetector {
|
||||
pub fn new() -> Self {
|
||||
let outputs = vec![
|
||||
|
@ -32,9 +40,44 @@ impl HolisticDetector {
|
|||
}
|
||||
|
||||
/// Processes the input frame, returns landmarks if detected
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<Vec<Vec<Landmark>>> {
|
||||
pub fn process(&mut self, input: &Mat) -> HolisticDetection {
|
||||
let landmarks = self.graph.process(input);
|
||||
landmarks.clone()
|
||||
|
||||
let mut pose = None;
|
||||
let mut face = None;
|
||||
let mut left_hand = None;
|
||||
let mut right_hand = None;
|
||||
|
||||
if !landmarks[0].is_empty() {
|
||||
let mut p = Pose::default();
|
||||
p.data.copy_from_slice(&landmarks[0][0][..]);
|
||||
pose = Some(p);
|
||||
}
|
||||
|
||||
if !landmarks[1].is_empty() {
|
||||
let mut f = FaceMesh::default();
|
||||
f.data.copy_from_slice(&landmarks[1][0][..]);
|
||||
face = Some(f);
|
||||
}
|
||||
|
||||
if !landmarks[2].is_empty() {
|
||||
let mut l = Hand::default();
|
||||
l.data.copy_from_slice(&landmarks[2][0][..]);
|
||||
left_hand = Some(l);
|
||||
}
|
||||
|
||||
if !landmarks[3].is_empty() {
|
||||
let mut r = Hand::default();
|
||||
r.data.copy_from_slice(&landmarks[3][0][..]);
|
||||
right_hand = Some(r);
|
||||
}
|
||||
|
||||
HolisticDetection {
|
||||
pose,
|
||||
face,
|
||||
left_hand,
|
||||
right_hand,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,3 +86,96 @@ impl Default for HolisticDetector {
|
|||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MultiPersonHolisticDetector {
|
||||
graph: Detector,
|
||||
}
|
||||
|
||||
impl MultiPersonHolisticDetector {
|
||||
pub fn new() -> Self {
|
||||
let outputs = vec![
|
||||
Output {
|
||||
type_: FeatureType::Poses,
|
||||
name: "multi_pose_landmarks".into(),
|
||||
},
|
||||
Output {
|
||||
type_: FeatureType::Faces,
|
||||
name: "multi_face_landmarks".into(),
|
||||
},
|
||||
Output {
|
||||
type_: FeatureType::Hands,
|
||||
name: "multi_left_hand_landmarks".into(),
|
||||
},
|
||||
Output {
|
||||
type_: FeatureType::Hands,
|
||||
name: "multi_right_hand_landmarks".into(),
|
||||
},
|
||||
];
|
||||
|
||||
let graph = Detector::new(
|
||||
include_str!("graphs/multi_person_holistic_tracking_cpu.pbtxt"),
|
||||
outputs,
|
||||
);
|
||||
|
||||
Self { graph }
|
||||
}
|
||||
|
||||
/// Processes the input frame, returns landmarks if detected
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<HolisticDetection> {
|
||||
let landmarks = self.graph.process(input);
|
||||
|
||||
let max_landmarks = landmarks
|
||||
.iter()
|
||||
.map(|l| l.len())
|
||||
.reduce(|acc, item| acc.max(item))
|
||||
.unwrap();
|
||||
|
||||
let mut detections = vec![];
|
||||
|
||||
for i in 0..max_landmarks {
|
||||
let mut pose = None;
|
||||
let mut face = None;
|
||||
let mut left_hand = None;
|
||||
let mut right_hand = None;
|
||||
|
||||
if landmarks[0].len() > i {
|
||||
let mut p = Pose::default();
|
||||
p.data.copy_from_slice(&landmarks[0][i][..]);
|
||||
pose = Some(p);
|
||||
}
|
||||
|
||||
if landmarks[1].len() > i {
|
||||
let mut f = FaceMesh::default();
|
||||
f.data.copy_from_slice(&landmarks[1][i][..]);
|
||||
face = Some(f);
|
||||
}
|
||||
|
||||
if landmarks[2].len() > i {
|
||||
let mut l = Hand::default();
|
||||
l.data.copy_from_slice(&landmarks[2][i][..]);
|
||||
left_hand = Some(l);
|
||||
}
|
||||
|
||||
if landmarks[3].len() > i {
|
||||
let mut r = Hand::default();
|
||||
r.data.copy_from_slice(&landmarks[3][i][..]);
|
||||
right_hand = Some(r);
|
||||
}
|
||||
|
||||
detections.push(HolisticDetection {
|
||||
pose,
|
||||
face,
|
||||
left_hand,
|
||||
right_hand,
|
||||
});
|
||||
}
|
||||
|
||||
detections
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MultiPersonHolisticDetector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ impl FeatureType {
|
|||
FeatureType::Face => 478,
|
||||
FeatureType::Faces => 478,
|
||||
FeatureType::Hand => 21,
|
||||
FeatureType::Hands => 42,
|
||||
FeatureType::Hands => 21,
|
||||
FeatureType::Pose => 33,
|
||||
FeatureType::Poses => 33,
|
||||
}
|
||||
|
@ -100,6 +100,7 @@ impl Default for Landmark {
|
|||
|
||||
/// Represents a detected pose, as 33 landmarks.
|
||||
/// Landmark names are in [pose::PoseLandmark].
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Pose {
|
||||
pub data: [Landmark; 33],
|
||||
}
|
||||
|
@ -114,12 +115,13 @@ impl Default for Pose {
|
|||
|
||||
/// Represents a detected hand, as 21 landmarks.
|
||||
/// Landmark names are in [hands::HandLandmark]
|
||||
#[derive(Default)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Hand {
|
||||
pub data: [Landmark; 21],
|
||||
}
|
||||
|
||||
/// Represents a detected face mesh, as 478 landmarks.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FaceMesh {
|
||||
pub data: [Landmark; 478],
|
||||
}
|
||||
|
|
40
src/pose.rs
40
src/pose.rs
|
@ -1,6 +1,8 @@
|
|||
//! Pose detection utilities.
|
||||
use super::*;
|
||||
|
||||
pub const NUM_POSE_LANDMARKS: usize = 33;
|
||||
|
||||
/// Pose landmark indices.
|
||||
pub enum PoseLandmark {
|
||||
NOSE = 0,
|
||||
|
@ -76,3 +78,41 @@ impl Default for PoseDetector {
|
|||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MultiPoseDetector {
|
||||
graph: Detector,
|
||||
}
|
||||
|
||||
impl MultiPoseDetector {
|
||||
pub fn new() -> Self {
|
||||
let graph = Detector::new(
|
||||
include_str!("graphs/multi_person_pose_tracking_cpu.pbtxt"),
|
||||
vec![Output {
|
||||
type_: FeatureType::Poses,
|
||||
name: "multi_pose_landmarks".into(),
|
||||
}],
|
||||
);
|
||||
|
||||
Self { graph }
|
||||
}
|
||||
|
||||
/// Processes the input frame, returns poses if detected.
|
||||
pub fn process(&mut self, input: &Mat) -> Vec<Pose> {
|
||||
let result = self.graph.process(input);
|
||||
let mut poses = vec![];
|
||||
|
||||
for pose_landmarks in result[0].iter() {
|
||||
let mut pose = Pose::default();
|
||||
pose.data.copy_from_slice(&pose_landmarks[..]);
|
||||
poses.push(pose);
|
||||
}
|
||||
|
||||
poses
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MultiPoseDetector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user