295 lines
11 KiB
Plaintext
295 lines
11 KiB
Plaintext
// Copyright 2020 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#import "FaceEffectViewController.h"
|
|
|
|
#import "mediapipe/objc/MPPCameraInputSource.h"
|
|
#import "mediapipe/objc/MPPGraph.h"
|
|
#import "mediapipe/objc/MPPLayerRenderer.h"
|
|
|
|
#include <map>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
|
|
|
static NSString* const kGraphName = @"face_effect_gpu";
|
|
|
|
static const char* kInputStream = "input_video";
|
|
static const char* kOutputStream = "output_video";
|
|
static const char* kMultiFaceGeometryStream = "multi_face_geometry";
|
|
static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue";
|
|
static const char* kSelectedEffectIdInputStream = "selected_effect_id";
|
|
static const char* kUseFaceDetectionInputSourceInputSidePacket = "use_face_detection_input_source";
|
|
|
|
static const BOOL kUseFaceDetectionInputSource = NO;
|
|
static const int kMatrixTranslationZIndex = 14;
|
|
|
|
static const int kSelectedEffectIdAxis = 0;
|
|
static const int kSelectedEffectIdFacepaint = 1;
|
|
static const int kSelectedEffectIdGlasses = 2;
|
|
|
|
@interface FaceEffectViewController () <MPPGraphDelegate, MPPInputSourceDelegate>
|
|
|
|
// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and
|
|
// sent video frames on _videoQueue.
|
|
@property(nonatomic) MPPGraph* graph;
|
|
|
|
@end
|
|
|
|
@implementation FaceEffectViewController {
|
|
/// Handle tap gestures.
|
|
UITapGestureRecognizer* _tapGestureRecognizer;
|
|
int _selectedEffectId;
|
|
|
|
/// Handles camera access via AVCaptureSession library.
|
|
MPPCameraInputSource* _cameraSource;
|
|
|
|
/// Inform the user when camera is unavailable.
|
|
IBOutlet UILabel* _noCameraLabel;
|
|
/// Inform the user about how to switch between effects.
|
|
UILabel* _effectSwitchingHintLabel;
|
|
/// Display the camera preview frames.
|
|
IBOutlet UIView* _liveView;
|
|
/// Render frames in a layer.
|
|
MPPLayerRenderer* _renderer;
|
|
|
|
/// Process camera frames on this queue.
|
|
dispatch_queue_t _videoQueue;
|
|
}
|
|
|
|
#pragma mark - Cleanup methods
|
|
|
|
- (void)dealloc {
|
|
self.graph.delegate = nil;
|
|
[self.graph cancel];
|
|
// Ignore errors since we're cleaning up.
|
|
[self.graph closeAllInputStreamsWithError:nil];
|
|
[self.graph waitUntilDoneWithError:nil];
|
|
}
|
|
|
|
#pragma mark - MediaPipe graph methods
|
|
|
|
+ (MPPGraph*)loadGraphFromResource:(NSString*)resource {
|
|
// Load the graph config resource.
|
|
NSError* configLoadError = nil;
|
|
NSBundle* bundle = [NSBundle bundleForClass:[self class]];
|
|
if (!resource || resource.length == 0) {
|
|
return nil;
|
|
}
|
|
NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"];
|
|
NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError];
|
|
if (!data) {
|
|
NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError);
|
|
return nil;
|
|
}
|
|
|
|
// Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object.
|
|
mediapipe::CalculatorGraphConfig config;
|
|
config.ParseFromArray(data.bytes, data.length);
|
|
|
|
// Pass the kUseFaceDetectionInputSource flag value as an input side packet into the graph.
|
|
std::map<std::string, mediapipe::Packet> side_packets;
|
|
side_packets[kUseFaceDetectionInputSourceInputSidePacket] =
|
|
mediapipe::MakePacket<bool>(kUseFaceDetectionInputSource);
|
|
|
|
// Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object.
|
|
MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config];
|
|
[newGraph addSidePackets:side_packets];
|
|
[newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer];
|
|
[newGraph addFrameOutputStream:kMultiFaceGeometryStream outputPacketType:MPPPacketTypeRaw];
|
|
return newGraph;
|
|
}
|
|
|
|
#pragma mark - UIViewController methods
|
|
|
|
- (void)viewDidLoad {
|
|
[super viewDidLoad];
|
|
|
|
_effectSwitchingHintLabel.hidden = YES;
|
|
_tapGestureRecognizer = [[UITapGestureRecognizer alloc] initWithTarget:self
|
|
action:@selector(handleTap)];
|
|
[self.view addGestureRecognizer:_tapGestureRecognizer];
|
|
|
|
// By default, render the axis effect for the face detection input source and the glasses effect
|
|
// for the face landmark input source.
|
|
if (kUseFaceDetectionInputSource) {
|
|
_selectedEffectId = kSelectedEffectIdAxis;
|
|
} else {
|
|
_selectedEffectId = kSelectedEffectIdGlasses;
|
|
}
|
|
|
|
_renderer = [[MPPLayerRenderer alloc] init];
|
|
_renderer.layer.frame = _liveView.layer.bounds;
|
|
[_liveView.layer insertSublayer:_renderer.layer atIndex:0];
|
|
_renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop;
|
|
_renderer.mirrored = NO;
|
|
|
|
dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class(
|
|
DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0);
|
|
_videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute);
|
|
|
|
_cameraSource = [[MPPCameraInputSource alloc] init];
|
|
[_cameraSource setDelegate:self queue:_videoQueue];
|
|
_cameraSource.sessionPreset = AVCaptureSessionPresetHigh;
|
|
_cameraSource.cameraPosition = AVCaptureDevicePositionFront;
|
|
// The frame's native format is rotated with respect to the portrait orientation.
|
|
_cameraSource.orientation = AVCaptureVideoOrientationPortrait;
|
|
_cameraSource.videoMirrored = YES;
|
|
|
|
self.graph = [[self class] loadGraphFromResource:kGraphName];
|
|
self.graph.delegate = self;
|
|
// Set maxFramesInFlight to a small value to avoid memory contention for real-time processing.
|
|
self.graph.maxFramesInFlight = 2;
|
|
}
|
|
|
|
// In this application, there is only one ViewController which has no navigation to other view
|
|
// controllers, and there is only one View with live display showing the result of running the
|
|
// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph
|
|
// setup/teardown and camera start/stop logic should be updated appropriately in response to the
|
|
// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times
|
|
// depending on the application navigation flow in that case.
|
|
- (void)viewWillAppear:(BOOL)animated {
|
|
[super viewWillAppear:animated];
|
|
|
|
[_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) {
|
|
if (granted) {
|
|
[self startGraphAndCamera];
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
_noCameraLabel.hidden = YES;
|
|
});
|
|
}
|
|
}];
|
|
}
|
|
|
|
- (void)startGraphAndCamera {
|
|
// Start running self.graph.
|
|
NSError* error;
|
|
if (![self.graph startWithError:&error]) {
|
|
NSLog(@"Failed to start graph: %@", error);
|
|
}
|
|
|
|
// Start fetching frames from the camera.
|
|
dispatch_async(_videoQueue, ^{
|
|
[_cameraSource start];
|
|
});
|
|
}
|
|
|
|
#pragma mark - UITapGestureRecognizer methods
|
|
|
|
// We use the tap gesture recognizer to switch between face effects. This allows users to try
|
|
// multiple pre-bundled face effects without a need to recompile the app.
|
|
- (void)handleTap {
|
|
dispatch_async(_videoQueue, ^{
|
|
// Avoid switching the Axis effect for the face detection input source.
|
|
if (kUseFaceDetectionInputSource) {
|
|
return;
|
|
}
|
|
|
|
// Looped effect order: glasses -> facepaint -> axis -> glasses -> ...
|
|
switch (_selectedEffectId) {
|
|
case kSelectedEffectIdAxis: {
|
|
_selectedEffectId = kSelectedEffectIdGlasses;
|
|
break;
|
|
}
|
|
|
|
case kSelectedEffectIdFacepaint: {
|
|
_selectedEffectId = kSelectedEffectIdAxis;
|
|
break;
|
|
}
|
|
|
|
case kSelectedEffectIdGlasses: {
|
|
_selectedEffectId = kSelectedEffectIdFacepaint;
|
|
break;
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
#pragma mark - MPPGraphDelegate methods
|
|
|
|
// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread.
|
|
- (void)mediapipeGraph:(MPPGraph*)graph
|
|
didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
|
fromStream:(const std::string&)streamName {
|
|
if (streamName == kOutputStream) {
|
|
// Display the captured image on the screen.
|
|
CVPixelBufferRetain(pixelBuffer);
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
_effectSwitchingHintLabel.hidden = kUseFaceDetectionInputSource;
|
|
[_renderer renderPixelBuffer:pixelBuffer];
|
|
CVPixelBufferRelease(pixelBuffer);
|
|
});
|
|
}
|
|
}
|
|
|
|
// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread.
|
|
//
|
|
// This callback demonstrates how the output face geometry packet can be obtained and used in an
|
|
// iOS app. As an example, the Z-translation component of the face pose transform matrix is logged
|
|
// for each face being equal to the approximate distance away from the camera in centimeters.
|
|
- (void)mediapipeGraph:(MPPGraph*)graph
|
|
didOutputPacket:(const ::mediapipe::Packet&)packet
|
|
fromStream:(const std::string&)streamName {
|
|
if (streamName == kMultiFaceGeometryStream) {
|
|
if (packet.IsEmpty()) {
|
|
NSLog(@"[TS:%lld] No face geometry", packet.Timestamp().Value());
|
|
return;
|
|
}
|
|
|
|
const auto& multiFaceGeometry =
|
|
packet.Get<std::vector<::mediapipe::face_geometry::FaceGeometry>>();
|
|
NSLog(@"[TS:%lld] Number of face instances with geometry: %lu ", packet.Timestamp().Value(),
|
|
multiFaceGeometry.size());
|
|
for (int faceIndex = 0; faceIndex < multiFaceGeometry.size(); ++faceIndex) {
|
|
const auto& faceGeometry = multiFaceGeometry[faceIndex];
|
|
NSLog(@"\tApprox. distance away from camera for face[%d]: %.6f cm", faceIndex,
|
|
-faceGeometry.pose_transform_matrix().packed_data(kMatrixTranslationZIndex));
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma mark - MPPInputSourceDelegate methods
|
|
|
|
// Must be invoked on _videoQueue.
|
|
- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer
|
|
timestamp:(CMTime)timestamp
|
|
fromSource:(MPPInputSource*)source {
|
|
if (source != _cameraSource) {
|
|
NSLog(@"Unknown source: %@", source);
|
|
return;
|
|
}
|
|
|
|
mediapipe::Timestamp graphTimestamp(static_cast<mediapipe::TimestampBaseType>(
|
|
mediapipe::Timestamp::kTimestampUnitsPerSecond * CMTimeGetSeconds(timestamp)));
|
|
|
|
mediapipe::Packet selectedEffectIdPacket =
|
|
mediapipe::MakePacket<int>(_selectedEffectId).At(graphTimestamp);
|
|
|
|
[self.graph sendPixelBuffer:imageBuffer
|
|
intoStream:kInputStream
|
|
packetType:MPPPacketTypePixelBuffer
|
|
timestamp:graphTimestamp];
|
|
|
|
// Alongside the input camera frame, we also send the `selected_effect_id` int packet to indicate
|
|
// which effect should be rendered on this frame.
|
|
[self.graph movePacket:std::move(selectedEffectIdPacket)
|
|
intoStream:kSelectedEffectIdInputStream
|
|
error:nil];
|
|
}
|
|
|
|
@end
|