add script to build facemesh ios xcframework

2023-06-06 17:57:21 +07:00 · 2023-06-06 17:57:21 +07:00 · 156038543c
commit 156038543c
parent 37290f0224
8 changed files with 498 additions and 0 deletions
--- a/build_face_mesh_ios_framework.sh
+++ b/build_face_mesh_ios_framework.sh
@ -0,0 +1,31 @@
 #!/bin/sh
 # Create output directories~
 mkdir -p ./frameworkbuild/FaceMeshSDK/arm64
 # XCFramework is how we're going to use it.
 mkdir -p ./frameworkbuild/FaceMeshSDK/xcframework
 # Interesting fact. Bazel `build` command stores cached files in `/private/var/tmp/...` folders
 # and when you run build, if it finds cached files, it kind of symlinks the files/folders
 # into the `bazel-bin` folder found in the project root. So don't be afraid of re-running builds
 # because the files are cached.
 # build the arm64 binary framework
 # bazel build --copt=-fembed-bitcode --apple_bitcode=embedded --config=ios_arm64 mediapipe/examples/ios/facemeshioslib:FaceMeshIOSLibFramework
 bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/facemeshioslib:FaceMeshSDK
 # use --cxxopt=-O3 to reduce framework size
 # bazel build --copt=-O3 --cxxopt=-O3 --config=ios_arm64 mediapipe/examples/ios/facemeshioslib:FaceMeshIOSLibFramework
 # The arm64 framework zip will be located at //bazel-bin/mediapipe/examples/ios/facemeshioslib/FaceMeshIOSLibFramework.zip
 # Call the framework patcher (First argument = compressed framework.zip, Second argument = header file's name(in this case FaceMeshIOSLib.h))
 sudo bash ./mediapipe/examples/ios/facemeshioslib/patch_ios_framework.sh ./bazel-bin/mediapipe/examples/ios/facemeshioslib/FaceMeshSDK.zip FaceMesh.h
 # There will be a resulting patched .framework folder at the same directory, this is our arm64 one, we copy it to our arm64 folder
 sudo cp -a ./bazel-bin/mediapipe/examples/ios/facemeshioslib/FaceMeshSDK.framework ./frameworkbuild/FaceMeshSDK/arm64
 # Create xcframework (because the classic lipo method with normal .framework no longer works (shows Building for iOS Simulator, but the linked and embedded framework was built for iOS + iOS Simulator))
 sudo xcodebuild -create-xcframework \
    -framework ./frameworkbuild/FaceMeshSDK/arm64/FaceMeshSDK.framework \
    -output ./frameworkbuild/FaceMeshSDK/xcframework/FaceMeshSDK.xcframework
--- a/mediapipe/examples/ios/facemeshioslib/BUILD
+++ b/mediapipe/examples/ios/facemeshioslib/BUILD
@ -0,0 +1,66 @@
 load(
    "@build_bazel_rules_apple//apple:ios.bzl",
    "ios_framework"
 )
 load(
    "//mediapipe/examples/ios:bundle_id.bzl",
    "BUNDLE_ID_PREFIX",
    "example_provisioning",
 )
 licenses(["notice"])  # Apache 2.0
 MIN_IOS_VERSION = "11.1"
 IOS_FAMILIES = [
    "iphone",
    "ipad",
 ]
 FRAMEWORK_HEADERS = [
    "FaceMesh.h",
 ]
 ios_framework(
    name = "FaceMeshSDK",
    hdrs = FRAMEWORK_HEADERS,
    bundle_id = BUNDLE_ID_PREFIX + ".FaceMeshSDK",
    bundle_name = "FaceMeshSDK",
    families = IOS_FAMILIES,
    infoplists = [
        "//mediapipe/examples/ios/common:Info.plist",
 #        "Info.plist",
    ],
    minimum_os_version = MIN_IOS_VERSION,
    visibility = ["//visibility:public"],
    deps = [
        ":FaceMeshObj",
        "@ios_opencv//:OpencvFramework",
    ],
 )
 objc_library(
    name = "FaceMeshObj",
    srcs = [
        "FaceMesh.mm",
    ],
    hdrs = FRAMEWORK_HEADERS,
    copts = ["-std=c++17"],
    data = [
        "//mediapipe/graphs/face_mesh:face_mesh_ios_lib_gpu.binarypb",
        "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
        "//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
    ],
    deps = [
        "//mediapipe/objc:mediapipe_framework_ios",
        "//mediapipe/objc:mediapipe_input_sources_ios",
        "//mediapipe/calculators/core:packet_presence_calculator",
 #        "//mediapipe/objc:mediapipe_layer_renderer", # no need for layer renderer since I don't render
    ] + select({
        "//conditions:default": [
            "//mediapipe/graphs/face_mesh:mobile_calculators",
            "//mediapipe/framework/formats:landmark_cc_proto",
        ],
    }),
 )
--- a/mediapipe/examples/ios/facemeshioslib/FaceMesh.h
+++ b/mediapipe/examples/ios/facemeshioslib/FaceMesh.h
@ -0,0 +1,35 @@
 #import <CoreVideo/CoreVideo.h>
 #import <Foundation/Foundation.h>
@interface FaceMeshLandmarkPoint : NSObject
@property(nonatomic) float x;
@property(nonatomic) float y;
@property(nonatomic) float z;
@end
@interface FaceMeshNormalizedRect : NSObject
@property(nonatomic) float centerX;
@property(nonatomic) float centerY;
@property(nonatomic) float height;
@property(nonatomic) float width;
@property(nonatomic) float rotation;
@end
@protocol FaceMeshDelegate <NSObject>
@optional
 /**
 * Array of faces, with faces represented by arrays of face landmarks
 */
 - (void)didReceiveFaces:(NSArray<NSArray<FaceMeshLandmarkPoint *> *> *)faces;
@end
@interface FaceMesh : NSObject
 - (instancetype)init;
 - (void)startGraph;
 - (void)processVideoFrame:(CVPixelBufferRef)imageBuffer;
 - (CVPixelBufferRef)resize:(CVPixelBufferRef)pixelBuffer
                     width:(int)width
                    height:(int)height;
@property(weak, nonatomic) id<FaceMeshDelegate> delegate;
@property(nonatomic) size_t timestamp;
@end
--- a/mediapipe/examples/ios/facemeshioslib/FaceMesh.mm
+++ b/mediapipe/examples/ios/facemeshioslib/FaceMesh.mm
@ -0,0 +1,229 @@
 #import "FaceMesh.h"
 #import "mediapipe/objc/MPPCameraInputSource.h"
 #import "mediapipe/objc/MPPGraph.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/formats/detection.pb.h"
 #include "mediapipe/framework/port/opencv_core_inc.h"
 #include "mediapipe/framework/port/opencv_imgproc_inc.h"
 //#import "mediapipe/objc/MPPLayerRenderer.h"
 // The graph name specified is supposed to be the same as in the pb file (binarypb?)
 static NSString* const kGraphName = @"face_mesh_ios_lib_gpu";
 static const char* kInputStream = "input_video";
 static const char* kNumFacesInputSidePacket = "num_faces";
 static const char* kLandmarksOutputStream = "multi_face_landmarks";
 // Max number of faces to detect/process.
 static const int kNumFaces = 1;
@interface FaceMesh () <MPPGraphDelegate>
@property(nonatomic) MPPGraph* mediapipeGraph;
@end
@implementation FaceMesh {}
 #pragma mark - Cleanup methods
 - (void)dealloc {
  self.mediapipeGraph.delegate = nil;
  [self.mediapipeGraph cancel];
  // Ignore errors since we're cleaning up.
  [self.mediapipeGraph closeAllInputStreamsWithError:nil];
  [self.mediapipeGraph waitUntilDoneWithError:nil];
 }
 #pragma mark - MediaPipe graph methods
 + (MPPGraph*)loadGraphFromResource:(NSString*)resource {
  // Load the graph config resource.
  NSError* configLoadError = nil;
  NSBundle* bundle = [NSBundle bundleForClass:[self class]];
  if (!resource || resource.length == 0) {
    return nil;
  }
  NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"];
  NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError];
  if (!data) {
    NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError);
    return nil;
  }
  // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object.
  mediapipe::CalculatorGraphConfig config;
  config.ParseFromArray(data.bytes, data.length);
  // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object.
  MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config];
  // Set graph configurations
  [newGraph setSidePacket:(mediapipe::MakePacket<int>(kNumFaces))
                              named:kNumFacesInputSidePacket];
  [newGraph addFrameOutputStream:kLandmarksOutputStream
                outputPacketType:MPPPacketTypeRaw];
  return newGraph;
 }
 - (instancetype)init {
  self = [super init];
  if (self) {
    self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName];
    self.mediapipeGraph.delegate = self;
    // // Set maxFramesInFlight to a small value to avoid memory contention
    // // for real-time processing.
    // self.mediapipeGraph.maxFramesInFlight = 2;
    NSLog(@"inited graph %@", kGraphName);
  }
  return self;
 }
 - (void)startGraph {
  NSError* error;
  if (![self.mediapipeGraph startWithError:&error]) {
    NSLog(@"Failed to start graph: %@", error);
  }
  NSLog(@"Started graph %@", kGraphName);
 }
 #pragma mark - MPPGraphDelegate methods
 // Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread.
 - (void)mediapipeGraph:(MPPGraph*)graph
    didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer
              fromStream:(const std::string&)streamName {
  NSLog(@"recv pixelBuffer from %@", @(streamName.c_str()));
 }
 // Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread.
 - (void)mediapipeGraph:(MPPGraph*)graph
     didOutputPacket:(const ::mediapipe::Packet&)packet
          fromStream:(const std::string&)streamName {
  if (streamName == kLandmarksOutputStream) {
    if (packet.IsEmpty()) { // This condition never gets called because FaceLandmarkFrontGpu does not process when there are no detections
      return;
    }
    const auto& multi_face_landmarks = packet.Get<std::vector<::mediapipe::NormalizedLandmarkList>>();
    // NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(),
          // multi_face_landmarks.size());
    NSMutableArray <NSArray <FaceMeshLandmarkPoint *>*>*faceLandmarks = [NSMutableArray new];
    for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) {
      NSMutableArray *thisFaceLandmarks = [NSMutableArray new];
      const auto& landmarks = multi_face_landmarks[face_index];
 //      NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size());
      for (int i = 0; i < landmarks.landmark_size(); ++i) {
 //        NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(),
 //              landmarks.landmark(i).y(), landmarks.landmark(i).z());
        FaceMeshLandmarkPoint *obj_landmark = [FaceMeshLandmarkPoint new];
        obj_landmark.x = landmarks.landmark(i).x();
        obj_landmark.y = landmarks.landmark(i).y();
        obj_landmark.z = landmarks.landmark(i).z();
        [thisFaceLandmarks addObject:obj_landmark];
      }
      [faceLandmarks addObject:thisFaceLandmarks];
    }
    if([self.delegate respondsToSelector:@selector(didReceiveFaces:)]) {
      [self.delegate didReceiveFaces:faceLandmarks];
    }
  } else {
    NSLog(@"Unknown %@ packet with stream name %s", packet.IsEmpty() ? @"EMPTY" : @"NON-EMPTY",streamName.c_str());
  }
 }
 #pragma mark - MPPInputSourceDelegate methods
 - (void)processVideoFrame:(CVPixelBufferRef)imageBuffer {
  const auto ts =
      mediapipe::Timestamp(self.timestamp++ * mediapipe::Timestamp::kTimestampUnitsPerSecond);
  NSError* err = nil;
  // NSLog(@"sending imageBuffer @%@ to %s", @(ts.DebugString().c_str()), kInputStream);
  auto sent = [self.mediapipeGraph sendPixelBuffer:imageBuffer
                                        intoStream:kInputStream
                                        packetType:MPPPacketTypePixelBuffer
                                         timestamp:ts
                                    allowOverwrite:NO
                                             error:&err];
  // NSLog(@"imageBuffer %s", sent ? "sent!" : "not sent.");
  if (err) {
    NSLog(@"sendPixelBuffer error: %@", err);
  }
 }
 // Resize the CVPixelBufferRef with INTER_AREA.
 - (CVPixelBufferRef)resize:(CVPixelBufferRef)pixelBuffer
                    width:(int)width
                    height:(int)height {
  OSType srcType = CVPixelBufferGetPixelFormatType(pixelBuffer);
  size_t channels = 2;
  if (srcType == kCVPixelFormatType_32ARGB || srcType == kCVPixelFormatType_32BGRA) {
    channels = 4;
  }
  // Lock the CVPixelBuffer
  CVPixelBufferLockBaseAddress(pixelBuffer, kCVPixelBufferLock_ReadOnly);
  // Get the pixel buffer attributes
  size_t srcWidth = CVPixelBufferGetWidth(pixelBuffer);
  size_t srcHeight = CVPixelBufferGetHeight(pixelBuffer);
  size_t bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer);
  // Get the base address of the pixel buffer
  unsigned char *baseAddress = (unsigned char *)CVPixelBufferGetBaseAddress(pixelBuffer);
  // Create a cv::Mat without copying the data
  cv::Mat argbImage(srcHeight, srcWidth, CV_8UC(channels), baseAddress, bytesPerRow);
  // Create a cv::Mat to hold the resized image
  cv::Mat resizedImage;
  // Resize the image using cv::resize
  cv::resize(argbImage, resizedImage, cv::Size(width, height), 0, 0, cv::INTER_AREA);
  // Unlock the CVPixelBuffer
  CVPixelBufferUnlockBaseAddress(pixelBuffer, kCVPixelBufferLock_ReadOnly);
  // Create a new CVPixelBuffer with the desired size and format
  CVPixelBufferRef resizedPixelBuffer;
  CVReturn result = CVPixelBufferCreate(NULL, width, height, srcType, NULL, &resizedPixelBuffer);
  if (result != kCVReturnSuccess) {
      NSLog(@"Failed to create CVPixelBuffer. Error: %d", result);
      return nil;
  }
  // Lock the resized CVPixelBuffer
  CVPixelBufferLockBaseAddress(resizedPixelBuffer, 0);
  // Get the base address and bytes per row of the resized pixel buffer
  void *resizedBaseAddress = CVPixelBufferGetBaseAddress(resizedPixelBuffer);
  size_t resizedBytesPerRow = CVPixelBufferGetBytesPerRow(resizedPixelBuffer);
  // Create a cv::Mat wrapper for the resized pixel buffer
  cv::Mat resizedPixelBufferMat(height, width, CV_8UC(channels), resizedBaseAddress, resizedBytesPerRow);
  // Convert the resized image (cv::Mat) to the resized pixel buffer (CVPixelBuffer)
  resizedImage.copyTo(resizedPixelBufferMat);
  // Unlock the resized CVPixelBuffer
  CVPixelBufferUnlockBaseAddress(resizedPixelBuffer, 0);
  // Return the resized CVPixelBuffer
  return resizedPixelBuffer;
 }
@end
@implementation FaceMeshLandmarkPoint
@end
@implementation FaceMeshNormalizedRect
@end
--- a/mediapipe/examples/ios/facemeshioslib/patch_ios_framework.sh
+++ b/mediapipe/examples/ios/facemeshioslib/patch_ios_framework.sh
@ -0,0 +1,60 @@
 #!/bin/bash
 set -eu
 set -o pipefail
 [[ $# -lt 2 ]] && echo "Usage: $0  <path/to/zipped .framework> <hdrs>..." && exit 1
 zipped=$(python3 -c "import os; print(os.path.realpath('$1'))"); shift
 name=$(basename "$zipped" .zip)
 parent=$(dirname "$zipped")
 named="$parent"/"$name".framework
 unzip "$zipped" -d "$parent"
 mkdir "$named"/Modules
 cat << EOF >"$named"/Modules/module.modulemap
 framework module $name {
  umbrella header "$name.h"
  export *
  module * { export * }
  link framework "AVFoundation"
  link framework "Accelerate"
  link framework "AssetsLibrary"
  link framework "CoreFoundation"
  link framework "CoreGraphics"
  link framework "CoreImage"
  link framework "CoreMedia"
  link framework "CoreVideo"
  link framework "GLKit"
  link framework "Metal"
  link framework "MetalKit"
  link framework "OpenGLES"
  link framework "QuartzCore"
  link framework "UIKit"
 }
 EOF
 # NOTE: All these linked frameworks are required by mediapipe/objc.
 cat << EOF >"$named"/Headers/$name.h
 //
 //  $name.h
 //  $name
 //
 #import <Foundation/Foundation.h>
 //! Project version number for $name.
 FOUNDATION_EXPORT double ${name}VersionNumber;
 //! Project version string for $name.
 FOUNDATION_EXPORT const unsigned char ${name}VersionString[];
 // In this header, you should import all the public headers of your framework using statements like #import <$name/PublicHeader.h>
 EOF
 until [[ $# -eq 0 ]]; do
  printf '#import "'"$1"'"\n' "$1" >>"$named"/Headers/$name.h
  shift
 done
--- a/mediapipe/graphs/.DS_Store
+++ b/mediapipe/graphs/.DS_Store
--- a/mediapipe/graphs/face_mesh/BUILD
+++ b/mediapipe/graphs/face_mesh/BUILD
@ -67,3 +67,10 @@ mediapipe_binary_graph(
    output_name = "face_mesh_mobile_gpu.binarypb",
    deps = [":mobile_calculators"],
 )
 mediapipe_binary_graph(
    name = "face_mesh_ios_lib_gpu_binary_graph",
    graph = "face_mesh_ios_lib.pbtxt",
    output_name = "face_mesh_ios_lib_gpu.binarypb",
    deps = [":mobile_calculators"],
 )
--- a/mediapipe/graphs/face_mesh/face_mesh_ios_lib.pbtxt
+++ b/mediapipe/graphs/face_mesh/face_mesh_ios_lib.pbtxt
@ -0,0 +1,70 @@
 # MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
 # GPU buffer. (GpuBuffer)
 input_stream: "input_video"
 # Max number of faces to detect/process. (int)
 input_side_packet: "num_faces"
 # Output image with rendered results. (GpuBuffer)
 # nope no rendering
 # output_stream: "output_video"
 # Collection of detected/processed faces, each represented as a list of
 # landmarks. (std::vector<NormalizedLandmarkList>)
 output_stream: "multi_face_landmarks"
 # Throttles the images flowing downstream for flow control. It passes through
 # the very first incoming image unaltered, and waits for downstream nodes
 # (calculators and subgraphs) in the graph to finish their tasks before it
 # passes through another image. All images that come in while waiting are
 # dropped, limiting the number of in-flight images in most part of the graph to
 # 1. This prevents the downstream nodes from queuing up incoming images and data
 # excessively, which leads to increased latency and memory usage, unwanted in
 # real-time mobile applications. It also eliminates unnecessarily computation,
 # e.g., the output produced by a node may get dropped downstream if the
 # subsequent nodes are still busy processing previous inputs.
 # node {
 #  calculator: "FlowLimiterCalculator"
 # input_stream: "input_video"
 #  input_stream: "FINISHED:output_video"
 #  input_stream_info: {
 #    tag_index: "FINISHED"
 #    back_edge: true
 #  }
 #  output_stream: "throttled_input_video"
 #}
 # Defines side packets for further use in the graph.
 node {
  calculator: "ConstantSidePacketCalculator"
  output_side_packet: "PACKET:with_attention"
  node_options: {
    [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
      packet { bool_value: true }
    }
  }
 }
 # Subgraph that detects faces and corresponding landmarks.
 node {
  calculator: "FaceLandmarkFrontGpu"
  # input_stream: "IMAGE:throttled_input_video"
  input_stream: "IMAGE:input_video"
  input_side_packet: "NUM_FACES:num_faces"
  input_side_packet: "WITH_ATTENTION:with_attention"
  output_stream: "LANDMARKS:multi_face_landmarks"
  output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
  # output_stream: "DETECTIONS:face_detections"
  # output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
 }
 # Subgraph that renders face-landmark annotation onto the input image.
 # node {
 #  calculator: "FaceRendererGpu"
 #  input_stream: "IMAGE:throttled_input_video"
 #  input_stream: "LANDMARKS:multi_face_landmarks"
 #  input_stream: "NORM_RECTS:face_rects_from_landmarks"
 #  input_stream: "DETECTIONS:face_detections"
 #  output_stream: "IMAGE:output_video"
 #}