diff --git a/.bazelrc b/.bazelrc index 44bc3d0a1..7774473c8 100644 --- a/.bazelrc +++ b/.bazelrc @@ -98,6 +98,9 @@ build:darwin_arm64 --apple_platform_type=macos build:darwin_arm64 --macos_minimum_os=10.16 build:darwin_arm64 --cpu=darwin_arm64 +# Turn off maximum stdout size +build --experimental_ui_max_stdouterr_bytes=-1 + # This bazelrc file is meant to be written by a setup script. try-import %workspace%/.configure.bazelrc diff --git a/WORKSPACE b/WORKSPACE index df2c4f93b..3a539569f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -513,6 +513,9 @@ http_archive( "@//third_party:org_tensorflow_system_python.diff", # Diff is generated with a script, don't update it manually. "@//third_party:org_tensorflow_custom_ops.diff", + # Works around Bazel issue with objc_library. + # See https://github.com/bazelbuild/bazel/issues/19912 + "@//third_party:org_tensorflow_objc_build_fixes.diff", ], patch_args = [ "-p1", diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.pbxproj b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.pbxproj new file mode 100644 index 000000000..8a95288c9 --- /dev/null +++ b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.pbxproj @@ -0,0 +1,342 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 56; + objects = { + +/* Begin PBXBuildFile section */ + 8566B55D2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h in Headers */ = {isa = PBXBuildFile; fileRef = 8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */; settings = {ATTRIBUTES = (Public, ); }; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = MediaPipeTasksDocGen.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MediaPipeTasksDocGen.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8566B5562ABABF9A00AAB22A /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 8566B54F2ABABF9A00AAB22A = { + isa = PBXGroup; + children = ( + 8566B55B2ABABF9A00AAB22A /* MediaPipeTasksDocGen */, + 8566B55A2ABABF9A00AAB22A /* Products */, + ); + sourceTree = ""; + }; + 8566B55A2ABABF9A00AAB22A /* Products */ = { + isa = PBXGroup; + children = ( + 8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */, + ); + name = Products; + sourceTree = ""; + }; + 8566B55B2ABABF9A00AAB22A /* MediaPipeTasksDocGen */ = { + isa = PBXGroup; + children = ( + 8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */, + ); + path = MediaPipeTasksDocGen; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 8566B5542ABABF9A00AAB22A /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 8566B55D2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 8566B5582ABABF9A00AAB22A /* MediaPipeTasksDocGen */ = { + isa = PBXNativeTarget; + buildConfigurationList = 8566B5602ABABF9A00AAB22A /* Build configuration list for PBXNativeTarget "MediaPipeTasksDocGen" */; + buildPhases = ( + 8566B5542ABABF9A00AAB22A /* Headers */, + 8566B5552ABABF9A00AAB22A /* Sources */, + 8566B5562ABABF9A00AAB22A /* Frameworks */, + 8566B5572ABABF9A00AAB22A /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = MediaPipeTasksDocGen; + productName = MediaPipeTasksDocGen; + productReference = 8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */; + productType = "com.apple.product-type.framework"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 8566B5502ABABF9A00AAB22A /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastUpgradeCheck = 1430; + TargetAttributes = { + 8566B5582ABABF9A00AAB22A = { + CreatedOnToolsVersion = 14.3.1; + }; + }; + }; + buildConfigurationList = 8566B5532ABABF9A00AAB22A /* Build configuration list for PBXProject "MediaPipeTasksDocGen" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 8566B54F2ABABF9A00AAB22A; + productRefGroup = 8566B55A2ABABF9A00AAB22A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8566B5582ABABF9A00AAB22A /* MediaPipeTasksDocGen */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 8566B5572ABABF9A00AAB22A /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 8566B5552ABABF9A00AAB22A /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 8566B55E2ABABF9A00AAB22A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 16.4; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Debug; + }; + 8566B55F2ABABF9A00AAB22A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 16.4; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Release; + }; + 8566B5612ABABF9A00AAB22A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEFINES_MODULE = YES; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + ENABLE_MODULE_VERIFIER = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu11 gnu++20"; + PRODUCT_BUNDLE_IDENTIFIER = com.google.mediapipe.MediaPipeTasksDocGen; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 8566B5622ABABF9A00AAB22A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEFINES_MODULE = YES; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + ENABLE_MODULE_VERIFIER = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu11 gnu++20"; + PRODUCT_BUNDLE_IDENTIFIER = com.google.mediapipe.MediaPipeTasksDocGen; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 8566B5532ABABF9A00AAB22A /* Build configuration list for PBXProject "MediaPipeTasksDocGen" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8566B55E2ABABF9A00AAB22A /* Debug */, + 8566B55F2ABABF9A00AAB22A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 8566B5602ABABF9A00AAB22A /* Build configuration list for PBXNativeTarget "MediaPipeTasksDocGen" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 8566B5612ABABF9A00AAB22A /* Debug */, + 8566B5622ABABF9A00AAB22A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 8566B5502ABABF9A00AAB22A /* Project object */; +} diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..919434a62 --- /dev/null +++ b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 000000000..b3ea17378 --- /dev/null +++ b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcuserdata/macd.xcuserdatad/UserInterfaceState.xcuserstate b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcuserdata/macd.xcuserdatad/UserInterfaceState.xcuserstate new file mode 100644 index 000000000..d667b462e Binary files /dev/null and b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/project.xcworkspace/xcuserdata/macd.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/xcuserdata/macd.xcuserdatad/xcschemes/xcschememanagement.plist b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/xcuserdata/macd.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 000000000..adc534a03 --- /dev/null +++ b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen.xcodeproj/xcuserdata/macd.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + MediaPipeTasksDocGen.xcscheme_^#shared#^_ + + orderHint + 0 + + + + diff --git a/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen/MediaPipeTasksDocGen.h b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen/MediaPipeTasksDocGen.h new file mode 100644 index 000000000..2ce44b27b --- /dev/null +++ b/docs/MediaPipeTasksDocGen/MediaPipeTasksDocGen/MediaPipeTasksDocGen.h @@ -0,0 +1,17 @@ +// +// MediaPipeTasksDocGen.h +// MediaPipeTasksDocGen +// +// Created by Mark McDonald on 20/9/2023. +// + +#import + +//! Project version number for MediaPipeTasksDocGen. +FOUNDATION_EXPORT double MediaPipeTasksDocGenVersionNumber; + +//! Project version string for MediaPipeTasksDocGen. +FOUNDATION_EXPORT const unsigned char MediaPipeTasksDocGenVersionString[]; + +// In this header, you should import all the public headers of your framework using statements like +// #import diff --git a/docs/MediaPipeTasksDocGen/Podfile b/docs/MediaPipeTasksDocGen/Podfile new file mode 100644 index 000000000..3c8d8f09d --- /dev/null +++ b/docs/MediaPipeTasksDocGen/Podfile @@ -0,0 +1,11 @@ +# Uncomment the next line to define a global platform for your project +platform :ios, '15.0' + +target 'MediaPipeTasksDocGen' do + # Comment the next line if you don't want to use dynamic frameworks + use_frameworks! + + # Pods for MediaPipeTasksDocGen + pod 'MediaPipeTasksText' + pod 'MediaPipeTasksVision' +end diff --git a/docs/MediaPipeTasksDocGen/README.md b/docs/MediaPipeTasksDocGen/README.md new file mode 100644 index 000000000..475253057 --- /dev/null +++ b/docs/MediaPipeTasksDocGen/README.md @@ -0,0 +1,9 @@ +# MediaPipeTasksDocGen + +This empty project is used to generate reference documentation for the +ObjectiveC and Swift libraries. + +Docs are generated using [Jazzy](https://github.com/realm/jazzy) and published +to [the developer site](https://developers.google.com/mediapipe/solutions/). + +To bump the API version used, edit [`Podfile`](./Podfile). diff --git a/mediapipe/calculators/audio/spectrogram_calculator.proto b/mediapipe/calculators/audio/spectrogram_calculator.proto index d8bca3f76..ac7181f4f 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.proto +++ b/mediapipe/calculators/audio/spectrogram_calculator.proto @@ -80,7 +80,7 @@ message SpectrogramCalculatorOptions { // If use_local_timestamp is true, the output packet's timestamp is based on // the last sample of the packet and it's inferred from the latest input // packet's timestamp. If false, the output packet's timestamp is based on - // the cumulative timestamping, which is inferred from the intial input + // the cumulative timestamping, which is inferred from the initial input // timestamp and the cumulative number of samples. optional bool use_local_timestamp = 8 [default = false]; } diff --git a/mediapipe/calculators/audio/time_series_framer_calculator.proto b/mediapipe/calculators/audio/time_series_framer_calculator.proto index 9e5b07462..16ecfc97c 100644 --- a/mediapipe/calculators/audio/time_series_framer_calculator.proto +++ b/mediapipe/calculators/audio/time_series_framer_calculator.proto @@ -66,7 +66,7 @@ message TimeSeriesFramerCalculatorOptions { // If use_local_timestamp is true, the output packet's timestamp is based on // the last sample of the packet and it's inferred from the latest input // packet's timestamp. If false, the output packet's timestamp is based on - // the cumulative timestamping, which is inferred from the intial input + // the cumulative timestamping, which is inferred from the initial input // timestamp and the cumulative number of samples. optional bool use_local_timestamp = 6 [default = false]; } diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index aacf694c1..729e91492 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -727,6 +727,7 @@ cc_library( "//mediapipe/framework/port:logging", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", + "@com_google_absl//absl/status", ], alwayslink = 1, ) @@ -742,6 +743,7 @@ cc_test( "//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:status", "//mediapipe/framework/tool:options_util", + "//mediapipe/util:packet_test_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], diff --git a/mediapipe/calculators/core/packet_sequencer_calculator_test.cc b/mediapipe/calculators/core/packet_sequencer_calculator_test.cc index c08e6bb12..6502fa4e9 100644 --- a/mediapipe/calculators/core/packet_sequencer_calculator_test.cc +++ b/mediapipe/calculators/core/packet_sequencer_calculator_test.cc @@ -71,7 +71,7 @@ TEST_F(PacketSequencerCalculatorTest, IsRegistered) { CalculatorBaseRegistry::IsRegistered("PacketSequencerCalculator")); } -// Shows how control packets recieve timestamps before and after frame packets +// Shows how control packets receive timestamps before and after frame packets // have arrived. TEST_F(PacketSequencerCalculatorTest, ChannelEarly) { CalculatorGraphConfig::Node node_config = BuildNodeConfig(); diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc index 311f7d815..686d705dd 100644 --- a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc @@ -17,6 +17,7 @@ #include #include +#include "absl/status/status.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/ret_check.h" @@ -32,6 +33,7 @@ namespace { constexpr char kTagAtPreStream[] = "AT_PRESTREAM"; constexpr char kTagAtPostStream[] = "AT_POSTSTREAM"; constexpr char kTagAtZero[] = "AT_ZERO"; +constexpr char kTagAtFirstTick[] = "AT_FIRST_TICK"; constexpr char kTagAtTick[] = "AT_TICK"; constexpr char kTagTick[] = "TICK"; constexpr char kTagAtTimestamp[] = "AT_TIMESTAMP"; @@ -43,6 +45,7 @@ static std::map* kTimestampMap = []() { res->emplace(kTagAtPostStream, Timestamp::PostStream()); res->emplace(kTagAtZero, Timestamp(0)); res->emplace(kTagAtTick, Timestamp::Unset()); + res->emplace(kTagAtFirstTick, Timestamp::Unset()); res->emplace(kTagAtTimestamp, Timestamp::Unset()); return res; }(); @@ -59,8 +62,8 @@ std::string GetOutputTag(const CC& cc) { // timestamp, depending on the tag used to define output stream(s). (One tag can // be used only.) // -// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, AT_TIMESTAMP -// and corresponding timestamps are Timestamp::PreStream(), +// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, AT_FIRST_TICK, +// AT_TIMESTAMP and corresponding timestamps are Timestamp::PreStream(), // Timestamp::PostStream(), Timestamp(0), timestamp of a packet received in TICK // input, and timestamp received from a side input. // @@ -96,6 +99,7 @@ class SidePacketToStreamCalculator : public CalculatorBase { private: bool is_tick_processing_ = false; + bool close_on_first_tick_ = false; std::string output_tag_; }; REGISTER_CALCULATOR(SidePacketToStreamCalculator); @@ -103,13 +107,16 @@ REGISTER_CALCULATOR(SidePacketToStreamCalculator); absl::Status SidePacketToStreamCalculator::GetContract(CalculatorContract* cc) { const auto& tags = cc->Outputs().GetTags(); RET_CHECK(tags.size() == 1 && kTimestampMap->count(*tags.begin()) == 1) - << "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " - "AT_TIMESTAMP tags is allowed and required to specify output " - "stream(s)."; - RET_CHECK( - (cc->Outputs().HasTag(kTagAtTick) && cc->Inputs().HasTag(kTagTick)) || - (!cc->Outputs().HasTag(kTagAtTick) && !cc->Inputs().HasTag(kTagTick))) - << "Either both of TICK and AT_TICK should be used or none of them."; + << "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, " + "AT_FIRST_TICK and AT_TIMESTAMP tags is allowed and required to " + "specify output stream(s)."; + const bool has_tick_output = + cc->Outputs().HasTag(kTagAtTick) || cc->Outputs().HasTag(kTagAtFirstTick); + const bool has_tick_input = cc->Inputs().HasTag(kTagTick); + RET_CHECK((has_tick_output && has_tick_input) || + (!has_tick_output && !has_tick_input)) + << "Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output " + "should be used or none of them."; RET_CHECK((cc->Outputs().HasTag(kTagAtTimestamp) && cc->InputSidePackets().HasTag(kTagSideInputTimestamp)) || (!cc->Outputs().HasTag(kTagAtTimestamp) && @@ -148,11 +155,17 @@ absl::Status SidePacketToStreamCalculator::Open(CalculatorContext* cc) { // timestamp bound update. cc->SetOffset(TimestampDiff(0)); } + if (output_tag_ == kTagAtFirstTick) { + close_on_first_tick_ = true; + } return absl::OkStatus(); } absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) { if (is_tick_processing_) { + if (cc->Outputs().Get(output_tag_, 0).IsClosed()) { + return absl::OkStatus(); + } // TICK input is guaranteed to be non-empty, as it's the only input stream // for this calculator. const auto& timestamp = cc->Inputs().Tag(kTagTick).Value().Timestamp(); @@ -160,6 +173,9 @@ absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) { cc->Outputs() .Get(output_tag_, i) .AddPacket(cc->InputSidePackets().Index(i).At(timestamp)); + if (close_on_first_tick_) { + cc->Outputs().Get(output_tag_, i).Close(); + } } return absl::OkStatus(); @@ -170,6 +186,7 @@ absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) { absl::Status SidePacketToStreamCalculator::Close(CalculatorContext* cc) { if (!cc->Outputs().HasTag(kTagAtTick) && + !cc->Outputs().HasTag(kTagAtFirstTick) && !cc->Outputs().HasTag(kTagAtTimestamp)) { const auto& timestamp = kTimestampMap->at(output_tag_); for (int i = 0; i < cc->Outputs().NumEntries(output_tag_); ++i) { diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc index 086b73fcd..6c0941b44 100644 --- a/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc @@ -27,13 +27,17 @@ #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/tool/options_util.h" +#include "mediapipe/util/packet_test_util.h" namespace mediapipe { namespace { -using testing::HasSubstr; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::IsEmpty; -TEST(SidePacketToStreamCalculator, WrongConfig_MissingTick) { +TEST(SidePacketToStreamCalculator, WrongConfigWithMissingTick) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -52,10 +56,35 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MissingTick) { EXPECT_THAT( status.message(), HasSubstr( - "Either both of TICK and AT_TICK should be used or none of them.")); + "Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output " + "should be used or none of them.")); } -TEST(SidePacketToStreamCalculator, WrongConfig_MissingTimestampSideInput) { +TEST(SidePacketToStreamCalculator, + WrongConfigWithMissingTickForFirstTickProcessing) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"pb( + input_stream: "tick" + input_side_packet: "side_packet" + output_stream: "packet" + node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "side_packet" + output_stream: "AT_FIRST_TICK:packet" + } + )pb"); + CalculatorGraph graph; + auto status = graph.Initialize(graph_config); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + HasSubstr( + "Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output " + "should be used or none of them.")); +} + +TEST(SidePacketToStreamCalculator, WrongConfigWithMissingTimestampSideInput) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -76,7 +105,7 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MissingTimestampSideInput) { "or none of them.")); } -TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) { +TEST(SidePacketToStreamCalculator, WrongConfigWithNonExistentTag) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -92,14 +121,13 @@ TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) { CalculatorGraph graph; auto status = graph.Initialize(graph_config); EXPECT_FALSE(status.ok()); - EXPECT_THAT( - status.message(), - HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " - "AT_TIMESTAMP tags is allowed and required to specify output " - "stream(s).")); + EXPECT_THAT(status.message(), + HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, " + "AT_TICK, AT_FIRST_TICK and AT_TIMESTAMP tags is " + "allowed and required to specify output stream(s).")); } -TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) { +TEST(SidePacketToStreamCalculator, WrongConfigWithMixedTags) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -117,14 +145,13 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) { CalculatorGraph graph; auto status = graph.Initialize(graph_config); EXPECT_FALSE(status.ok()); - EXPECT_THAT( - status.message(), - HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " - "AT_TIMESTAMP tags is allowed and required to specify output " - "stream(s).")); + EXPECT_THAT(status.message(), + HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, " + "AT_TICK, AT_FIRST_TICK and AT_TIMESTAMP tags is " + "allowed and required to specify output stream(s).")); } -TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughSidePackets) { +TEST(SidePacketToStreamCalculator, WrongConfigWithNotEnoughSidePackets) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -146,7 +173,7 @@ TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughSidePackets) { "Same number of input side packets and output streams is required.")); } -TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughOutputStreams) { +TEST(SidePacketToStreamCalculator, WrongConfigWithNotEnoughOutputStreams) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -248,7 +275,50 @@ TEST(SidePacketToStreamCalculator, AtTick) { tick_and_verify(/*at_timestamp=*/1025); } -TEST(SidePacketToStreamCalculator, AtTick_MultipleSidePackets) { +TEST(SidePacketToStreamCalculator, AtFirstTick) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"pb( + input_stream: "tick" + input_side_packet: "side_packet" + output_stream: "packet" + node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:tick" + input_side_packet: "side_packet" + output_stream: "AT_FIRST_TICK:packet" + } + )pb"); + std::vector output_packets; + tool::AddVectorSink("packet", &graph_config, &output_packets); + CalculatorGraph graph; + + MP_ASSERT_OK(graph.Initialize(graph_config)); + const int expected_value = 20; + const Timestamp kTestTimestamp(1234); + MP_ASSERT_OK( + graph.StartRun({{"side_packet", MakePacket(expected_value)}})); + + auto insert_tick = [&graph](Timestamp at_timestamp) { + MP_ASSERT_OK(graph.AddPacketToInputStream( + "tick", MakePacket(/*doesn't matter*/ 1).At(at_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + }; + + insert_tick(kTestTimestamp); + + EXPECT_THAT(output_packets, + ElementsAre(PacketContainsTimestampAndPayload( + Eq(kTestTimestamp), Eq(expected_value)))); + + output_packets.clear(); + + // Should not result in an additional output. + insert_tick(kTestTimestamp + 1); + EXPECT_THAT(output_packets, IsEmpty()); +} + +TEST(SidePacketToStreamCalculator, AtTickWithMultipleSidePackets) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( @@ -302,6 +372,62 @@ TEST(SidePacketToStreamCalculator, AtTick_MultipleSidePackets) { tick_and_verify(/*at_timestamp=*/1025); } +TEST(SidePacketToStreamCalculator, AtFirstTickWithMultipleSidePackets) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"pb( + input_stream: "tick" + input_side_packet: "side_packet0" + input_side_packet: "side_packet1" + output_stream: "packet0" + output_stream: "packet1" + node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:tick" + input_side_packet: "side_packet0" + input_side_packet: "side_packet1" + output_stream: "AT_FIRST_TICK:0:packet0" + output_stream: "AT_FIRST_TICK:1:packet1" + } + )pb"); + std::vector output_packets0; + tool::AddVectorSink("packet0", &graph_config, &output_packets0); + std::vector output_packets1; + tool::AddVectorSink("packet1", &graph_config, &output_packets1); + CalculatorGraph graph; + + MP_ASSERT_OK(graph.Initialize(graph_config)); + const int expected_value0 = 20; + const int expected_value1 = 128; + const Timestamp kTestTimestamp(1234); + MP_ASSERT_OK( + graph.StartRun({{"side_packet0", MakePacket(expected_value0)}, + {"side_packet1", MakePacket(expected_value1)}})); + + auto insert_tick = [&graph](Timestamp at_timestamp) { + MP_ASSERT_OK(graph.AddPacketToInputStream( + "tick", MakePacket(/*doesn't matter*/ 1).At(at_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + }; + + insert_tick(kTestTimestamp); + + EXPECT_THAT(output_packets0, + ElementsAre(PacketContainsTimestampAndPayload( + Eq(kTestTimestamp), Eq(expected_value0)))); + EXPECT_THAT(output_packets1, + ElementsAre(PacketContainsTimestampAndPayload( + Eq(kTestTimestamp), Eq(expected_value1)))); + + output_packets0.clear(); + output_packets1.clear(); + + // Should not result in an additional output. + insert_tick(kTestTimestamp + 1); + EXPECT_THAT(output_packets0, IsEmpty()); + EXPECT_THAT(output_packets1, IsEmpty()); +} + TEST(SidePacketToStreamCalculator, AtTimestamp) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( @@ -334,7 +460,7 @@ TEST(SidePacketToStreamCalculator, AtTimestamp) { EXPECT_EQ(expected_value, output_packets.back().Get()); } -TEST(SidePacketToStreamCalculator, AtTimestamp_MultipleOutputs) { +TEST(SidePacketToStreamCalculator, AtTimestampWithMultipleOutputs) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( R"pb( diff --git a/mediapipe/calculators/core/value_or_default_calculator_test.cc b/mediapipe/calculators/core/value_or_default_calculator_test.cc index acd1415ad..12a043bc6 100644 --- a/mediapipe/calculators/core/value_or_default_calculator_test.cc +++ b/mediapipe/calculators/core/value_or_default_calculator_test.cc @@ -174,7 +174,7 @@ TEST(ValueOrDefaultCalculatorTest, DefaultAndValues) { ElementsAre(kDefaultValue, 1, 2, kDefaultValue, 3, kDefaultValue)); } -TEST(ValueOrDefaultCalculatorTest, TimestampsMissmatch) { +TEST(ValueOrDefaultCalculatorTest, TimestampsMismatch) { // Check that when we provide the inputs not on time - we don't get them. ValueOrDefaultRunner runner; const std::vector ticks = {1, 2, 5, 8, 12, 33, 231}; diff --git a/mediapipe/calculators/image/affine_transformation_runner_opencv.cc b/mediapipe/calculators/image/affine_transformation_runner_opencv.cc index c43d73ff7..b58e035ee 100644 --- a/mediapipe/calculators/image/affine_transformation_runner_opencv.cc +++ b/mediapipe/calculators/image/affine_transformation_runner_opencv.cc @@ -59,7 +59,7 @@ class OpenCvRunner const ImageFrame& input, const std::array& matrix, const AffineTransformation::Size& size, AffineTransformation::BorderMode border_mode) override { - // OpenCV warpAffine works in absolute coordinates, so the transfom (which + // OpenCV warpAffine works in absolute coordinates, so the transform (which // accepts and produces relative coordinates) should be adjusted to first // normalize coordinates and then scale them. // clang-format off diff --git a/mediapipe/calculators/image/image_clone_calculator.cc b/mediapipe/calculators/image/image_clone_calculator.cc index 563b4a4ad..0929e81e5 100644 --- a/mediapipe/calculators/image/image_clone_calculator.cc +++ b/mediapipe/calculators/image/image_clone_calculator.cc @@ -65,7 +65,7 @@ class ImageCloneCalculator : public Node { } #else MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract( - cc, /*requesst_gpu_as_optional=*/true)); + cc, /*request_gpu_as_optional=*/true)); #endif // MEDIAPIPE_DISABLE_GPU return absl::OkStatus(); } diff --git a/mediapipe/calculators/image/image_cropping_calculator.proto b/mediapipe/calculators/image/image_cropping_calculator.proto index 55d3467d1..17e4cb3e8 100644 --- a/mediapipe/calculators/image/image_cropping_calculator.proto +++ b/mediapipe/calculators/image/image_cropping_calculator.proto @@ -24,7 +24,7 @@ message ImageCroppingCalculatorOptions { } // Output texture buffer dimensions. The values defined in the options will be - // overriden by the WIDTH and HEIGHT input streams if they exist. + // overridden by the WIDTH and HEIGHT input streams if they exist. optional int32 width = 1; optional int32 height = 2; diff --git a/mediapipe/calculators/image/image_file_properties_calculator.cc b/mediapipe/calculators/image/image_file_properties_calculator.cc index db01400cd..01a1bd2c1 100644 --- a/mediapipe/calculators/image/image_file_properties_calculator.cc +++ b/mediapipe/calculators/image/image_file_properties_calculator.cc @@ -77,7 +77,7 @@ absl::StatusOr ComputeFocalLengthInPixels(int image_width, return focal_length_pixels; } -absl::StatusOr GetImageFileProperites( +absl::StatusOr GetImageFileProperties( const std::string& image_bytes) { easyexif::EXIFInfo result; int code = result.parseFrom(image_bytes); @@ -151,7 +151,7 @@ class ImageFilePropertiesCalculator : public CalculatorBase { if (cc->InputSidePackets().NumEntries() == 1) { const std::string& image_bytes = cc->InputSidePackets().Index(0).Get(); - MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperties(image_bytes)); read_properties_ = true; } @@ -169,7 +169,7 @@ class ImageFilePropertiesCalculator : public CalculatorBase { return absl::OkStatus(); } const std::string& image_bytes = cc->Inputs().Index(0).Get(); - MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperties(image_bytes)); read_properties_ = true; } if (read_properties_) { diff --git a/mediapipe/calculators/image/segmentation_smoothing_calculator.cc b/mediapipe/calculators/image/segmentation_smoothing_calculator.cc index d238975c6..ab2148f36 100644 --- a/mediapipe/calculators/image/segmentation_smoothing_calculator.cc +++ b/mediapipe/calculators/image/segmentation_smoothing_calculator.cc @@ -118,7 +118,7 @@ absl::Status SegmentationSmoothingCalculator::GetContract( #if !MEDIAPIPE_DISABLE_GPU MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract( - cc, /*requesst_gpu_as_optional=*/true)); + cc, /*request_gpu_as_optional=*/true)); #endif // !MEDIAPIPE_DISABLE_GPU return absl::OkStatus(); diff --git a/mediapipe/calculators/image/warp_affine_calculator.cc b/mediapipe/calculators/image/warp_affine_calculator.cc index dba500dfa..0bbf6c72d 100644 --- a/mediapipe/calculators/image/warp_affine_calculator.cc +++ b/mediapipe/calculators/image/warp_affine_calculator.cc @@ -206,7 +206,7 @@ class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl { if constexpr (std::is_same_v || std::is_same_v) { MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract( - cc, /*requesst_gpu_as_optional=*/true)); + cc, /*request_gpu_as_optional=*/true)); } return absl::OkStatus(); } diff --git a/mediapipe/calculators/image/warp_affine_calculator_test.cc b/mediapipe/calculators/image/warp_affine_calculator_test.cc index 8a4c2429e..90bf41233 100644 --- a/mediapipe/calculators/image/warp_affine_calculator_test.cc +++ b/mediapipe/calculators/image/warp_affine_calculator_test.cc @@ -284,7 +284,7 @@ std::array GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi, .IgnoreError(); mediapipe::GetRotatedSubRectToRectTransformMatrix( roi_absolute, input.cols, input.rows, - /*flip_horizontaly=*/false, &transform_mat); + /*flip_horizontally=*/false, &transform_mat); return transform_mat; } diff --git a/mediapipe/calculators/image/yuv_to_image_calculator.cc b/mediapipe/calculators/image/yuv_to_image_calculator.cc index 6a82877c3..e177ba589 100644 --- a/mediapipe/calculators/image/yuv_to_image_calculator.cc +++ b/mediapipe/calculators/image/yuv_to_image_calculator.cc @@ -49,7 +49,7 @@ std::string FourCCToString(libyuv::FourCC fourcc) { // The input `YUVImage` is expected to be in the NV12, NV21, YV12 or I420 (aka // YV21) format (as per the `fourcc()` property). This covers the most commonly // used YUV image formats used on mobile devices. Other formats are not -// supported and wil result in an `InvalidArgumentError`. +// supported and will result in an `InvalidArgumentError`. class YUVToImageCalculator : public Node { public: static constexpr Input kInput{"YUV_IMAGE"}; diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 6c433e9b5..618624430 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -657,6 +657,7 @@ cc_library( }), deps = [ ":tensor_converter_calculator_cc_proto", + ":tensor_converter_cpu", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:port", "//mediapipe/framework/formats:image_frame", @@ -665,6 +666,7 @@ cc_library( "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", "//mediapipe/framework/port:statusor", + "//mediapipe/gpu:gpu_buffer", "//mediapipe/gpu:gpu_buffer_format", "//mediapipe/gpu:gpu_origin_cc_proto", "//mediapipe/util:resource_util", @@ -674,10 +676,17 @@ cc_library( "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ] + select({ "//mediapipe/gpu:disable_gpu": [], - "//conditions:default": ["tensor_converter_calculator_gpu_deps"], + "//conditions:default": [ + "tensor_converter_calculator_gpu_deps", + "//mediapipe/gpu:gl_base", + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gl_simple_shaders", + "//mediapipe/gpu:shader_util", + ], }) + select({ "//mediapipe:apple": [ "//third_party/apple_frameworks:MetalKit", @@ -687,6 +696,35 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tensor_converter_cpu", + srcs = ["tensor_converter_cpu.cc"], + hdrs = ["tensor_converter_cpu.h"], + deps = [ + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_test( + name = "tensor_converter_cpu_test", + srcs = ["tensor_converter_cpu_test.cc"], + deps = [ + ":tensor_converter_cpu", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:gtest", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:status_matchers", + "//mediapipe/util:image_test_utils", + ], +) + cc_library( name = "tensor_converter_calculator_gpu_deps", visibility = ["//visibility:private"], @@ -1414,6 +1452,8 @@ cc_library( }), deps = [ ":tensors_to_segmentation_calculator_cc_proto", + ":tensors_to_segmentation_converter", + ":tensors_to_segmentation_utils", "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:port", @@ -1421,9 +1461,11 @@ cc_library( "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:tensor", "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", "//mediapipe/framework/port:statusor", "//mediapipe/gpu:gpu_origin_cc_proto", "//mediapipe/util:resource_util", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", @@ -1434,6 +1476,7 @@ cc_library( "//mediapipe/gpu:gl_calculator_helper", "//mediapipe/gpu:gl_simple_shaders", "//mediapipe/gpu:gpu_buffer", + "//mediapipe/gpu:gpu_buffer_format", "//mediapipe/gpu:shader_util", ], }) + selects.with_or({ @@ -1453,19 +1496,96 @@ cc_library( }) + select({ "//mediapipe/framework/port:disable_opencv": [], "//conditions:default": [ - "//mediapipe/framework/formats:image_opencv", - "//mediapipe/framework/port:opencv_imgproc", + ":tensors_to_segmentation_converter_opencv", ], }), alwayslink = 1, ) +cc_library( + name = "tensors_to_segmentation_utils", + srcs = ["tensors_to_segmentation_utils.cc"], + hdrs = ["tensors_to_segmentation_utils.h"], + deps = [ + "//mediapipe/framework:port", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_test( + name = "tensors_to_segmentation_utils_test", + srcs = ["tensors_to_segmentation_utils_test.cc"], + deps = [ + ":tensors_to_segmentation_utils", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:status_matchers", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "tensors_to_segmentation_converter", + hdrs = ["tensors_to_segmentation_converter.h"], + deps = [ + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:tensor", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "tensors_to_segmentation_converter_opencv", + srcs = ["tensors_to_segmentation_converter_opencv.cc"], + hdrs = ["tensors_to_segmentation_converter_opencv.h"], + deps = [ + ":tensors_to_segmentation_calculator_cc_proto", + ":tensors_to_segmentation_converter", + ":tensors_to_segmentation_utils", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_opencv", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "tensors_to_segmentation_calculator_test_utils", + testonly = 1, + srcs = ["tensors_to_segmentation_calculator_test_utils.cc"], + hdrs = ["tensors_to_segmentation_calculator_test_utils.h"], + deps = [ + ":tensors_to_segmentation_calculator_cc_proto", + "//mediapipe/framework:calculator_cc_proto", + "//mediapipe/framework/port:parse_text_proto", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "tensors_to_segmentation_calculator_test_utils_test", + srcs = ["tensors_to_segmentation_calculator_test_utils_test.cc"], + deps = [ + ":tensors_to_segmentation_calculator_cc_proto", + ":tensors_to_segmentation_calculator_test_utils", + "//mediapipe/framework/port:gtest_main", + ], +) + cc_test( name = "tensors_to_segmentation_calculator_test", srcs = ["tensors_to_segmentation_calculator_test.cc"], deps = [ ":tensors_to_segmentation_calculator", ":tensors_to_segmentation_calculator_cc_proto", + ":tensors_to_segmentation_calculator_test_utils", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework:packet", @@ -1476,11 +1596,6 @@ cc_test( "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/formats:tensor", "//mediapipe/framework/port:gtest_main", - "//mediapipe/framework/port:parse_text_proto", - "@com_google_absl//absl/log", - "@com_google_absl//absl/log:absl_log", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", ], ) diff --git a/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc b/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc index c8d38a653..7230e178d 100644 --- a/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc @@ -109,7 +109,7 @@ bool IsValidFftSize(int size) { // Non-streaming mode: when "stream_mode" is set to false in the calculator // options, the calculators treats the packets in the input audio stream as // a batch of unrelated audio buffers. In each Process() call, the input -// buffer will be frist resampled, and framed as fixed-sized, possibly +// buffer will be first resampled, and framed as fixed-sized, possibly // overlapping tensors. The last tensor produced by a Process() invocation // will be zero-padding if the remaining samples are insufficient. As the // calculator treats the input packets as unrelated, all samples will be @@ -159,7 +159,7 @@ class AudioToTensorCalculator : public Node { public: static constexpr Input kAudioIn{"AUDIO"}; // TODO: Removes this optional input stream when the "AUDIO" stream - // uses the new mediapipe audio data containers that carry audio metatdata, + // uses the new mediapipe audio data containers that carry audio metadata, // such as sample rate. static constexpr Input::Optional kAudioSampleRateIn{"SAMPLE_RATE"}; static constexpr Output> kTensorsOut{"TENSORS"}; diff --git a/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto b/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto index 948c82a36..a49825586 100644 --- a/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto +++ b/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto @@ -37,7 +37,7 @@ message AudioToTensorCalculatorOptions { // will be converted into tensors. optional double target_sample_rate = 4; - // Whether to treat the input audio stream as a continous stream or a batch + // Whether to treat the input audio stream as a continuous stream or a batch // of unrelated audio buffers. optional bool stream_mode = 5 [default = true]; diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc index 171b28eb4..924df6af3 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc @@ -82,7 +82,7 @@ namespace api2 { // // Outputs: // TENSORS - std::vector -// Vector containing a single Tensor populated with an extrated RGB image. +// Vector containing a single Tensor populated with an extracted RGB image. // MATRIX - std::array @Optional // An std::array representing a 4x4 row-major-order matrix that // maps a point on the input image to a point on the output tensor, and @@ -212,7 +212,7 @@ class ImageToTensorCalculator : public Node { std::array matrix; GetRotatedSubRectToRectTransformMatrix( roi, image->width(), image->height(), - /*flip_horizontaly=*/false, &matrix); + /*flip_horizontally=*/false, &matrix); kOutMatrix(cc).Send(std::move(matrix)); } diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc b/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc index 7017c1e3a..51150a1ca 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc @@ -206,7 +206,7 @@ mediapipe::ImageFormat::Format GetImageFormat(int image_channels) { } else if (image_channels == 1) { return ImageFormat::GRAY8; } - ABSL_CHECK(false) << "Unsupported input image channles: " << image_channels; + ABSL_CHECK(false) << "Unsupported input image channels: " << image_channels; } Packet MakeImageFramePacket(cv::Mat input) { diff --git a/mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.cc b/mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.cc index b32b67869..04b791bd4 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.cc @@ -57,7 +57,7 @@ class SubRectExtractorGl { absl::Status ExtractSubRectToBuffer( const tflite::gpu::gl::GlTexture& texture, const tflite::gpu::HW& texture_size, const RotatedRect& sub_rect, - bool flip_horizontaly, float alpha, float beta, + bool flip_horizontally, float alpha, float beta, const tflite::gpu::HW& destination_size, tflite::gpu::gl::CommandQueue* command_queue, tflite::gpu::gl::GlBuffer* destination); @@ -154,13 +154,13 @@ void main() { absl::Status SubRectExtractorGl::ExtractSubRectToBuffer( const tflite::gpu::gl::GlTexture& texture, const tflite::gpu::HW& texture_size, const RotatedRect& texture_sub_rect, - bool flip_horizontaly, float alpha, float beta, + bool flip_horizontally, float alpha, float beta, const tflite::gpu::HW& destination_size, tflite::gpu::gl::CommandQueue* command_queue, tflite::gpu::gl::GlBuffer* destination) { std::array transform_mat; GetRotatedSubRectToRectTransformMatrix(texture_sub_rect, texture_size.w, - texture_size.h, flip_horizontaly, + texture_size.h, flip_horizontally, &transform_mat); MP_RETURN_IF_ERROR(texture.BindAsSampler2D(0)); @@ -308,7 +308,7 @@ class GlProcessor : public ImageToTensorConverter { input_texture, tflite::gpu::HW(source_texture.height(), source_texture.width()), roi, - /*flip_horizontaly=*/false, transform.scale, transform.offset, + /*flip_horizontally=*/false, transform.scale, transform.offset, tflite::gpu::HW(output_shape.dims[1], output_shape.dims[2]), command_queue_.get(), &output)); diff --git a/mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.cc b/mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.cc index 2522cae85..930d9fe21 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.cc @@ -199,7 +199,7 @@ class GlProcessor : public ImageToTensorConverter { range_min, range_max)); auto tensor_view = output_tensor.GetOpenGlTexture2dWriteView(); MP_RETURN_IF_ERROR(ExtractSubRect(input_texture, roi, - /*flip_horizontaly=*/false, + /*flip_horizontally=*/false, transform.scale, transform.offset, output_shape, &tensor_view)); return absl::OkStatus(); @@ -210,7 +210,7 @@ class GlProcessor : public ImageToTensorConverter { absl::Status ExtractSubRect(const mediapipe::GlTexture& texture, const RotatedRect& sub_rect, - bool flip_horizontaly, float alpha, float beta, + bool flip_horizontally, float alpha, float beta, const Tensor::Shape& output_shape, Tensor::OpenGlTexture2dView* output) { const int output_height = output_shape.dims[1]; @@ -263,13 +263,13 @@ class GlProcessor : public ImageToTensorConverter { ABSL_LOG_IF(FATAL, !gl_context) << "GlContext is not bound to the thread."; if (gl_context->GetGlVersion() == mediapipe::GlVersion::kGLES2) { GetTransposedRotatedSubRectToRectTransformMatrix( - sub_rect, texture.width(), texture.height(), flip_horizontaly, + sub_rect, texture.width(), texture.height(), flip_horizontally, &transform_mat); glUniformMatrix4fv(matrix_id_, 1, GL_FALSE, transform_mat.data()); } else { GetRotatedSubRectToRectTransformMatrix(sub_rect, texture.width(), - texture.height(), flip_horizontaly, - &transform_mat); + texture.height(), + flip_horizontally, &transform_mat); glUniformMatrix4fv(matrix_id_, 1, GL_TRUE, transform_mat.data()); } diff --git a/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc b/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc index cef2abcd7..f47d2da9a 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc @@ -179,13 +179,13 @@ class SubRectExtractorMetal { } absl::Status Execute(id input_texture, - const RotatedRect& sub_rect, bool flip_horizontaly, + const RotatedRect& sub_rect, bool flip_horizontally, float alpha, float beta, const tflite::gpu::HW& destination_size, id command_buffer, id destination) { auto output_texture = MTLTextureWithBuffer(destination_size, destination); - return InternalExecute(input_texture, sub_rect, flip_horizontaly, alpha, + return InternalExecute(input_texture, sub_rect, flip_horizontally, alpha, beta, destination_size, command_buffer, output_texture); } @@ -211,7 +211,7 @@ class SubRectExtractorMetal { absl::Status InternalExecute(id input_texture, const RotatedRect& sub_rect, - bool flip_horizontaly, float alpha, float beta, + bool flip_horizontally, float alpha, float beta, const tflite::gpu::HW& destination_size, id command_buffer, id output_texture) { @@ -223,7 +223,7 @@ class SubRectExtractorMetal { std::array transform_mat; GetRotatedSubRectToRectTransformMatrix(sub_rect, input_texture.width, input_texture.height, - flip_horizontaly, &transform_mat); + flip_horizontally, &transform_mat); id transform_mat_buffer = [device_ newBufferWithBytes:&transform_mat length:sizeof(transform_mat) @@ -383,7 +383,7 @@ class MetalProcessor : public ImageToTensorConverter { MtlBufferView::GetWriteView(output_tensor, command_buffer); MP_RETURN_IF_ERROR(extractor_->Execute( texture, roi, - /*flip_horizontaly=*/false, transform.scale, transform.offset, + /*flip_horizontally=*/false, transform.scale, transform.offset, tflite::gpu::HW(output_shape.dims[1], output_shape.dims[2]), command_buffer, buffer_view.buffer())); [command_buffer commit]; diff --git a/mediapipe/calculators/tensor/image_to_tensor_utils.cc b/mediapipe/calculators/tensor/image_to_tensor_utils.cc index 3f91f3dc2..b6ed5216c 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_utils.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_utils.cc @@ -92,7 +92,7 @@ absl::StatusOr GetValueRangeTransformation( void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect, int rect_width, int rect_height, - bool flip_horizontaly, + bool flip_horizontally, std::array* matrix_ptr) { std::array& matrix = *matrix_ptr; // The resulting matrix is multiplication of below commented out matrices: @@ -118,7 +118,7 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect, // {0.0f, 0.0f, a, 0.0f} // {0.0f, 0.0f, 0.0f, 1.0f} - const float flip = flip_horizontaly ? -1 : 1; + const float flip = flip_horizontally ? -1 : 1; // Matrix for optional horizontal flip around middle of output image. // { fl , 0.0f, 0.0f, 0.0f} // { 0.0f, 1.0f, 0.0f, 0.0f} @@ -177,13 +177,13 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect, void GetTransposedRotatedSubRectToRectTransformMatrix( const RotatedRect& sub_rect, int rect_width, int rect_height, - bool flip_horizontaly, std::array* matrix_ptr) { + bool flip_horizontally, std::array* matrix_ptr) { std::array& matrix = *matrix_ptr; // See comments in GetRotatedSubRectToRectTransformMatrix for detailed // calculations. const float a = sub_rect.width; const float b = sub_rect.height; - const float flip = flip_horizontaly ? -1 : 1; + const float flip = flip_horizontally ? -1 : 1; const float c = std::cos(sub_rect.rotation); const float d = std::sin(sub_rect.rotation); const float e = sub_rect.center_x; diff --git a/mediapipe/calculators/tensor/image_to_tensor_utils.h b/mediapipe/calculators/tensor/image_to_tensor_utils.h index a73529dce..63810923d 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_utils.h +++ b/mediapipe/calculators/tensor/image_to_tensor_utils.h @@ -74,7 +74,7 @@ absl::StatusOr> PadRoi(int input_tensor_width, // Represents a transformation of value which involves scaling and offsetting. // To apply transformation: // ValueTransformation transform = ... -// float transformed_value = transform.scale * value + transfrom.offset; +// float transformed_value = transform.scale * value + transform.offset; struct ValueTransformation { float scale; float offset; @@ -99,11 +99,11 @@ absl::StatusOr GetValueRangeTransformation( // @sub_rect - rotated sub rect in absolute coordinates // @rect_width - rect width // @rect_height - rect height -// @flip_horizontaly - we need to flip the output buffer. +// @flip_horizontally - we need to flip the output buffer. // @matrix - 4x4 matrix (array of 16 elements) to populate void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect, int rect_width, int rect_height, - bool flip_horizontaly, + bool flip_horizontally, std::array* matrix); // Returns the transpose of the matrix found with @@ -118,11 +118,11 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect, // @sub_rect - rotated sub rect in absolute coordinates // @rect_width - rect width // @rect_height - rect height -// @flip_horizontaly - we need to flip the output buffer. +// @flip_horizontally - we need to flip the output buffer. // @matrix - 4x4 matrix (array of 16 elements) to populate void GetTransposedRotatedSubRectToRectTransformMatrix( const RotatedRect& sub_rect, int rect_width, int rect_height, - bool flip_horizontaly, std::array* matrix); + bool flip_horizontally, std::array* matrix); // Validates the output dimensions set in the option proto. The input option // proto is expected to have to following fields: diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.cc b/mediapipe/calculators/tensor/tensor_converter_calculator.cc index b42cb0b17..03eb2ff80 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include "absl/log/absl_check.h" @@ -21,17 +22,22 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_format.h" +#include "absl/strings/substitute.h" +#include "absl/types/optional.h" #include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensor_converter_cpu.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/port.h" #include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" #include "mediapipe/gpu/gpu_buffer_format.h" #include "mediapipe/gpu/gpu_origin.pb.h" #if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gl_base.h" #include "mediapipe/gpu/gpu_buffer.h" #if MEDIAPIPE_METAL_ENABLED #import @@ -94,16 +100,13 @@ absl::StatusOr ShouldFlipVertically( } } -typedef Eigen::Matrix - RowMajorMatrixXf; -typedef Eigen::Matrix - ColMajorMatrixXf; - constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kTensorsTag[] = "TENSORS"; constexpr char kMatrixTag[] = "MATRIX"; +constexpr std::pair kDefaultOutputRange = {0.0f, 1.0f}; + } // namespace namespace mediapipe { @@ -156,10 +159,6 @@ class TensorConverterCalculator : public CalculatorBase { private: absl::Status InitGpu(CalculatorContext* cc); absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu); - template - absl::Status NormalizeImage(const ImageFrame& image_frame, - bool flip_vertically, float* tensor_ptr); - absl::Status CopyMatrixToTensor(const Matrix& matrix, float* tensor_ptr); absl::Status ProcessCPU(CalculatorContext* cc); absl::Status ProcessGPU(CalculatorContext* cc); @@ -279,46 +278,21 @@ absl::Status TensorConverterCalculator::ProcessCPU(CalculatorContext* cc) { } const auto& image_frame = cc->Inputs().Tag(kImageFrameTag).Get(); - const int height = image_frame.Height(); - const int width = image_frame.Width(); - const int channels = image_frame.NumberOfChannels(); - const int channels_preserved = std::min(channels, max_num_channels_); - const mediapipe::ImageFormat::Format format = image_frame.Format(); - - if (!(format == mediapipe::ImageFormat::SRGBA || - format == mediapipe::ImageFormat::SRGB || - format == mediapipe::ImageFormat::GRAY8 || - format == mediapipe::ImageFormat::VEC32F1)) - RET_CHECK_FAIL() << "Unsupported CPU input format."; - - output_tensors->emplace_back( - Tensor::ElementType::kFloat32, - Tensor::Shape{1, height, width, channels_preserved}); - auto cpu_view = output_tensors->back().GetCpuWriteView(); - - // Copy image data into tensor. - if (image_frame.ByteDepth() == 1) { - MP_RETURN_IF_ERROR(NormalizeImage(image_frame, flip_vertically_, - cpu_view.buffer())); - } else if (image_frame.ByteDepth() == 4) { - MP_RETURN_IF_ERROR(NormalizeImage(image_frame, flip_vertically_, - cpu_view.buffer())); - } else { - return absl::InternalError( - "Only byte-based (8 bit) and float (32 bit) images supported."); - } + MP_ASSIGN_OR_RETURN(Tensor output, + ConvertImageFrameToTensorOnCpu( + image_frame, + output_range_.has_value() ? output_range_.value() + : kDefaultOutputRange, + flip_vertically_, max_num_channels_)); + output_tensors->emplace_back(std::move(output)); } else if (cc->Inputs().HasTag(kMatrixTag)) { if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) { return absl::OkStatus(); } const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get(); - const int height = matrix.rows(); - const int width = matrix.cols(); - const int channels = 1; - output_tensors->emplace_back(Tensor::ElementType::kFloat32, - Tensor::Shape{1, height, width, channels}); - MP_RETURN_IF_ERROR(CopyMatrixToTensor( - matrix, output_tensors->back().GetCpuWriteView().buffer())); + MP_ASSIGN_OR_RETURN(Tensor output, + ConvertMatrixToTensorOnCpu(matrix, row_major_matrix_)); + output_tensors->emplace_back(std::move(output)); } else { return absl::OkStatus(); } @@ -669,67 +643,4 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc, return absl::OkStatus(); } -template -absl::Status TensorConverterCalculator::NormalizeImage( - const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr) { - const int height = image_frame.Height(); - const int width = image_frame.Width(); - const int channels = image_frame.NumberOfChannels(); - const int channels_preserved = std::min(channels, max_num_channels_); - const int channels_ignored = channels - channels_preserved; - - if (output_range_.has_value()) { - // If the output float range is set and we are not using custom - // normalization, normalize the pixel values from [0, 255] to the specified - // output range. - RET_CHECK_NE(output_range_->first, output_range_->second); - const float scale = (output_range_->second - output_range_->first) / 255.0f; - const float bias = output_range_->first; - - for (int i = 0; i < height; ++i) { - const T* image_ptr = reinterpret_cast( - image_frame.PixelData() + - (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); - for (int j = 0; j < width; ++j) { - for (int c = 0; c < channels_preserved; ++c) { - *tensor_ptr++ = *image_ptr++ * scale + bias; - } - image_ptr += channels_ignored; - } - } - } else { - // [0,1], scale only (bias == 0) - // Verified that there are no precision issues with 1.0f / 255.0f expression - const float scale = 1.0f / 255.0f; - for (int i = 0; i < height; ++i) { - const T* image_ptr = reinterpret_cast( - image_frame.PixelData() + - (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); - for (int j = 0; j < width; ++j) { - for (int c = 0; c < channels_preserved; ++c) { - *tensor_ptr++ = *image_ptr++ * scale; - } - image_ptr += channels_ignored; - } - } - } - - return absl::OkStatus(); -} - -absl::Status TensorConverterCalculator::CopyMatrixToTensor(const Matrix& matrix, - float* tensor_ptr) { - if (row_major_matrix_) { - auto matrix_map = - Eigen::Map(tensor_ptr, matrix.rows(), matrix.cols()); - matrix_map = matrix; - } else { - auto matrix_map = - Eigen::Map(tensor_ptr, matrix.rows(), matrix.cols()); - matrix_map = matrix; - } - - return absl::OkStatus(); -} - } // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.proto b/mediapipe/calculators/tensor/tensor_converter_calculator.proto index 2c5e0be56..b80d1e805 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.proto +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.proto @@ -32,7 +32,7 @@ message TensorConverterCalculatorOptions { // Custom settings to override the internal scaling factors `div` and `sub`. // Both values must be set to non-negative values. Will only take effect on // CPU AND when |use_custom_normalization| is set to true. When these custom - // values take effect, the |zero_center| setting above will be overriden, and + // values take effect, the |zero_center| setting above will be overridden, and // the normalized_value will be calculated as: // normalized_value = input / custom_div - custom_sub. optional bool use_custom_normalization = 6 [default = false]; diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc index 0394daebd..3446ea301 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc @@ -321,6 +321,61 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) { } } +TEST_F(TensorConverterCalculatorTest, + ShouldConvertImageWithDefaultOutputRange) { + CalculatorGraph graph; + CalculatorGraphConfig graph_config = + mediapipe::ParseTextProtoOrDie( + R"pb( + input_stream: "input_image" + node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TensorConverterCalculatorOptions.ext] { + zero_center: false + } + } + } + )pb"); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 1); + cv::Mat mat = mediapipe::formats::MatView(input_image.get()); + mat.at(0, 0) = 200; + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + ASSERT_EQ(output_packets.size(), 1); + + // Get and process results. + const std::vector& tensor_vec = + output_packets[0].Get>(); + ASSERT_EQ(tensor_vec.size(), 1); + + const Tensor* tensor = &tensor_vec[0]; + + // Calculate the expected normalized value: + float expected_value = 200.0 / 255.0; + + EXPECT_EQ(tensor->element_type(), Tensor::ElementType::kFloat32); + auto view = tensor->GetCpuReadView(); + float actual_value = *view.buffer(); + EXPECT_FLOAT_EQ(actual_value, expected_value); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + TEST_F(TensorConverterCalculatorTest, FlipVertically) { CalculatorGraph graph; CalculatorGraphConfig graph_config = diff --git a/mediapipe/calculators/tensor/tensor_converter_cpu.cc b/mediapipe/calculators/tensor/tensor_converter_cpu.cc new file mode 100644 index 000000000..f72a24c31 --- /dev/null +++ b/mediapipe/calculators/tensor/tensor_converter_cpu.cc @@ -0,0 +1,145 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensor_converter_cpu.h" + +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" + +namespace mediapipe { +namespace { + +typedef Eigen::Matrix + RowMajorMatrixXf; +typedef Eigen::Matrix + ColMajorMatrixXf; + +template +absl::Status NormalizeImage(const ImageFrame& image_frame, bool flip_vertically, + const std::pair& output_range, + int max_num_channels, float* tensor_ptr) { + const int height = image_frame.Height(); + const int width = image_frame.Width(); + const int channels = image_frame.NumberOfChannels(); + const int channels_preserved = std::min(channels, max_num_channels); + const int channels_ignored = channels - channels_preserved; + + RET_CHECK_NE(output_range.first, output_range.second); + const float scale = (output_range.second - output_range.first) / 255.0f; + const float bias = output_range.first; + + for (int i = 0; i < height; ++i) { + const T* image_ptr = reinterpret_cast( + image_frame.PixelData() + + (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); + for (int j = 0; j < width; ++j) { + for (int c = 0; c < channels_preserved; ++c) { + *tensor_ptr++ = *image_ptr++ * scale + bias; + } + image_ptr += channels_ignored; + } + } + return absl::OkStatus(); +} + +} // namespace + +absl::Status NormalizeUInt8Image(const ImageFrame& image_frame, + bool flip_vertically, + const std::pair& output_range, + int max_num_channels, float* tensor_ptr) { + return NormalizeImage(image_frame, flip_vertically, output_range, + max_num_channels, tensor_ptr); +} + +absl::Status NormalizeFloatImage(const ImageFrame& image_frame, + bool flip_vertically, + const std::pair& output_range, + int max_num_channels, float* tensor_ptr) { + return NormalizeImage(image_frame, flip_vertically, output_range, + max_num_channels, tensor_ptr); +} + +absl::Status CopyMatrixToTensor(const Matrix& matrix, bool is_row_major_matrix, + float* tensor_ptr) { + if (is_row_major_matrix) { + auto matrix_map = + Eigen::Map(tensor_ptr, matrix.rows(), matrix.cols()); + matrix_map = matrix; + } else { + auto matrix_map = + Eigen::Map(tensor_ptr, matrix.rows(), matrix.cols()); + matrix_map = matrix; + } + return absl::OkStatus(); +} + +absl::StatusOr ConvertImageFrameToTensorOnCpu( + const ImageFrame& image_frame, const std::pair& output_range, + bool flip_vertically, int max_num_channels) { + const int height = image_frame.Height(); + const int width = image_frame.Width(); + const int channels = image_frame.NumberOfChannels(); + const int channels_preserved = std::min(channels, max_num_channels); + const mediapipe::ImageFormat::Format format = image_frame.Format(); + + if (!(format == mediapipe::ImageFormat::SRGBA || + format == mediapipe::ImageFormat::SRGB || + format == mediapipe::ImageFormat::GRAY8 || + format == mediapipe::ImageFormat::VEC32F1)) + RET_CHECK_FAIL() << "Unsupported CPU input format."; + + Tensor output_tensor(Tensor::ElementType::kFloat32, + Tensor::Shape{1, height, width, channels_preserved}); + auto cpu_view = output_tensor.GetCpuWriteView(); + + // Copy image data into tensor. + if (image_frame.ByteDepth() == 1) { + MP_RETURN_IF_ERROR(NormalizeUInt8Image(image_frame, flip_vertically, + output_range, max_num_channels, + cpu_view.buffer())); + } else if (image_frame.ByteDepth() == 4) { + MP_RETURN_IF_ERROR(NormalizeFloatImage(image_frame, flip_vertically, + output_range, max_num_channels, + cpu_view.buffer())); + } else { + return absl::InternalError( + "Only byte-based (8 bit) and float (32 bit) images supported."); + } + return output_tensor; +} + +absl::StatusOr ConvertMatrixToTensorOnCpu(const Matrix& matrix, + bool row_major_matrix) { + const int height = matrix.rows(); + const int width = matrix.cols(); + const int channels = 1; + Tensor output_tensor(Tensor::ElementType::kFloat32, + Tensor::Shape{1, height, width, channels}); + MP_RETURN_IF_ERROR( + CopyMatrixToTensor(matrix, row_major_matrix, + output_tensor.GetCpuWriteView().buffer())); + return output_tensor; +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensor_converter_cpu.h b/mediapipe/calculators/tensor/tensor_converter_cpu.h new file mode 100644 index 000000000..784bade80 --- /dev/null +++ b/mediapipe/calculators/tensor/tensor_converter_cpu.h @@ -0,0 +1,61 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_ +#define MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_ + +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/tensor.h" + +namespace mediapipe { + +// Converts an ImageFrame to a vector of Tensors. +// @flip_vertically enables to flip the image during conversion. +// @max_num_channels can be used to reserve extra channels in the output +// tensors. +// Returns output Tensor. +absl::StatusOr ConvertImageFrameToTensorOnCpu( + const ImageFrame& image_frame, const std::pair& output_range, + bool flip_vertically, int max_num_channels); + +// Converts a Matrix to a vector of Tensors. +// @row_major_matrix defines the ordering in the input matrix. +// @max_num_channels can be used to reserve extra channels in the output +// tensors. +// Returns output Tensor. +absl::StatusOr ConvertMatrixToTensorOnCpu(const Matrix& matrix, + bool row_major_matrix); + +// For testing only below. +absl::Status NormalizeUInt8Image(const ImageFrame& image_frame, + bool flip_vertically, + const std::pair& output_range, + int max_num_channels, float* tensor_ptr); + +absl::Status NormalizeFloatImage(const ImageFrame& image_frame, + bool flip_vertically, + const std::pair& output_range, + int max_num_channels, float* tensor_ptr); + +absl::Status CopyMatrixToTensor(const Matrix& matrix, bool is_row_major_matrix, + float* tensor_ptr); + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_ diff --git a/mediapipe/calculators/tensor/tensor_converter_cpu_test.cc b/mediapipe/calculators/tensor/tensor_converter_cpu_test.cc new file mode 100644 index 000000000..478a9c6dc --- /dev/null +++ b/mediapipe/calculators/tensor/tensor_converter_cpu_test.cc @@ -0,0 +1,175 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensor_converter_cpu.h" + +#include +#include +#include + +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/util/image_test_utils.h" + +namespace mediapipe { +namespace { + +Matrix CreateTestMatrix(int num_rows, int num_columns) { + Matrix matrix(num_rows, num_columns); + for (int r = 0; r < num_rows; ++r) { + for (int c = 0; c < num_columns; ++c) { + matrix(r, c) = r * num_columns + c; + } + } + return matrix; +} + +TEST(TensorConverterCpuTest, ShouldCopyMatrixInRowMajorFormatToTensor) { + auto test_matrix = CreateTestMatrix(/* num_rows=*/3, /*num_columns=*/4); + std::vector tensor_data(test_matrix.size(), 0.0f); + + MP_EXPECT_OK(CopyMatrixToTensor(test_matrix, /*is_row_major_matrix=*/true, + tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + const int row = i / test_matrix.cols(); + const int column = i % test_matrix.cols(); + EXPECT_FLOAT_EQ(tensor_data[i], (test_matrix)(row, column)); + } +} + +TEST(TensorConverterCpuTest, ShouldCopyMatrixInColumnMajorFormatToTensor) { + auto test_matrix = CreateTestMatrix(/*num_rows=*/3, /*num_columns=*/4); + std::vector tensor_data(test_matrix.size(), 0.0f); + + MP_EXPECT_OK(CopyMatrixToTensor(test_matrix, /*is_row_major_matrix=*/false, + tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + const int row = i % test_matrix.rows(); + const int column = i / test_matrix.rows(); + EXPECT_FLOAT_EQ(tensor_data[i], (test_matrix)(row, column)); + } +} + +TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageWithDefaultRange) { + auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4); + std::vector tensor_data( + grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f); + + MP_EXPECT_OK(NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/false, + {0.0f, 1.0f}, /*num_tensor_channels=*/1, + tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + EXPECT_FLOAT_EQ( + tensor_data[i], + static_cast(grey8_image_frame.PixelData()[i]) / 255.0f); + } +} + +TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageWithSpecifiedRange) { + auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4); + std::vector tensor_data( + grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f); + const auto range = std::make_pair(2.0f, 3.0f); + + MP_EXPECT_OK( + NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/false, range, + /*num_tensor_channels=*/1, tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + EXPECT_FLOAT_EQ(tensor_data[i], + static_cast(grey8_image_frame.PixelData()[i]) / + 255.0f * (range.second - range.first) + + range.first); + } +} + +TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageFlipped) { + auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4); + std::vector tensor_data( + grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f); + + MP_EXPECT_OK(NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/true, + {0.0f, 1.0f}, /*num_tensor_channels=*/1, + tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + const int x = i % grey8_image_frame.Width(); + const int y = i / grey8_image_frame.Width(); + const int flipped_y = grey8_image_frame.Height() - y - 1; + + const int index = flipped_y * grey8_image_frame.Width() + x; + EXPECT_FLOAT_EQ( + tensor_data[index], + static_cast(grey8_image_frame.PixelData()[i]) / 255.0f); + } +} + +TEST(TensorConverterCpuTest, ShouldNormalizeFloatImageWithDefaultRange) { + auto float_image_frame = + CreateTestFloat32ImageFrame(/*width=*/3, /*height=*/4); + std::vector tensor_data( + float_image_frame.Width() * float_image_frame.Height(), 0.0f); + + MP_EXPECT_OK(NormalizeFloatImage(float_image_frame, /*flip_vertically=*/false, + {0.0f, 1.0f}, /*num_tensor_channels=*/1, + tensor_data.data())); + + for (int i = 0; i < tensor_data.size(); ++i) { + EXPECT_FLOAT_EQ(tensor_data[i], reinterpret_cast( + float_image_frame.PixelData())[i] / + 255.0f); + } +} + +TEST(TensorConverterCpuTest, ConvertImageFrameToTensorOnCpu) { + auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4); + + MP_ASSERT_OK_AND_ASSIGN(Tensor output, ConvertImageFrameToTensorOnCpu( + grey8_image_frame, {0.0f, 1.0f}, + /*flip_vertically=*/false, + /*max_num_channels=*/1)); + + const auto cpu_read_view = output.GetCpuReadView(); + const float* tensor_ptr = cpu_read_view.buffer(); + for (int i = 0; i < grey8_image_frame.Width() * grey8_image_frame.Height(); + ++i) { + EXPECT_FLOAT_EQ( + tensor_ptr[i], + static_cast(grey8_image_frame.PixelData()[i]) / 255.0); + } +} + +TEST(TensorConverterCpuTest, ConvertMatrixToTensorOnCpu) { + auto test_matrix = CreateTestMatrix(/*num_rows=*/3, /*num_columns=*/4); + + MP_ASSERT_OK_AND_ASSIGN( + Tensor output, ConvertMatrixToTensorOnCpu(test_matrix, + /*row_major_matrix=*/false)); + + const auto cpu_read_view = output.GetCpuReadView(); + const float* tensor_ptr = cpu_read_view.buffer(); + for (int i = 0; i < test_matrix.size(); ++i) { + EXPECT_FLOAT_EQ(tensor_ptr[i], test_matrix.data()[i]); + } +} + +} // namespace + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensors_to_classification_calculator.proto b/mediapipe/calculators/tensor/tensors_to_classification_calculator.proto index 32bc4b63a..28012a455 100644 --- a/mediapipe/calculators/tensor/tensors_to_classification_calculator.proto +++ b/mediapipe/calculators/tensor/tensors_to_classification_calculator.proto @@ -34,7 +34,7 @@ message TensorsToClassificationCalculatorOptions { repeated Entry entries = 1; } - // Score threshold for perserving the class. + // Score threshold for preserving the class. optional float min_score_threshold = 1; // Number of highest scoring labels to output. If top_k is not positive then // all labels are used. diff --git a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc index 8e649c0a1..2b4a22fc6 100644 --- a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc @@ -15,7 +15,6 @@ #include #include -#include "absl/log/absl_log.h" #include "absl/strings/str_format.h" #include "absl/types/span.h" #include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h" @@ -147,7 +146,7 @@ BoxFormat GetBoxFormat(const TensorsToDetectionsCalculatorOptions& options) { // TENSORS - Vector of Tensors of type kFloat32. The vector of tensors can have // 2 or 3 tensors. First tensor is the predicted raw boxes/keypoints. // The size of the values must be (num_boxes * num_predicted_values). -// Second tensor is the score tensor. The size of the valuse must be +// Second tensor is the score tensor. The size of the values must be // (num_boxes * num_classes). It's optional to pass in a third tensor // for anchors (e.g. for SSD models) depend on the outputs of the // detection model. The size of anchor tensor must be (num_boxes * @@ -215,7 +214,8 @@ class TensorsToDetectionsCalculator : public Node { const int* detection_classes, std::vector* output_detections); Detection ConvertToDetection(float box_ymin, float box_xmin, float box_ymax, - float box_xmax, float score, int class_id, + float box_xmax, absl::Span scores, + absl::Span class_ids, bool flip_vertically); bool IsClassIndexAllowed(int class_index); @@ -223,6 +223,7 @@ class TensorsToDetectionsCalculator : public Node { int num_boxes_ = 0; int num_coords_ = 0; int max_results_ = -1; + int classes_per_detection_ = 1; BoxFormat box_output_format_ = mediapipe::TensorsToDetectionsCalculatorOptions::YXHW; @@ -267,7 +268,7 @@ absl::Status TensorsToDetectionsCalculator::UpdateContract( if (CanUseGpu()) { #ifndef MEDIAPIPE_DISABLE_GL_COMPUTE MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract( - cc, /*requesst_gpu_as_optional=*/true)); + cc, /*request_gpu_as_optional=*/true)); #elif MEDIAPIPE_METAL_ENABLED MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #endif // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) @@ -484,6 +485,16 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU( auto num_boxes_view = num_boxes_tensor->GetCpuReadView(); auto num_boxes = num_boxes_view.buffer(); num_boxes_ = num_boxes[0]; + // The detection model with Detection_PostProcess op may output duplicate + // boxes with different classes, in the following format: + // num_boxes_tensor = [num_boxes] + // detection_classes_tensor = [box_1_class_1, box_1_class_2, ...] + // detection_scores_tensor = [box_1_score_1, box_1_score_2, ... ] + // detection_boxes_tensor = [box_1, box1, ... ] + // Each box repeats classes_per_detection_ times. + // Note Detection_PostProcess op is only supported in CPU. + RET_CHECK_EQ(max_detections % num_boxes_, 0); + classes_per_detection_ = max_detections / num_boxes_; auto detection_boxes_view = detection_boxes_tensor->GetCpuReadView(); auto detection_boxes = detection_boxes_view.buffer(); @@ -493,8 +504,8 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU( auto detection_classes_view = detection_classes_tensor->GetCpuReadView(); auto detection_classes_ptr = detection_classes_view.buffer(); - std::vector detection_classes(num_boxes_); - for (int i = 0; i < num_boxes_; ++i) { + std::vector detection_classes(num_boxes_ * classes_per_detection_); + for (int i = 0; i < detection_classes.size(); ++i) { detection_classes[i] = static_cast(detection_classes_ptr[i]); } MP_RETURN_IF_ERROR(ConvertToDetections(detection_boxes, detection_scores, @@ -863,24 +874,25 @@ absl::Status TensorsToDetectionsCalculator::DecodeBoxes( absl::Status TensorsToDetectionsCalculator::ConvertToDetections( const float* detection_boxes, const float* detection_scores, const int* detection_classes, std::vector* output_detections) { - for (int i = 0; i < num_boxes_; ++i) { + for (int i = 0; i < num_boxes_ * classes_per_detection_; + i += classes_per_detection_) { if (max_results_ > 0 && output_detections->size() == max_results_) { break; } - if (options_.has_min_score_thresh() && - detection_scores[i] < options_.min_score_thresh()) { - continue; - } - if (!IsClassIndexAllowed(detection_classes[i])) { - continue; - } const int box_offset = i * num_coords_; Detection detection = ConvertToDetection( /*box_ymin=*/detection_boxes[box_offset + box_indices_[0]], /*box_xmin=*/detection_boxes[box_offset + box_indices_[1]], /*box_ymax=*/detection_boxes[box_offset + box_indices_[2]], /*box_xmax=*/detection_boxes[box_offset + box_indices_[3]], - detection_scores[i], detection_classes[i], options_.flip_vertically()); + absl::MakeConstSpan(detection_scores + i, classes_per_detection_), + absl::MakeConstSpan(detection_classes + i, classes_per_detection_), + options_.flip_vertically()); + // if all the scores and classes are filtered out, we skip the empty + // detection. + if (detection.score().empty()) { + continue; + } const auto& bbox = detection.location_data().relative_bounding_box(); if (bbox.width() < 0 || bbox.height() < 0 || std::isnan(bbox.width()) || std::isnan(bbox.height())) { @@ -910,11 +922,21 @@ absl::Status TensorsToDetectionsCalculator::ConvertToDetections( } Detection TensorsToDetectionsCalculator::ConvertToDetection( - float box_ymin, float box_xmin, float box_ymax, float box_xmax, float score, - int class_id, bool flip_vertically) { + float box_ymin, float box_xmin, float box_ymax, float box_xmax, + absl::Span scores, absl::Span class_ids, + bool flip_vertically) { Detection detection; - detection.add_score(score); - detection.add_label_id(class_id); + for (int i = 0; i < scores.size(); ++i) { + if (!IsClassIndexAllowed(class_ids[i])) { + continue; + } + if (options_.has_min_score_thresh() && + scores[i] < options_.min_score_thresh()) { + continue; + } + detection.add_score(scores[i]); + detection.add_label_id(class_ids[i]); + } LocationData* location_data = detection.mutable_location_data(); location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX); diff --git a/mediapipe/calculators/tensor/tensors_to_detections_calculator.proto b/mediapipe/calculators/tensor/tensors_to_detections_calculator.proto index 5cedff6c7..49db8e3e7 100644 --- a/mediapipe/calculators/tensor/tensors_to_detections_calculator.proto +++ b/mediapipe/calculators/tensor/tensors_to_detections_calculator.proto @@ -75,7 +75,7 @@ message TensorsToDetectionsCalculatorOptions { // representation has a bottom-left origin (e.g., in OpenGL). optional bool flip_vertically = 18 [default = false]; - // Score threshold for perserving decoded detections. + // Score threshold for preserving decoded detections. optional float min_score_thresh = 19; // The maximum number of the detection results to return. If < 0, all diff --git a/mediapipe/calculators/tensor/tensors_to_landmarks_calculator.cc b/mediapipe/calculators/tensor/tensors_to_landmarks_calculator.cc index 5942f234d..77488443f 100644 --- a/mediapipe/calculators/tensor/tensors_to_landmarks_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_landmarks_calculator.cc @@ -124,7 +124,7 @@ absl::Status TensorsToLandmarksCalculator::Open(CalculatorContext* cc) { kFlipVertically(cc).IsConnected())) { RET_CHECK(options_.has_input_image_height() && options_.has_input_image_width()) - << "Must provide input width/height for using flipping when outputing " + << "Must provide input width/height for using flipping when outputting " "landmarks in absolute coordinates."; } return absl::OkStatus(); diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc index 6456126ae..6164c7b0a 100644 --- a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc @@ -12,32 +12,35 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include +#include #include -#include "absl/strings/str_format.h" -#include "absl/types/span.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h" #include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/port.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/framework/port/status_macros.h" #include "mediapipe/gpu/gpu_origin.pb.h" -#include "mediapipe/util/resource_util.h" -#include "tensorflow/lite/interpreter.h" #if !MEDIAPIPE_DISABLE_GPU #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_simple_shaders.h" -#include "mediapipe/gpu/gpu_buffer.h" +#include "mediapipe/gpu/gpu_buffer_format.h" #include "mediapipe/gpu/shader_util.h" #endif // !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_OPENCV -#include "mediapipe/framework/formats/image_opencv.h" -#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h" #endif // !MEDIAPIPE_DISABLE_OPENCV #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 @@ -62,37 +65,9 @@ namespace { constexpr int kWorkgroupSize = 8; // Block size for GPU shader. enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; -// Commonly used to compute the number of blocks to launch in a kernel. -int NumGroups(const int size, const int group_size) { // NOLINT - return (size + group_size - 1) / group_size; -} - -bool CanUseGpu() { -#if !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED - // TODO: Configure GPU usage policy in individual calculators. - constexpr bool kAllowGpuProcessing = true; - return kAllowGpuProcessing; -#else - return false; -#endif // !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED -} - constexpr char kTensorsTag[] = "TENSORS"; constexpr char kOutputSizeTag[] = "OUTPUT_SIZE"; constexpr char kMaskTag[] = "MASK"; - -absl::StatusOr> GetHwcFromDims( - const std::vector& dims) { - if (dims.size() == 3) { - return std::make_tuple(dims[0], dims[1], dims[2]); - } else if (dims.size() == 4) { - // BHWC format check B == 1 - RET_CHECK_EQ(1, dims[0]) << "Expected batch to be 1 for BHWC heatmap"; - return std::make_tuple(dims[1], dims[2], dims[3]); - } else { - RET_CHECK(false) << "Invalid shape for segmentation tensor " << dims.size(); - } -} } // namespace namespace mediapipe { @@ -156,19 +131,28 @@ class TensorsToSegmentationCalculator : public CalculatorBase { private: absl::Status LoadOptions(CalculatorContext* cc); absl::Status InitGpu(CalculatorContext* cc); - absl::Status ProcessGpu(CalculatorContext* cc); - absl::Status ProcessCpu(CalculatorContext* cc); + absl::Status ProcessGpu(CalculatorContext* cc, + const std::vector& input_tensors, + std::tuple hwc, int output_width, + int output_height); void GlRender(); bool DoesGpuTextureStartAtBottom() { return options_.gpu_origin() != mediapipe::GpuOrigin_Mode_TOP_LEFT; } - + absl::Status InitConverterIfNecessary() { #if !MEDIAPIPE_DISABLE_OPENCV - template - absl::Status ApplyActivation(cv::Mat& tensor_mat, cv::Mat* small_mask_mat); + if (!cpu_converter_) { + MP_ASSIGN_OR_RETURN(cpu_converter_, CreateOpenCvConverter(options_)); + } +#else + RET_CHECK_FAIL() << "OpenCV processing disabled."; #endif // !MEDIAPIPE_DISABLE_OPENCV - ::mediapipe::TensorsToSegmentationCalculatorOptions options_; + return absl::OkStatus(); + } + + mediapipe::TensorsToSegmentationCalculatorOptions options_; + std::unique_ptr cpu_converter_; #if !MEDIAPIPE_DISABLE_GPU mediapipe::GlCalculatorHelper gpu_helper_; @@ -208,7 +192,7 @@ absl::Status TensorsToSegmentationCalculator::GetContract( if (CanUseGpu()) { #if !MEDIAPIPE_DISABLE_GPU MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract( - cc, /*requesst_gpu_as_optional=*/true)); + cc, /*request_gpu_as_optional=*/true)); #if MEDIAPIPE_METAL_ENABLED MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #endif // MEDIAPIPE_METAL_ENABLED @@ -261,7 +245,7 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) { MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims)); int tensor_channels = std::get<2>(hwc); - typedef mediapipe::TensorsToSegmentationCalculatorOptions Options; + using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions; switch (options_.activation()) { case Options::NONE: RET_CHECK_EQ(tensor_channels, 1); @@ -275,6 +259,17 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) { } } + // Get dimensions. + MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims)); + auto [tensor_height, tensor_width, tensor_channels] = hwc; + int output_width = tensor_width, output_height = tensor_height; + if (cc->Inputs().HasTag(kOutputSizeTag)) { + const auto& size = + cc->Inputs().Tag(kOutputSizeTag).Get>(); + output_width = size.first; + output_height = size.second; + } + if (use_gpu) { #if !MEDIAPIPE_DISABLE_GPU if (!gpu_initialized_) { @@ -286,16 +281,25 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) { #endif // !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU - MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status { - MP_RETURN_IF_ERROR(ProcessGpu(cc)); - return absl::OkStatus(); - })); + MP_RETURN_IF_ERROR( + gpu_helper_.RunInGlContext([this, cc, &input_tensors, output_width, + output_height, hwc]() -> absl::Status { + MP_RETURN_IF_ERROR( + ProcessGpu(cc, input_tensors, hwc, output_width, output_height)); + return absl::OkStatus(); + })); #else RET_CHECK_FAIL() << "GPU processing disabled."; #endif // !MEDIAPIPE_DISABLE_GPU } else { #if !MEDIAPIPE_DISABLE_OPENCV - MP_RETURN_IF_ERROR(ProcessCpu(cc)); + // Lazily initialize converter. + MP_RETURN_IF_ERROR(InitConverterIfNecessary()); + MP_ASSIGN_OR_RETURN( + std::unique_ptr output_mask, + cpu_converter_->Convert(input_tensors, output_width, output_height)); + cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), + cc->InputTimestamp()); #else RET_CHECK_FAIL() << "OpenCV processing disabled."; #endif // !MEDIAPIPE_DISABLE_OPENCV @@ -329,132 +333,15 @@ absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext* cc) { return absl::OkStatus(); } -absl::Status TensorsToSegmentationCalculator::ProcessCpu( - CalculatorContext* cc) { -#if !MEDIAPIPE_DISABLE_OPENCV - // Get input streams, and dimensions. - const auto& input_tensors = - cc->Inputs().Tag(kTensorsTag).Get>(); - MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims)); - auto [tensor_height, tensor_width, tensor_channels] = hwc; - int output_width = tensor_width, output_height = tensor_height; - if (cc->Inputs().HasTag(kOutputSizeTag)) { - const auto& size = - cc->Inputs().Tag(kOutputSizeTag).Get>(); - output_width = size.first; - output_height = size.second; - } - - // Create initial working mask. - cv::Mat small_mask_mat(cv::Size(tensor_width, tensor_height), CV_32FC1); - - // Wrap input tensor. - auto raw_input_tensor = &input_tensors[0]; - auto raw_input_view = raw_input_tensor->GetCpuReadView(); - const float* raw_input_data = raw_input_view.buffer(); - cv::Mat tensor_mat(cv::Size(tensor_width, tensor_height), - CV_MAKETYPE(CV_32F, tensor_channels), - const_cast(raw_input_data)); - - // Process mask tensor and apply activation function. - if (tensor_channels == 2) { - MP_RETURN_IF_ERROR(ApplyActivation(tensor_mat, &small_mask_mat)); - } else if (tensor_channels == 1) { - RET_CHECK(mediapipe::TensorsToSegmentationCalculatorOptions::SOFTMAX != - options_.activation()); // Requires 2 channels. - if (mediapipe::TensorsToSegmentationCalculatorOptions::NONE == - options_.activation()) // Pass-through optimization. - tensor_mat.copyTo(small_mask_mat); - else - MP_RETURN_IF_ERROR(ApplyActivation(tensor_mat, &small_mask_mat)); - } else { - RET_CHECK_FAIL() << "Unsupported number of tensor channels " - << tensor_channels; - } - - // Send out image as CPU packet. - std::shared_ptr mask_frame = std::make_shared( - ImageFormat::VEC32F1, output_width, output_height); - std::unique_ptr output_mask = absl::make_unique(mask_frame); - auto output_mat = formats::MatView(output_mask.get()); - // Upsample small mask into output. - cv::resize(small_mask_mat, *output_mat, - cv::Size(output_width, output_height)); - cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp()); -#endif // !MEDIAPIPE_DISABLE_OPENCV - - return absl::OkStatus(); -} - -#if !MEDIAPIPE_DISABLE_OPENCV -template -absl::Status TensorsToSegmentationCalculator::ApplyActivation( - cv::Mat& tensor_mat, cv::Mat* small_mask_mat) { - // Configure activation function. - const int output_layer_index = options_.output_layer_index(); - typedef mediapipe::TensorsToSegmentationCalculatorOptions Options; - const auto activation_fn = [&](const cv::Vec2f& mask_value) { - float new_mask_value = 0; - // TODO consider moving switch out of the loop, - // and also avoid float/Vec2f casting. - switch (options_.activation()) { - case Options::NONE: { - new_mask_value = mask_value[0]; - break; - } - case Options::SIGMOID: { - const float pixel0 = mask_value[0]; - new_mask_value = 1.0 / (std::exp(-pixel0) + 1.0); - break; - } - case Options::SOFTMAX: { - const float pixel0 = mask_value[0]; - const float pixel1 = mask_value[1]; - const float max_pixel = std::max(pixel0, pixel1); - const float min_pixel = std::min(pixel0, pixel1); - const float softmax_denom = - /*exp(max_pixel - max_pixel)=*/1.0f + - std::exp(min_pixel - max_pixel); - new_mask_value = std::exp(mask_value[output_layer_index] - max_pixel) / - softmax_denom; - break; - } - } - return new_mask_value; - }; - - // Process mask tensor. - for (int i = 0; i < tensor_mat.rows; ++i) { - for (int j = 0; j < tensor_mat.cols; ++j) { - const T& input_pix = tensor_mat.at(i, j); - const float mask_value = activation_fn(input_pix); - small_mask_mat->at(i, j) = mask_value; - } - } - - return absl::OkStatus(); -} -#endif // !MEDIAPIPE_DISABLE_OPENCV - // Steps: // 1. receive tensor // 2. process segmentation tensor into small mask // 3. upsample small mask into output mask to be same size as input image absl::Status TensorsToSegmentationCalculator::ProcessGpu( - CalculatorContext* cc) { + CalculatorContext* cc, const std::vector& input_tensors, + std::tuple hwc, int output_width, int output_height) { #if !MEDIAPIPE_DISABLE_GPU - // Get input streams, and dimensions. - const auto& input_tensors = - cc->Inputs().Tag(kTensorsTag).Get>(); - MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims)); auto [tensor_height, tensor_width, tensor_channels] = hwc; - int output_width = tensor_width, output_height = tensor_height; - if (cc->Inputs().HasTag(kOutputSizeTag)) { - const auto& size = - cc->Inputs().Tag(kOutputSizeTag).Get>(); - output_width = size.first; - output_height = size.second; - } // Create initial working mask texture. #if !(MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31) @@ -632,7 +519,7 @@ void TensorsToSegmentationCalculator::GlRender() { absl::Status TensorsToSegmentationCalculator::LoadOptions( CalculatorContext* cc) { // Get calculator options specified in the graph. - options_ = cc->Options<::mediapipe::TensorsToSegmentationCalculatorOptions>(); + options_ = cc->Options(); return absl::OkStatus(); } @@ -826,7 +713,7 @@ void main() { #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 // Shader defines. - typedef mediapipe::TensorsToSegmentationCalculatorOptions Options; + using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions; const std::string output_layer_index = "\n#define OUTPUT_LAYER_INDEX int(" + std::to_string(options_.output_layer_index()) + ")"; diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test.cc index 3db9145d2..9ac63f31a 100644 --- a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test.cc +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test.cc @@ -17,10 +17,8 @@ #include #include -#include "absl/log/absl_log.h" -#include "absl/log/log.h" -#include "absl/strings/substitute.h" #include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_runner.h" #include "mediapipe/framework/formats/image.h" @@ -30,7 +28,6 @@ #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/packet.h" #include "mediapipe/framework/port/gtest.h" -#include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/timestamp.h" @@ -40,62 +37,17 @@ namespace { using ::testing::SizeIs; using ::testing::TestWithParam; using Options = mediapipe::TensorsToSegmentationCalculatorOptions; +namespace test_utils = ::mediapipe::tensors_to_segmentation_utils; -std::string ActivationTypeToString(Options::Activation activation) { - switch (activation) { - case Options::NONE: - return "NONE"; - case Options::SIGMOID: - return "SIGMOID"; - case Options::SOFTMAX: - return "SOFTMAX"; - default: - ABSL_LOG(FATAL) << "Unknown activation type: " << activation; - return "UNKNOWN"; - } -} +using TensorsToSegmentationCalculatorTest = + TestWithParam; -struct FormattingTestCase { - std::string test_name; - std::vector inputs; - std::vector expected_outputs; - Options::Activation activation; - int rows; - int cols; - int channels; -}; - -using TensorsToSegmentationCalculatorTest = TestWithParam; - -// Currently only useable for tests with no output resize. TEST_P(TensorsToSegmentationCalculatorTest, ParameterizedTests) { - const FormattingTestCase& test_case = GetParam(); - std::vector inputs = test_case.inputs; - std::vector expected_outputs = test_case.expected_outputs; - Options::Activation activation = test_case.activation; - int rows = test_case.rows; - int cols = test_case.cols; - int channels = test_case.channels; + const auto& [test_name, inputs, expected_outputs, activation, rows, cols, + rows_new, cols_new, channels, max_abs_diff] = GetParam(); - std::string string_config = absl::Substitute( - R"pb( - input_stream: "tensors" - input_stream: "size" - node { - calculator: "TensorsToSegmentationCalculator" - input_stream: "TENSORS:tensors" - input_stream: "OUTPUT_SIZE:size" - output_stream: "MASK:image_as_mask" - options: { - [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { - activation: $0 - } - } - } - )pb", - ActivationTypeToString(activation)); auto graph_config = - mediapipe::ParseTextProtoOrDie(string_config); + test_utils::CreateGraphConfigForTest(/*test_gpu=*/false, activation); std::vector output_packets; tool::AddVectorSink("image_as_mask", &graph_config, &output_packets); @@ -119,28 +71,34 @@ TEST_P(TensorsToSegmentationCalculatorTest, ParameterizedTests) { MP_ASSERT_OK(graph.AddPacketToInputStream( "tensors", mediapipe::Adopt(tensors.release()).At(Timestamp(0)))); } + + // The output size is defined as pair(new_width, new_height). MP_ASSERT_OK(graph.AddPacketToInputStream( - "size", - mediapipe::Adopt(new std::pair(rows, cols)).At(Timestamp(0)))); + "size", mediapipe::Adopt(new std::pair(cols_new, rows_new)) + .At(Timestamp(0)))); MP_ASSERT_OK(graph.WaitUntilIdle()); ASSERT_THAT(output_packets, SizeIs(1)); const Image& image_as_mask = output_packets[0].Get(); + EXPECT_FALSE(image_as_mask.UsesGpu()); + std::shared_ptr result_mat = formats::MatView(&image_as_mask); - EXPECT_EQ(result_mat->rows, rows); - EXPECT_EQ(result_mat->cols, cols); - EXPECT_EQ(result_mat->channels(), channels); + EXPECT_EQ(result_mat->rows, rows_new); + EXPECT_EQ(result_mat->cols, cols_new); + EXPECT_EQ(result_mat->channels(), 1); // Compare the real result with the expected result. - cv::Mat expected_result = cv::Mat( - rows, cols, CV_32FC1, const_cast(expected_outputs.data())); + cv::Mat expected_result = + cv::Mat(rows_new, cols_new, CV_32FC1, + const_cast(expected_outputs.data())); cv::Mat diff; cv::absdiff(*result_mat, expected_result, diff); double max_val; cv::minMaxLoc(diff, nullptr, &max_val); - // Expects the maximum absolute pixel-by-pixel difference is less than 1e-5. - // This delta is for passthorugh accuracy only. - EXPECT_LE(max_val, 1e-5); + + // The max allowable diff between output and expected output varies between + // tests. + EXPECT_LE(max_val, max_abs_diff); MP_ASSERT_OK(graph.CloseInputStream("tensors")); MP_ASSERT_OK(graph.CloseInputStream("size")); @@ -149,18 +107,97 @@ TEST_P(TensorsToSegmentationCalculatorTest, ParameterizedTests) { INSTANTIATE_TEST_SUITE_P( TensorsToSegmentationCalculatorTests, TensorsToSegmentationCalculatorTest, - testing::ValuesIn({ - {/*test_name=*/"NoActivationAndNoOutputResize", - /*inputs=*/ - {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0}, - /*expected_outputs=*/ - {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, - 14.0, 15.0, 16.0}, - /*activation=*/Options::NONE, - /*rows=*/4, - /*cols=*/4, - /*channels=*/1}, + testing::ValuesIn({ + {.test_name = "NoActivationAndNoOutputResize", + .inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0}, + .expected_outputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, + .activation = Options::NONE, + .rows = 4, + .cols = 4, + .rows_new = 4, + .cols_new = 4, + .channels = 1, + .max_abs_diff = 1e-7}, + {.test_name = "OutputResizeOnly", + .inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0}, + .expected_outputs = {1, 1.5, 2.166667, 2.833333, 3.5, 4, + 3.8, 4.3, 4.966667, 5.633333, 6.3, 6.8, + 7, 7.5, 8.166667, 8.833333, 9.5, 10, + 10.2, 10.7, 11.366667, 12.033333, 12.7, 13.2, + 13, 13.5, 14.166667, 14.833333, 15.5, 16}, + .activation = Options::NONE, + .rows = 4, + .cols = 4, + .rows_new = 5, + .cols_new = 6, + .channels = 1, + .max_abs_diff = 1e-6}, + {.test_name = "SigmoidActivationWithNoOutputResize", + .inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0}, + .expected_outputs = {0.731059, 0.880797, 0.952574, 0.982014, 0.993307, + 0.997527, 0.999089, 0.999665, 0.999877, 0.999955, + 0.999983, 0.999994, 0.999998, 0.999999, 1.0, 1.0}, + .activation = Options::SIGMOID, + .rows = 4, + .cols = 4, + .rows_new = 4, + .cols_new = 4, + .channels = 1, + .max_abs_diff = 1e-6}, + {.test_name = "SigmoidActivationWithOutputResize", + .inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0}, + .expected_outputs = {0.731059, 0.805928, 0.89276, 0.940611, 0.967294, + 0.982014, 0.914633, 0.93857, 0.966279, 0.981363, + 0.989752, 0.994369, 0.996592, 0.997666, 0.998873, + 0.999404, 0.999683, 0.999829, 0.999913, 0.99994, + 0.999971, 0.999985, 0.999992, 0.999996, 0.999998, + 0.999998, 0.999999, 1.0, 1.0, 1.0}, + .activation = Options::SIGMOID, + .rows = 4, + .cols = 4, + .rows_new = 5, + .cols_new = 6, + .channels = 1, + .max_abs_diff = 1e-6}, + {.test_name = "SoftmaxActivationWithNoOutputResize", + .inputs = {1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 6.0, 1.5, + 7.0, 10.0, 11.0, 4.0, 12.0, 15.0, 16.0, 18.5, + 19.0, 20.0, 22.0, 23.0, 24.5, 23.4, 25.6, 28.3, + 29.2, 30.0, 24.6, 29.2, 30.0, 24.9, 31.2, 30.3}, + .expected_outputs = {0.731059, 0.119203, 0.880797, 0.0109869, 0.952574, + 0.000911051, 0.952574, 0.924142, 0.731059, + 0.731059, 0.24974, 0.937027, 0.689974, 0.990048, + 0.0060598, 0.28905}, + .activation = Options::SOFTMAX, + .rows = 4, + .cols = 4, + .rows_new = 4, + .cols_new = 4, + .channels = 2, + .max_abs_diff = 1e-6}, + {.test_name = "SoftmaxActivationWithOutputResize", + .inputs = {1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 6.0, 1.5, + 7.0, 10.0, 11.0, 4.0, 12.0, 15.0, 16.0, 18.5, + 19.0, 20.0, 22.0, 23.0, 24.5, 23.4, 25.6, 28.3, + 29.2, 30.0, 24.6, 29.2, 30.0, 24.9, 31.2, 30.3}, + .expected_outputs = {0.731059, 0.425131, 0.246135, 0.753865, 0.445892, + 0.0109869, 0.886119, 0.461259, 0.185506, 0.781934, + 0.790618, 0.650195, 0.841816, 0.603901, 0.40518, + 0.561962, 0.765871, 0.930584, 0.718733, 0.763744, + 0.703402, 0.281989, 0.459635, 0.742634, 0.689974, + 0.840011, 0.82605, 0.170058, 0.147555, 0.28905}, + .activation = Options::SOFTMAX, + .rows = 4, + .cols = 4, + .rows_new = 5, + .cols_new = 6, + .channels = 2, + .max_abs_diff = 1e-6}, }), [](const testing::TestParamInfo< TensorsToSegmentationCalculatorTest::ParamType>& info) { diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.cc new file mode 100644 index 000000000..2fc9019c2 --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.cc @@ -0,0 +1,111 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h" + +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/strings/substitute.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/port/parse_text_proto.h" + +namespace mediapipe { +namespace tensors_to_segmentation_utils { + +std::string ActivationTypeToString( + const TensorsToSegmentationCalculatorOptions::Activation& activation) { + switch (activation) { + case TensorsToSegmentationCalculatorOptions::NONE: + return "NONE"; + case TensorsToSegmentationCalculatorOptions::SIGMOID: + return "SIGMOID"; + case TensorsToSegmentationCalculatorOptions::SOFTMAX: + return "SOFTMAX"; + } + ABSL_LOG(FATAL) << "Unknown activation type: " << activation; + return "UNKNOWN"; +} + +std::vector ArrayFloatToUnsignedChar( + const std::vector& array) { + std::vector result; + result.reserve(array.size()); + for (int i = 0; i < array.size(); ++i) { + result.push_back(static_cast(array[i])); + } + return result; +} + +std::vector MakeRedAlphaMatrix(const std::vector& values) { + std::vector result; + result.reserve(values.size() * 4); + for (const float& value : values) { + result.push_back(value); + result.push_back(0); + result.push_back(0); + result.push_back(value); + } + return result; +} + +// For GPU tests, the input tensor needs to be moved to GPU, using +// TensorViewRequestor. After calculation, the output needs to be moved back +// to CPU, using ToImageCalculator. The output is an ImageFrame. +mediapipe::CalculatorGraphConfig CreateGraphConfigForTest( + bool test_gpu, + const TensorsToSegmentationCalculatorOptions::Activation& activation) { + std::string pre_process = R"pb( + node { + calculator: "mediapipe.aimatter.TensorViewRequestor" + input_stream: "TENSORS:tensors" + output_stream: "TENSORS:tensors_gpu" + options { + [mediapipe.aimatter.TensorViewRequestorOptions.ext] { gpu {} } + } + } + )pb"; + std::string post_process = R"pb( + node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:image_as_mask_gpu" + output_stream: "IMAGE_CPU:image_as_mask" + } + )pb"; + return mediapipe::ParseTextProtoOrDie( + absl::Substitute( + R"pb( + input_stream: "tensors" + input_stream: "size" $0 + node { + calculator: "TensorsToSegmentationCalculator" + input_stream: "TENSORS:tensors$1" + input_stream: "OUTPUT_SIZE:size" + output_stream: "MASK:image_as_mask$2" + options: { + [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { + activation: $3 + gpu_origin: TOP_LEFT + } + } + } $4 + )pb", + test_gpu ? pre_process : "", test_gpu ? "_gpu" : "", + test_gpu ? "_gpu" : "", ActivationTypeToString(activation), + test_gpu ? post_process : "")); +} +} // namespace tensors_to_segmentation_utils +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h new file mode 100644 index 000000000..abeda546b --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h @@ -0,0 +1,57 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_ +#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_ + +#include +#include +#include +#include + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/framework/calculator.pb.h" + +namespace mediapipe { +namespace tensors_to_segmentation_utils { +std::string ActivationTypeToString( + const mediapipe::TensorsToSegmentationCalculatorOptions::Activation& + activation); + +std::vector ArrayFloatToUnsignedChar( + const std::vector& array); + +std::vector MakeRedAlphaMatrix(const std::vector& values); + +mediapipe::CalculatorGraphConfig CreateGraphConfigForTest( + bool test_gpu, + const mediapipe::TensorsToSegmentationCalculatorOptions::Activation& + activation); + +struct FormattingTestCase { + std::string test_name; + std::vector inputs; + std::vector expected_outputs; + mediapipe::TensorsToSegmentationCalculatorOptions::Activation activation; + int rows = 1; + int cols = 1; + int rows_new = 1; + int cols_new = 1; + int channels = 1; + double max_abs_diff = 1e-7; +}; +} // namespace tensors_to_segmentation_utils +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_ diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils_test.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils_test.cc new file mode 100644 index 000000000..3f048c62d --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils_test.cc @@ -0,0 +1,50 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h" + +#include + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/framework/port/gtest.h" + +namespace mediapipe::tensors_to_segmentation_utils { +namespace { + +using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions; + +TEST(TensorsToSegmentationCalculatorTestUtilsTest, + ActivationTypeToStringWorksCorrectly) { + EXPECT_EQ(ActivationTypeToString(Options::NONE), "NONE"); + EXPECT_EQ(ActivationTypeToString(Options::SIGMOID), "SIGMOID"); + EXPECT_EQ(ActivationTypeToString(Options::SOFTMAX), "SOFTMAX"); +} + +TEST(TensorsToSegmentationCalculatorTestUtilsTest, + ArrayFloatToUnsignedCharWorksCorrectly) { + std::vector input = {1.0, 2.0, 3.0}; + std::vector expected = {1, 2, 3}; + EXPECT_EQ(ArrayFloatToUnsignedChar(input), expected); +} + +TEST(TensorsToSegmentationCalculatorTestUtilsTest, + MakeRedAlphaMatrixWorksCorrectly) { + std::vector input = {1.0, 2.0, 3.0}; + std::vector expected = {1.0, 0.0, 0.0, 1.0, 2.0, 0.0, + 0.0, 2.0, 3.0, 0.0, 0.0, 3.0}; + EXPECT_EQ(MakeRedAlphaMatrix(input), expected); +} + +} // namespace +} // namespace mediapipe::tensors_to_segmentation_utils diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_converter.h b/mediapipe/calculators/tensor/tensors_to_segmentation_converter.h new file mode 100644 index 000000000..61d95dfe0 --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_converter.h @@ -0,0 +1,43 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_ +#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_ + +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/tensor.h" + +namespace mediapipe { + +class TensorsToSegmentationConverter { + public: + virtual ~TensorsToSegmentationConverter() = default; + + // Converts tensors to image mask. + // Returns a unique pointer containing the converted image. + // @input_tensors contains the tensors needed to be processed. + // @output_width/height describes output dimensions to reshape the output mask + // into. + virtual absl::StatusOr> Convert( + const std::vector& input_tensors, int output_width, + int output_height) = 0; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_ diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.cc new file mode 100644 index 000000000..1ee2e172b --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.cc @@ -0,0 +1,157 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h" + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_opencv.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" + +namespace mediapipe { +namespace { + +class OpenCvProcessor : public TensorsToSegmentationConverter { + public: + absl::Status Init(const TensorsToSegmentationCalculatorOptions& options) { + options_ = options; + return absl::OkStatus(); + } + + absl::StatusOr> Convert( + const std::vector& input_tensors, int output_width, + int output_height) override; + + private: + template + absl::Status ApplyActivation(cv::Mat& tensor_mat, cv::Mat* small_mask_mat); + + TensorsToSegmentationCalculatorOptions options_; +}; + +absl::StatusOr> OpenCvProcessor::Convert( + const std::vector& input_tensors, int output_width, + int output_height) { + MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims)); + auto [tensor_height, tensor_width, tensor_channels] = hwc; + // Create initial working mask. + cv::Mat small_mask_mat(cv::Size(tensor_width, tensor_height), CV_32FC1); + + // Wrap input tensor. + auto raw_input_tensor = &input_tensors[0]; + auto raw_input_view = raw_input_tensor->GetCpuReadView(); + const float* raw_input_data = raw_input_view.buffer(); + cv::Mat tensor_mat(cv::Size(tensor_width, tensor_height), + CV_MAKETYPE(CV_32F, tensor_channels), + const_cast(raw_input_data)); + + // Process mask tensor and apply activation function. + if (tensor_channels == 2) { + MP_RETURN_IF_ERROR(ApplyActivation(tensor_mat, &small_mask_mat)); + } else if (tensor_channels == 1) { + RET_CHECK(mediapipe::TensorsToSegmentationCalculatorOptions::SOFTMAX != + options_.activation()); // Requires 2 channels. + if (mediapipe::TensorsToSegmentationCalculatorOptions::NONE == + options_.activation()) // Pass-through optimization. + tensor_mat.copyTo(small_mask_mat); + else + MP_RETURN_IF_ERROR(ApplyActivation(tensor_mat, &small_mask_mat)); + } else { + RET_CHECK_FAIL() << "Unsupported number of tensor channels " + << tensor_channels; + } + + // Send out image as CPU packet. + std::shared_ptr mask_frame = std::make_shared( + ImageFormat::VEC32F1, output_width, output_height); + auto output_mask = std::make_unique(mask_frame); + auto output_mat = formats::MatView(output_mask.get()); + // Upsample small mask into output. + cv::resize(small_mask_mat, *output_mat, + cv::Size(output_width, output_height)); + return output_mask; +} + +template +absl::Status OpenCvProcessor::ApplyActivation(cv::Mat& tensor_mat, + cv::Mat* small_mask_mat) { + // Configure activation function. + const int output_layer_index = options_.output_layer_index(); + using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions; + const auto activation_fn = [&](const cv::Vec2f& mask_value) { + float new_mask_value = 0; + // TODO consider moving switch out of the loop, + // and also avoid float/Vec2f casting. + switch (options_.activation()) { + case Options::NONE: { + new_mask_value = mask_value[0]; + break; + } + case Options::SIGMOID: { + const float pixel0 = mask_value[0]; + new_mask_value = 1.0 / (std::exp(-pixel0) + 1.0); + break; + } + case Options::SOFTMAX: { + const float pixel0 = mask_value[0]; + const float pixel1 = mask_value[1]; + const float max_pixel = std::max(pixel0, pixel1); + const float min_pixel = std::min(pixel0, pixel1); + const float softmax_denom = + /*exp(max_pixel - max_pixel)=*/1.0f + + std::exp(min_pixel - max_pixel); + new_mask_value = std::exp(mask_value[output_layer_index] - max_pixel) / + softmax_denom; + break; + } + } + return new_mask_value; + }; + + // Process mask tensor. + for (int i = 0; i < tensor_mat.rows; ++i) { + for (int j = 0; j < tensor_mat.cols; ++j) { + const T& input_pix = tensor_mat.at(i, j); + const float mask_value = activation_fn(input_pix); + small_mask_mat->at(i, j) = mask_value; + } + } + + return absl::OkStatus(); +} + +} // namespace + +absl::StatusOr> +CreateOpenCvConverter(const TensorsToSegmentationCalculatorOptions& options) { + auto converter = std::make_unique(); + MP_RETURN_IF_ERROR(converter->Init(options)); + return converter; +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h b/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h new file mode 100644 index 000000000..3ae41b5e0 --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h @@ -0,0 +1,31 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_ +#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h" + +namespace mediapipe { +// Creates OpenCV tensors-to-segmentation converter. +absl::StatusOr> +CreateOpenCvConverter( + const mediapipe::TensorsToSegmentationCalculatorOptions& options); +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_ diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_utils.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_utils.cc new file mode 100644 index 000000000..ab1e9c139 --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_utils.cc @@ -0,0 +1,52 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h" + +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/port.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { + +int NumGroups(int size, int group_size) { + return (size + group_size - 1) / group_size; +} + +bool CanUseGpu() { +#if !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED + // TODO: Configure GPU usage policy in individual calculators. + constexpr bool kAllowGpuProcessing = true; + return kAllowGpuProcessing; +#else + return false; +#endif // !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED +} + +absl::StatusOr> GetHwcFromDims( + const std::vector& dims) { + if (dims.size() == 3) { + return std::make_tuple(dims[0], dims[1], dims[2]); + } else if (dims.size() == 4) { + // BHWC format check B == 1 + RET_CHECK_EQ(dims[0], 1) << "Expected batch to be 1 for BHWC heatmap"; + return std::make_tuple(dims[1], dims[2], dims[3]); + } else { + RET_CHECK(false) << "Invalid shape for segmentation tensor " << dims.size(); + } +} +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_utils.h b/mediapipe/calculators/tensor/tensors_to_segmentation_utils.h new file mode 100644 index 000000000..44893073b --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_utils.h @@ -0,0 +1,34 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_ +#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_ + +#include +#include + +#include "absl/status/statusor.h" + +namespace mediapipe { + +// Commonly used to compute the number of blocks to launch in a kernel. +int NumGroups(const int size, const int group_size); // NOLINT + +bool CanUseGpu(); + +absl::StatusOr> GetHwcFromDims( + const std::vector& dims); +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_ diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_utils_test.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_utils_test.cc new file mode 100644 index 000000000..5535d159d --- /dev/null +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_utils_test.cc @@ -0,0 +1,63 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h" + +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { +namespace { + +using ::testing::HasSubstr; + +TEST(TensorsToSegmentationUtilsTest, NumGroupsWorksProperly) { + EXPECT_EQ(NumGroups(13, 4), 4); + EXPECT_EQ(NumGroups(4, 13), 1); +} + +TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsWorksProperly) { + std::vector dims_3 = {2, 3, 4}; + absl::StatusOr> result_1 = GetHwcFromDims(dims_3); + MP_ASSERT_OK(result_1); + EXPECT_EQ(result_1.value(), (std::make_tuple(2, 3, 4))); + std::vector dims_4 = {1, 3, 4, 5}; + absl::StatusOr> result_2 = GetHwcFromDims(dims_4); + MP_ASSERT_OK(result_2); + EXPECT_EQ(result_2.value(), (std::make_tuple(3, 4, 5))); +} + +TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsBatchCheckFail) { + std::vector dims_4 = {2, 3, 4, 5}; + absl::StatusOr> result = GetHwcFromDims(dims_4); + EXPECT_FALSE(result.ok()); + EXPECT_THAT(result.status().message(), + HasSubstr("Expected batch to be 1 for BHWC heatmap")); +} + +TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsInvalidShape) { + std::vector dims_5 = {1, 2, 3, 4, 5}; + absl::StatusOr> result = GetHwcFromDims(dims_5); + EXPECT_FALSE(result.ok()); + EXPECT_THAT(result.status().message(), + HasSubstr("Invalid shape for segmentation tensor")); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc index 4972b202d..95962c261 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc @@ -79,7 +79,7 @@ namespace mpms = mediapipe::mediasequence; // and label and label_id are optional but at least one of them should be set. // "IMAGE_${NAME}", "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store // prefixed versions of each stream, which allows for multiple image streams to -// be included. However, the default names are suppored by more tools. +// be included. However, the default names are supported by more tools. // // Example config: // node { diff --git a/mediapipe/calculators/tensorflow/tensor_to_matrix_calculator.cc b/mediapipe/calculators/tensorflow/tensor_to_matrix_calculator.cc index dc3d97844..ed234b3fa 100644 --- a/mediapipe/calculators/tensorflow/tensor_to_matrix_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensor_to_matrix_calculator.cc @@ -67,8 +67,8 @@ absl::Status FillTimeSeriesHeaderIfValid(const Packet& header_packet, // -- 1-D or 2-D Tensor // Output: // -- Matrix with the same values as the Tensor -// If input tensor is 1 dimensional, the ouput Matrix is of (1xn) shape. -// If input tensor is 2 dimensional (batched), the ouput Matrix is (mxn) shape. +// If input tensor is 1 dimensional, the output Matrix is of (1xn) shape. +// If input tensor is 2 dimensional (batched), the output Matrix is (mxn) shape. // // Example Config // node: { diff --git a/mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator.cc b/mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator.cc index 7adb26daa..b4fcf6a01 100644 --- a/mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator.cc @@ -15,9 +15,9 @@ // Calculator converts from one-dimensional Tensor of DT_FLOAT to vector // OR from (batched) two-dimensional Tensor of DT_FLOAT to vector. +#include #include -#include "absl/base/integral_types.h" #include "mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator_options.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/status.h" diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc index 84c32fed6..39993ada0 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc @@ -111,8 +111,8 @@ class InferenceState { // input_side_packet. // // The input and output streams are TensorFlow tensors labeled by tags. The tags -// for the streams are matched to feeds and fetchs in a TensorFlow session using -// a named_signature.generic_signature in the ModelManifest. The +// for the streams are matched to feeds and fetches in a TensorFlow session +// using a named_signature.generic_signature in the ModelManifest. The // generic_signature is used as key-value pairs between the MediaPipe tag and // the TensorFlow tensor. The signature_name in the options proto determines // which named_signature is used. The keys in the generic_signature must be @@ -128,7 +128,7 @@ class InferenceState { // addition. Once batch_size inputs have been provided, the batch will be run // and the output tensors sent out on the output streams with timestamps // corresponding to the input stream packets. Setting the batch_size to 1 -// completely disables batching, but is indepdent of add_batch_dim_to_tensors. +// completely disables batching, but is independent of add_batch_dim_to_tensors. // // The TensorFlowInferenceCalculator also support feeding states recurrently for // RNNs and LSTMs. Simply set the recurrent_tag_pair options to define the diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto index a243412c0..f09664592 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto @@ -42,7 +42,7 @@ message TensorFlowInferenceCalculatorOptions { // If the 0th dimension is the batch dimension, then the tensors are // concatenated on that dimension. If the 0th is a data dimension, then a 0th // dimension is added before concatenating. If added, the extra dimension is - // removed before outputing the tensor. Examples of each case: If you want + // removed before outputting the tensor. Examples of each case: If you want // to batch spectra of audio over time for an LSTM, a time-frequency // representation has a 0th dimension as the batch dimension. If you want to // batch frames of video that are [width, height, channels], the batch diff --git a/mediapipe/calculators/tflite/testdata/README.md b/mediapipe/calculators/tflite/testdata/README.md index c0efdcf07..ffafe0df9 100644 --- a/mediapipe/calculators/tflite/testdata/README.md +++ b/mediapipe/calculators/tflite/testdata/README.md @@ -1,2 +1,2 @@ The model files add.bin, add_quantized.bin -(and corresponding metatada json files) come from tensorflow/lite/testdata/ +(and corresponding metadata json files) come from tensorflow/lite/testdata/ diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index 7188cbc59..682dd3b7b 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -95,7 +95,7 @@ struct GPUData { // into a TfLiteTensor (float 32) or a GpuBuffer to a tflite::gpu::GlBuffer // or MTLBuffer. // -// This calculator is designed to be used with the TfLiteInferenceCalcualtor, +// This calculator is designed to be used with the TfLiteInferenceCalculator, // as a pre-processing step for calculator inputs. // // IMAGE and IMAGE_GPU inputs are normalized to [-1,1] (default) or [0,1], diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.proto b/mediapipe/calculators/tflite/tflite_converter_calculator.proto index 5ed70879d..930545831 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.proto @@ -31,7 +31,7 @@ message TfLiteConverterCalculatorOptions { // Custom settings to override the internal scaling factors `div` and `sub`. // Both values must be set to non-negative values. Will only take effect on // CPU AND when |use_custom_normalization| is set to true. When these custom - // values take effect, the |zero_center| setting above will be overriden, and + // values take effect, the |zero_center| setting above will be overridden, and // the normalized_value will be calculated as: // normalized_value = input / custom_div - custom_sub. optional bool use_custom_normalization = 6 [default = false]; diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto index c6c9d915d..aa141eee5 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto @@ -25,7 +25,7 @@ message TfLiteTensorsToClassificationCalculatorOptions { optional TfLiteTensorsToClassificationCalculatorOptions ext = 266399463; } - // Score threshold for perserving the class. + // Score threshold for preserving the class. optional float min_score_threshold = 1; // Number of highest scoring labels to output. If top_k is not positive then // all labels are used. diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index 269661f73..0eaba9eb0 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -116,7 +116,7 @@ void ConvertAnchorsToRawValues(const std::vector& anchors, // tensors can have 2 or 3 tensors. First tensor is the predicted // raw boxes/keypoints. The size of the values must be (num_boxes // * num_predicted_values). Second tensor is the score tensor. The -// size of the valuse must be (num_boxes * num_classes). It's +// size of the values must be (num_boxes * num_classes). It's // optional to pass in a third tensor for anchors (e.g. for SSD // models) depend on the outputs of the detection model. The size // of anchor tensor must be (num_boxes * 4). diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto index 41ad903de..f054608a6 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto @@ -69,6 +69,6 @@ message TfLiteTensorsToDetectionsCalculatorOptions { // representation has a bottom-left origin (e.g., in OpenGL). optional bool flip_vertically = 18 [default = false]; - // Score threshold for perserving decoded detections. + // Score threshold for preserving decoded detections. optional float min_score_thresh = 19; } diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc index 6740f0afa..c25776de3 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc @@ -158,7 +158,7 @@ absl::Status TfLiteTensorsToLandmarksCalculator::Open(CalculatorContext* cc) { RET_CHECK(options_.has_input_image_height() && options_.has_input_image_width()) << "Must provide input width/height for using flip_vertically option " - "when outputing landmarks in absolute coordinates."; + "when outputting landmarks in absolute coordinates."; } flip_horizontally_ = diff --git a/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.jar b/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.jar index afba10928..7f93135c4 100644 Binary files a/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.jar and b/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.jar differ diff --git a/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.properties b/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.properties index 4e86b9270..3fa8f862f 100644 --- a/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.properties +++ b/mediapipe/examples/android/solutions/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/mediapipe/examples/android/solutions/gradlew b/mediapipe/examples/android/solutions/gradlew index 65dcd68d6..1aa94a426 100755 --- a/mediapipe/examples/android/solutions/gradlew +++ b/mediapipe/examples/android/solutions/gradlew @@ -83,10 +83,8 @@ done # This is normally unused # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} -APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -133,10 +131,13 @@ location of your Java installation." fi else JAVACMD=java - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. @@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then done fi -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ diff --git a/mediapipe/examples/desktop/media_sequence/run_graph_file_io_main.cc b/mediapipe/examples/desktop/media_sequence/run_graph_file_io_main.cc index a14c7734d..80e19a355 100644 --- a/mediapipe/examples/desktop/media_sequence/run_graph_file_io_main.cc +++ b/mediapipe/examples/desktop/media_sequence/run_graph_file_io_main.cc @@ -55,7 +55,7 @@ absl::Status RunMPPGraph() { for (const std::string& kv_pair : kv_pairs) { std::vector name_and_value = absl::StrSplit(kv_pair, '='); RET_CHECK(name_and_value.size() == 2); - RET_CHECK(!mediapipe::ContainsKey(input_side_packets, name_and_value[0])); + RET_CHECK(!input_side_packets.contains(name_and_value[0])); std::string input_side_packet_contents; MP_RETURN_IF_ERROR(mediapipe::file::GetContents( name_and_value[1], &input_side_packet_contents)); diff --git a/mediapipe/examples/desktop/youtube8m/extract_yt8m_features.cc b/mediapipe/examples/desktop/youtube8m/extract_yt8m_features.cc index dbabf84b1..f958924f0 100644 --- a/mediapipe/examples/desktop/youtube8m/extract_yt8m_features.cc +++ b/mediapipe/examples/desktop/youtube8m/extract_yt8m_features.cc @@ -56,7 +56,7 @@ absl::Status RunMPPGraph() { for (const std::string& kv_pair : kv_pairs) { std::vector name_and_value = absl::StrSplit(kv_pair, '='); RET_CHECK(name_and_value.size() == 2); - RET_CHECK(!mediapipe::ContainsKey(input_side_packets, name_and_value[0])); + RET_CHECK(!input_side_packets.contains(name_and_value[0])); std::string input_side_packet_contents; MP_RETURN_IF_ERROR(mediapipe::file::GetContents( name_and_value[1], &input_side_packet_contents)); diff --git a/mediapipe/framework/api2/builder.h b/mediapipe/framework/api2/builder.h index fde281121..c82998040 100644 --- a/mediapipe/framework/api2/builder.h +++ b/mediapipe/framework/api2/builder.h @@ -330,6 +330,14 @@ using MultiSideDestination = MultiPort>; class NodeBase { public: + NodeBase() = default; + ~NodeBase() = default; + NodeBase(NodeBase&&) = default; + NodeBase& operator=(NodeBase&&) = default; + // Explicitly delete copies to improve error messages. + NodeBase(const NodeBase&) = delete; + NodeBase& operator=(const NodeBase&) = delete; + // TODO: right now access to an indexed port is made directly by // specifying both a tag and an index. It would be better to represent this // as a two-step lookup, first getting a multi-port, and then accessing one @@ -585,6 +593,14 @@ class PacketGenerator { class Graph { public: + Graph() = default; + ~Graph() = default; + Graph(Graph&&) = default; + Graph& operator=(Graph&&) = default; + // Explicitly delete copies to improve error messages. + Graph(const Graph&) = delete; + Graph& operator=(const Graph&) = delete; + void SetType(std::string type) { type_ = std::move(type); } // Creates a node of a specific type. Should be used for calculators whose diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index b36ea0211..047b95d32 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -124,6 +124,15 @@ cc_library( ], ) +cc_library( + name = "ahwb_view", + hdrs = ["ahwb_view.h"], + deps = [ + "//mediapipe/framework:port", + "//mediapipe/gpu:gpu_buffer_storage", + ], +) + cc_library( name = "affine_transform", srcs = ["affine_transform.cc"], diff --git a/mediapipe/framework/formats/ahwb_view.h b/mediapipe/framework/formats/ahwb_view.h new file mode 100644 index 000000000..0c8ad6323 --- /dev/null +++ b/mediapipe/framework/formats/ahwb_view.h @@ -0,0 +1,54 @@ +#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_AHWB_VIEW_H_ +#define MEDIAPIPE_FRAMEWORK_FORMATS_AHWB_VIEW_H_ + +#include "mediapipe/framework/port.h" +#ifdef MEDIAPIPE_GPU_BUFFER_USE_AHWB +#include + +#include "mediapipe/gpu/gpu_buffer_storage.h" + +namespace mediapipe { + +// Wrapper to facilitate short lived access to Android Hardware Buffer objects. +// Intended use cases: +// - Extracting an AHWB for processing in another library after it's produced by +// MediaPipe. +// - Sending AHWBs to compute devices that are able to map the memory for their +// own usage. +// The AHWB abstractions in GpuBuffer and Tensor are likely more suitable for +// other CPU/GPU uses of AHWBs. +class AhwbView { + public: + explicit AhwbView(AHardwareBuffer* handle) : handle_(handle) {} + // Non-copyable + AhwbView(const AhwbView&) = delete; + AhwbView& operator=(const AhwbView&) = delete; + // Non-movable + AhwbView(AhwbView&&) = delete; + + // Only supports synchronous usage. All users of GetHandle must finish + // accessing the buffer before this view object is destroyed to avoid race + // conditions. + // TODO: Support asynchronous usage. + const AHardwareBuffer* GetHandle() const { return handle_; } + + private: + const AHardwareBuffer* handle_; +}; + +namespace internal { +// Makes this class available as a GpuBuffer view. +template <> +class ViewProvider { + public: + virtual ~ViewProvider() = default; + virtual const AhwbView GetReadView(types) const = 0; + virtual AhwbView GetWriteView(types) = 0; +}; + +} // namespace internal + +} // namespace mediapipe + +#endif // MEDIAPIPE_GPU_BUFFER_USE_AHWB +#endif // MEDIAPIPE_FRAMEWORK_FORMATS_AHWB_VIEW_H_ diff --git a/mediapipe/framework/port.h b/mediapipe/framework/port.h index a18080637..1bb4d4cdf 100644 --- a/mediapipe/framework/port.h +++ b/mediapipe/framework/port.h @@ -50,7 +50,7 @@ // but may or may not still be able to run other OpenGL code. #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) && \ (defined(__APPLE__) || defined(__EMSCRIPTEN__) || MEDIAPIPE_DISABLE_GPU || \ - MEDIAPIPE_USING_SWIFTSHADER) + MEDIAPIPE_USING_LEGACY_SWIFTSHADER) #define MEDIAPIPE_DISABLE_GL_COMPUTE #endif @@ -104,4 +104,9 @@ #endif #endif // MEDIAPIPE_HAS_RTTI +// AHardware buffers are only available since Android API 26. +#if (__ANDROID_API__ >= 26) +#define MEDIAPIPE_GPU_BUFFER_USE_AHWB 1 +#endif + #endif // MEDIAPIPE_FRAMEWORK_PORT_H_ diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 77e3ab16d..7a4b5a112 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -616,6 +616,7 @@ cc_test( "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:parse_text_proto", + "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/strings", ], ) @@ -856,6 +857,7 @@ cc_library( mediapipe_cc_test( name = "switch_demux_calculator_test", srcs = ["switch_demux_calculator_test.cc"], + requires_full_emulation = False, deps = [ ":container_util", ":switch_demux_calculator", @@ -891,6 +893,7 @@ cc_library( mediapipe_cc_test( name = "switch_mux_calculator_test", srcs = ["switch_mux_calculator_test.cc"], + requires_full_emulation = False, deps = [ ":container_util", ":switch_mux_calculator", diff --git a/mediapipe/framework/tool/sink_test.cc b/mediapipe/framework/tool/sink_test.cc index c5316af4d..9769aeeee 100644 --- a/mediapipe/framework/tool/sink_test.cc +++ b/mediapipe/framework/tool/sink_test.cc @@ -17,6 +17,7 @@ #include #include +#include "absl/functional/bind_front.h" #include "absl/strings/string_view.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_runner.h" diff --git a/mediapipe/framework/tool/validate_name.cc b/mediapipe/framework/tool/validate_name.cc index 8f9be7687..4415f76b5 100644 --- a/mediapipe/framework/tool/validate_name.cc +++ b/mediapipe/framework/tool/validate_name.cc @@ -134,7 +134,7 @@ absl::Status ParseTagAndName(absl::string_view tag_and_name, std::string* tag, RET_CHECK(name); absl::Status tag_status = absl::OkStatus(); absl::Status name_status = absl::UnknownError(""); - int name_index = 0; + int name_index = -1; std::vector v = absl::StrSplit(tag_and_name, ':'); if (v.size() == 1) { name_status = ValidateName(v[0]); @@ -143,7 +143,7 @@ absl::Status ParseTagAndName(absl::string_view tag_and_name, std::string* tag, tag_status = ValidateTag(v[0]); name_status = ValidateName(v[1]); name_index = 1; - } + } // else omitted, name_index == -1, triggering error. if (name_index == -1 || tag_status != absl::OkStatus() || name_status != absl::OkStatus()) { tag->clear(); diff --git a/mediapipe/gpu/BUILD b/mediapipe/gpu/BUILD index 27770acaa..88864e894 100644 --- a/mediapipe/gpu/BUILD +++ b/mediapipe/gpu/BUILD @@ -511,11 +511,19 @@ cc_library( ], }), deps = [ + ":gl_base_hdr", + ":gl_context", ":gl_texture_buffer", + ":gl_texture_view", ":gpu_buffer_format", ":gpu_buffer_storage", ":image_frame_view", + "//mediapipe/framework:port", + "//mediapipe/framework/formats:ahwb_view", "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/port:ret_check", + "//third_party/GL:EGL_headers", + "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/strings:str_format", ], ) @@ -526,12 +534,14 @@ mediapipe_proto_library( visibility = ["//visibility:public"], ) -objc_library( +cc_library( name = "pixel_buffer_pool_util", - srcs = ["pixel_buffer_pool_util.mm"], + srcs = ["pixel_buffer_pool_util.cc"], hdrs = ["pixel_buffer_pool_util.h"], copts = [ + "-x objective-c++", "-Wno-shorten-64-to-32", + "-fobjc-arc", # enable reference-counting ], visibility = ["//visibility:public"], deps = [ @@ -542,13 +552,14 @@ objc_library( ], ) -objc_library( +cc_library( name = "metal_shared_resources", - srcs = ["metal_shared_resources.mm"], + srcs = ["metal_shared_resources.cc"], hdrs = ["metal_shared_resources.h"], copts = [ "-x objective-c++", "-Wno-shorten-64-to-32", + "-fobjc-arc", # enable reference-counting ], features = ["-layering_check"], visibility = ["//visibility:public"], @@ -557,15 +568,17 @@ objc_library( "@google_toolbox_for_mac//:GTM_Defines", ] + [ ], + alwayslink = 1, ) -objc_library( +cc_library( name = "MPPMetalUtil", - srcs = ["MPPMetalUtil.mm"], + srcs = ["MPPMetalUtil.cc"], hdrs = ["MPPMetalUtil.h"], copts = [ "-x objective-c++", "-Wno-shorten-64-to-32", + "-fobjc-arc", # enable reference-counting ], visibility = ["//visibility:public"], deps = [ @@ -575,6 +588,7 @@ objc_library( "@com_google_absl//absl/time", "@google_toolbox_for_mac//:GTM_Defines", ], + alwayslink = 1, ) mediapipe_proto_library( @@ -655,6 +669,8 @@ cc_library( "//mediapipe/framework/port:ret_check", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", ] + select({ "//conditions:default": [], "//mediapipe:apple": [ @@ -857,12 +873,14 @@ cc_library( }), ) -objc_library( +cc_library( name = "MPPMetalHelper", - srcs = ["MPPMetalHelper.mm"], + srcs = ["MPPMetalHelper.cc"], hdrs = ["MPPMetalHelper.h"], copts = [ "-Wno-shorten-64-to-32", + "-x objective-c++", + "-fobjc-arc", ], features = ["-layering_check"], visibility = ["//visibility:public"], @@ -1215,9 +1233,13 @@ mediapipe_cc_test( ], requires_full_emulation = True, deps = [ + ":gl_texture_buffer", + ":gl_texture_util", ":gpu_buffer_format", ":gpu_buffer_storage_ahwb", + ":gpu_test_base", "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/tool:test_util", ], ) diff --git a/mediapipe/gpu/MPPMetalHelper.mm b/mediapipe/gpu/MPPMetalHelper.cc similarity index 74% rename from mediapipe/gpu/MPPMetalHelper.mm rename to mediapipe/gpu/MPPMetalHelper.cc index c66483698..e92d6aae7 100644 --- a/mediapipe/gpu/MPPMetalHelper.mm +++ b/mediapipe/gpu/MPPMetalHelper.cc @@ -14,15 +14,14 @@ #import "mediapipe/gpu/MPPMetalHelper.h" +#import "GTMDefines.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" +#include "mediapipe/framework/port/ret_check.h" #import "mediapipe/gpu/gpu_buffer.h" #import "mediapipe/gpu/gpu_service.h" #import "mediapipe/gpu/graph_support.h" #import "mediapipe/gpu/metal_shared_resources.h" -#import "GTMDefines.h" - -#include "mediapipe/framework/port/ret_check.h" @interface MPPMetalHelper () { mediapipe::GpuResources* _gpuResources; @@ -31,7 +30,8 @@ namespace mediapipe { -// Using a C++ class so it can be declared as a friend of LegacyCalculatorSupport. +// Using a C++ class so it can be declared as a friend of +// LegacyCalculatorSupport. class MetalHelperLegacySupport { public: static CalculatorContract* GetCalculatorContract() { @@ -61,7 +61,8 @@ class MetalHelperLegacySupport { - (instancetype)initWithCalculatorContext:(mediapipe::CalculatorContext*)cc { if (!cc) return nil; - return [self initWithGpuResources:&cc->Service(mediapipe::kGpuService).GetObject()]; + return [self + initWithGpuResources:&cc->Service(mediapipe::kGpuService).GetObject()]; } + (absl::Status)updateContract:(mediapipe::CalculatorContract*)cc { @@ -77,7 +78,8 @@ class MetalHelperLegacySupport { } // Legacy support. -- (instancetype)initWithSidePackets:(const mediapipe::PacketSet&)inputSidePackets { +- (instancetype)initWithSidePackets: + (const mediapipe::PacketSet&)inputSidePackets { auto cc = mediapipe::MetalHelperLegacySupport::GetCalculatorContext(); if (cc) { ABSL_CHECK_EQ(&inputSidePackets, &cc->InputSidePackets()); @@ -85,16 +87,19 @@ class MetalHelperLegacySupport { } // TODO: remove when we can. - ABSL_LOG(WARNING) << "CalculatorContext not available. If this calculator uses " - "CalculatorBase, call initWithCalculatorContext instead."; + ABSL_LOG(WARNING) + << "CalculatorContext not available. If this calculator uses " + "CalculatorBase, call initWithCalculatorContext instead."; mediapipe::GpuSharedData* gpu_shared = - inputSidePackets.Tag(mediapipe::kGpuSharedTagName).Get(); + inputSidePackets.Tag(mediapipe::kGpuSharedTagName) + .Get(); return [self initWithGpuResources:gpu_shared->gpu_resources.get()]; } // Legacy support. -+ (absl::Status)setupInputSidePackets:(mediapipe::PacketTypeSet*)inputSidePackets { ++ (absl::Status)setupInputSidePackets: + (mediapipe::PacketTypeSet*)inputSidePackets { auto cc = mediapipe::MetalHelperLegacySupport::GetCalculatorContract(); if (cc) { ABSL_CHECK_EQ(inputSidePackets, &cc->InputSidePackets()); @@ -102,12 +107,12 @@ class MetalHelperLegacySupport { } // TODO: remove when we can. - ABSL_LOG(WARNING) << "CalculatorContract not available. If you're calling this " - "from a GetContract method, call updateContract instead."; + ABSL_LOG(WARNING) + << "CalculatorContract not available. If you're calling this " + "from a GetContract method, call updateContract instead."; auto id = inputSidePackets->GetId(mediapipe::kGpuSharedTagName, 0); - RET_CHECK(id.IsValid()) - << "A " << mediapipe::kGpuSharedTagName - << " input side packet is required here."; + RET_CHECK(id.IsValid()) << "A " << mediapipe::kGpuSharedTagName + << " input side packet is required here."; inputSidePackets->Get(id).Set(); return absl::OkStatus(); } @@ -125,10 +130,12 @@ class MetalHelperLegacySupport { } - (id)commandBuffer { - return [_gpuResources->metal_shared().resources().mtlCommandQueue commandBuffer]; + return + [_gpuResources->metal_shared().resources().mtlCommandQueue commandBuffer]; } -- (CVMetalTextureRef)copyCVMetalTextureWithGpuBuffer:(const mediapipe::GpuBuffer&)gpuBuffer +- (CVMetalTextureRef)copyCVMetalTextureWithGpuBuffer: + (const mediapipe::GpuBuffer&)gpuBuffer plane:(size_t)plane { CVPixelBufferRef pixel_buffer = mediapipe::GetCVPixelBufferRef(gpuBuffer); OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer); @@ -178,41 +185,48 @@ class MetalHelperLegacySupport { CVMetalTextureRef texture; CVReturn err = CVMetalTextureCacheCreateTextureFromImage( NULL, _gpuResources->metal_shared().resources().mtlTextureCache, - mediapipe::GetCVPixelBufferRef(gpuBuffer), NULL, metalPixelFormat, width, height, plane, - &texture); + mediapipe::GetCVPixelBufferRef(gpuBuffer), NULL, metalPixelFormat, width, + height, plane, &texture); ABSL_CHECK_EQ(err, kCVReturnSuccess); return texture; } -- (CVMetalTextureRef)copyCVMetalTextureWithGpuBuffer:(const mediapipe::GpuBuffer&)gpuBuffer { +- (CVMetalTextureRef)copyCVMetalTextureWithGpuBuffer: + (const mediapipe::GpuBuffer&)gpuBuffer { return [self copyCVMetalTextureWithGpuBuffer:gpuBuffer plane:0]; } -- (id)metalTextureWithGpuBuffer:(const mediapipe::GpuBuffer&)gpuBuffer { +- (id)metalTextureWithGpuBuffer: + (const mediapipe::GpuBuffer&)gpuBuffer { return [self metalTextureWithGpuBuffer:gpuBuffer plane:0]; } -- (id)metalTextureWithGpuBuffer:(const mediapipe::GpuBuffer&)gpuBuffer - plane:(size_t)plane { +- (id)metalTextureWithGpuBuffer: + (const mediapipe::GpuBuffer&)gpuBuffer + plane:(size_t)plane { CFHolder cvTexture; cvTexture.adopt([self copyCVMetalTextureWithGpuBuffer:gpuBuffer plane:plane]); return CVMetalTextureGetTexture(*cvTexture); } -- (mediapipe::GpuBuffer)mediapipeGpuBufferWithWidth:(int)width height:(int)height { +- (mediapipe::GpuBuffer)mediapipeGpuBufferWithWidth:(int)width + height:(int)height { return _gpuResources->gpu_buffer_pool().GetBuffer(width, height); } - (mediapipe::GpuBuffer)mediapipeGpuBufferWithWidth:(int)width - height:(int)height - format:(mediapipe::GpuBufferFormat)format { + height:(int)height + format:(mediapipe::GpuBufferFormat) + format { return _gpuResources->gpu_buffer_pool().GetBuffer(width, height, format); } -- (id)newLibraryWithResourceName:(NSString*)name error:(NSError * _Nullable *)error { +- (id)newLibraryWithResourceName:(NSString*)name + error:(NSError* _Nullable*)error { return [_gpuResources->metal_shared().resources().mtlDevice - newLibraryWithFile:[[NSBundle bundleForClass:[self class]] pathForResource:name - ofType:@"metallib"] + newLibraryWithFile:[[NSBundle bundleForClass:[self class]] + pathForResource:name + ofType:@"metallib"] error:error]; } diff --git a/mediapipe/gpu/MPPMetalUtil.mm b/mediapipe/gpu/MPPMetalUtil.cc similarity index 95% rename from mediapipe/gpu/MPPMetalUtil.mm rename to mediapipe/gpu/MPPMetalUtil.cc index ba8be0dbd..c9bd6798d 100644 --- a/mediapipe/gpu/MPPMetalUtil.mm +++ b/mediapipe/gpu/MPPMetalUtil.cc @@ -69,10 +69,10 @@ while (!bufferCompleted) { auto duration = absl::Now() - start_time; // If the spin-lock takes more than 5 ms then go to blocking wait: - // - it frees the CPU core for another threads: increase the performance/decrease power - // consumption. - // - if a driver thread that notifies that the GPU buffer is completed has lower priority then - // the CPU core is allocated for the thread. + // - it frees the CPU core for another threads: increase the + // performance/decrease power consumption. + // - if a driver thread that notifies that the GPU buffer is completed has + // lower priority then the CPU core is allocated for the thread. if (duration >= absl::Milliseconds(5)) { [commandBuffer waitUntilCompleted]; break; diff --git a/mediapipe/gpu/gl_calculator_helper.cc b/mediapipe/gpu/gl_calculator_helper.cc index 1b113f8ac..20f155e15 100644 --- a/mediapipe/gpu/gl_calculator_helper.cc +++ b/mediapipe/gpu/gl_calculator_helper.cc @@ -57,8 +57,8 @@ void GlCalculatorHelper::InitializeForTest(GpuResources* gpu_resources) { // static absl::Status GlCalculatorHelper::UpdateContract(CalculatorContract* cc, - bool requesst_gpu_as_optional) { - if (requesst_gpu_as_optional) { + bool request_gpu_as_optional) { + if (request_gpu_as_optional) { cc->UseService(kGpuService).Optional(); } else { cc->UseService(kGpuService); diff --git a/mediapipe/gpu/gl_calculator_helper.h b/mediapipe/gpu/gl_calculator_helper.h index f5d98ebfe..45b25f67e 100644 --- a/mediapipe/gpu/gl_calculator_helper.h +++ b/mediapipe/gpu/gl_calculator_helper.h @@ -68,7 +68,7 @@ class GlCalculatorHelper { // This method can be called from GetContract to set up the needed GPU // resources. static absl::Status UpdateContract(CalculatorContract* cc, - bool requesst_gpu_as_optional = false); + bool request_gpu_as_optional = false); // This method can be called from FillExpectations to set the correct types // for the shared GL input side packet(s). diff --git a/mediapipe/gpu/gl_texture_buffer.cc b/mediapipe/gpu/gl_texture_buffer.cc index 48afbd219..7e4694a0e 100644 --- a/mediapipe/gpu/gl_texture_buffer.cc +++ b/mediapipe/gpu/gl_texture_buffer.cc @@ -14,6 +14,8 @@ #include "mediapipe/gpu/gl_texture_buffer.h" +#include + #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" #include "mediapipe/framework/formats/image_frame.h" @@ -131,6 +133,13 @@ bool GlTextureBuffer::CreateInternal(const void* data, int alignment) { SymbolAvailable(&glTexStorage2D)) { ABSL_CHECK(data == nullptr) << "unimplemented"; glTexStorage2D(target_, 1, info.gl_internal_format, width_, height_); + } else if (info.immutable) { + ABSL_CHECK(SymbolAvailable(&glTexStorage2D) && + context->GetGlVersion() != GlVersion::kGLES2) + << "Immutable GpuBuffer format requested is not supported in this " + << "GlContext. Format was " << static_cast(format_); + ABSL_CHECK(data == nullptr) << "unimplemented"; + glTexStorage2D(target_, 1, info.gl_internal_format, width_, height_); } else { glTexImage2D(target_, 0 /* level */, info.gl_internal_format, width_, height_, 0 /* border */, info.gl_format, info.gl_type, data); diff --git a/mediapipe/gpu/gl_texture_buffer.h b/mediapipe/gpu/gl_texture_buffer.h index 7b9140646..4548fce5c 100644 --- a/mediapipe/gpu/gl_texture_buffer.h +++ b/mediapipe/gpu/gl_texture_buffer.h @@ -19,6 +19,7 @@ #define MEDIAPIPE_GPU_GL_TEXTURE_BUFFER_H_ #include +#include #include "absl/memory/memory.h" #include "mediapipe/framework/formats/image_frame.h" diff --git a/mediapipe/gpu/gl_thread_collector.h b/mediapipe/gpu/gl_thread_collector.h index 58a35503c..2cc7aa57a 100644 --- a/mediapipe/gpu/gl_thread_collector.h +++ b/mediapipe/gpu/gl_thread_collector.h @@ -17,7 +17,7 @@ #include -#if defined(MEDIAPIPE_USING_SWIFTSHADER) +#if defined(MEDIAPIPE_USING_LEGACY_SWIFTSHADER) #define MEDIAPIPE_NEEDS_GL_THREAD_COLLECTOR 1 #endif diff --git a/mediapipe/gpu/gpu_buffer_format.cc b/mediapipe/gpu/gpu_buffer_format.cc index 646fb383f..510a9cd48 100644 --- a/mediapipe/gpu/gpu_buffer_format.cc +++ b/mediapipe/gpu/gpu_buffer_format.cc @@ -35,6 +35,10 @@ namespace mediapipe { #endif // GL_HALF_FLOAT_OES #endif // __EMSCRIPTEN__ +#ifndef GL_RGBA8 +#define GL_RGBA8 0x8058 +#endif // GL_RGBA8 + #if !MEDIAPIPE_DISABLE_GPU #ifdef GL_ES_VERSION_2_0 static void AdaptGlTextureInfoForGLES2(GlTextureInfo* info) { @@ -163,6 +167,14 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format, { {GL_RGBA32F, GL_RGBA, GL_FLOAT, 1}, }}, + {GpuBufferFormat::kImmutableRGBAFloat128, + { + {GL_RGBA32F, GL_RGBA, GL_FLOAT, 1, true /* immutable */}, + }}, + {GpuBufferFormat::kImmutableRGBA32, + { + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, 1, true /* immutable */}, + }}, }}; static const auto* gles2_format_info = ([] { @@ -206,6 +218,7 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format, ImageFormat::Format ImageFormatForGpuBufferFormat(GpuBufferFormat format) { switch (format) { + case GpuBufferFormat::kImmutableRGBA32: case GpuBufferFormat::kBGRA32: // TODO: verify we are handling order of channels correctly. return ImageFormat::SRGBA; @@ -221,10 +234,11 @@ ImageFormat::Format ImageFormatForGpuBufferFormat(GpuBufferFormat format) { return ImageFormat::SRGB; case GpuBufferFormat::kTwoComponentFloat32: return ImageFormat::VEC32F2; + case GpuBufferFormat::kImmutableRGBAFloat128: case GpuBufferFormat::kRGBAFloat128: return ImageFormat::VEC32F4; case GpuBufferFormat::kRGBA32: - // TODO: this likely maps to ImageFormat::SRGBA + return ImageFormat::SRGBA; case GpuBufferFormat::kGrayHalf16: case GpuBufferFormat::kOneComponent8Alpha: case GpuBufferFormat::kOneComponent8Red: diff --git a/mediapipe/gpu/gpu_buffer_format.h b/mediapipe/gpu/gpu_buffer_format.h index 06eabda77..223780939 100644 --- a/mediapipe/gpu/gpu_buffer_format.h +++ b/mediapipe/gpu/gpu_buffer_format.h @@ -53,6 +53,10 @@ enum class GpuBufferFormat : uint32_t { kRGB24 = 0x00000018, // Note: prefer BGRA32 whenever possible. kRGBAHalf64 = MEDIAPIPE_FOURCC('R', 'G', 'h', 'A'), kRGBAFloat128 = MEDIAPIPE_FOURCC('R', 'G', 'f', 'A'), + // Immutable version of kRGBA32 + kImmutableRGBA32 = MEDIAPIPE_FOURCC('4', 'C', 'I', '8'), + // Immutable version of kRGBAFloat128 + kImmutableRGBAFloat128 = MEDIAPIPE_FOURCC('4', 'C', 'I', 'f'), // 8-bit Y plane + interleaved 8-bit U/V plane with 2x2 subsampling. kNV12 = MEDIAPIPE_FOURCC('N', 'V', '1', '2'), // 8-bit Y plane + interleaved 8-bit V/U plane with 2x2 subsampling. @@ -78,6 +82,9 @@ struct GlTextureInfo { // For multiplane buffers, this represents how many times smaller than // the nominal image size a plane is. int downscale; + // For GLES3.1+ compute shaders, users may explicitly request immutable + // textures. + bool immutable = false; }; const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format, @@ -121,6 +128,8 @@ inline OSType CVPixelFormatForGpuBufferFormat(GpuBufferFormat format) { return kCVPixelFormatType_64RGBAHalf; case GpuBufferFormat::kRGBAFloat128: return kCVPixelFormatType_128RGBAFloat; + case GpuBufferFormat::kImmutableRGBA32: + case GpuBufferFormat::kImmutableRGBAFloat128: case GpuBufferFormat::kNV12: case GpuBufferFormat::kNV21: case GpuBufferFormat::kI420: diff --git a/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc b/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc index da6c5a72d..5983758f9 100644 --- a/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc +++ b/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc @@ -151,7 +151,7 @@ static std::shared_ptr ConvertFromImageFrame( std::shared_ptr frame) { auto status_or_buffer = CreateCVPixelBufferForImageFrame(frame->image_frame()); - ABSL_CHECK(status_or_buffer.ok()); + ABSL_CHECK_OK(status_or_buffer); return std::make_shared( std::move(status_or_buffer).value()); } diff --git a/mediapipe/gpu/gpu_origin.proto b/mediapipe/gpu/gpu_origin.proto index 9d4ae2aa1..2c116cbde 100644 --- a/mediapipe/gpu/gpu_origin.proto +++ b/mediapipe/gpu/gpu_origin.proto @@ -16,6 +16,9 @@ syntax = "proto3"; package mediapipe; +option java_package = "com.google.mediapipe.gpu"; +option java_outer_classname = "GpuOriginProto"; + message GpuOrigin { enum Mode { DEFAULT = 0; diff --git a/mediapipe/gpu/gpu_shared_data_internal.cc b/mediapipe/gpu/gpu_shared_data_internal.cc index 31bdf18b5..965a8b4c5 100644 --- a/mediapipe/gpu/gpu_shared_data_internal.cc +++ b/mediapipe/gpu/gpu_shared_data_internal.cc @@ -14,10 +14,14 @@ #include "mediapipe/gpu/gpu_shared_data_internal.h" +#include +#include + #include "absl/base/attributes.h" #include "absl/log/absl_check.h" +#include "absl/log/absl_log.h" +#include "absl/status/status.h" #include "mediapipe/framework/deps/no_destructor.h" -#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/gpu/gl_context.h" #include "mediapipe/gpu/gl_context_options.pb.h" #include "mediapipe/gpu/graph_support.h" @@ -83,8 +87,25 @@ GpuResources::StatusOrGpuResources GpuResources::Create( } GpuResources::GpuResources(std::shared_ptr gl_context) + : gl_key_context_(new GlContextMapType(), + [](auto* map) { + // This flushes all pending jobs in all GL contexts, + // ensuring that all GL contexts not referenced + // elsewhere are destroyed as part of this destructor. + // Failure to do this may cause GL threads to outlast + // this destructor and execute jobs after the + // GpuResources object is destroyed. + for (auto& [key, context] : *map) { + const auto status = std::move(context)->Run( + []() { return absl::OkStatus(); }); + ABSL_LOG_IF(ERROR, !status.ok()) + << "Failed to flush GlContext jobs: " << status; + } + delete map; + }) #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER - : texture_caches_(std::make_shared()), + , + texture_caches_(std::make_shared()), gpu_buffer_pool_( [tc = texture_caches_](const internal::GpuBufferSpec& spec, const MultiPoolOptions& options) { @@ -92,7 +113,7 @@ GpuResources::GpuResources(std::shared_ptr gl_context) }) #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER { - gl_key_context_[SharedContextKey()] = gl_context; + gl_key_context_->insert({SharedContextKey(), gl_context}); named_executors_[kGpuExecutorName] = std::make_shared(gl_context.get()); #if __APPLE__ @@ -104,6 +125,15 @@ GpuResources::GpuResources(std::shared_ptr gl_context) } GpuResources::~GpuResources() { + // This flushes all pending jobs in all GL contexts, + // ensuring that all existing jobs, which may refer GpuResource and kept their + // gpu resources (e.g. GpuResources::gpu_buffer_pool_) through a raw pointer, + // have finished before kept gpu resources get deleted. + for (auto& [key, context] : *gl_key_context_) { + const auto status = context->Run([]() { return absl::OkStatus(); }); + ABSL_LOG_IF(ERROR, !status.ok()) + << "Failed to flush GlContext jobs: " << status; + } #if __APPLE__ // Note: on Apple platforms, this object contains Objective-C objects. // The destructor will release them, but ARC must be on. @@ -111,7 +141,7 @@ GpuResources::~GpuResources() { #error This file must be built with ARC. #endif #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER - for (auto& kv : gl_key_context_) { + for (auto& kv : *gl_key_context_) { texture_caches_->UnregisterTextureCache(kv.second->cv_texture_cache()); } #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER @@ -173,23 +203,24 @@ absl::Status GpuResources::PrepareGpuNode(CalculatorNode* node) { const std::shared_ptr& GpuResources::gl_context( CalculatorContext* cc) { if (cc) { - auto it = gl_key_context_.find(node_key_[cc->NodeName()]); - if (it != gl_key_context_.end()) { + auto it = gl_key_context_->find(node_key_[cc->NodeName()]); + if (it != gl_key_context_->end()) { return it->second; } } - return gl_key_context_[SharedContextKey()]; + return gl_key_context_->at(SharedContextKey()); } GlContext::StatusOrGlContext GpuResources::GetOrCreateGlContext( const std::string& key) { - auto it = gl_key_context_.find(key); - if (it == gl_key_context_.end()) { - MP_ASSIGN_OR_RETURN(std::shared_ptr new_context, - GlContext::Create(*gl_key_context_[SharedContextKey()], - kGlContextUseDedicatedThread)); - it = gl_key_context_.emplace(key, new_context).first; + auto it = gl_key_context_->find(key); + if (it == gl_key_context_->end()) { + MP_ASSIGN_OR_RETURN( + std::shared_ptr new_context, + GlContext::Create(*gl_key_context_->at(SharedContextKey()), + kGlContextUseDedicatedThread)); + it = gl_key_context_->emplace(key, new_context).first; #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER texture_caches_->RegisterTextureCache(it->second->cv_texture_cache()); #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER diff --git a/mediapipe/gpu/gpu_shared_data_internal.h b/mediapipe/gpu/gpu_shared_data_internal.h index 3f7c67e2e..5e8be45ab 100644 --- a/mediapipe/gpu/gpu_shared_data_internal.h +++ b/mediapipe/gpu/gpu_shared_data_internal.h @@ -21,6 +21,8 @@ #ifndef MEDIAPIPE_GPU_GPU_SHARED_DATA_INTERNAL_H_ #define MEDIAPIPE_GPU_GPU_SHARED_DATA_INTERNAL_H_ +#include + #include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_node.h" #include "mediapipe/framework/executor.h" @@ -82,7 +84,10 @@ class GpuResources { const std::string& ContextKey(const std::string& canonical_node_name); std::map node_key_; - std::map> gl_key_context_; + + using GlContextMapType = std::map>; + std::unique_ptr + gl_key_context_; #ifdef MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER std::shared_ptr texture_caches_; diff --git a/mediapipe/gpu/gpu_test_base.h b/mediapipe/gpu/gpu_test_base.h index 6ec53603b..94842c769 100644 --- a/mediapipe/gpu/gpu_test_base.h +++ b/mediapipe/gpu/gpu_test_base.h @@ -15,6 +15,9 @@ #ifndef MEDIAPIPE_GPU_GPU_TEST_BASE_H_ #define MEDIAPIPE_GPU_GPU_TEST_BASE_H_ +#include +#include + #include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/gpu/gl_calculator_helper.h" @@ -22,9 +25,9 @@ namespace mediapipe { -class GpuTestBase : public ::testing::Test { +class GpuTestEnvironment { protected: - GpuTestBase() { helper_.InitializeForTest(gpu_resources_.get()); } + GpuTestEnvironment() { helper_.InitializeForTest(gpu_resources_.get()); } void RunInGlContext(std::function gl_func) { helper_.RunInGlContext(std::move(gl_func)); @@ -35,6 +38,12 @@ class GpuTestBase : public ::testing::Test { GlCalculatorHelper helper_; }; +class GpuTestBase : public testing::Test, public GpuTestEnvironment {}; + +template +class GpuTestWithParamBase : public testing::TestWithParam, + public GpuTestEnvironment {}; + } // namespace mediapipe #endif // MEDIAPIPE_GPU_GPU_TEST_BASE_H_ diff --git a/mediapipe/gpu/metal.bzl b/mediapipe/gpu/metal.bzl index 77129baf2..4ee6dd8f8 100644 --- a/mediapipe/gpu/metal.bzl +++ b/mediapipe/gpu/metal.bzl @@ -171,7 +171,7 @@ METAL_LIBRARY_ATTRS = dicts.add(apple_support.action_required_attrs(), { metal_library = rule( implementation = _metal_library_impl, attrs = METAL_LIBRARY_ATTRS, - fragments = ["apple", "objc", "swift"], + fragments = ["apple", "objc"], output_to_genfiles = True, ) """ diff --git a/mediapipe/gpu/metal_shared_resources.mm b/mediapipe/gpu/metal_shared_resources.cc similarity index 85% rename from mediapipe/gpu/metal_shared_resources.mm rename to mediapipe/gpu/metal_shared_resources.cc index 80d755a01..925c0f995 100644 --- a/mediapipe/gpu/metal_shared_resources.mm +++ b/mediapipe/gpu/metal_shared_resources.cc @@ -50,9 +50,10 @@ - (CVMetalTextureCacheRef)mtlTextureCache { @synchronized(self) { if (!_mtlTextureCache) { - CVReturn __unused err = - CVMetalTextureCacheCreate(NULL, NULL, self.mtlDevice, NULL, &_mtlTextureCache); - NSAssert(err == kCVReturnSuccess, @"Error at CVMetalTextureCacheCreate %d ; device %@", err, + CVReturn __unused err = CVMetalTextureCacheCreate( + NULL, NULL, self.mtlDevice, NULL, &_mtlTextureCache); + NSAssert(err == kCVReturnSuccess, + @"Error at CVMetalTextureCacheCreate %d ; device %@", err, self.mtlDevice); // TODO: register and flush metal caches too. } diff --git a/mediapipe/gpu/pixel_buffer_pool_util.mm b/mediapipe/gpu/pixel_buffer_pool_util.cc similarity index 63% rename from mediapipe/gpu/pixel_buffer_pool_util.mm rename to mediapipe/gpu/pixel_buffer_pool_util.cc index 0b13cb194..9980d0a5d 100644 --- a/mediapipe/gpu/pixel_buffer_pool_util.mm +++ b/mediapipe/gpu/pixel_buffer_pool_util.cc @@ -24,23 +24,27 @@ namespace mediapipe { -CVPixelBufferPoolRef CreateCVPixelBufferPool( - int width, int height, OSType pixelFormat, int keepCount, - CFTimeInterval maxAge) { +CVPixelBufferPoolRef CreateCVPixelBufferPool(int width, int height, + OSType pixelFormat, int keepCount, + CFTimeInterval maxAge) { CVPixelBufferPoolRef pool = NULL; NSMutableDictionary *sourcePixelBufferOptions = - [(__bridge NSDictionary*)GetCVPixelBufferAttributesForGlCompatibility() mutableCopy]; + [(__bridge NSDictionary *)GetCVPixelBufferAttributesForGlCompatibility() + mutableCopy]; [sourcePixelBufferOptions addEntriesFromDictionary:@{ (id)kCVPixelBufferPixelFormatTypeKey : @(pixelFormat), (id)kCVPixelBufferWidthKey : @(width), (id)kCVPixelBufferHeightKey : @(height), }]; - NSMutableDictionary *pixelBufferPoolOptions = [[NSMutableDictionary alloc] init]; - pixelBufferPoolOptions[(id)kCVPixelBufferPoolMinimumBufferCountKey] = @(keepCount); + NSMutableDictionary *pixelBufferPoolOptions = + [[NSMutableDictionary alloc] init]; + pixelBufferPoolOptions[(id)kCVPixelBufferPoolMinimumBufferCountKey] = + @(keepCount); if (maxAge > 0) { - pixelBufferPoolOptions[(id)kCVPixelBufferPoolMaximumBufferAgeKey] = @(maxAge); + pixelBufferPoolOptions[(id)kCVPixelBufferPoolMaximumBufferAgeKey] = + @(maxAge); } CVPixelBufferPoolCreate( @@ -50,8 +54,9 @@ CVPixelBufferPoolRef CreateCVPixelBufferPool( return pool; } -OSStatus PreallocateCVPixelBufferPoolBuffers( - CVPixelBufferPoolRef pool, int count, CFDictionaryRef auxAttributes) { +OSStatus PreallocateCVPixelBufferPoolBuffers(CVPixelBufferPoolRef pool, + int count, + CFDictionaryRef auxAttributes) { CVReturn err = kCVReturnSuccess; NSMutableArray *pixelBuffers = [[NSMutableArray alloc] init]; for (int i = 0; i < count && err == kCVReturnSuccess; i++) { @@ -68,30 +73,37 @@ OSStatus PreallocateCVPixelBufferPoolBuffers( return err; } -CFDictionaryRef CreateCVPixelBufferPoolAuxiliaryAttributesForThreshold(int allocationThreshold) { +CFDictionaryRef CreateCVPixelBufferPoolAuxiliaryAttributesForThreshold( + int allocationThreshold) { if (allocationThreshold > 0) { - return (CFDictionaryRef)CFBridgingRetain( - @{(id)kCVPixelBufferPoolAllocationThresholdKey: @(allocationThreshold)}); + return (CFDictionaryRef)CFBridgingRetain(@{ + (id)kCVPixelBufferPoolAllocationThresholdKey : @(allocationThreshold) + }); } else { return nil; } } -CVReturn CreateCVPixelBufferWithPool( - CVPixelBufferPoolRef pool, CFDictionaryRef auxAttributes, - CVTextureCacheType textureCache, CVPixelBufferRef* outBuffer) { - return CreateCVPixelBufferWithPool(pool, auxAttributes, [textureCache](){ +CVReturn CreateCVPixelBufferWithPool(CVPixelBufferPoolRef pool, + CFDictionaryRef auxAttributes, + CVTextureCacheType textureCache, + CVPixelBufferRef *outBuffer) { + return CreateCVPixelBufferWithPool( + pool, auxAttributes, + [textureCache]() { #if TARGET_OS_OSX - CVOpenGLTextureCacheFlush(textureCache, 0); + CVOpenGLTextureCacheFlush(textureCache, 0); #else - CVOpenGLESTextureCacheFlush(textureCache, 0); + CVOpenGLESTextureCacheFlush(textureCache, 0); #endif // TARGET_OS_OSX - }, outBuffer); + }, + outBuffer); } -CVReturn CreateCVPixelBufferWithPool( - CVPixelBufferPoolRef pool, CFDictionaryRef auxAttributes, - std::function flush, CVPixelBufferRef* outBuffer) { +CVReturn CreateCVPixelBufferWithPool(CVPixelBufferPoolRef pool, + CFDictionaryRef auxAttributes, + std::function flush, + CVPixelBufferRef *outBuffer) { CVReturn err = CVPixelBufferPoolCreatePixelBufferWithAuxAttributes( kCFAllocatorDefault, pool, auxAttributes, outBuffer); if (err == kCVReturnWouldExceedAllocationThreshold) { @@ -103,11 +115,13 @@ CVReturn CreateCVPixelBufferWithPool( kCFAllocatorDefault, pool, auxAttributes, outBuffer); } if (err == kCVReturnWouldExceedAllocationThreshold) { - // TODO: allow the application to set the threshold. For now, disable it by - // default, since the threshold we are using is arbitrary and some graphs routinely cross it. + // TODO: allow the application to set the threshold. For now, disable it + // by default, since the threshold we are using is arbitrary and some + // graphs routinely cross it. #ifdef ENABLE_MEDIAPIPE_GPU_BUFFER_THRESHOLD_CHECK - NSLog(@"Using more buffers than expected! This is a debug-only warning, " - "you can ignore it if your app works fine otherwise."); + NSLog( + @"Using more buffers than expected! This is a debug-only warning, " + "you can ignore it if your app works fine otherwise."); #ifdef DEBUG NSLog(@"Pool status: %@", ((__bridge NSObject *)pool).description); #endif // DEBUG diff --git a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java index 4376aeb58..e063e7ef0 100644 --- a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java +++ b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java @@ -17,7 +17,6 @@ package com.google.mediapipe.components; import static java.lang.Math.max; import static java.lang.Math.min; -import android.graphics.Bitmap; import android.graphics.SurfaceTexture; import android.opengl.GLES11Ext; import android.opengl.GLES20; @@ -25,6 +24,7 @@ import android.opengl.GLES31; import android.opengl.GLSurfaceView; import android.opengl.Matrix; import android.util.Log; +import android.util.Pair; import com.google.mediapipe.framework.TextureFrame; import com.google.mediapipe.glutil.CommonShaders; import com.google.mediapipe.glutil.ShaderUtil; @@ -51,11 +51,9 @@ import javax.microedition.khronos.opengles.GL10; * {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}). */ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { - /** - * Listener for Bitmap capture requests. - */ - public interface BitmapCaptureListener { - void onBitmapCaptured(Bitmap result); + /** Listener for image capture requests. */ + public interface ImageCaptureListener { + void onImageCaptured(int width, int height, int[] data); } /** @@ -86,27 +84,28 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { private float[] textureTransformMatrix = new float[16]; private SurfaceTexture surfaceTexture = null; private final AtomicReference nextFrame = new AtomicReference<>(); - private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean(); - private BitmapCaptureListener bitmapCaptureListener; + private final AtomicBoolean captureNextFrameImage = new AtomicBoolean(); + private ImageCaptureListener imageCaptureListener; // Specifies whether a black CLAMP_TO_BORDER effect should be used. private boolean shouldClampToBorder = false; private Scale scale = Scale.FILL; - /** - * Sets the {@link BitmapCaptureListener}. - */ - public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) { - this.bitmapCaptureListener = bitmapCaptureListener; + private float zoomFactor = 1.0f; + private Pair zoomLocation = new Pair<>(0.5f, 0.5f); + + /** Sets the {@link ImageCaptureListener}. */ + public void setImageCaptureListener(ImageCaptureListener imageCaptureListener) { + this.imageCaptureListener = imageCaptureListener; } /** - * Request to capture Bitmap of the next frame. + * Request to capture Image of the next frame. * - * The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note + *

The result will be provided to the {@link ImageCaptureListener} if one is set. Please note * this is an expensive operation and the result may not be available for a while. */ - public void captureNextFrameBitmap() { - captureNextFrameBitmap.set(true); + public void captureNextFrameImage() { + captureNextFrameImage.set(true); } @Override @@ -202,9 +201,9 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4); ShaderUtil.checkGlError("glDrawArrays"); - // Capture Bitmap if requested. - BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener; - if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) { + // Capture image if requested. + ImageCaptureListener imageCaptureListener = this.imageCaptureListener; + if (captureNextFrameImage.getAndSet(false) && imageCaptureListener != null) { // Find the name of the bound texture. int[] texName = new int[1]; if (surfaceTexture != null) { @@ -219,8 +218,8 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { GLES31.glGetTexLevelParameteriv(textureTarget, 0, GLES31.GL_TEXTURE_HEIGHT, texDims, 1); int texWidth = texDims[0]; int texHeight = texDims[1]; - int bitmapSize = texWidth * texHeight; - ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4); + int imageSize = texWidth * texHeight; + ByteBuffer byteBuffer = ByteBuffer.allocateDirect(imageSize * 4); byteBuffer.order(ByteOrder.nativeOrder()); // Read pixels from texture. @@ -235,27 +234,9 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { GLES20.glDeleteFramebuffers(1, fbo, 0); ShaderUtil.checkGlError("capture frame"); - int[] pixelBuffer = new int[bitmapSize]; - byteBuffer.asIntBuffer().get(pixelBuffer); - for (int i = 0; i < bitmapSize; i++) { - // Swap R and B channels. - pixelBuffer[i] = - (pixelBuffer[i] & 0xff00ff00) - | ((pixelBuffer[i] & 0x000000ff) << 16) - | ((pixelBuffer[i] & 0x00ff0000) >> 16); - } - - // Send bitmap. - Bitmap bitmap = Bitmap.createBitmap(texWidth, texHeight, Bitmap.Config.ARGB_8888); - bitmap.setPixels( - pixelBuffer, - /* offset= */ bitmapSize - texWidth, - /* stride= */ -texWidth, - /* x= */ 0, - /* y= */ 0, - texWidth, - texHeight); - bitmapCaptureListener.onBitmapCaptured(bitmap); + int[] data = new int[imageSize]; + byteBuffer.asIntBuffer().get(data); + imageCaptureListener.onImageCaptured(texWidth, texHeight, data); } GLES20.glBindTexture(textureTarget, 0); @@ -294,6 +275,15 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { float textureBottom = (1.0f - scaleHeight) * alignmentVertical; float textureTop = textureBottom + scaleHeight; + Pair currentZoomLocation = this.zoomLocation; + float zoomLocationX = currentZoomLocation.first; + float zoomLocationY = currentZoomLocation.second; + + textureLeft = (textureLeft - 0.5f) / zoomFactor + zoomLocationX; + textureRight = (textureRight - 0.5f) / zoomFactor + zoomLocationX; + textureBottom = (textureBottom - 0.5f) / zoomFactor + zoomLocationY; + textureTop = (textureTop - 0.5f) / zoomFactor + zoomLocationY; + return new float[] {textureLeft, textureRight, textureBottom, textureTop}; } @@ -380,6 +370,22 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { this.scale = scale; } + /** Zoom factor applied to the frame, must not be 0. */ + public void setZoomFactor(float zoomFactor) { + if (zoomFactor == 0.f) { + return; + } + this.zoomFactor = zoomFactor; + } + + /** + * Location where to apply the zooming of the frame to. Default is 0.5, 0.5 (scaling is applied to + * the center). + */ + public void setZoomLocation(float zoomLocationX, float zoomLocationY) { + this.zoomLocation = new Pair<>(zoomLocationX, zoomLocationY); + } + private boolean isExternalTexture() { return textureTarget == GLES11Ext.GL_TEXTURE_EXTERNAL_OES; } diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java index 5ea12872a..3d6b16ce6 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java +++ b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java @@ -128,6 +128,16 @@ public final class PacketGetter { return ProtoUtil.unpack(result, defaultInstance); } + public static T getProto(final Packet packet, Parser messageParser) { + SerializedMessage result = new SerializedMessage(); + nativeGetProto(packet.getNativeHandle(), result); + try { + return messageParser.parseFrom(result.value); + } catch (InvalidProtocolBufferException e) { + throw new IllegalArgumentException(e); + } + } + /** * @deprecated {@link #getProto(Packet, MessageLite)} is safer to use in obfuscated builds. */ diff --git a/mediapipe/java/com/google/mediapipe/glutil/EglManager.java b/mediapipe/java/com/google/mediapipe/glutil/EglManager.java index bad59ce3a..ae52dc895 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/EglManager.java +++ b/mediapipe/java/com/google/mediapipe/glutil/EglManager.java @@ -114,6 +114,16 @@ public class EglManager { } } + /** Returns the managed {@link EGLDisplay}. */ + public EGLDisplay getEglDisplay() { + return eglDisplay; + } + + /** Returns the {@link EGL10}. */ + public EGL10 getEgl() { + return egl; + } + /** Returns the managed {@link EGLContext} */ public EGLContext getContext() { return eglContext; diff --git a/mediapipe/java/com/google/mediapipe/glutil/GlThread.java b/mediapipe/java/com/google/mediapipe/glutil/GlThread.java index 24b8ca094..b8d4fa636 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/GlThread.java +++ b/mediapipe/java/com/google/mediapipe/glutil/GlThread.java @@ -128,6 +128,10 @@ public class GlThread extends Thread { prepareGl(); startedSuccessfully = true; + } catch (RuntimeException e) { + releaseGl(); + eglManager.release(); + throw e; } finally { // Always stop waitUntilReady here, even if we got an exception. // Otherwise the main thread may be stuck waiting. diff --git a/mediapipe/model_maker/python/core/utils/metrics.py b/mediapipe/model_maker/python/core/utils/metrics.py index 310146168..cf0be6d08 100644 --- a/mediapipe/model_maker/python/core/utils/metrics.py +++ b/mediapipe/model_maker/python/core/utils/metrics.py @@ -94,6 +94,23 @@ def _get_sparse_metric(metric: tf.metrics.Metric): return SparseMetric +class BinaryAUC(tf.keras.metrics.AUC): + """A Binary AUC metric for binary classification tasks. + + For update state, the shapes of y_true and y_pred are expected to be: + - y_true: [batch_size x 1] array of 0 for negatives and 1 for positives + - y_pred: [batch_size x 2] array of probabilities where y_pred[:,0] are the + probabilities of the 0th(negative) class and y_pred[:,1] are the + probabilities of the 1st(positive) class + + See https://www.tensorflow.org/api_docs/python/tf/keras/metrics/AUC for + details. + """ + + def update_state(self, y_true, y_pred, sample_weight=None): + super().update_state(y_true, y_pred[:, 1], sample_weight) + + SparseRecall = _get_sparse_metric(tf.metrics.Recall) SparsePrecision = _get_sparse_metric(tf.metrics.Precision) BinarySparseRecallAtPrecision = _get_binary_sparse_metric( diff --git a/mediapipe/model_maker/python/core/utils/metrics_test.py b/mediapipe/model_maker/python/core/utils/metrics_test.py index 842335273..2ea8769d2 100644 --- a/mediapipe/model_maker/python/core/utils/metrics_test.py +++ b/mediapipe/model_maker/python/core/utils/metrics_test.py @@ -14,6 +14,7 @@ from absl.testing import parameterized +import numpy as np import tensorflow as tf from mediapipe.model_maker.python.core.utils import metrics @@ -23,16 +24,15 @@ class SparseMetricTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): super().setUp() - self.y_true = [0, 0, 1, 1, 0, 1] - self.y_pred = [ + self.y_true = np.array([0, 0, 1, 1, 0, 1]) + self.y_pred = np.array([ [0.9, 0.1], # 0, 0 y [0.8, 0.2], # 0, 0 y [0.7, 0.3], # 0, 1 n [0.6, 0.4], # 0, 1 n [0.3, 0.7], # 1, 0 y [0.3, 0.7], # 1, 1 y - ] - self.num_classes = 3 + ]) def _assert_metric_equals(self, metric, value): metric.update_state(self.y_true, self.y_pred) @@ -69,6 +69,10 @@ class SparseMetricTest(tf.test.TestCase, parameterized.TestCase): ): _ = metrics.BinarySparsePrecisionAtRecall(1.0, class_id=2) + def test_binary_auc(self): + metric = metrics.BinaryAUC(num_thresholds=1000) + self._assert_metric_equals(metric, 0.7222222) + if __name__ == '__main__': tf.test.main() diff --git a/mediapipe/model_maker/python/core/utils/model_util.py b/mediapipe/model_maker/python/core/utils/model_util.py index 2b1eebf9f..32b509797 100644 --- a/mediapipe/model_maker/python/core/utils/model_util.py +++ b/mediapipe/model_maker/python/core/utils/model_util.py @@ -35,18 +35,23 @@ ESTIMITED_STEPS_PER_EPOCH = 1000 def get_default_callbacks( export_dir: str, + checkpoint_frequency: int = 5, ) -> Sequence[tf.keras.callbacks.Callback]: """Gets default callbacks.""" + callbacks = [] summary_dir = os.path.join(export_dir, 'summaries') summary_callback = tf.keras.callbacks.TensorBoard(summary_dir) + callbacks.append(summary_callback) - checkpoint_path = os.path.join(export_dir, 'checkpoint') - checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( - os.path.join(checkpoint_path, 'model-{epoch:04d}'), - save_weights_only=True, - period=5, - ) - return [summary_callback, checkpoint_callback] + if checkpoint_frequency > 0: + checkpoint_path = os.path.join(export_dir, 'checkpoint') + checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( + os.path.join(checkpoint_path, 'model-{epoch:04d}'), + save_weights_only=True, + period=checkpoint_frequency, + ) + callbacks.append(checkpoint_callback) + return callbacks def load_keras_model( diff --git a/mediapipe/model_maker/python/core/utils/model_util_test.py b/mediapipe/model_maker/python/core/utils/model_util_test.py index 57750624f..ed8ba85e5 100644 --- a/mediapipe/model_maker/python/core/utils/model_util_test.py +++ b/mediapipe/model_maker/python/core/utils/model_util_test.py @@ -25,6 +25,23 @@ from mediapipe.model_maker.python.core.utils import test_util class ModelUtilTest(tf.test.TestCase, parameterized.TestCase): + def test_get_default_callbacks(self): + callbacks = model_util.get_default_callbacks( + 'export_dir', checkpoint_frequency=5 + ) + self.assertLen(callbacks, 2) + self.assertIsInstance(callbacks[0], tf.keras.callbacks.TensorBoard) + self.assertEqual(callbacks[0].log_dir, 'export_dir/summaries') + self.assertIsInstance(callbacks[1], tf.keras.callbacks.ModelCheckpoint) + self.assertEqual(callbacks[1].period, 5) + + callbacks = model_util.get_default_callbacks( + 'export_dir_2', checkpoint_frequency=0 + ) + self.assertLen(callbacks, 1) + self.assertIsInstance(callbacks[0], tf.keras.callbacks.TensorBoard) + self.assertEqual(callbacks[0].log_dir, 'export_dir_2/summaries') + def test_load_keras_model(self): input_dim = 4 model = test_util.build_model(input_shape=[input_dim], num_classes=2) diff --git a/mediapipe/model_maker/python/text/text_classifier/hyperparameters.py b/mediapipe/model_maker/python/text/text_classifier/hyperparameters.py index 5d16564f5..a7dc05d5b 100644 --- a/mediapipe/model_maker/python/text/text_classifier/hyperparameters.py +++ b/mediapipe/model_maker/python/text/text_classifier/hyperparameters.py @@ -56,6 +56,8 @@ class BertHParams(hp.BaseHParams): value to 0. Defaults to 2.0. tokenizer: Tokenizer to use for preprocessing. Must be one of the enum options of SupportedBertTokenizers. Defaults to FULL_TOKENIZER. + checkpoint_frequency: Frequency(in epochs) of saving checkpoints during + training. Defaults to 0 which does not save training checkpoints. """ learning_rate: float = 3e-5 @@ -75,5 +77,7 @@ class BertHParams(hp.BaseHParams): bert_tokenizer.SupportedBertTokenizers.FULL_TOKENIZER ) + checkpoint_frequency: int = 0 + HParams = Union[BertHParams, AverageWordEmbeddingHParams] diff --git a/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer.py b/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer.py index 95328fb43..a96fe1b84 100644 --- a/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer.py +++ b/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer.py @@ -32,4 +32,4 @@ class ModelWithTokenizer(tf.keras.Model): x = self._tokenizer.process_fn(input_tensor) x = {k: tf.expand_dims(v, axis=0) for k, v in x.items()} x = self._model(x) - return x + return x[0] # TODO: Add back the batch dimension diff --git a/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer_test.py b/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer_test.py index f6c5d2477..1da09ab4e 100644 --- a/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer_test.py +++ b/mediapipe/model_maker/python/text/text_classifier/model_with_tokenizer_test.py @@ -97,7 +97,7 @@ class BertTokenizerTest(tf.test.TestCase): self._tokenizer, self._model ) output = model(tf.constant(["Example input".encode("utf-8")])) - self.assertAllEqual(output.shape, (1, 2)) + self.assertAllEqual(output.shape, (2,)) self.assertEqual(tf.reduce_sum(output), 1) diff --git a/mediapipe/model_maker/python/text/text_classifier/text_classifier.py b/mediapipe/model_maker/python/text/text_classifier/text_classifier.py index 348f4cfb6..aea9224ff 100644 --- a/mediapipe/model_maker/python/text/text_classifier/text_classifier.py +++ b/mediapipe/model_maker/python/text/text_classifier/text_classifier.py @@ -372,7 +372,19 @@ class _BertClassifier(TextClassifier): ): super().__init__(model_spec, label_names, hparams.shuffle) self._hparams = hparams - self._callbacks = model_util.get_default_callbacks(self._hparams.export_dir) + self._callbacks = list( + model_util.get_default_callbacks( + self._hparams.export_dir, self._hparams.checkpoint_frequency + ) + ) + [ + tf.keras.callbacks.ModelCheckpoint( + os.path.join(self._hparams.export_dir, "best_model"), + monitor="val_auc", + mode="max", + save_best_only=True, + save_weights_only=False, + ) + ] self._model_options = model_options self._text_preprocessor: preprocessor.BertClassifierPreprocessor = None with self._hparams.get_strategy().scope(): @@ -463,6 +475,7 @@ class _BertClassifier(TextClassifier): ), metrics.SparsePrecision(name="precision", dtype=tf.float32), metrics.SparseRecall(name="recall", dtype=tf.float32), + metrics.BinaryAUC(name="auc", num_thresholds=1000), ] if self._num_classes == 2: if self._hparams.desired_precisions: diff --git a/mediapipe/model_maker/python/text/text_classifier/text_classifier_test.py b/mediapipe/model_maker/python/text/text_classifier/text_classifier_test.py index fdc2613a9..02b3fe4d5 100644 --- a/mediapipe/model_maker/python/text/text_classifier/text_classifier_test.py +++ b/mediapipe/model_maker/python/text/text_classifier/text_classifier_test.py @@ -230,6 +230,7 @@ class TextClassifierTest(tf.test.TestCase, parameterized.TestCase): 'accuracy', 'recall', 'precision', + 'auc', 'precision_at_recall_0.2', 'recall_at_precision_0.9', ] diff --git a/mediapipe/model_maker/requirements.txt b/mediapipe/model_maker/requirements.txt index ff43fa3f0..ffb547b82 100644 --- a/mediapipe/model_maker/requirements.txt +++ b/mediapipe/model_maker/requirements.txt @@ -7,4 +7,4 @@ tensorflow-addons tensorflow-datasets tensorflow-hub tensorflow-text -tf-models-official>=2.13.1 +tf-models-official>=2.13.2 diff --git a/mediapipe/modules/face_geometry/data/BUILD b/mediapipe/modules/face_geometry/data/BUILD index 1661a2283..5a7d795a1 100644 --- a/mediapipe/modules/face_geometry/data/BUILD +++ b/mediapipe/modules/face_geometry/data/BUILD @@ -57,3 +57,17 @@ exports_files([ "canonical_face_model.obj", "canonical_face_model_uv_visualization.png", ]) + +# This metadata contains the 468 landmarks copied from geometry_pipeline_metadata_landmarks.pbtxt +# plus 10 extra landmarks representing the two irises. The position of each iris landmark was +# calculated using neighbor vertices through linear interpolation. Visual inspection suggests +# these positions are good. The UV coordinates are also estimated using a similar approach. +encode_binary_proto( + name = "geometry_pipeline_metadata_including_iris_landmarks", + input = "geometry_pipeline_metadata_including_iris_landmarks.pbtxt", + message_type = "mediapipe.face_geometry.GeometryPipelineMetadata", + output = "geometry_pipeline_metadata_including_iris_landmarks.binarypb", + deps = [ + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto", + ], +) diff --git a/mediapipe/modules/face_geometry/data/face_model_with_iris.obj b/mediapipe/modules/face_geometry/data/face_model_with_iris.obj new file mode 100644 index 000000000..29174b749 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/face_model_with_iris.obj @@ -0,0 +1,1862 @@ +v 0.000000 -3.406404 5.979507 +v 0.000000 -1.126865 7.475604 +v 0.000000 -2.089024 6.058267 +v -0.463928 0.955357 6.633583 +v 0.000000 -0.463170 7.586580 +v 0.000000 0.365669 7.242870 +v 0.000000 2.473255 5.788627 +v -4.253081 2.577646 3.279702 +v 0.000000 4.019042 5.284764 +v 0.000000 4.885979 5.385258 +v 0.000000 8.261778 4.481535 +v 0.000000 -3.706811 5.864924 +v 0.000000 -3.918301 5.569430 +v 0.000000 -3.994436 5.219482 +v 0.000000 -4.542400 5.404754 +v 0.000000 -4.745577 5.529457 +v 0.000000 -5.019567 5.601448 +v 0.000000 -5.365123 5.535441 +v 0.000000 -6.149624 5.071372 +v 0.000000 -1.501095 7.112196 +v -0.416106 -1.466449 6.447657 +v -7.087960 5.434801 0.099620 +v -2.628639 2.035898 3.848121 +v -3.198363 1.985815 3.796952 +v -3.775151 2.039402 3.646194 +v -4.465819 2.422950 3.155168 +v -2.164289 2.189867 3.851822 +v -3.208229 3.223926 4.115822 +v -2.673803 3.205337 4.092203 +v -3.745193 3.165286 3.972409 +v -4.161018 3.059069 3.719554 +v -5.062006 1.934418 2.776093 +v -2.266659 -7.425768 4.389812 +v -4.445859 2.663991 3.173422 +v -7.214530 2.263009 0.073150 +v -5.799793 2.349546 2.204059 +v -2.844939 -0.720868 4.433130 +v -0.711452 -3.329355 5.877044 +v -0.606033 -3.924562 5.444923 +v -1.431615 -3.500953 5.496189 +v -1.914910 -3.803146 5.028930 +v -1.131043 -3.973937 5.189648 +v -1.563548 -4.082763 4.842263 +v -2.650112 -5.003649 4.188483 +v -0.427049 -1.094134 7.360529 +v -0.496396 -0.475659 7.440358 +v -5.253307 3.881582 3.363159 +v -1.718698 0.974609 4.558359 +v -1.608635 -0.942516 5.814193 +v -1.651267 -0.610868 5.581319 +v -4.765501 -0.701554 3.534632 +v -0.478306 0.295766 7.101013 +v -3.734964 4.508230 4.550454 +v -4.588603 4.302037 4.048484 +v -6.279331 6.615427 1.425850 +v -1.220941 4.142165 5.106035 +v -2.193489 3.100317 4.000575 +v -3.102642 -4.352984 4.095905 +v -6.719682 -4.788645 -1.745401 +v -1.193824 -1.306795 5.737747 +v -0.729766 -1.593712 5.833208 +v -2.456206 -4.342621 4.283884 +v -2.204823 -4.304508 4.162499 +v -4.985894 4.802461 3.751977 +v -1.592294 -1.257709 5.456949 +v -2.644548 4.524654 4.921559 +v -2.760292 5.100971 5.015990 +v -3.523964 8.005976 3.729163 +v -5.599763 5.715470 2.724259 +v -3.063932 6.566144 4.529981 +v -5.720968 4.254584 2.830852 +v -6.374393 4.785590 1.591691 +v -0.672728 -3.688016 5.737804 +v -1.262560 -3.787691 5.417779 +v -1.732553 -3.952767 5.000579 +v -1.043625 -1.464973 5.662455 +v -2.321234 -4.329069 4.258156 +v -2.056846 -4.477671 4.520883 +v -2.153084 -4.276322 4.038093 +v -0.946874 -1.035249 6.512274 +v -1.469132 -4.036351 4.604908 +v -1.024340 -3.989851 4.926693 +v -0.533422 -3.993222 5.138202 +v -0.769720 -6.095394 4.985883 +v -0.699606 -5.291850 5.448304 +v -0.669687 -4.949770 5.509612 +v -0.630947 -4.695101 5.449371 +v -0.583218 -4.517982 5.339869 +v -1.537170 -4.423206 4.745470 +v -1.615600 -4.475942 4.813632 +v -1.729053 -4.618680 4.854463 +v -1.838624 -4.828746 4.823737 +v -2.368250 -3.106237 4.868096 +v -7.542244 -1.049282 -2.431321 +v 0.000000 -1.724003 6.601390 +v -1.826614 -4.399531 4.399021 +v -1.929558 -4.411831 4.497052 +v -0.597442 -2.013686 5.866456 +v -1.405627 -1.714196 5.241087 +v -0.662449 -1.819321 5.863759 +v -2.342340 0.572222 4.294303 +v -3.327324 0.104863 4.113860 +v -1.726175 -0.919165 5.273355 +v -5.133204 7.485602 2.660442 +v -4.538641 6.319907 3.683424 +v -3.986562 5.109487 4.466315 +v -2.169681 -5.440433 4.455874 +v -1.395634 5.011963 5.316032 +v -1.619500 6.599217 4.921106 +v -1.891399 8.236377 4.274997 +v -4.195832 2.235205 3.375099 +v -5.733342 1.411738 2.431726 +v -1.859887 2.355757 3.843181 +v -4.988612 3.074654 3.083858 +v -1.303263 1.416453 4.831091 +v -1.305757 -0.672779 6.415959 +v -6.465170 0.937119 1.689873 +v -5.258659 0.945811 2.974312 +v -4.432338 0.722096 3.522615 +v -3.300681 0.861641 3.872784 +v -2.430178 1.131492 4.039035 +v -1.820731 1.467954 4.224124 +v -0.563221 2.307693 5.566789 +v -6.338145 -0.529279 1.881175 +v -5.587698 3.208071 2.687839 +v -0.242624 -1.462857 7.071491 +v -1.611251 0.339326 4.895421 +v -7.743095 2.364999 -2.005167 +v -1.391142 1.851048 4.448999 +v -1.785794 -0.978284 4.850470 +v -4.670959 2.664461 3.084075 +v -1.333970 -0.283761 6.097047 +v -7.270895 -2.890917 -2.252455 +v -1.856432 2.585245 3.757904 +v -0.923388 0.073076 6.671944 +v -5.000589 -6.135128 1.892523 +v -5.085276 -7.178590 0.714711 +v -7.159291 -0.811820 -0.072044 +v -5.843051 -5.248023 0.924091 +v -6.847258 3.662916 0.724695 +v -2.412942 -8.258853 4.119213 +v -0.179909 -1.689864 6.573301 +v -2.103655 -0.163946 4.566119 +v -6.407571 2.236021 1.560843 +v -3.670075 2.360153 3.635230 +v -3.177186 2.294265 3.775704 +v -2.196121 -4.598322 4.479786 +v -6.234883 -1.944430 1.663542 +v -1.292924 -9.295920 4.094063 +v -3.210651 -8.533278 2.802001 +v -4.068926 -7.993109 1.925119 +v 0.000000 6.545390 5.027311 +v 0.000000 -9.403378 4.264492 +v -2.724032 2.315802 3.777151 +v -2.288460 2.398891 3.697603 +v -1.998311 2.496547 3.689148 +v -6.130040 3.399261 2.038516 +v -2.288460 2.886504 3.775031 +v -2.724032 2.961810 3.871767 +v -3.177186 2.964136 3.876973 +v -3.670075 2.927714 3.724325 +v -4.018389 2.857357 3.482983 +v -7.555811 4.106811 -0.991917 +v -4.018389 2.483695 3.440898 +v 0.000000 -2.521945 5.932265 +v -1.776217 -2.683946 5.213116 +v -1.222237 -1.182444 5.952465 +v -0.731493 -2.536683 5.815343 +v 0.000000 3.271027 5.236015 +v -4.135272 -6.996638 2.671970 +v -3.311811 -7.660815 3.382963 +v -1.313701 -8.639995 4.702456 +v -5.940524 -6.223629 -0.631468 +v -1.998311 2.743838 3.744030 +v -0.901447 1.236992 5.754256 +v 0.000000 -8.765243 4.891441 +v -2.308977 -8.974196 3.609070 +v -6.954154 -2.439843 -0.131163 +v -1.098819 -4.458788 5.120727 +v -1.181124 -4.579996 5.189564 +v -1.255818 -4.787901 5.237051 +v -1.325085 -5.106507 5.205010 +v -1.546388 -5.819392 4.757893 +v -1.953754 -4.183892 4.431713 +v -2.117802 -4.137093 4.555096 +v -2.285339 -4.051196 4.582438 +v -2.850160 -3.665720 4.484994 +v -5.278538 -2.238942 2.861224 +v -0.946709 1.907628 5.196779 +v -1.314173 3.104912 4.231404 +v -1.780000 2.860000 3.881555 +v -1.845110 -4.098880 4.247264 +v -5.436187 -4.030482 2.109852 +v -0.766444 3.182131 4.861453 +v -1.938616 -6.614410 4.521085 +v 0.000000 1.059413 6.774605 +v -0.516573 1.583572 6.148363 +v 0.000000 1.728369 6.316750 +v -1.246815 0.230297 5.681036 +v 0.000000 -7.942194 5.181173 +v 0.000000 -6.991499 5.153478 +v -0.997827 -6.930921 4.979576 +v -3.288807 -5.382514 3.795752 +v -2.311631 -1.566237 4.590085 +v -2.680250 -6.111567 4.096152 +v -3.832928 -1.537326 4.137731 +v -2.961860 -2.274215 4.440943 +v -4.386901 -2.683286 3.643886 +v -1.217295 -7.834465 4.969286 +v -1.542374 -0.136843 5.201008 +v -3.878377 -6.041764 3.311079 +v -3.084037 -6.809842 3.814195 +v -3.747321 -4.503545 3.726453 +v -6.094129 -3.205991 1.473482 +v -4.588995 -4.728726 2.983221 +v -6.583231 -3.941269 0.070268 +v -3.492580 -3.195820 4.130198 +v -1.255543 0.802341 5.307551 +v -1.126122 -0.933602 6.538785 +v -1.443109 -1.142774 5.905127 +v -0.923043 -0.529042 7.003423 +v -1.755386 3.529117 4.327696 +v -2.632589 3.713828 4.364629 +v -3.388062 3.721976 4.309028 +v -4.075766 3.675413 4.076063 +v -4.622910 3.474691 3.646321 +v -5.171755 2.535753 2.670867 +v -7.297331 0.763172 -0.048769 +v -4.706828 1.651000 3.109532 +v -4.071712 1.476821 3.476944 +v -3.269817 1.470659 3.731945 +v -2.527572 1.617311 3.865444 +v -1.970894 1.858505 3.961782 +v -1.579543 2.097941 4.084996 +v -7.664182 0.673132 -2.435867 +v -1.397041 -1.340139 5.630378 +v -0.884838 0.658740 6.233232 +v -0.767097 -0.968035 7.077932 +v -0.460213 -1.334106 6.787447 +v -0.748618 -1.067994 6.798303 +v -1.236408 -1.585568 5.480490 +v -0.387306 -1.409990 6.957705 +v -0.319925 -1.607931 6.508676 +v -1.639633 2.556298 3.863736 +v -1.255645 2.467144 4.203800 +v -1.031362 2.382663 4.615849 +v -4.253081 2.772296 3.315305 +v -4.530000 2.910000 3.339685 +v 0.463928 0.955357 6.633583 +v 4.253081 2.577646 3.279702 +v 0.416106 -1.466449 6.447657 +v 7.087960 5.434801 0.099620 +v 2.628639 2.035898 3.848121 +v 3.198363 1.985815 3.796952 +v 3.775151 2.039402 3.646194 +v 4.465819 2.422950 3.155168 +v 2.164289 2.189867 3.851822 +v 3.208229 3.223926 4.115822 +v 2.673803 3.205337 4.092203 +v 3.745193 3.165286 3.972409 +v 4.161018 3.059069 3.719554 +v 5.062006 1.934418 2.776093 +v 2.266659 -7.425768 4.389812 +v 4.445859 2.663991 3.173422 +v 7.214530 2.263009 0.073150 +v 5.799793 2.349546 2.204059 +v 2.844939 -0.720868 4.433130 +v 0.711452 -3.329355 5.877044 +v 0.606033 -3.924562 5.444923 +v 1.431615 -3.500953 5.496189 +v 1.914910 -3.803146 5.028930 +v 1.131043 -3.973937 5.189648 +v 1.563548 -4.082763 4.842263 +v 2.650112 -5.003649 4.188483 +v 0.427049 -1.094134 7.360529 +v 0.496396 -0.475659 7.440358 +v 5.253307 3.881582 3.363159 +v 1.718698 0.974609 4.558359 +v 1.608635 -0.942516 5.814193 +v 1.651267 -0.610868 5.581319 +v 4.765501 -0.701554 3.534632 +v 0.478306 0.295766 7.101013 +v 3.734964 4.508230 4.550454 +v 4.588603 4.302037 4.048484 +v 6.279331 6.615427 1.425850 +v 1.220941 4.142165 5.106035 +v 2.193489 3.100317 4.000575 +v 3.102642 -4.352984 4.095905 +v 6.719682 -4.788645 -1.745401 +v 1.193824 -1.306795 5.737747 +v 0.729766 -1.593712 5.833208 +v 2.456206 -4.342621 4.283884 +v 2.204823 -4.304508 4.162499 +v 4.985894 4.802461 3.751977 +v 1.592294 -1.257709 5.456949 +v 2.644548 4.524654 4.921559 +v 2.760292 5.100971 5.015990 +v 3.523964 8.005976 3.729163 +v 5.599763 5.715470 2.724259 +v 3.063932 6.566144 4.529981 +v 5.720968 4.254584 2.830852 +v 6.374393 4.785590 1.591691 +v 0.672728 -3.688016 5.737804 +v 1.262560 -3.787691 5.417779 +v 1.732553 -3.952767 5.000579 +v 1.043625 -1.464973 5.662455 +v 2.321234 -4.329069 4.258156 +v 2.056846 -4.477671 4.520883 +v 2.153084 -4.276322 4.038093 +v 0.946874 -1.035249 6.512274 +v 1.469132 -4.036351 4.604908 +v 1.024340 -3.989851 4.926693 +v 0.533422 -3.993222 5.138202 +v 0.769720 -6.095394 4.985883 +v 0.699606 -5.291850 5.448304 +v 0.669687 -4.949770 5.509612 +v 0.630947 -4.695101 5.449371 +v 0.583218 -4.517982 5.339869 +v 1.537170 -4.423206 4.745470 +v 1.615600 -4.475942 4.813632 +v 1.729053 -4.618680 4.854463 +v 1.838624 -4.828746 4.823737 +v 2.368250 -3.106237 4.868096 +v 7.542244 -1.049282 -2.431321 +v 1.826614 -4.399531 4.399021 +v 1.929558 -4.411831 4.497052 +v 0.597442 -2.013686 5.866456 +v 1.405627 -1.714196 5.241087 +v 0.662449 -1.819321 5.863759 +v 2.342340 0.572222 4.294303 +v 3.327324 0.104863 4.113860 +v 1.726175 -0.919165 5.273355 +v 5.133204 7.485602 2.660442 +v 4.538641 6.319907 3.683424 +v 3.986562 5.109487 4.466315 +v 2.169681 -5.440433 4.455874 +v 1.395634 5.011963 5.316032 +v 1.619500 6.599217 4.921106 +v 1.891399 8.236377 4.274997 +v 4.195832 2.235205 3.375099 +v 5.733342 1.411738 2.431726 +v 1.859887 2.355757 3.843181 +v 4.988612 3.074654 3.083858 +v 1.303263 1.416453 4.831091 +v 1.305757 -0.672779 6.415959 +v 6.465170 0.937119 1.689873 +v 5.258659 0.945811 2.974312 +v 4.432338 0.722096 3.522615 +v 3.300681 0.861641 3.872784 +v 2.430178 1.131492 4.039035 +v 1.820731 1.467954 4.224124 +v 0.563221 2.307693 5.566789 +v 6.338145 -0.529279 1.881175 +v 5.587698 3.208071 2.687839 +v 0.242624 -1.462857 7.071491 +v 1.611251 0.339326 4.895421 +v 7.743095 2.364999 -2.005167 +v 1.391142 1.851048 4.448999 +v 1.785794 -0.978284 4.850470 +v 4.670959 2.664461 3.084075 +v 1.333970 -0.283761 6.097047 +v 7.270895 -2.890917 -2.252455 +v 1.856432 2.585245 3.757904 +v 0.923388 0.073076 6.671944 +v 5.000589 -6.135128 1.892523 +v 5.085276 -7.178590 0.714711 +v 7.159291 -0.811820 -0.072044 +v 5.843051 -5.248023 0.924091 +v 6.847258 3.662916 0.724695 +v 2.412942 -8.258853 4.119213 +v 0.179909 -1.689864 6.573301 +v 2.103655 -0.163946 4.566119 +v 6.407571 2.236021 1.560843 +v 3.670075 2.360153 3.635230 +v 3.177186 2.294265 3.775704 +v 2.196121 -4.598322 4.479786 +v 6.234883 -1.944430 1.663542 +v 1.292924 -9.295920 4.094063 +v 3.210651 -8.533278 2.802001 +v 4.068926 -7.993109 1.925119 +v 2.724032 2.315802 3.777151 +v 2.288460 2.398891 3.697603 +v 1.998311 2.496547 3.689148 +v 6.130040 3.399261 2.038516 +v 2.288460 2.886504 3.775031 +v 2.724032 2.961810 3.871767 +v 3.177186 2.964136 3.876973 +v 3.670075 2.927714 3.724325 +v 4.018389 2.857357 3.482983 +v 7.555811 4.106811 -0.991917 +v 4.018389 2.483695 3.440898 +v 1.776217 -2.683946 5.213116 +v 1.222237 -1.182444 5.952465 +v 0.731493 -2.536683 5.815343 +v 4.135272 -6.996638 2.671970 +v 3.311811 -7.660815 3.382963 +v 1.313701 -8.639995 4.702456 +v 5.940524 -6.223629 -0.631468 +v 1.998311 2.743838 3.744030 +v 0.901447 1.236992 5.754256 +v 2.308977 -8.974196 3.609070 +v 6.954154 -2.439843 -0.131163 +v 1.098819 -4.458788 5.120727 +v 1.181124 -4.579996 5.189564 +v 1.255818 -4.787901 5.237051 +v 1.325085 -5.106507 5.205010 +v 1.546388 -5.819392 4.757893 +v 1.953754 -4.183892 4.431713 +v 2.117802 -4.137093 4.555096 +v 2.285339 -4.051196 4.582438 +v 2.850160 -3.665720 4.484994 +v 5.278538 -2.238942 2.861224 +v 0.946709 1.907628 5.196779 +v 1.314173 3.104912 4.231404 +v 1.780000 2.860000 3.881555 +v 1.845110 -4.098880 4.247264 +v 5.436187 -4.030482 2.109852 +v 0.766444 3.182131 4.861453 +v 1.938616 -6.614410 4.521085 +v 0.516573 1.583572 6.148363 +v 1.246815 0.230297 5.681036 +v 0.997827 -6.930921 4.979576 +v 3.288807 -5.382514 3.795752 +v 2.311631 -1.566237 4.590085 +v 2.680250 -6.111567 4.096152 +v 3.832928 -1.537326 4.137731 +v 2.961860 -2.274215 4.440943 +v 4.386901 -2.683286 3.643886 +v 1.217295 -7.834465 4.969286 +v 1.542374 -0.136843 5.201008 +v 3.878377 -6.041764 3.311079 +v 3.084037 -6.809842 3.814195 +v 3.747321 -4.503545 3.726453 +v 6.094129 -3.205991 1.473482 +v 4.588995 -4.728726 2.983221 +v 6.583231 -3.941269 0.070268 +v 3.492580 -3.195820 4.130198 +v 1.255543 0.802341 5.307551 +v 1.126122 -0.933602 6.538785 +v 1.443109 -1.142774 5.905127 +v 0.923043 -0.529042 7.003423 +v 1.755386 3.529117 4.327696 +v 2.632589 3.713828 4.364629 +v 3.388062 3.721976 4.309028 +v 4.075766 3.675413 4.076063 +v 4.622910 3.474691 3.646321 +v 5.171755 2.535753 2.670867 +v 7.297331 0.763172 -0.048769 +v 4.706828 1.651000 3.109532 +v 4.071712 1.476821 3.476944 +v 3.269817 1.470659 3.731945 +v 2.527572 1.617311 3.865444 +v 1.970894 1.858505 3.961782 +v 1.579543 2.097941 4.084996 +v 7.664182 0.673132 -2.435867 +v 1.397041 -1.340139 5.630378 +v 0.884838 0.658740 6.233232 +v 0.767097 -0.968035 7.077932 +v 0.460213 -1.334106 6.787447 +v 0.748618 -1.067994 6.798303 +v 1.236408 -1.585568 5.480490 +v 0.387306 -1.409990 6.957705 +v 0.319925 -1.607931 6.508676 +v 1.639633 2.556298 3.863736 +v 1.255645 2.467144 4.203800 +v 1.031362 2.382663 4.615849 +v 4.253081 2.772296 3.315305 +v 4.530000 2.910000 3.339685 +v -3.18175 2.635786 3.826339 +v -2.58175 2.635786 3.824459 +v -3.18175 3.235786 3.876973 +v -3.78175 2.635786 3.679778 +v -3.18175 2.035786 3.775704 +v 3.181751 2.635786 3.826339 +v 3.781751 2.635786 3.679777 +v 3.181751 3.235786 3.876973 +v 2.581751 2.635786 3.824459 +v 3.181751 2.035786 3.775704 +vt 0.427942 0.304722 +vt 0.526878 0.295374 +vt 0.444832 0.269206 +vt 0.607600 0.322297 +vt 0.377046 0.677222 +vt 0.473033 0.304722 +vt 0.526913 0.282143 +vt 0.447112 0.284192 +vt 0.599262 0.318931 +vt 0.414712 0.664780 +vt 0.473122 0.295374 +vt 0.527671 0.263774 +vt 0.448020 0.295368 +vt 0.593203 0.314324 +vt 0.467288 0.470075 +vt 0.473087 0.282143 +vt 0.534090 0.220859 +vt 0.448662 0.304722 +vt 0.569944 0.232965 +vt 0.437114 0.441104 +vt 0.472329 0.263774 +vt 0.524613 0.307634 +vt 0.114210 0.384978 +vt 0.555168 0.269206 +vt 0.455528 0.451377 +vt 0.465828 0.220810 +vt 0.547818 0.307634 +vt 0.375437 0.075808 +vt 0.552888 0.284192 +vt 0.429884 0.533478 +vt 0.475387 0.307634 +vt 0.568842 0.307634 +vt 0.499877 0.091010 +vt 0.551980 0.295368 +vt 0.336768 0.355267 +vt 0.452182 0.307634 +vt 0.539958 0.442861 +vt 0.455607 0.548199 +vt 0.551338 0.304722 +vt 0.133823 0.317299 +vt 0.431158 0.307634 +vt 0.596371 0.306047 +vt 0.408772 0.626106 +vt 0.885770 0.384971 +vt 0.279777 0.285342 +vt 0.460042 0.442861 +vt 0.596961 0.293460 +vt 0.128294 0.208059 +vt 0.624563 0.075808 +vt 0.189096 0.353700 +vt 0.403629 0.306047 +vt 0.611897 0.306039 +vt 0.440512 0.097581 +vt 0.544341 0.548416 +vt 0.324548 0.296007 +vt 0.403039 0.293460 +vt 0.554692 0.419934 +vt 0.335279 0.147180 +vt 0.591234 0.626106 +vt 0.354128 0.187447 +vt 0.388103 0.306039 +vt 0.577238 0.326110 +vt 0.288719 0.180054 +vt 0.871706 0.208059 +vt 0.445308 0.419934 +vt 0.553172 0.331473 +vt 0.499923 0.648476 +vt 0.559100 0.097368 +vt 0.422762 0.326110 +vt 0.527121 0.333802 +vt 0.465844 0.379359 +vt 0.664630 0.147129 +vt 0.446828 0.331473 +vt 0.826722 0.721245 +vt 0.445682 0.433923 +vt 0.711218 0.180025 +vt 0.472879 0.333802 +vt 0.770391 0.700444 +vt 0.415838 0.375804 +vt 0.534154 0.379360 +vt 0.173287 0.721252 +vt 0.635536 0.810751 +vt 0.499988 0.381566 +vt 0.554318 0.433923 +vt 0.229622 0.700459 +vt 0.770092 0.767979 +vt 0.301415 0.612551 +vt 0.584177 0.375893 +vt 0.364501 0.810886 +vt 0.668509 0.880086 +vt 0.058133 0.680924 +vt 0.698585 0.612551 +vt 0.229924 0.767997 +vt 0.616907 0.744114 +vt 0.301415 0.636844 +vt 0.941867 0.680924 +vt 0.331431 0.880286 +vt 0.614083 0.718613 +vt 0.318785 0.641660 +vt 0.698585 0.636844 +vt 0.383103 0.744160 +vt 0.577414 0.436833 +vt 0.343364 0.644643 +vt 0.681215 0.641660 +vt 0.385919 0.718636 +vt 0.722943 0.728037 +vt 0.365962 0.644029 +vt 0.656636 0.644643 +vt 0.422552 0.436767 +vt 0.607591 0.305797 +vt 0.388665 0.637716 +vt 0.634038 0.644029 +vt 0.277076 0.728068 +vt 0.618026 0.305289 +vt 0.194993 0.657898 +vt 0.611335 0.637716 +vt 0.392389 0.305797 +vt 0.542902 0.415208 +vt 0.410373 0.608920 +vt 0.805016 0.657892 +vt 0.381974 0.305289 +vt 0.557261 0.427174 +vt 0.393207 0.604463 +vt 0.589660 0.608938 +vt 0.457098 0.415208 +vt 0.932695 0.269895 +vt 0.366170 0.601178 +vt 0.606793 0.604463 +vt 0.442739 0.427174 +vt 0.645429 0.303293 +vt 0.499977 0.045547 +vt 0.633830 0.601178 +vt 0.067305 0.269895 +vt 0.607610 0.646112 +vt 0.500023 0.809424 +vt 0.733752 0.130299 +vt 0.354490 0.303216 +vt 0.552386 0.697432 +vt 0.266248 0.130299 +vt 0.681008 0.101715 +vt 0.392390 0.646112 +vt 0.830705 0.806186 +vt 0.318993 0.101715 +vt 0.568013 0.055435 +vt 0.447580 0.697390 +vt 0.703624 0.706729 +vt 0.430987 0.055935 +vt 0.812086 0.411461 +vt 0.169295 0.806186 +vt 0.662801 0.717082 +vt 0.187885 0.411462 +vt 0.603900 0.289783 +vt 0.296392 0.706757 +vt 0.516446 0.500361 +vt 0.396100 0.289783 +vt 0.656636 0.599403 +vt 0.337212 0.717117 +vt 0.723330 0.636627 +vt 0.723087 0.467946 +vt 0.343364 0.599403 +vt 0.681215 0.603765 +vt 0.483370 0.500413 +vt 0.710288 0.631747 +vt 0.578632 0.466377 +vt 0.318785 0.603765 +vt 0.825608 0.602325 +vt 0.276896 0.467943 +vt 0.549756 0.600249 +vt 0.570338 0.451425 +vt 0.174399 0.602329 +vt 0.617942 0.491684 +vt 0.421352 0.466259 +vt 0.560698 0.604668 +vt 0.598631 0.545021 +vt 0.382385 0.491427 +vt 0.508953 0.420562 +vt 0.429819 0.451385 +vt 0.573595 0.610193 +vt 0.742247 0.685493 +vt 0.490967 0.420622 +vt 0.614074 0.116754 +vt 0.401223 0.544828 +vt 0.517472 0.422123 +vt 0.515097 0.472748 +vt 0.385764 0.116846 +vt 0.865595 0.666313 +vt 0.257765 0.685510 +vt 0.516311 0.436946 +vt 0.513050 0.452718 +vt 0.134410 0.666317 +vt 0.816351 0.259740 +vt 0.485301 0.472605 +vt 0.566036 0.417671 +vt 0.624852 0.271901 +vt 0.183610 0.259743 +vt 0.892441 0.459239 +vt 0.486717 0.452371 +vt 0.531529 0.444943 +vt 0.571228 0.317308 +vt 0.107550 0.459245 +vt 0.801779 0.168062 +vt 0.374971 0.272195 +vt 0.523913 0.436170 +vt 0.549626 0.319139 +vt 0.198221 0.168062 +vt 0.760966 0.220247 +vt 0.428771 0.317309 +vt 0.526564 0.453882 +vt 0.585384 0.333459 +vt 0.238979 0.220255 +vt 0.537728 0.494615 +vt 0.450374 0.319139 +vt 0.541366 0.521101 +vt 0.560215 0.342771 +vt 0.462783 0.494253 +vt 0.580985 0.612840 +vt 0.414617 0.333459 +vt 0.567192 0.430580 +vt 0.525850 0.319809 +vt 0.419054 0.612845 +vt 0.967686 0.355643 +vt 0.439785 0.342771 +vt 0.992440 0.519223 +vt 0.528249 0.349596 +vt 0.032314 0.355643 +vt 0.560611 0.480983 +vt 0.474155 0.319808 +vt 0.579658 0.590055 +vt 0.643998 0.465512 +vt 0.439121 0.481042 +vt 0.733530 0.623023 +vt 0.471751 0.349596 +vt 0.603876 0.583413 +vt 0.790082 0.608646 +vt 0.266470 0.623023 +vt 0.602995 0.451312 +vt 0.355808 0.465594 +vt 0.633505 0.573912 +vt 0.893693 0.600040 +vt 0.396993 0.451203 +vt 0.573500 0.580000 +vt 0.209925 0.608647 +vt 0.666525 0.566134 +vt 0.719902 0.624400 +vt 0.426243 0.579569 +vt 0.980531 0.598436 +vt 0.106310 0.600044 +vt 0.702114 0.566837 +vt 0.602918 0.157137 +vt 0.019469 0.598436 +vt 0.595293 0.514976 +vt 0.280098 0.624400 +vt 0.732392 0.575453 +vt 0.752212 0.589195 +vt 0.404670 0.514867 +vt 0.509127 0.437282 +vt 0.396889 0.157245 +vt 0.897013 0.531231 +vt 0.702097 0.646409 +vt 0.490726 0.437599 +vt 0.771046 0.651041 +vt 0.247792 0.589190 +vt 0.758757 0.617213 +vt 0.680678 0.652735 +vt 0.228962 0.651049 +vt 0.810748 0.476074 +vt 0.297903 0.646409 +vt 0.716482 0.666799 +vt 0.629906 0.653924 +vt 0.189241 0.476076 +vt 0.523481 0.594373 +vt 0.319322 0.652735 +vt 0.687132 0.677654 +vt 0.654766 0.655989 +vt 0.476410 0.594194 +vt 0.600862 0.567527 +vt 0.370094 0.653924 +vt 0.655896 0.679837 +vt 0.606630 0.596295 +vt 0.398964 0.567345 +vt 0.631101 0.552846 +vt 0.345234 0.655989 +vt 0.622953 0.677221 +vt 0.725342 0.610869 +vt 0.368756 0.552793 +vt 0.667113 0.539327 +vt 0.393362 0.596294 +vt 0.585271 0.664823 +vt 0.688880 0.590540 +vt 0.332828 0.539288 +vt 0.713757 0.532373 +vt 0.274658 0.610869 +vt 0.531987 0.469860 +vt 0.661242 0.586975 +vt 0.286267 0.532325 +vt 0.752702 0.542818 +vt 0.311120 0.590540 +vt 0.562759 0.441215 +vt 0.634070 0.590424 +vt 0.247308 0.542806 +vt 0.821442 0.542444 +vt 0.313951 0.224692 +vt 0.338758 0.586975 +vt 0.544562 0.451624 +vt 0.895093 0.745859 +vt 0.178560 0.542446 +vt 0.551868 0.463430 +vt 0.410986 0.491277 +vt 0.365930 0.590424 +vt 0.570082 0.533674 +vt 0.526227 0.426090 +vt 0.448340 0.463064 +vt 0.572156 0.562348 +vt 0.447750 0.137523 +vt 0.104907 0.745859 +vt 0.663187 0.355403 +vt 0.710288 0.619236 +vt 0.427685 0.562039 +vt 0.742870 0.644554 +vt 0.295284 0.378419 +vt 0.473773 0.426090 +vt 0.866152 0.317295 +vt 0.517862 0.528052 +vt 0.257135 0.644560 +vt 0.587247 0.601068 +vt 0.357155 0.395730 +vt 0.499816 0.437019 +vt 0.720122 0.285333 +vt 0.276670 0.636627 +vt 0.412782 0.601030 +vt 0.781070 0.564595 +vt 0.319688 0.429262 +vt 0.499968 0.218629 +vt 0.810858 0.353695 +vt 0.289712 0.631747 +vt 0.218937 0.564589 +vt 0.711045 0.601048 +vt 0.374293 0.219815 +vt 0.499977 0.262981 +vt 0.675343 0.296022 +vt 0.450067 0.599566 +vt 0.288955 0.601048 +vt 0.588166 0.890956 +vt 0.378909 0.425990 +vt 0.499977 0.280615 +vt 0.645735 0.187360 +vt 0.438999 0.603505 +vt 0.412198 0.891099 +vt 0.570304 0.812129 +vt 0.344549 0.254561 +vt 0.499977 0.294066 +vt 0.685945 0.224643 +vt 0.426450 0.610201 +vt 0.429765 0.812166 +vt 0.558266 0.738328 +vt 0.456549 0.180799 +vt 0.499977 0.304722 +vt 0.589072 0.491363 +vt 0.482483 0.422151 +vt 0.441728 0.738324 +vt 0.600409 0.250995 +vt 0.499913 0.178271 +vt 0.500023 0.307652 +vt 0.552012 0.137408 +vt 0.483518 0.437016 +vt 0.399510 0.251079 +vt 0.672684 0.743419 +vt 0.499886 0.133083 +vt 0.500016 0.320776 +vt 0.704663 0.378470 +vt 0.433991 0.417638 +vt 0.327338 0.743473 +vt 0.709250 0.798492 +vt 0.432112 0.506411 +vt 0.500023 0.333766 +vt 0.642764 0.395662 +vt 0.468472 0.444943 +vt 0.290777 0.798554 +vt 0.757824 0.852324 +vt 0.499974 0.560363 +vt 0.500023 0.892950 +vt 0.680198 0.429281 +vt 0.476088 0.436170 +vt 0.242176 0.852324 +vt 0.588354 0.453138 +vt 0.479154 0.557346 +vt 0.499987 0.730081 +vt 0.625560 0.219688 +vt 0.473466 0.454256 +vt 0.411671 0.453035 +vt 0.665586 0.504049 +vt 0.499989 0.530175 +vt 0.499955 0.687602 +vt 0.621009 0.425982 +vt 0.458639 0.520911 +vt 0.334562 0.503927 +vt 0.627543 0.526648 +vt 0.411362 0.195673 +vt 0.289712 0.619236 +vt 0.655317 0.254485 +vt 0.432949 0.430482 +vt 0.372120 0.526586 +vt 0.536915 0.406214 +vt 0.468268 0.647329 +vt 0.499523 0.598938 +vt 0.543283 0.180745 +vt 0.007561 0.519223 +vt 0.463080 0.406216 +vt 0.577268 0.414065 +vt 0.228018 0.316428 +vt 0.499910 0.501747 +vt 0.567985 0.506521 +vt 0.420121 0.589772 +vt 0.422729 0.414015 +vt 0.531915 0.398463 +vt 0.413386 0.307634 +vt 0.500151 0.472844 +vt 0.520797 0.557435 +vt 0.396012 0.583304 +vt 0.468080 0.398465 +vt 0.590372 0.298177 +vt 0.416164 0.631286 +vt 0.482113 0.528021 +vt 0.588371 0.195559 +vt 0.366427 0.573884 +vt 0.409626 0.298177 +vt 0.586800 0.304600 +vt 0.436392 0.640113 +vt 0.499974 0.397628 +vt 0.531597 0.647517 +vt 0.333434 0.566122 +vt 0.413200 0.304600 +vt 0.986046 0.439966 +vt 0.452770 0.579150 +vt 0.500026 0.452513 +vt 0.771915 0.316422 +vt 0.297879 0.566824 +vt 0.499914 0.419853 +vt 0.609945 0.360090 +vt 0.247923 0.398667 +vt 0.499977 0.347466 +vt 0.586614 0.307634 +vt 0.267612 0.575440 +vt 0.013954 0.439966 +vt 0.581691 0.279937 +vt 0.367856 0.336081 +vt 0.583841 0.631286 +vt 0.102986 0.531237 +vt 0.390095 0.360427 +vt 0.576838 0.288154 +vt 0.392400 0.322297 +vt 0.563544 0.640172 +vt 0.241246 0.617214 +vt 0.418309 0.279937 +vt 0.573521 0.296460 +vt 0.400738 0.318931 +vt 0.547226 0.579605 +vt 0.283526 0.666810 +vt 0.423162 0.288154 +vt 0.572058 0.304722 +vt 0.406787 0.314327 +vt 0.752033 0.398685 +vt 0.312876 0.677668 +vt 0.426479 0.296460 +vt 0.526967 0.304722 +vt 0.430012 0.233191 +vt 0.631938 0.336500 +vt 0.344108 0.679849 +vt 0.523494 0.653066 +vt 0.619766 0.484153 +vt 0.448126 0.441797 +vt 0.564397 0.650577 +vt 0.629396 0.4879675 +vt 0.500005 0.5319235 +vt 0.528836 0.3630495 +vt 0.601042 0.688245 +vt 0.489588 0.725148 +vt 0.626117 0.4614805 +f 174/43 156/119 134/220 +f 247/335 34/252 8/399 +f 383/124 399/59 363/216 +f 264/244 467/163 250/317 +f 309/42 416/442 325/427 +f 79/51 96/432 192/416 +f 357/246 390/96 265/239 +f 128/250 35/247 163/91 +f 369/186 265/239 390/96 +f 140/190 163/91 35/247 +f 268/224 1/441 303/70 +f 38/232 73/77 1/441 +f 12/375 303/70 1/441 +f 12/375 1/441 73/77 +f 350/281 452/238 351/276 +f 121/285 122/280 232/425 +f 453/233 351/276 452/238 +f 233/419 232/425 122/280 +f 268/224 303/70 270/214 +f 38/232 40/222 73/77 +f 304/66 270/214 303/70 +f 74/73 73/77 40/222 +f 358/241 344/313 351/276 +f 129/245 122/280 115/318 +f 278/174 351/276 344/313 +f 48/182 115/318 122/280 +f 351/276 453/233 358/241 +f 122/280 129/245 233/419 +f 454/228 358/241 453/233 +f 234/413 233/419 129/245 +f 300/82 334/373 298/90 +f 70/89 68/97 105/378 +f 333/379 298/90 334/373 +f 104/384 105/378 68/97 +f 176/33 153/131 397/68 +f 176/33 172/53 153/131 +f 378/144 397/68 153/131 +f 149/147 153/131 172/53 +f 382/128 385/116 383/124 +f 155/123 156/119 158/111 +f 399/59 383/124 385/116 +f 174/43 158/111 156/119 +f 281/159 348/291 331/391 +f 51/167 102/396 119/295 +f 349/286 331/391 348/291 +f 120/290 119/295 102/396 +f 270/214 304/66 271/209 +f 40/222 41/217 74/73 +f 305/62 271/209 304/66 +f 75/69 74/73 41/217 +f 10/387 337/355 152/135 +f 10/387 152/135 108/360 +f 338/349 152/135 337/355 +f 109/354 108/360 152/135 +f 345/307 279/169 361/226 +f 116/312 132/230 49/177 +f 280/164 361/226 279/169 +f 50/172 49/177 132/230 +f 263/249 432/346 419/424 +f 33/257 195/398 212/60 +f 425/388 419/424 432/346 +f 205/338 212/60 195/398 +f 305/62 409/9 271/209 +f 75/69 41/217 185/456 +f 410/4 271/209 409/9 +f 186/451 185/456 41/217 +f 273/199 311/32 408/14 +f 43/207 184/461 81/41 +f 416/442 408/14 311/32 +f 192/416 81/41 184/461 +f 323/439 271/209 411/467 +f 93/449 187/446 41/217 +f 410/4 411/467 271/209 +f 186/451 41/217 187/446 +f 348/291 450/248 349/286 +f 119/295 120/290 230/437 +f 451/243 349/286 450/248 +f 231/431 230/437 120/290 +f 435/328 433/340 431/352 +f 215/45 211/302 213/55 +f 423/400 431/352 433/340 +f 203/350 213/55 211/302 +f 314/17 315/12 19/333 +f 84/26 19/333 85/21 +f 18/339 19/333 315/12 +f 18/339 85/21 19/333 +f 308/47 376/152 307/52 +f 78/56 77/61 147/155 +f 292/114 307/52 376/152 +f 62/121 147/155 77/61 +f 260/264 388/104 261/259 +f 30/272 31/267 161/99 +f 389/100 261/259 388/104 +f 162/95 161/99 31/267 +f 287/134 415/447 385/116 +f 57/141 158/111 191/422 +f 399/59 385/116 415/447 +f 174/43 191/422 158/111 +f 419/424 425/388 407/19 +f 195/398 183/466 205/338 +f 336/361 407/19 425/388 +f 107/366 205/338 183/466 +f 368/191 417/436 365/206 +f 139/195 136/210 193/410 +f 435/328 365/206 417/436 +f 215/45 193/410 136/210 +f 392/88 424/394 328/409 +f 166/79 99/414 204/344 +f 359/236 328/409 424/394 +f 130/240 204/344 99/414 +f 299/86 302/74 285/142 +f 69/93 55/149 72/81 +f 252/305 285/142 302/74 +f 22/315 72/81 55/149 +f 5/417 276/184 6/411 +f 5/417 6/411 46/192 +f 282/154 6/411 276/184 +f 52/162 46/192 6/411 +f 255/289 374/161 254/294 +f 25/297 24/303 145/165 +f 375/156 254/294 374/161 +f 146/160 145/165 24/303 +f 321/450 322/445 308/47 +f 91/459 78/56 92/454 +f 376/152 308/47 322/445 +f 147/155 92/454 78/56 +f 281/159 426/382 412/462 +f 51/167 188/440 206/332 +f 428/370 412/462 426/382 +f 208/320 206/332 188/440 +f 422/406 314/17 201/362 +f 202/356 201/362 84/26 +f 19/333 201/362 314/17 +f 19/333 84/26 201/362 +f 336/361 322/445 407/19 +f 107/366 183/466 92/454 +f 406/24 407/19 322/445 +f 182/3 92/454 183/466 +f 406/24 322/445 405/29 +f 182/3 181/8 92/454 +f 321/450 405/29 322/445 +f 91/459 92/454 181/8 +f 18/339 315/12 17/345 +f 18/339 17/345 85/21 +f 316/7 17/345 315/12 +f 86/16 85/21 17/345 +f 426/382 267/229 427/376 +f 206/332 207/326 37/237 +f 424/394 427/376 267/229 +f 204/344 37/237 207/326 +f 370/181 397/68 401/49 +f 141/185 177/28 172/53 +f 378/144 401/49 397/68 +f 149/147 172/53 177/28 +f 392/88 270/214 323/439 +f 166/79 93/449 40/222 +f 271/209 323/439 270/214 +f 41/217 40/222 93/449 +f 418/430 466/168 414/452 +f 194/404 190/428 246/341 +f 465/173 414/452 466/168 +f 245/347 246/341 190/428 +f 258/274 259/269 387/108 +f 28/282 160/103 29/277 +f 386/112 387/108 259/269 +f 159/107 29/277 160/103 +f 261/259 389/100 468/158 +f 31/267 248/329 162/95 +f 467/163 468/158 389/100 +f 247/335 162/95 248/329 +f 249/323 457/213 420/418 +f 4/423 197/386 237/395 +f 400/54 420/418 457/213 +f 175/38 237/395 197/386 +f 334/373 299/86 333/379 +f 105/378 104/384 69/93 +f 285/142 333/379 299/86 +f 55/149 69/93 104/384 +f 286/138 9/393 418/430 +f 56/145 194/404 9/393 +f 169/67 418/430 9/393 +f 169/67 9/393 194/404 +f 341/331 262/254 347/296 +f 112/336 118/300 32/262 +f 449/253 347/296 262/254 +f 229/443 32/262 118/300 +f 286/138 418/430 442/288 +f 56/145 222/10 194/404 +f 414/452 442/288 418/430 +f 190/428 194/404 222/10 +f 328/409 461/193 327/415 +f 99/414 98/420 241/371 +f 329/403 327/415 461/193 +f 100/408 241/371 98/420 +f 278/174 356/251 330/397 +f 48/182 101/402 127/255 +f 372/171 330/397 356/251 +f 143/175 127/255 101/402 +f 310/37 393/84 439/304 +f 80/46 219/25 167/75 +f 440/298 439/304 393/84 +f 220/20 167/75 219/25 +f 382/128 383/124 257/279 +f 155/123 27/287 156/119 +f 342/325 257/279 383/124 +f 113/330 156/119 27/287 +f 361/226 280/164 421/412 +f 132/230 199/374 50/172 +f 430/358 421/412 280/164 +f 210/308 50/172 199/374 +f 366/201 365/206 380/136 +f 137/205 151/139 136/210 +f 395/76 380/136 365/206 +f 170/63 136/210 151/139 +f 356/251 278/174 438/310 +f 127/255 218/30 48/182 +f 344/313 438/310 278/174 +f 115/318 48/182 218/30 +f 444/278 445/273 283/150 +f 224/468 53/157 225/463 +f 284/146 283/150 445/273 +f 54/153 225/463 53/157 +f 282/154 276/184 364/211 +f 52/162 135/215 46/192 +f 441/293 364/211 276/184 +f 221/15 46/192 135/215 +f 432/346 263/249 396/72 +f 212/60 171/58 33/257 +f 370/181 396/72 263/249 +f 141/185 33/257 171/58 +f 338/349 300/82 339/343 +f 109/354 110/348 70/89 +f 298/90 339/343 300/82 +f 68/97 70/89 110/348 +f 336/361 274/194 322/445 +f 107/366 92/454 44/202 +f 376/152 322/445 274/194 +f 147/155 44/202 92/454 +f 349/286 451/243 350/281 +f 120/290 121/285 231/431 +f 452/238 350/281 451/243 +f 232/425 231/431 121/285 +f 468/158 360/231 343/319 +f 248/329 114/324 131/235 +f 447/263 343/319 360/231 +f 227/453 131/235 114/324 +f 283/150 284/146 335/367 +f 53/157 106/372 54/153 +f 294/106 335/367 284/146 +f 64/113 54/153 106/372 +f 251/311 459/203 463/183 +f 21/321 243/359 239/383 +f 462/188 463/183 459/203 +f 242/365 239/383 243/359 +f 277/179 354/261 301/78 +f 47/187 71/85 125/265 +f 384/120 301/78 354/261 +f 157/115 125/265 71/85 +f 326/421 293/110 325/427 +f 97/426 96/432 63/117 +f 309/42 325/427 293/110 +f 79/51 63/117 96/432 +f 284/146 277/179 294/106 +f 54/153 64/113 47/187 +f 301/78 294/106 277/179 +f 71/85 47/187 64/113 +f 448/258 265/239 346/301 +f 228/448 117/306 35/247 +f 373/166 346/301 265/239 +f 144/170 35/247 117/306 +f 353/266 346/301 347/296 +f 124/270 118/300 117/306 +f 341/331 347/296 346/301 +f 112/336 117/306 118/300 +f 2/435 20/327 275/189 +f 2/435 45/197 20/327 +f 355/256 275/189 20/327 +f 126/260 20/327 45/197 +f 249/323 282/154 457/213 +f 4/423 237/395 52/162 +f 364/211 457/213 282/154 +f 135/215 52/162 237/395 +f 426/382 427/376 428/370 +f 206/332 208/320 207/326 +f 437/316 428/370 427/376 +f 217/35 207/326 208/320 +f 381/132 382/128 253/299 +f 154/127 23/309 155/123 +f 257/279 253/299 382/128 +f 27/287 155/123 23/309 +f 392/88 394/80 270/214 +f 166/79 40/222 168/71 +f 268/224 270/214 394/80 +f 38/232 168/71 40/222 +f 200/368 429/364 201/362 +f 200/368 201/362 209/314 +f 422/406 201/362 429/364 +f 202/356 209/314 201/362 +f 331/391 330/397 267/229 +f 102/396 37/237 101/402 +f 372/171 267/229 330/397 +f 143/175 101/402 37/237 +f 423/400 433/340 274/194 +f 203/350 44/202 213/55 +f 288/130 274/194 433/340 +f 58/137 213/55 44/202 +f 291/118 251/311 329/403 +f 61/125 100/408 21/321 +f 463/183 329/403 251/311 +f 243/359 21/321 100/408 +f 259/269 287/134 386/112 +f 29/277 159/107 57/141 +f 385/116 386/112 287/134 +f 158/111 57/141 159/107 +f 343/319 447/263 354/261 +f 114/324 125/265 227/453 +f 266/234 354/261 447/263 +f 36/242 227/453 125/265 +f 258/274 387/108 260/264 +f 28/282 30/272 160/103 +f 388/104 260/264 387/108 +f 161/99 160/103 30/272 +f 431/352 423/400 432/346 +f 211/302 212/60 203/350 +f 425/388 432/346 423/400 +f 205/338 203/350 212/60 +f 446/268 343/319 277/179 +f 226/458 47/187 114/324 +f 354/261 277/179 343/319 +f 125/265 114/324 47/187 +f 425/388 423/400 336/361 +f 205/338 107/366 203/350 +f 274/194 336/361 423/400 +f 44/202 203/350 107/366 +f 307/52 293/110 308/47 +f 77/61 78/56 63/117 +f 326/421 308/47 293/110 +f 97/426 63/117 78/56 +f 367/196 448/258 353/266 +f 138/200 124/270 228/448 +f 346/301 353/266 448/258 +f 117/306 228/448 124/270 +f 303/70 269/219 304/66 +f 73/77 74/73 39/227 +f 272/204 304/66 269/219 +f 42/212 39/227 74/73 +f 372/171 359/236 267/229 +f 143/175 37/237 130/240 +f 424/394 267/229 359/236 +f 204/344 130/240 37/237 +f 328/409 295/102 461/193 +f 99/414 241/371 65/109 +f 456/218 461/193 295/102 +f 236/401 65/109 241/371 +f 295/102 332/385 279/169 +f 65/109 49/177 103/390 +f 280/164 279/169 332/385 +f 50/172 103/390 49/177 +f 304/66 272/204 305/62 +f 74/73 75/69 42/212 +f 273/199 305/62 272/204 +f 43/207 42/212 75/69 +f 428/370 437/316 435/328 +f 208/320 215/45 217/35 +f 433/340 435/328 437/316 +f 213/55 217/35 215/45 +f 305/62 273/199 409/9 +f 75/69 185/456 43/207 +f 408/14 409/9 273/199 +f 184/461 43/207 185/456 +f 395/76 431/352 396/72 +f 170/63 171/58 211/302 +f 432/346 396/72 431/352 +f 212/60 211/302 171/58 +f 396/72 370/181 379/140 +f 171/58 150/143 141/185 +f 401/49 379/140 370/181 +f 177/28 141/185 150/143 +f 297/94 335/367 300/82 +f 67/101 70/89 106/372 +f 334/373 300/82 335/367 +f 105/378 106/372 70/89 +f 418/430 169/67 352/271 +f 194/404 123/275 169/67 +f 7/405 352/271 169/67 +f 7/405 169/67 123/275 +f 281/159 412/462 353/266 +f 51/167 124/270 188/440 +f 377/148 353/266 412/462 +f 148/151 188/440 124/270 +f 320/455 321/450 326/421 +f 90/464 97/426 91/459 +f 308/47 326/421 321/450 +f 78/56 91/459 97/426 +f 286/138 296/98 337/355 +f 56/145 108/360 66/105 +f 297/94 337/355 296/98 +f 67/101 66/105 108/360 +f 405/29 321/450 404/34 +f 181/8 180/13 91/459 +f 320/455 404/34 321/450 +f 90/464 91/459 180/13 +f 331/391 349/286 330/397 +f 102/396 101/402 120/290 +f 350/281 330/397 349/286 +f 121/285 120/290 101/402 +f 335/367 294/106 334/373 +f 106/372 105/378 64/113 +f 299/86 334/373 294/106 +f 69/93 64/113 105/378 +f 324/433 455/223 367/196 +f 94/444 138/200 235/407 +f 448/258 367/196 455/223 +f 228/448 235/407 138/200 +f 17/345 316/7 16/351 +f 17/345 16/351 86/16 +f 317/2 16/351 316/7 +f 87/11 86/16 16/351 +f 430/358 280/164 359/236 +f 210/308 130/240 50/172 +f 332/385 359/236 280/164 +f 103/390 50/172 130/240 +f 16/351 317/2 15/357 +f 16/351 15/357 87/11 +f 318/465 15/357 317/2 +f 88/6 87/11 15/357 +f 9/393 286/138 10/387 +f 9/393 10/387 56/145 +f 337/355 10/387 286/138 +f 108/360 56/145 10/387 +f 330/397 350/281 278/174 +f 101/402 48/182 121/285 +f 351/276 278/174 350/281 +f 122/280 121/285 48/182 +f 253/299 254/294 381/132 +f 23/309 154/127 24/303 +f 375/156 381/132 254/294 +f 146/160 24/303 154/127 +f 403/39 404/34 319/460 +f 179/18 89/1 180/13 +f 320/455 319/460 404/34 +f 90/464 180/13 89/1 +f 352/271 7/405 420/418 +f 123/275 197/386 7/405 +f 198/380 420/418 7/405 +f 198/380 7/405 197/386 +f 325/427 319/460 326/421 +f 96/432 97/426 89/1 +f 320/455 326/421 319/460 +f 90/464 89/1 97/426 +f 398/64 368/191 366/201 +f 173/48 137/205 139/195 +f 365/206 366/201 368/191 +f 136/210 139/195 137/205 +f 289/126 436/322 398/64 +f 59/133 173/48 216/40 +f 368/191 398/64 436/322 +f 139/195 216/40 173/48 +f 439/304 440/298 345/307 +f 219/25 116/312 220/20 +f 279/169 345/307 440/298 +f 49/177 220/20 116/312 +f 272/204 312/27 273/199 +f 42/212 43/207 82/36 +f 311/32 273/199 312/27 +f 81/41 82/36 43/207 +f 6/411 282/154 196/392 +f 6/411 196/392 52/162 +f 249/323 196/392 282/154 +f 4/423 52/162 196/392 +f 274/194 288/130 376/152 +f 44/202 147/155 58/137 +f 292/114 376/152 288/130 +f 62/121 58/137 147/155 +f 397/68 429/364 176/33 +f 172/53 176/33 209/314 +f 200/368 176/33 429/364 +f 200/368 209/314 176/33 +f 269/219 313/22 272/204 +f 39/227 42/212 83/31 +f 312/27 272/204 313/22 +f 82/36 83/31 42/212 +f 445/273 446/268 284/146 +f 225/463 54/153 226/458 +f 277/179 284/146 446/268 +f 47/187 226/458 54/153 +f 255/289 340/337 374/161 +f 25/297 145/165 111/342 +f 391/92 374/161 340/337 +f 164/87 111/342 145/165 +f 296/98 283/150 297/94 +f 66/105 67/101 53/157 +f 335/367 297/94 283/150 +f 106/372 53/157 67/101 +f 347/296 449/253 348/291 +f 118/300 119/295 229/443 +f 450/248 348/291 449/253 +f 230/437 229/443 119/295 +f 455/223 357/246 448/258 +f 235/407 228/448 128/250 +f 265/239 448/258 357/246 +f 35/247 128/250 228/448 +f 337/355 297/94 338/349 +f 108/360 109/354 67/101 +f 300/82 338/349 297/94 +f 70/89 67/101 109/354 +f 152/135 338/349 11/381 +f 152/135 11/381 109/354 +f 339/343 11/381 338/349 +f 110/348 109/354 11/381 +f 279/169 440/298 295/102 +f 49/177 65/109 220/20 +f 456/218 295/102 440/298 +f 236/401 220/20 65/109 +f 408/14 416/442 293/110 +f 184/461 63/117 192/416 +f 309/42 293/110 416/442 +f 79/51 192/416 63/117 +f 359/236 372/171 430/358 +f 130/240 210/308 143/175 +f 356/251 430/358 372/171 +f 127/255 143/175 210/308 +f 346/301 373/166 341/331 +f 117/306 112/336 144/170 +f 266/234 341/331 373/166 +f 36/242 144/170 112/336 +f 389/100 391/92 467/163 +f 162/95 247/335 164/87 +f 250/317 467/163 391/92 +f 8/399 164/87 247/335 +f 353/266 347/296 281/159 +f 124/270 51/167 118/300 +f 348/291 281/159 347/296 +f 119/295 118/300 51/167 +f 296/98 443/283 283/150 +f 66/105 53/157 223/5 +f 444/278 283/150 443/283 +f 224/468 223/5 53/157 +f 20/327 95/438 355/256 +f 20/327 126/260 95/438 +f 371/176 355/256 95/438 +f 142/180 95/438 126/260 +f 296/98 286/138 443/283 +f 66/105 223/5 56/145 +f 442/288 443/283 286/138 +f 222/10 56/145 223/5 +f 420/418 198/380 249/323 +f 197/386 4/423 198/380 +f 196/392 249/323 198/380 +f 196/392 198/380 4/423 +f 360/231 264/244 256/284 +f 131/235 26/292 34/252 +f 250/317 256/284 264/244 +f 8/399 34/252 26/292 +f 276/184 275/189 441/293 +f 46/192 221/15 45/197 +f 458/208 441/293 275/189 +f 238/389 45/197 221/15 +f 301/78 384/120 302/74 +f 71/85 72/81 157/115 +f 369/186 302/74 384/120 +f 140/190 157/115 72/81 +f 418/430 352/271 466/168 +f 194/404 246/341 123/275 +f 413/457 466/168 352/271 +f 189/434 123/275 246/341 +f 467/163 264/244 468/158 +f 247/335 248/329 34/252 +f 360/231 468/158 264/244 +f 131/235 34/252 248/329 +f 390/96 252/305 369/186 +f 163/91 140/190 22/315 +f 302/74 369/186 252/305 +f 72/81 22/315 140/190 +f 375/156 387/108 381/132 +f 146/160 154/127 160/103 +f 386/112 381/132 387/108 +f 159/107 160/103 154/127 +f 380/136 395/76 379/140 +f 151/139 150/143 170/63 +f 396/72 379/140 395/76 +f 171/58 170/63 150/143 +f 352/271 420/418 413/457 +f 123/275 189/434 197/386 +f 400/54 413/457 420/418 +f 175/38 197/386 189/434 +f 427/376 323/439 437/316 +f 207/326 217/35 93/449 +f 411/467 437/316 323/439 +f 187/446 93/449 217/35 +f 388/104 374/161 389/100 +f 161/99 162/95 145/165 +f 391/92 389/100 374/161 +f 164/87 145/165 162/95 +f 394/80 327/415 165/83 +f 168/71 165/83 98/420 +f 3/429 165/83 327/415 +f 3/429 98/420 165/83 +f 355/256 371/176 462/188 +f 126/260 242/365 142/180 +f 463/183 462/188 371/176 +f 243/359 142/180 242/365 +f 1/441 268/224 165/83 +f 1/441 165/83 38/232 +f 394/80 165/83 268/224 +f 168/71 38/232 165/83 +f 12/375 13/369 303/70 +f 12/375 73/77 13/369 +f 269/219 303/70 13/369 +f 39/227 13/369 73/77 +f 387/108 375/156 388/104 +f 160/103 161/99 146/160 +f 374/161 388/104 375/156 +f 145/165 146/160 161/99 +f 13/369 14/363 269/219 +f 13/369 39/227 14/363 +f 313/22 269/219 14/363 +f 83/31 14/363 39/227 +f 294/106 301/78 299/86 +f 64/113 69/93 71/85 +f 302/74 299/86 301/78 +f 72/81 71/85 69/93 +f 341/331 266/234 262/254 +f 112/336 32/262 36/242 +f 447/263 262/254 266/234 +f 227/453 36/242 32/262 +f 381/132 386/112 382/128 +f 154/127 155/123 159/107 +f 385/116 382/128 386/112 +f 158/111 159/107 155/123 +f 281/159 331/391 426/382 +f 51/167 206/332 102/396 +f 267/229 426/382 331/391 +f 37/237 102/396 206/332 +f 424/394 392/88 427/376 +f 204/344 207/326 166/79 +f 323/439 427/376 392/88 +f 93/449 166/79 207/326 +f 430/358 356/251 421/412 +f 210/308 199/374 127/255 +f 438/310 421/412 356/251 +f 218/30 127/255 199/374 +f 392/88 328/409 394/80 +f 166/79 168/71 99/414 +f 327/415 394/80 328/409 +f 98/420 99/414 168/71 +f 458/208 439/304 441/293 +f 238/389 221/15 219/25 +f 345/307 441/293 439/304 +f 116/312 219/25 221/15 +f 383/124 363/216 342/325 +f 156/119 113/330 134/220 +f 464/178 342/325 363/216 +f 244/353 134/220 113/330 +f 458/208 462/188 460/198 +f 238/389 240/377 242/365 +f 459/203 460/198 462/188 +f 239/383 242/365 240/377 +f 435/328 431/352 365/206 +f 215/45 136/210 211/302 +f 395/76 365/206 431/352 +f 170/63 211/302 136/210 +f 415/447 464/178 399/59 +f 191/422 174/43 244/353 +f 363/216 399/59 464/178 +f 134/220 244/353 174/43 +f 263/249 429/364 370/181 +f 33/257 141/185 209/314 +f 397/68 370/181 429/364 +f 172/53 209/314 141/185 +f 458/208 275/189 462/188 +f 238/389 242/365 45/197 +f 355/256 462/188 275/189 +f 126/260 45/197 242/365 +f 317/2 404/34 318/465 +f 87/11 88/6 180/13 +f 403/39 318/465 404/34 +f 179/18 180/13 88/6 +f 316/7 405/29 317/2 +f 86/16 87/11 181/8 +f 404/34 317/2 405/29 +f 180/13 181/8 87/11 +f 315/12 406/24 316/7 +f 85/21 86/16 182/3 +f 405/29 316/7 406/24 +f 181/8 182/3 86/16 +f 314/17 407/19 315/12 +f 84/26 85/21 183/466 +f 406/24 315/12 407/19 +f 182/3 183/466 85/21 +f 419/424 407/19 422/406 +f 195/398 202/356 183/466 +f 314/17 422/406 407/19 +f 84/26 183/466 202/356 +f 367/196 402/44 324/433 +f 138/200 94/444 178/23 +f 362/221 324/433 402/44 +f 133/225 178/23 94/444 +f 409/9 408/14 307/52 +f 185/456 77/61 184/461 +f 293/110 307/52 408/14 +f 63/117 184/461 77/61 +f 409/9 307/52 410/4 +f 185/456 186/451 77/61 +f 292/114 410/4 307/52 +f 62/121 77/61 186/451 +f 411/467 410/4 288/130 +f 187/446 58/137 186/451 +f 292/114 288/130 410/4 +f 62/121 186/451 58/137 +f 437/316 411/467 433/340 +f 217/35 213/55 187/446 +f 288/130 433/340 411/467 +f 58/137 187/446 213/55 +f 435/328 417/436 428/370 +f 215/45 208/320 193/410 +f 412/462 428/370 417/436 +f 188/440 193/410 208/320 +f 265/239 369/186 373/166 +f 35/247 144/170 140/190 +f 384/120 373/166 369/186 +f 157/115 140/190 144/170 +f 458/208 460/198 439/304 +f 238/389 219/25 240/377 +f 310/37 439/304 460/198 +f 80/46 240/377 219/25 +f 353/266 377/148 367/196 +f 124/270 138/200 148/151 +f 402/44 367/196 377/148 +f 178/23 148/151 138/200 +f 5/417 2/435 276/184 +f 5/417 46/192 2/435 +f 275/189 276/184 2/435 +f 45/197 2/435 46/192 +f 429/364 263/249 422/406 +f 209/314 202/356 33/257 +f 419/424 422/406 263/249 +f 195/398 33/257 202/356 +f 328/409 359/236 295/102 +f 99/414 65/109 130/240 +f 332/385 295/102 359/236 +f 103/390 130/240 65/109 +f 368/191 436/322 417/436 +f 139/195 193/410 216/40 +f 434/334 417/436 436/322 +f 214/50 216/40 193/410 +f 456/218 440/298 290/122 +f 236/401 60/129 220/20 +f 393/84 290/122 440/298 +f 167/75 220/20 60/129 +f 329/403 463/183 327/415 +f 100/408 98/420 243/359 +f 371/176 327/415 463/183 +f 142/180 243/359 98/420 +f 327/415 371/176 3/429 +f 98/420 3/429 142/180 +f 95/438 3/429 371/176 +f 95/438 142/180 3/429 +f 461/193 456/218 306/57 +f 241/371 76/65 236/401 +f 290/122 306/57 456/218 +f 60/129 236/401 76/65 +f 449/253 340/337 450/248 +f 229/443 230/437 111/342 +f 255/289 450/248 340/337 +f 25/297 111/342 230/437 +f 262/254 447/263 256/284 +f 32/262 26/292 227/453 +f 360/231 256/284 447/263 +f 131/235 227/453 26/292 +f 450/248 255/289 451/243 +f 230/437 231/431 25/297 +f 254/294 451/243 255/289 +f 24/303 25/297 231/431 +f 451/243 254/294 452/238 +f 231/431 232/425 24/303 +f 253/299 452/238 254/294 +f 23/309 24/303 232/425 +f 452/238 253/299 453/233 +f 232/425 233/419 23/309 +f 257/279 453/233 253/299 +f 27/287 23/309 233/419 +f 257/279 342/325 453/233 +f 27/287 233/419 113/330 +f 454/228 453/233 342/325 +f 234/413 113/330 233/419 +f 414/452 465/173 415/447 +f 190/428 191/422 245/347 +f 464/178 415/447 465/173 +f 244/353 245/347 191/422 +f 442/288 414/452 287/134 +f 222/10 57/141 190/428 +f 415/447 287/134 414/452 +f 191/422 190/428 57/141 +f 442/288 287/134 443/283 +f 222/10 223/5 57/141 +f 259/269 443/283 287/134 +f 29/277 57/141 223/5 +f 443/283 259/269 444/278 +f 223/5 224/468 29/277 +f 258/274 444/278 259/269 +f 28/282 29/277 224/468 +f 445/273 444/278 260/264 +f 225/463 30/272 224/468 +f 258/274 260/264 444/278 +f 28/282 224/468 30/272 +f 260/264 261/259 445/273 +f 30/272 225/463 31/267 +f 446/268 445/273 261/259 +f 226/458 31/267 225/463 +f 261/259 468/158 446/268 +f 31/267 226/458 248/329 +f 343/319 446/268 468/158 +f 114/324 248/329 226/458 +f 251/311 310/37 459/203 +f 21/321 239/383 80/46 +f 460/198 459/203 310/37 +f 240/377 80/46 239/383 +f 291/118 306/57 393/84 +f 61/125 167/75 76/65 +f 290/122 393/84 306/57 +f 60/129 76/65 167/75 +f 461/193 306/57 329/403 +f 241/371 100/408 76/65 +f 291/118 329/403 306/57 +f 61/125 76/65 100/408 +f 377/148 434/334 402/44 +f 148/151 178/23 214/50 +f 436/322 402/44 434/334 +f 216/40 214/50 178/23 +f 251/311 291/118 310/37 +f 21/321 80/46 61/125 +f 393/84 310/37 291/118 +f 167/75 61/125 80/46 +f 412/462 417/436 377/148 +f 188/440 148/151 193/410 +f 434/334 377/148 417/436 +f 214/50 193/410 148/151 +f 342/325 464/178 454/228 +f 113/330 234/413 244/353 +f 465/173 454/228 464/178 +f 245/347 244/353 234/413 +f 454/228 465/173 358/241 +f 234/413 129/245 245/347 +f 466/168 358/241 465/173 +f 246/341 245/347 129/245 +f 413/457 344/313 466/168 +f 189/434 246/341 115/318 +f 358/241 466/168 344/313 +f 129/245 115/318 246/341 +f 438/310 344/313 400/54 +f 218/30 175/38 115/318 +f 413/457 400/54 344/313 +f 189/434 115/318 175/38 +f 364/211 441/293 361/226 +f 135/215 132/230 221/15 +f 345/307 361/226 441/293 +f 116/312 221/15 132/230 +f 457/213 421/412 400/54 +f 237/395 175/38 199/374 +f 438/310 400/54 421/412 +f 218/30 199/374 175/38 +f 457/213 364/211 421/412 +f 237/395 199/374 135/215 +f 361/226 421/412 364/211 +f 132/230 135/215 199/374 +f 362/221 402/44 289/126 +f 133/225 59/133 178/23 +f 436/322 289/126 402/44 +f 216/40 178/23 59/133 +f 354/261 266/234 384/120 +f 125/265 157/115 36/242 +f 373/166 384/120 266/234 +f 144/170 36/242 157/115 +f 256/284 250/317 340/337 +f 26/292 111/342 8/399 +f 391/92 340/337 250/317 +f 164/87 8/399 111/342 +f 262/254 256/284 449/253 +f 32/262 229/443 26/292 +f 340/337 449/253 256/284 +f 111/342 26/292 229/443 +f 15/357 318/465 14/363 +f 15/357 14/363 88/6 +f 313/22 14/363 318/465 +f 83/31 88/6 14/363 +f 318/465 403/39 313/22 +f 88/6 83/31 179/18 +f 312/27 313/22 403/39 +f 82/36 179/18 83/31 +f 403/39 319/460 312/27 +f 179/18 82/36 89/1 +f 311/32 312/27 319/460 +f 81/41 89/1 82/36 +f 319/460 325/427 311/32 +f 89/1 81/41 96/432 +f 416/442 311/32 325/427 +f 192/416 96/432 81/41 +f 469/100 472/100 473/100 +f 470/100 469/100 473/100 +f 471/100 469/100 470/100 +f 471/100 472/100 469/100 +f 474/100 477/100 478/100 +f 475/100 474/100 478/100 +f 476/100 474/100 475/100 +f 476/100 477/100 474/100 diff --git a/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_including_iris_landmarks.pbtxt b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_including_iris_landmarks.pbtxt new file mode 100644 index 000000000..23b9a91e5 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_including_iris_landmarks.pbtxt @@ -0,0 +1,5160 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +input_source: FACE_LANDMARK_PIPELINE +procrustes_landmark_basis { landmark_id: 4 weight: 0.070909939706326 } +procrustes_landmark_basis { landmark_id: 6 weight: 0.032100144773722 } +procrustes_landmark_basis { landmark_id: 10 weight: 0.008446550928056 } +procrustes_landmark_basis { landmark_id: 33 weight: 0.058724168688059 } +procrustes_landmark_basis { landmark_id: 54 weight: 0.007667080033571 } +procrustes_landmark_basis { landmark_id: 67 weight: 0.009078059345484 } +procrustes_landmark_basis { landmark_id: 117 weight: 0.009791937656701 } +procrustes_landmark_basis { landmark_id: 119 weight: 0.014565368182957 } +procrustes_landmark_basis { landmark_id: 121 weight: 0.018591361120343 } +procrustes_landmark_basis { landmark_id: 127 weight: 0.005197994410992 } +procrustes_landmark_basis { landmark_id: 129 weight: 0.120625205338001 } +procrustes_landmark_basis { landmark_id: 132 weight: 0.005560018587857 } +procrustes_landmark_basis { landmark_id: 133 weight: 0.05328618362546 } +procrustes_landmark_basis { landmark_id: 136 weight: 0.066890455782413 } +procrustes_landmark_basis { landmark_id: 143 weight: 0.014816547743976 } +procrustes_landmark_basis { landmark_id: 147 weight: 0.014262833632529 } +procrustes_landmark_basis { landmark_id: 198 weight: 0.025462191551924 } +procrustes_landmark_basis { landmark_id: 205 weight: 0.047252278774977 } +procrustes_landmark_basis { landmark_id: 263 weight: 0.058724168688059 } +procrustes_landmark_basis { landmark_id: 284 weight: 0.007667080033571 } +procrustes_landmark_basis { landmark_id: 297 weight: 0.009078059345484 } +procrustes_landmark_basis { landmark_id: 346 weight: 0.009791937656701 } +procrustes_landmark_basis { landmark_id: 348 weight: 0.014565368182957 } +procrustes_landmark_basis { landmark_id: 350 weight: 0.018591361120343 } +procrustes_landmark_basis { landmark_id: 356 weight: 0.005197994410992 } +procrustes_landmark_basis { landmark_id: 358 weight: 0.120625205338001 } +procrustes_landmark_basis { landmark_id: 361 weight: 0.005560018587857 } +procrustes_landmark_basis { landmark_id: 362 weight: 0.05328618362546 } +procrustes_landmark_basis { landmark_id: 365 weight: 0.066890455782413 } +procrustes_landmark_basis { landmark_id: 372 weight: 0.014816547743976 } +procrustes_landmark_basis { landmark_id: 376 weight: 0.014262833632529 } +procrustes_landmark_basis { landmark_id: 420 weight: 0.025462191551924 } +procrustes_landmark_basis { landmark_id: 425 weight: 0.047252278774977 } +canonical_mesh: { + vertex_type: VERTEX_PT + primitive_type: TRIANGLE + vertex_buffer: 0.000000 + vertex_buffer: -3.406404 + vertex_buffer: 5.979507 + vertex_buffer: 0.499977 + vertex_buffer: 0.652534 + vertex_buffer: 0.000000 + vertex_buffer: -1.126865 + vertex_buffer: 7.475604 + vertex_buffer: 0.500026 + vertex_buffer: 0.547487 + vertex_buffer: 0.000000 + vertex_buffer: -2.089024 + vertex_buffer: 6.058267 + vertex_buffer: 0.499974 + vertex_buffer: 0.602372 + vertex_buffer: -0.463928 + vertex_buffer: 0.955357 + vertex_buffer: 6.633583 + vertex_buffer: 0.482113 + vertex_buffer: 0.471979 + vertex_buffer: 0.000000 + vertex_buffer: -0.463170 + vertex_buffer: 7.586580 + vertex_buffer: 0.500151 + vertex_buffer: 0.527156 + vertex_buffer: 0.000000 + vertex_buffer: 0.365669 + vertex_buffer: 7.242870 + vertex_buffer: 0.499910 + vertex_buffer: 0.498253 + vertex_buffer: 0.000000 + vertex_buffer: 2.473255 + vertex_buffer: 5.788627 + vertex_buffer: 0.499523 + vertex_buffer: 0.401062 + vertex_buffer: -4.253081 + vertex_buffer: 2.577646 + vertex_buffer: 3.279702 + vertex_buffer: 0.289712 + vertex_buffer: 0.380764 + vertex_buffer: 0.000000 + vertex_buffer: 4.019042 + vertex_buffer: 5.284764 + vertex_buffer: 0.499955 + vertex_buffer: 0.312398 + vertex_buffer: 0.000000 + vertex_buffer: 4.885979 + vertex_buffer: 5.385258 + vertex_buffer: 0.499987 + vertex_buffer: 0.269919 + vertex_buffer: 0.000000 + vertex_buffer: 8.261778 + vertex_buffer: 4.481535 + vertex_buffer: 0.500023 + vertex_buffer: 0.107050 + vertex_buffer: 0.000000 + vertex_buffer: -3.706811 + vertex_buffer: 5.864924 + vertex_buffer: 0.500023 + vertex_buffer: 0.666234 + vertex_buffer: 0.000000 + vertex_buffer: -3.918301 + vertex_buffer: 5.569430 + vertex_buffer: 0.500016 + vertex_buffer: 0.679224 + vertex_buffer: 0.000000 + vertex_buffer: -3.994436 + vertex_buffer: 5.219482 + vertex_buffer: 0.500023 + vertex_buffer: 0.692348 + vertex_buffer: 0.000000 + vertex_buffer: -4.542400 + vertex_buffer: 5.404754 + vertex_buffer: 0.499977 + vertex_buffer: 0.695278 + vertex_buffer: 0.000000 + vertex_buffer: -4.745577 + vertex_buffer: 5.529457 + vertex_buffer: 0.499977 + vertex_buffer: 0.705934 + vertex_buffer: 0.000000 + vertex_buffer: -5.019567 + vertex_buffer: 5.601448 + vertex_buffer: 0.499977 + vertex_buffer: 0.719385 + vertex_buffer: 0.000000 + vertex_buffer: -5.365123 + vertex_buffer: 5.535441 + vertex_buffer: 0.499977 + vertex_buffer: 0.737019 + vertex_buffer: 0.000000 + vertex_buffer: -6.149624 + vertex_buffer: 5.071372 + vertex_buffer: 0.499968 + vertex_buffer: 0.781371 + vertex_buffer: 0.000000 + vertex_buffer: -1.501095 + vertex_buffer: 7.112196 + vertex_buffer: 0.499816 + vertex_buffer: 0.562981 + vertex_buffer: -0.416106 + vertex_buffer: -1.466449 + vertex_buffer: 6.447657 + vertex_buffer: 0.473773 + vertex_buffer: 0.573910 + vertex_buffer: -7.087960 + vertex_buffer: 5.434801 + vertex_buffer: 0.099620 + vertex_buffer: 0.104907 + vertex_buffer: 0.254141 + vertex_buffer: -2.628639 + vertex_buffer: 2.035898 + vertex_buffer: 3.848121 + vertex_buffer: 0.365930 + vertex_buffer: 0.409576 + vertex_buffer: -3.198363 + vertex_buffer: 1.985815 + vertex_buffer: 3.796952 + vertex_buffer: 0.338758 + vertex_buffer: 0.413025 + vertex_buffer: -3.775151 + vertex_buffer: 2.039402 + vertex_buffer: 3.646194 + vertex_buffer: 0.311120 + vertex_buffer: 0.409460 + vertex_buffer: -4.465819 + vertex_buffer: 2.422950 + vertex_buffer: 3.155168 + vertex_buffer: 0.274658 + vertex_buffer: 0.389131 + vertex_buffer: -2.164289 + vertex_buffer: 2.189867 + vertex_buffer: 3.851822 + vertex_buffer: 0.393362 + vertex_buffer: 0.403706 + vertex_buffer: -3.208229 + vertex_buffer: 3.223926 + vertex_buffer: 4.115822 + vertex_buffer: 0.345234 + vertex_buffer: 0.344011 + vertex_buffer: -2.673803 + vertex_buffer: 3.205337 + vertex_buffer: 4.092203 + vertex_buffer: 0.370094 + vertex_buffer: 0.346076 + vertex_buffer: -3.745193 + vertex_buffer: 3.165286 + vertex_buffer: 3.972409 + vertex_buffer: 0.319322 + vertex_buffer: 0.347265 + vertex_buffer: -4.161018 + vertex_buffer: 3.059069 + vertex_buffer: 3.719554 + vertex_buffer: 0.297903 + vertex_buffer: 0.353591 + vertex_buffer: -5.062006 + vertex_buffer: 1.934418 + vertex_buffer: 2.776093 + vertex_buffer: 0.247792 + vertex_buffer: 0.410810 + vertex_buffer: -2.266659 + vertex_buffer: -7.425768 + vertex_buffer: 4.389812 + vertex_buffer: 0.396889 + vertex_buffer: 0.842755 + vertex_buffer: -4.445859 + vertex_buffer: 2.663991 + vertex_buffer: 3.173422 + vertex_buffer: 0.280098 + vertex_buffer: 0.375600 + vertex_buffer: -7.214530 + vertex_buffer: 2.263009 + vertex_buffer: 0.073150 + vertex_buffer: 0.106310 + vertex_buffer: 0.399956 + vertex_buffer: -5.799793 + vertex_buffer: 2.349546 + vertex_buffer: 2.204059 + vertex_buffer: 0.209925 + vertex_buffer: 0.391353 + vertex_buffer: -2.844939 + vertex_buffer: -0.720868 + vertex_buffer: 4.433130 + vertex_buffer: 0.355808 + vertex_buffer: 0.534406 + vertex_buffer: -0.711452 + vertex_buffer: -3.329355 + vertex_buffer: 5.877044 + vertex_buffer: 0.471751 + vertex_buffer: 0.650404 + vertex_buffer: -0.606033 + vertex_buffer: -3.924562 + vertex_buffer: 5.444923 + vertex_buffer: 0.474155 + vertex_buffer: 0.680192 + vertex_buffer: -1.431615 + vertex_buffer: -3.500953 + vertex_buffer: 5.496189 + vertex_buffer: 0.439785 + vertex_buffer: 0.657229 + vertex_buffer: -1.914910 + vertex_buffer: -3.803146 + vertex_buffer: 5.028930 + vertex_buffer: 0.414617 + vertex_buffer: 0.666541 + vertex_buffer: -1.131043 + vertex_buffer: -3.973937 + vertex_buffer: 5.189648 + vertex_buffer: 0.450374 + vertex_buffer: 0.680861 + vertex_buffer: -1.563548 + vertex_buffer: -4.082763 + vertex_buffer: 4.842263 + vertex_buffer: 0.428771 + vertex_buffer: 0.682691 + vertex_buffer: -2.650112 + vertex_buffer: -5.003649 + vertex_buffer: 4.188483 + vertex_buffer: 0.374971 + vertex_buffer: 0.727805 + vertex_buffer: -0.427049 + vertex_buffer: -1.094134 + vertex_buffer: 7.360529 + vertex_buffer: 0.486717 + vertex_buffer: 0.547629 + vertex_buffer: -0.496396 + vertex_buffer: -0.475659 + vertex_buffer: 7.440358 + vertex_buffer: 0.485301 + vertex_buffer: 0.527395 + vertex_buffer: -5.253307 + vertex_buffer: 3.881582 + vertex_buffer: 3.363159 + vertex_buffer: 0.257765 + vertex_buffer: 0.314490 + vertex_buffer: -1.718698 + vertex_buffer: 0.974609 + vertex_buffer: 4.558359 + vertex_buffer: 0.401223 + vertex_buffer: 0.455172 + vertex_buffer: -1.608635 + vertex_buffer: -0.942516 + vertex_buffer: 5.814193 + vertex_buffer: 0.429819 + vertex_buffer: 0.548615 + vertex_buffer: -1.651267 + vertex_buffer: -0.610868 + vertex_buffer: 5.581319 + vertex_buffer: 0.421352 + vertex_buffer: 0.533741 + vertex_buffer: -4.765501 + vertex_buffer: -0.701554 + vertex_buffer: 3.534632 + vertex_buffer: 0.276896 + vertex_buffer: 0.532057 + vertex_buffer: -0.478306 + vertex_buffer: 0.295766 + vertex_buffer: 7.101013 + vertex_buffer: 0.483370 + vertex_buffer: 0.499587 + vertex_buffer: -3.734964 + vertex_buffer: 4.508230 + vertex_buffer: 4.550454 + vertex_buffer: 0.337212 + vertex_buffer: 0.282883 + vertex_buffer: -4.588603 + vertex_buffer: 4.302037 + vertex_buffer: 4.048484 + vertex_buffer: 0.296392 + vertex_buffer: 0.293243 + vertex_buffer: -6.279331 + vertex_buffer: 6.615427 + vertex_buffer: 1.425850 + vertex_buffer: 0.169295 + vertex_buffer: 0.193814 + vertex_buffer: -1.220941 + vertex_buffer: 4.142165 + vertex_buffer: 5.106035 + vertex_buffer: 0.447580 + vertex_buffer: 0.302610 + vertex_buffer: -2.193489 + vertex_buffer: 3.100317 + vertex_buffer: 4.000575 + vertex_buffer: 0.392390 + vertex_buffer: 0.353888 + vertex_buffer: -3.102642 + vertex_buffer: -4.352984 + vertex_buffer: 4.095905 + vertex_buffer: 0.354490 + vertex_buffer: 0.696784 + vertex_buffer: -6.719682 + vertex_buffer: -4.788645 + vertex_buffer: -1.745401 + vertex_buffer: 0.067305 + vertex_buffer: 0.730105 + vertex_buffer: -1.193824 + vertex_buffer: -1.306795 + vertex_buffer: 5.737747 + vertex_buffer: 0.442739 + vertex_buffer: 0.572826 + vertex_buffer: -0.729766 + vertex_buffer: -1.593712 + vertex_buffer: 5.833208 + vertex_buffer: 0.457098 + vertex_buffer: 0.584792 + vertex_buffer: -2.456206 + vertex_buffer: -4.342621 + vertex_buffer: 4.283884 + vertex_buffer: 0.381974 + vertex_buffer: 0.694711 + vertex_buffer: -2.204823 + vertex_buffer: -4.304508 + vertex_buffer: 4.162499 + vertex_buffer: 0.392389 + vertex_buffer: 0.694203 + vertex_buffer: -4.985894 + vertex_buffer: 4.802461 + vertex_buffer: 3.751977 + vertex_buffer: 0.277076 + vertex_buffer: 0.271932 + vertex_buffer: -1.592294 + vertex_buffer: -1.257709 + vertex_buffer: 5.456949 + vertex_buffer: 0.422552 + vertex_buffer: 0.563233 + vertex_buffer: -2.644548 + vertex_buffer: 4.524654 + vertex_buffer: 4.921559 + vertex_buffer: 0.385919 + vertex_buffer: 0.281364 + vertex_buffer: -2.760292 + vertex_buffer: 5.100971 + vertex_buffer: 5.015990 + vertex_buffer: 0.383103 + vertex_buffer: 0.255840 + vertex_buffer: -3.523964 + vertex_buffer: 8.005976 + vertex_buffer: 3.729163 + vertex_buffer: 0.331431 + vertex_buffer: 0.119714 + vertex_buffer: -5.599763 + vertex_buffer: 5.715470 + vertex_buffer: 2.724259 + vertex_buffer: 0.229924 + vertex_buffer: 0.232003 + vertex_buffer: -3.063932 + vertex_buffer: 6.566144 + vertex_buffer: 4.529981 + vertex_buffer: 0.364501 + vertex_buffer: 0.189114 + vertex_buffer: -5.720968 + vertex_buffer: 4.254584 + vertex_buffer: 2.830852 + vertex_buffer: 0.229622 + vertex_buffer: 0.299541 + vertex_buffer: -6.374393 + vertex_buffer: 4.785590 + vertex_buffer: 1.591691 + vertex_buffer: 0.173287 + vertex_buffer: 0.278748 + vertex_buffer: -0.672728 + vertex_buffer: -3.688016 + vertex_buffer: 5.737804 + vertex_buffer: 0.472879 + vertex_buffer: 0.666198 + vertex_buffer: -1.262560 + vertex_buffer: -3.787691 + vertex_buffer: 5.417779 + vertex_buffer: 0.446828 + vertex_buffer: 0.668527 + vertex_buffer: -1.732553 + vertex_buffer: -3.952767 + vertex_buffer: 5.000579 + vertex_buffer: 0.422762 + vertex_buffer: 0.673890 + vertex_buffer: -1.043625 + vertex_buffer: -1.464973 + vertex_buffer: 5.662455 + vertex_buffer: 0.445308 + vertex_buffer: 0.580066 + vertex_buffer: -2.321234 + vertex_buffer: -4.329069 + vertex_buffer: 4.258156 + vertex_buffer: 0.388103 + vertex_buffer: 0.693961 + vertex_buffer: -2.056846 + vertex_buffer: -4.477671 + vertex_buffer: 4.520883 + vertex_buffer: 0.403039 + vertex_buffer: 0.706540 + vertex_buffer: -2.153084 + vertex_buffer: -4.276322 + vertex_buffer: 4.038093 + vertex_buffer: 0.403629 + vertex_buffer: 0.693953 + vertex_buffer: -0.946874 + vertex_buffer: -1.035249 + vertex_buffer: 6.512274 + vertex_buffer: 0.460042 + vertex_buffer: 0.557139 + vertex_buffer: -1.469132 + vertex_buffer: -4.036351 + vertex_buffer: 4.604908 + vertex_buffer: 0.431158 + vertex_buffer: 0.692366 + vertex_buffer: -1.024340 + vertex_buffer: -3.989851 + vertex_buffer: 4.926693 + vertex_buffer: 0.452182 + vertex_buffer: 0.692366 + vertex_buffer: -0.533422 + vertex_buffer: -3.993222 + vertex_buffer: 5.138202 + vertex_buffer: 0.475387 + vertex_buffer: 0.692366 + vertex_buffer: -0.769720 + vertex_buffer: -6.095394 + vertex_buffer: 4.985883 + vertex_buffer: 0.465828 + vertex_buffer: 0.779190 + vertex_buffer: -0.699606 + vertex_buffer: -5.291850 + vertex_buffer: 5.448304 + vertex_buffer: 0.472329 + vertex_buffer: 0.736226 + vertex_buffer: -0.669687 + vertex_buffer: -4.949770 + vertex_buffer: 5.509612 + vertex_buffer: 0.473087 + vertex_buffer: 0.717857 + vertex_buffer: -0.630947 + vertex_buffer: -4.695101 + vertex_buffer: 5.449371 + vertex_buffer: 0.473122 + vertex_buffer: 0.704626 + vertex_buffer: -0.583218 + vertex_buffer: -4.517982 + vertex_buffer: 5.339869 + vertex_buffer: 0.473033 + vertex_buffer: 0.695278 + vertex_buffer: -1.537170 + vertex_buffer: -4.423206 + vertex_buffer: 4.745470 + vertex_buffer: 0.427942 + vertex_buffer: 0.695278 + vertex_buffer: -1.615600 + vertex_buffer: -4.475942 + vertex_buffer: 4.813632 + vertex_buffer: 0.426479 + vertex_buffer: 0.703540 + vertex_buffer: -1.729053 + vertex_buffer: -4.618680 + vertex_buffer: 4.854463 + vertex_buffer: 0.423162 + vertex_buffer: 0.711846 + vertex_buffer: -1.838624 + vertex_buffer: -4.828746 + vertex_buffer: 4.823737 + vertex_buffer: 0.418309 + vertex_buffer: 0.720063 + vertex_buffer: -2.368250 + vertex_buffer: -3.106237 + vertex_buffer: 4.868096 + vertex_buffer: 0.390095 + vertex_buffer: 0.639573 + vertex_buffer: -7.542244 + vertex_buffer: -1.049282 + vertex_buffer: -2.431321 + vertex_buffer: 0.013954 + vertex_buffer: 0.560034 + vertex_buffer: 0.000000 + vertex_buffer: -1.724003 + vertex_buffer: 6.601390 + vertex_buffer: 0.499914 + vertex_buffer: 0.580147 + vertex_buffer: -1.826614 + vertex_buffer: -4.399531 + vertex_buffer: 4.399021 + vertex_buffer: 0.413200 + vertex_buffer: 0.695400 + vertex_buffer: -1.929558 + vertex_buffer: -4.411831 + vertex_buffer: 4.497052 + vertex_buffer: 0.409626 + vertex_buffer: 0.701823 + vertex_buffer: -0.597442 + vertex_buffer: -2.013686 + vertex_buffer: 5.866456 + vertex_buffer: 0.468080 + vertex_buffer: 0.601535 + vertex_buffer: -1.405627 + vertex_buffer: -1.714196 + vertex_buffer: 5.241087 + vertex_buffer: 0.422729 + vertex_buffer: 0.585985 + vertex_buffer: -0.662449 + vertex_buffer: -1.819321 + vertex_buffer: 5.863759 + vertex_buffer: 0.463080 + vertex_buffer: 0.593784 + vertex_buffer: -2.342340 + vertex_buffer: 0.572222 + vertex_buffer: 4.294303 + vertex_buffer: 0.372120 + vertex_buffer: 0.473414 + vertex_buffer: -3.327324 + vertex_buffer: 0.104863 + vertex_buffer: 4.113860 + vertex_buffer: 0.334562 + vertex_buffer: 0.496073 + vertex_buffer: -1.726175 + vertex_buffer: -0.919165 + vertex_buffer: 5.273355 + vertex_buffer: 0.411671 + vertex_buffer: 0.546965 + vertex_buffer: -5.133204 + vertex_buffer: 7.485602 + vertex_buffer: 2.660442 + vertex_buffer: 0.242176 + vertex_buffer: 0.147676 + vertex_buffer: -4.538641 + vertex_buffer: 6.319907 + vertex_buffer: 3.683424 + vertex_buffer: 0.290777 + vertex_buffer: 0.201446 + vertex_buffer: -3.986562 + vertex_buffer: 5.109487 + vertex_buffer: 4.466315 + vertex_buffer: 0.327338 + vertex_buffer: 0.256527 + vertex_buffer: -2.169681 + vertex_buffer: -5.440433 + vertex_buffer: 4.455874 + vertex_buffer: 0.399510 + vertex_buffer: 0.748921 + vertex_buffer: -1.395634 + vertex_buffer: 5.011963 + vertex_buffer: 5.316032 + vertex_buffer: 0.441728 + vertex_buffer: 0.261676 + vertex_buffer: -1.619500 + vertex_buffer: 6.599217 + vertex_buffer: 4.921106 + vertex_buffer: 0.429765 + vertex_buffer: 0.187834 + vertex_buffer: -1.891399 + vertex_buffer: 8.236377 + vertex_buffer: 4.274997 + vertex_buffer: 0.412198 + vertex_buffer: 0.108901 + vertex_buffer: -4.195832 + vertex_buffer: 2.235205 + vertex_buffer: 3.375099 + vertex_buffer: 0.288955 + vertex_buffer: 0.398952 + vertex_buffer: -5.733342 + vertex_buffer: 1.411738 + vertex_buffer: 2.431726 + vertex_buffer: 0.218937 + vertex_buffer: 0.435411 + vertex_buffer: -1.859887 + vertex_buffer: 2.355757 + vertex_buffer: 3.843181 + vertex_buffer: 0.412782 + vertex_buffer: 0.398970 + vertex_buffer: -4.988612 + vertex_buffer: 3.074654 + vertex_buffer: 3.083858 + vertex_buffer: 0.257135 + vertex_buffer: 0.355440 + vertex_buffer: -1.303263 + vertex_buffer: 1.416453 + vertex_buffer: 4.831091 + vertex_buffer: 0.427685 + vertex_buffer: 0.437961 + vertex_buffer: -1.305757 + vertex_buffer: -0.672779 + vertex_buffer: 6.415959 + vertex_buffer: 0.448340 + vertex_buffer: 0.536936 + vertex_buffer: -6.465170 + vertex_buffer: 0.937119 + vertex_buffer: 1.689873 + vertex_buffer: 0.178560 + vertex_buffer: 0.457554 + vertex_buffer: -5.258659 + vertex_buffer: 0.945811 + vertex_buffer: 2.974312 + vertex_buffer: 0.247308 + vertex_buffer: 0.457194 + vertex_buffer: -4.432338 + vertex_buffer: 0.722096 + vertex_buffer: 3.522615 + vertex_buffer: 0.286267 + vertex_buffer: 0.467675 + vertex_buffer: -3.300681 + vertex_buffer: 0.861641 + vertex_buffer: 3.872784 + vertex_buffer: 0.332828 + vertex_buffer: 0.460712 + vertex_buffer: -2.430178 + vertex_buffer: 1.131492 + vertex_buffer: 4.039035 + vertex_buffer: 0.368756 + vertex_buffer: 0.447207 + vertex_buffer: -1.820731 + vertex_buffer: 1.467954 + vertex_buffer: 4.224124 + vertex_buffer: 0.398964 + vertex_buffer: 0.432655 + vertex_buffer: -0.563221 + vertex_buffer: 2.307693 + vertex_buffer: 5.566789 + vertex_buffer: 0.476410 + vertex_buffer: 0.405806 + vertex_buffer: -6.338145 + vertex_buffer: -0.529279 + vertex_buffer: 1.881175 + vertex_buffer: 0.189241 + vertex_buffer: 0.523924 + vertex_buffer: -5.587698 + vertex_buffer: 3.208071 + vertex_buffer: 2.687839 + vertex_buffer: 0.228962 + vertex_buffer: 0.348951 + vertex_buffer: -0.242624 + vertex_buffer: -1.462857 + vertex_buffer: 7.071491 + vertex_buffer: 0.490726 + vertex_buffer: 0.562401 + vertex_buffer: -1.611251 + vertex_buffer: 0.339326 + vertex_buffer: 4.895421 + vertex_buffer: 0.404670 + vertex_buffer: 0.485133 + vertex_buffer: -7.743095 + vertex_buffer: 2.364999 + vertex_buffer: -2.005167 + vertex_buffer: 0.019469 + vertex_buffer: 0.401564 + vertex_buffer: -1.391142 + vertex_buffer: 1.851048 + vertex_buffer: 4.448999 + vertex_buffer: 0.426243 + vertex_buffer: 0.420431 + vertex_buffer: -1.785794 + vertex_buffer: -0.978284 + vertex_buffer: 4.850470 + vertex_buffer: 0.396993 + vertex_buffer: 0.548797 + vertex_buffer: -4.670959 + vertex_buffer: 2.664461 + vertex_buffer: 3.084075 + vertex_buffer: 0.266470 + vertex_buffer: 0.376977 + vertex_buffer: -1.333970 + vertex_buffer: -0.283761 + vertex_buffer: 6.097047 + vertex_buffer: 0.439121 + vertex_buffer: 0.518958 + vertex_buffer: -7.270895 + vertex_buffer: -2.890917 + vertex_buffer: -2.252455 + vertex_buffer: 0.032314 + vertex_buffer: 0.644357 + vertex_buffer: -1.856432 + vertex_buffer: 2.585245 + vertex_buffer: 3.757904 + vertex_buffer: 0.419054 + vertex_buffer: 0.387155 + vertex_buffer: -0.923388 + vertex_buffer: 0.073076 + vertex_buffer: 6.671944 + vertex_buffer: 0.462783 + vertex_buffer: 0.505747 + vertex_buffer: -5.000589 + vertex_buffer: -6.135128 + vertex_buffer: 1.892523 + vertex_buffer: 0.238979 + vertex_buffer: 0.779745 + vertex_buffer: -5.085276 + vertex_buffer: -7.178590 + vertex_buffer: 0.714711 + vertex_buffer: 0.198221 + vertex_buffer: 0.831938 + vertex_buffer: -7.159291 + vertex_buffer: -0.811820 + vertex_buffer: -0.072044 + vertex_buffer: 0.107550 + vertex_buffer: 0.540755 + vertex_buffer: -5.843051 + vertex_buffer: -5.248023 + vertex_buffer: 0.924091 + vertex_buffer: 0.183610 + vertex_buffer: 0.740257 + vertex_buffer: -6.847258 + vertex_buffer: 3.662916 + vertex_buffer: 0.724695 + vertex_buffer: 0.134410 + vertex_buffer: 0.333683 + vertex_buffer: -2.412942 + vertex_buffer: -8.258853 + vertex_buffer: 4.119213 + vertex_buffer: 0.385764 + vertex_buffer: 0.883154 + vertex_buffer: -0.179909 + vertex_buffer: -1.689864 + vertex_buffer: 6.573301 + vertex_buffer: 0.490967 + vertex_buffer: 0.579378 + vertex_buffer: -2.103655 + vertex_buffer: -0.163946 + vertex_buffer: 4.566119 + vertex_buffer: 0.382385 + vertex_buffer: 0.508573 + vertex_buffer: -6.407571 + vertex_buffer: 2.236021 + vertex_buffer: 1.560843 + vertex_buffer: 0.174399 + vertex_buffer: 0.397671 + vertex_buffer: -3.670075 + vertex_buffer: 2.360153 + vertex_buffer: 3.635230 + vertex_buffer: 0.318785 + vertex_buffer: 0.396235 + vertex_buffer: -3.177186 + vertex_buffer: 2.294265 + vertex_buffer: 3.775704 + vertex_buffer: 0.343364 + vertex_buffer: 0.400597 + vertex_buffer: -2.196121 + vertex_buffer: -4.598322 + vertex_buffer: 4.479786 + vertex_buffer: 0.396100 + vertex_buffer: 0.710217 + vertex_buffer: -6.234883 + vertex_buffer: -1.944430 + vertex_buffer: 1.663542 + vertex_buffer: 0.187885 + vertex_buffer: 0.588538 + vertex_buffer: -1.292924 + vertex_buffer: -9.295920 + vertex_buffer: 4.094063 + vertex_buffer: 0.430987 + vertex_buffer: 0.944065 + vertex_buffer: -3.210651 + vertex_buffer: -8.533278 + vertex_buffer: 2.802001 + vertex_buffer: 0.318993 + vertex_buffer: 0.898285 + vertex_buffer: -4.068926 + vertex_buffer: -7.993109 + vertex_buffer: 1.925119 + vertex_buffer: 0.266248 + vertex_buffer: 0.869701 + vertex_buffer: 0.000000 + vertex_buffer: 6.545390 + vertex_buffer: 5.027311 + vertex_buffer: 0.500023 + vertex_buffer: 0.190576 + vertex_buffer: 0.000000 + vertex_buffer: -9.403378 + vertex_buffer: 4.264492 + vertex_buffer: 0.499977 + vertex_buffer: 0.954453 + vertex_buffer: -2.724032 + vertex_buffer: 2.315802 + vertex_buffer: 3.777151 + vertex_buffer: 0.366170 + vertex_buffer: 0.398822 + vertex_buffer: -2.288460 + vertex_buffer: 2.398891 + vertex_buffer: 3.697603 + vertex_buffer: 0.393207 + vertex_buffer: 0.395537 + vertex_buffer: -1.998311 + vertex_buffer: 2.496547 + vertex_buffer: 3.689148 + vertex_buffer: 0.410373 + vertex_buffer: 0.391080 + vertex_buffer: -6.130040 + vertex_buffer: 3.399261 + vertex_buffer: 2.038516 + vertex_buffer: 0.194993 + vertex_buffer: 0.342102 + vertex_buffer: -2.288460 + vertex_buffer: 2.886504 + vertex_buffer: 3.775031 + vertex_buffer: 0.388665 + vertex_buffer: 0.362284 + vertex_buffer: -2.724032 + vertex_buffer: 2.961810 + vertex_buffer: 3.871767 + vertex_buffer: 0.365962 + vertex_buffer: 0.355971 + vertex_buffer: -3.177186 + vertex_buffer: 2.964136 + vertex_buffer: 3.876973 + vertex_buffer: 0.343364 + vertex_buffer: 0.355357 + vertex_buffer: -3.670075 + vertex_buffer: 2.927714 + vertex_buffer: 3.724325 + vertex_buffer: 0.318785 + vertex_buffer: 0.358340 + vertex_buffer: -4.018389 + vertex_buffer: 2.857357 + vertex_buffer: 3.482983 + vertex_buffer: 0.301415 + vertex_buffer: 0.363156 + vertex_buffer: -7.555811 + vertex_buffer: 4.106811 + vertex_buffer: -0.991917 + vertex_buffer: 0.058133 + vertex_buffer: 0.319076 + vertex_buffer: -4.018389 + vertex_buffer: 2.483695 + vertex_buffer: 3.440898 + vertex_buffer: 0.301415 + vertex_buffer: 0.387449 + vertex_buffer: 0.000000 + vertex_buffer: -2.521945 + vertex_buffer: 5.932265 + vertex_buffer: 0.499988 + vertex_buffer: 0.618434 + vertex_buffer: -1.776217 + vertex_buffer: -2.683946 + vertex_buffer: 5.213116 + vertex_buffer: 0.415838 + vertex_buffer: 0.624196 + vertex_buffer: -1.222237 + vertex_buffer: -1.182444 + vertex_buffer: 5.952465 + vertex_buffer: 0.445682 + vertex_buffer: 0.566077 + vertex_buffer: -0.731493 + vertex_buffer: -2.536683 + vertex_buffer: 5.815343 + vertex_buffer: 0.465844 + vertex_buffer: 0.620641 + vertex_buffer: 0.000000 + vertex_buffer: 3.271027 + vertex_buffer: 5.236015 + vertex_buffer: 0.499923 + vertex_buffer: 0.351524 + vertex_buffer: -4.135272 + vertex_buffer: -6.996638 + vertex_buffer: 2.671970 + vertex_buffer: 0.288719 + vertex_buffer: 0.819946 + vertex_buffer: -3.311811 + vertex_buffer: -7.660815 + vertex_buffer: 3.382963 + vertex_buffer: 0.335279 + vertex_buffer: 0.852820 + vertex_buffer: -1.313701 + vertex_buffer: -8.639995 + vertex_buffer: 4.702456 + vertex_buffer: 0.440512 + vertex_buffer: 0.902419 + vertex_buffer: -5.940524 + vertex_buffer: -6.223629 + vertex_buffer: -0.631468 + vertex_buffer: 0.128294 + vertex_buffer: 0.791941 + vertex_buffer: -1.998311 + vertex_buffer: 2.743838 + vertex_buffer: 3.744030 + vertex_buffer: 0.408772 + vertex_buffer: 0.373894 + vertex_buffer: -0.901447 + vertex_buffer: 1.236992 + vertex_buffer: 5.754256 + vertex_buffer: 0.455607 + vertex_buffer: 0.451801 + vertex_buffer: 0.000000 + vertex_buffer: -8.765243 + vertex_buffer: 4.891441 + vertex_buffer: 0.499877 + vertex_buffer: 0.908990 + vertex_buffer: -2.308977 + vertex_buffer: -8.974196 + vertex_buffer: 3.609070 + vertex_buffer: 0.375437 + vertex_buffer: 0.924192 + vertex_buffer: -6.954154 + vertex_buffer: -2.439843 + vertex_buffer: -0.131163 + vertex_buffer: 0.114210 + vertex_buffer: 0.615022 + vertex_buffer: -1.098819 + vertex_buffer: -4.458788 + vertex_buffer: 5.120727 + vertex_buffer: 0.448662 + vertex_buffer: 0.695278 + vertex_buffer: -1.181124 + vertex_buffer: -4.579996 + vertex_buffer: 5.189564 + vertex_buffer: 0.448020 + vertex_buffer: 0.704632 + vertex_buffer: -1.255818 + vertex_buffer: -4.787901 + vertex_buffer: 5.237051 + vertex_buffer: 0.447112 + vertex_buffer: 0.715808 + vertex_buffer: -1.325085 + vertex_buffer: -5.106507 + vertex_buffer: 5.205010 + vertex_buffer: 0.444832 + vertex_buffer: 0.730794 + vertex_buffer: -1.546388 + vertex_buffer: -5.819392 + vertex_buffer: 4.757893 + vertex_buffer: 0.430012 + vertex_buffer: 0.766809 + vertex_buffer: -1.953754 + vertex_buffer: -4.183892 + vertex_buffer: 4.431713 + vertex_buffer: 0.406787 + vertex_buffer: 0.685673 + vertex_buffer: -2.117802 + vertex_buffer: -4.137093 + vertex_buffer: 4.555096 + vertex_buffer: 0.400738 + vertex_buffer: 0.681069 + vertex_buffer: -2.285339 + vertex_buffer: -4.051196 + vertex_buffer: 4.582438 + vertex_buffer: 0.392400 + vertex_buffer: 0.677703 + vertex_buffer: -2.850160 + vertex_buffer: -3.665720 + vertex_buffer: 4.484994 + vertex_buffer: 0.367856 + vertex_buffer: 0.663919 + vertex_buffer: -5.278538 + vertex_buffer: -2.238942 + vertex_buffer: 2.861224 + vertex_buffer: 0.247923 + vertex_buffer: 0.601333 + vertex_buffer: -0.946709 + vertex_buffer: 1.907628 + vertex_buffer: 5.196779 + vertex_buffer: 0.452770 + vertex_buffer: 0.420850 + vertex_buffer: -1.314173 + vertex_buffer: 3.104912 + vertex_buffer: 4.231404 + vertex_buffer: 0.436392 + vertex_buffer: 0.359887 + vertex_buffer: -1.780000 + vertex_buffer: 2.860000 + vertex_buffer: 3.881555 + vertex_buffer: 0.416164 + vertex_buffer: 0.368714 + vertex_buffer: -1.845110 + vertex_buffer: -4.098880 + vertex_buffer: 4.247264 + vertex_buffer: 0.413386 + vertex_buffer: 0.692366 + vertex_buffer: -5.436187 + vertex_buffer: -4.030482 + vertex_buffer: 2.109852 + vertex_buffer: 0.228018 + vertex_buffer: 0.683572 + vertex_buffer: -0.766444 + vertex_buffer: 3.182131 + vertex_buffer: 4.861453 + vertex_buffer: 0.468268 + vertex_buffer: 0.352671 + vertex_buffer: -1.938616 + vertex_buffer: -6.614410 + vertex_buffer: 4.521085 + vertex_buffer: 0.411362 + vertex_buffer: 0.804327 + vertex_buffer: 0.000000 + vertex_buffer: 1.059413 + vertex_buffer: 6.774605 + vertex_buffer: 0.499989 + vertex_buffer: 0.469825 + vertex_buffer: -0.516573 + vertex_buffer: 1.583572 + vertex_buffer: 6.148363 + vertex_buffer: 0.479154 + vertex_buffer: 0.442654 + vertex_buffer: 0.000000 + vertex_buffer: 1.728369 + vertex_buffer: 6.316750 + vertex_buffer: 0.499974 + vertex_buffer: 0.439637 + vertex_buffer: -1.246815 + vertex_buffer: 0.230297 + vertex_buffer: 5.681036 + vertex_buffer: 0.432112 + vertex_buffer: 0.493589 + vertex_buffer: 0.000000 + vertex_buffer: -7.942194 + vertex_buffer: 5.181173 + vertex_buffer: 0.499886 + vertex_buffer: 0.866917 + vertex_buffer: 0.000000 + vertex_buffer: -6.991499 + vertex_buffer: 5.153478 + vertex_buffer: 0.499913 + vertex_buffer: 0.821729 + vertex_buffer: -0.997827 + vertex_buffer: -6.930921 + vertex_buffer: 4.979576 + vertex_buffer: 0.456549 + vertex_buffer: 0.819201 + vertex_buffer: -3.288807 + vertex_buffer: -5.382514 + vertex_buffer: 3.795752 + vertex_buffer: 0.344549 + vertex_buffer: 0.745439 + vertex_buffer: -2.311631 + vertex_buffer: -1.566237 + vertex_buffer: 4.590085 + vertex_buffer: 0.378909 + vertex_buffer: 0.574010 + vertex_buffer: -2.680250 + vertex_buffer: -6.111567 + vertex_buffer: 4.096152 + vertex_buffer: 0.374293 + vertex_buffer: 0.780185 + vertex_buffer: -3.832928 + vertex_buffer: -1.537326 + vertex_buffer: 4.137731 + vertex_buffer: 0.319688 + vertex_buffer: 0.570738 + vertex_buffer: -2.961860 + vertex_buffer: -2.274215 + vertex_buffer: 4.440943 + vertex_buffer: 0.357155 + vertex_buffer: 0.604270 + vertex_buffer: -4.386901 + vertex_buffer: -2.683286 + vertex_buffer: 3.643886 + vertex_buffer: 0.295284 + vertex_buffer: 0.621581 + vertex_buffer: -1.217295 + vertex_buffer: -7.834465 + vertex_buffer: 4.969286 + vertex_buffer: 0.447750 + vertex_buffer: 0.862477 + vertex_buffer: -1.542374 + vertex_buffer: -0.136843 + vertex_buffer: 5.201008 + vertex_buffer: 0.410986 + vertex_buffer: 0.508723 + vertex_buffer: -3.878377 + vertex_buffer: -6.041764 + vertex_buffer: 3.311079 + vertex_buffer: 0.313951 + vertex_buffer: 0.775308 + vertex_buffer: -3.084037 + vertex_buffer: -6.809842 + vertex_buffer: 3.814195 + vertex_buffer: 0.354128 + vertex_buffer: 0.812553 + vertex_buffer: -3.747321 + vertex_buffer: -4.503545 + vertex_buffer: 3.726453 + vertex_buffer: 0.324548 + vertex_buffer: 0.703993 + vertex_buffer: -6.094129 + vertex_buffer: -3.205991 + vertex_buffer: 1.473482 + vertex_buffer: 0.189096 + vertex_buffer: 0.646300 + vertex_buffer: -4.588995 + vertex_buffer: -4.728726 + vertex_buffer: 2.983221 + vertex_buffer: 0.279777 + vertex_buffer: 0.714658 + vertex_buffer: -6.583231 + vertex_buffer: -3.941269 + vertex_buffer: 0.070268 + vertex_buffer: 0.133823 + vertex_buffer: 0.682701 + vertex_buffer: -3.492580 + vertex_buffer: -3.195820 + vertex_buffer: 4.130198 + vertex_buffer: 0.336768 + vertex_buffer: 0.644733 + vertex_buffer: -1.255543 + vertex_buffer: 0.802341 + vertex_buffer: 5.307551 + vertex_buffer: 0.429884 + vertex_buffer: 0.466522 + vertex_buffer: -1.126122 + vertex_buffer: -0.933602 + vertex_buffer: 6.538785 + vertex_buffer: 0.455528 + vertex_buffer: 0.548623 + vertex_buffer: -1.443109 + vertex_buffer: -1.142774 + vertex_buffer: 5.905127 + vertex_buffer: 0.437114 + vertex_buffer: 0.558896 + vertex_buffer: -0.923043 + vertex_buffer: -0.529042 + vertex_buffer: 7.003423 + vertex_buffer: 0.467288 + vertex_buffer: 0.529925 + vertex_buffer: -1.755386 + vertex_buffer: 3.529117 + vertex_buffer: 4.327696 + vertex_buffer: 0.414712 + vertex_buffer: 0.335220 + vertex_buffer: -2.632589 + vertex_buffer: 3.713828 + vertex_buffer: 4.364629 + vertex_buffer: 0.377046 + vertex_buffer: 0.322778 + vertex_buffer: -3.388062 + vertex_buffer: 3.721976 + vertex_buffer: 4.309028 + vertex_buffer: 0.344108 + vertex_buffer: 0.320151 + vertex_buffer: -4.075766 + vertex_buffer: 3.675413 + vertex_buffer: 4.076063 + vertex_buffer: 0.312876 + vertex_buffer: 0.322332 + vertex_buffer: -4.622910 + vertex_buffer: 3.474691 + vertex_buffer: 3.646321 + vertex_buffer: 0.283526 + vertex_buffer: 0.333190 + vertex_buffer: -5.171755 + vertex_buffer: 2.535753 + vertex_buffer: 2.670867 + vertex_buffer: 0.241246 + vertex_buffer: 0.382786 + vertex_buffer: -7.297331 + vertex_buffer: 0.763172 + vertex_buffer: -0.048769 + vertex_buffer: 0.102986 + vertex_buffer: 0.468763 + vertex_buffer: -4.706828 + vertex_buffer: 1.651000 + vertex_buffer: 3.109532 + vertex_buffer: 0.267612 + vertex_buffer: 0.424560 + vertex_buffer: -4.071712 + vertex_buffer: 1.476821 + vertex_buffer: 3.476944 + vertex_buffer: 0.297879 + vertex_buffer: 0.433176 + vertex_buffer: -3.269817 + vertex_buffer: 1.470659 + vertex_buffer: 3.731945 + vertex_buffer: 0.333434 + vertex_buffer: 0.433878 + vertex_buffer: -2.527572 + vertex_buffer: 1.617311 + vertex_buffer: 3.865444 + vertex_buffer: 0.366427 + vertex_buffer: 0.426116 + vertex_buffer: -1.970894 + vertex_buffer: 1.858505 + vertex_buffer: 3.961782 + vertex_buffer: 0.396012 + vertex_buffer: 0.416696 + vertex_buffer: -1.579543 + vertex_buffer: 2.097941 + vertex_buffer: 4.084996 + vertex_buffer: 0.420121 + vertex_buffer: 0.410228 + vertex_buffer: -7.664182 + vertex_buffer: 0.673132 + vertex_buffer: -2.435867 + vertex_buffer: 0.007561 + vertex_buffer: 0.480777 + vertex_buffer: -1.397041 + vertex_buffer: -1.340139 + vertex_buffer: 5.630378 + vertex_buffer: 0.432949 + vertex_buffer: 0.569518 + vertex_buffer: -0.884838 + vertex_buffer: 0.658740 + vertex_buffer: 6.233232 + vertex_buffer: 0.458639 + vertex_buffer: 0.479089 + vertex_buffer: -0.767097 + vertex_buffer: -0.968035 + vertex_buffer: 7.077932 + vertex_buffer: 0.473466 + vertex_buffer: 0.545744 + vertex_buffer: -0.460213 + vertex_buffer: -1.334106 + vertex_buffer: 6.787447 + vertex_buffer: 0.476088 + vertex_buffer: 0.563830 + vertex_buffer: -0.748618 + vertex_buffer: -1.067994 + vertex_buffer: 6.798303 + vertex_buffer: 0.468472 + vertex_buffer: 0.555057 + vertex_buffer: -1.236408 + vertex_buffer: -1.585568 + vertex_buffer: 5.480490 + vertex_buffer: 0.433991 + vertex_buffer: 0.582362 + vertex_buffer: -0.387306 + vertex_buffer: -1.409990 + vertex_buffer: 6.957705 + vertex_buffer: 0.483518 + vertex_buffer: 0.562984 + vertex_buffer: -0.319925 + vertex_buffer: -1.607931 + vertex_buffer: 6.508676 + vertex_buffer: 0.482483 + vertex_buffer: 0.577849 + vertex_buffer: -1.639633 + vertex_buffer: 2.556298 + vertex_buffer: 3.863736 + vertex_buffer: 0.426450 + vertex_buffer: 0.389799 + vertex_buffer: -1.255645 + vertex_buffer: 2.467144 + vertex_buffer: 4.203800 + vertex_buffer: 0.438999 + vertex_buffer: 0.396495 + vertex_buffer: -1.031362 + vertex_buffer: 2.382663 + vertex_buffer: 4.615849 + vertex_buffer: 0.450067 + vertex_buffer: 0.400434 + vertex_buffer: -4.253081 + vertex_buffer: 2.772296 + vertex_buffer: 3.315305 + vertex_buffer: 0.289712 + vertex_buffer: 0.368253 + vertex_buffer: -4.530000 + vertex_buffer: 2.910000 + vertex_buffer: 3.339685 + vertex_buffer: 0.276670 + vertex_buffer: 0.363373 + vertex_buffer: 0.463928 + vertex_buffer: 0.955357 + vertex_buffer: 6.633583 + vertex_buffer: 0.517862 + vertex_buffer: 0.471948 + vertex_buffer: 4.253081 + vertex_buffer: 2.577646 + vertex_buffer: 3.279702 + vertex_buffer: 0.710288 + vertex_buffer: 0.380764 + vertex_buffer: 0.416106 + vertex_buffer: -1.466449 + vertex_buffer: 6.447657 + vertex_buffer: 0.526227 + vertex_buffer: 0.573910 + vertex_buffer: 7.087960 + vertex_buffer: 5.434801 + vertex_buffer: 0.099620 + vertex_buffer: 0.895093 + vertex_buffer: 0.254141 + vertex_buffer: 2.628639 + vertex_buffer: 2.035898 + vertex_buffer: 3.848121 + vertex_buffer: 0.634070 + vertex_buffer: 0.409576 + vertex_buffer: 3.198363 + vertex_buffer: 1.985815 + vertex_buffer: 3.796952 + vertex_buffer: 0.661242 + vertex_buffer: 0.413025 + vertex_buffer: 3.775151 + vertex_buffer: 2.039402 + vertex_buffer: 3.646194 + vertex_buffer: 0.688880 + vertex_buffer: 0.409460 + vertex_buffer: 4.465819 + vertex_buffer: 2.422950 + vertex_buffer: 3.155168 + vertex_buffer: 0.725342 + vertex_buffer: 0.389131 + vertex_buffer: 2.164289 + vertex_buffer: 2.189867 + vertex_buffer: 3.851822 + vertex_buffer: 0.606630 + vertex_buffer: 0.403705 + vertex_buffer: 3.208229 + vertex_buffer: 3.223926 + vertex_buffer: 4.115822 + vertex_buffer: 0.654766 + vertex_buffer: 0.344011 + vertex_buffer: 2.673803 + vertex_buffer: 3.205337 + vertex_buffer: 4.092203 + vertex_buffer: 0.629906 + vertex_buffer: 0.346076 + vertex_buffer: 3.745193 + vertex_buffer: 3.165286 + vertex_buffer: 3.972409 + vertex_buffer: 0.680678 + vertex_buffer: 0.347265 + vertex_buffer: 4.161018 + vertex_buffer: 3.059069 + vertex_buffer: 3.719554 + vertex_buffer: 0.702097 + vertex_buffer: 0.353591 + vertex_buffer: 5.062006 + vertex_buffer: 1.934418 + vertex_buffer: 2.776093 + vertex_buffer: 0.752212 + vertex_buffer: 0.410805 + vertex_buffer: 2.266659 + vertex_buffer: -7.425768 + vertex_buffer: 4.389812 + vertex_buffer: 0.602918 + vertex_buffer: 0.842863 + vertex_buffer: 4.445859 + vertex_buffer: 2.663991 + vertex_buffer: 3.173422 + vertex_buffer: 0.719902 + vertex_buffer: 0.375600 + vertex_buffer: 7.214530 + vertex_buffer: 2.263009 + vertex_buffer: 0.073150 + vertex_buffer: 0.893693 + vertex_buffer: 0.399960 + vertex_buffer: 5.799793 + vertex_buffer: 2.349546 + vertex_buffer: 2.204059 + vertex_buffer: 0.790082 + vertex_buffer: 0.391354 + vertex_buffer: 2.844939 + vertex_buffer: -0.720868 + vertex_buffer: 4.433130 + vertex_buffer: 0.643998 + vertex_buffer: 0.534488 + vertex_buffer: 0.711452 + vertex_buffer: -3.329355 + vertex_buffer: 5.877044 + vertex_buffer: 0.528249 + vertex_buffer: 0.650404 + vertex_buffer: 0.606033 + vertex_buffer: -3.924562 + vertex_buffer: 5.444923 + vertex_buffer: 0.525850 + vertex_buffer: 0.680191 + vertex_buffer: 1.431615 + vertex_buffer: -3.500953 + vertex_buffer: 5.496189 + vertex_buffer: 0.560215 + vertex_buffer: 0.657229 + vertex_buffer: 1.914910 + vertex_buffer: -3.803146 + vertex_buffer: 5.028930 + vertex_buffer: 0.585384 + vertex_buffer: 0.666541 + vertex_buffer: 1.131043 + vertex_buffer: -3.973937 + vertex_buffer: 5.189648 + vertex_buffer: 0.549626 + vertex_buffer: 0.680861 + vertex_buffer: 1.563548 + vertex_buffer: -4.082763 + vertex_buffer: 4.842263 + vertex_buffer: 0.571228 + vertex_buffer: 0.682692 + vertex_buffer: 2.650112 + vertex_buffer: -5.003649 + vertex_buffer: 4.188483 + vertex_buffer: 0.624852 + vertex_buffer: 0.728099 + vertex_buffer: 0.427049 + vertex_buffer: -1.094134 + vertex_buffer: 7.360529 + vertex_buffer: 0.513050 + vertex_buffer: 0.547282 + vertex_buffer: 0.496396 + vertex_buffer: -0.475659 + vertex_buffer: 7.440358 + vertex_buffer: 0.515097 + vertex_buffer: 0.527252 + vertex_buffer: 5.253307 + vertex_buffer: 3.881582 + vertex_buffer: 3.363159 + vertex_buffer: 0.742247 + vertex_buffer: 0.314507 + vertex_buffer: 1.718698 + vertex_buffer: 0.974609 + vertex_buffer: 4.558359 + vertex_buffer: 0.598631 + vertex_buffer: 0.454979 + vertex_buffer: 1.608635 + vertex_buffer: -0.942516 + vertex_buffer: 5.814193 + vertex_buffer: 0.570338 + vertex_buffer: 0.548575 + vertex_buffer: 1.651267 + vertex_buffer: -0.610868 + vertex_buffer: 5.581319 + vertex_buffer: 0.578632 + vertex_buffer: 0.533623 + vertex_buffer: 4.765501 + vertex_buffer: -0.701554 + vertex_buffer: 3.534632 + vertex_buffer: 0.723087 + vertex_buffer: 0.532054 + vertex_buffer: 0.478306 + vertex_buffer: 0.295766 + vertex_buffer: 7.101013 + vertex_buffer: 0.516446 + vertex_buffer: 0.499639 + vertex_buffer: 3.734964 + vertex_buffer: 4.508230 + vertex_buffer: 4.550454 + vertex_buffer: 0.662801 + vertex_buffer: 0.282918 + vertex_buffer: 4.588603 + vertex_buffer: 4.302037 + vertex_buffer: 4.048484 + vertex_buffer: 0.703624 + vertex_buffer: 0.293271 + vertex_buffer: 6.279331 + vertex_buffer: 6.615427 + vertex_buffer: 1.425850 + vertex_buffer: 0.830705 + vertex_buffer: 0.193814 + vertex_buffer: 1.220941 + vertex_buffer: 4.142165 + vertex_buffer: 5.106035 + vertex_buffer: 0.552386 + vertex_buffer: 0.302568 + vertex_buffer: 2.193489 + vertex_buffer: 3.100317 + vertex_buffer: 4.000575 + vertex_buffer: 0.607610 + vertex_buffer: 0.353888 + vertex_buffer: 3.102642 + vertex_buffer: -4.352984 + vertex_buffer: 4.095905 + vertex_buffer: 0.645429 + vertex_buffer: 0.696707 + vertex_buffer: 6.719682 + vertex_buffer: -4.788645 + vertex_buffer: -1.745401 + vertex_buffer: 0.932695 + vertex_buffer: 0.730105 + vertex_buffer: 1.193824 + vertex_buffer: -1.306795 + vertex_buffer: 5.737747 + vertex_buffer: 0.557261 + vertex_buffer: 0.572826 + vertex_buffer: 0.729766 + vertex_buffer: -1.593712 + vertex_buffer: 5.833208 + vertex_buffer: 0.542902 + vertex_buffer: 0.584792 + vertex_buffer: 2.456206 + vertex_buffer: -4.342621 + vertex_buffer: 4.283884 + vertex_buffer: 0.618026 + vertex_buffer: 0.694711 + vertex_buffer: 2.204823 + vertex_buffer: -4.304508 + vertex_buffer: 4.162499 + vertex_buffer: 0.607591 + vertex_buffer: 0.694203 + vertex_buffer: 4.985894 + vertex_buffer: 4.802461 + vertex_buffer: 3.751977 + vertex_buffer: 0.722943 + vertex_buffer: 0.271963 + vertex_buffer: 1.592294 + vertex_buffer: -1.257709 + vertex_buffer: 5.456949 + vertex_buffer: 0.577414 + vertex_buffer: 0.563167 + vertex_buffer: 2.644548 + vertex_buffer: 4.524654 + vertex_buffer: 4.921559 + vertex_buffer: 0.614083 + vertex_buffer: 0.281387 + vertex_buffer: 2.760292 + vertex_buffer: 5.100971 + vertex_buffer: 5.015990 + vertex_buffer: 0.616907 + vertex_buffer: 0.255886 + vertex_buffer: 3.523964 + vertex_buffer: 8.005976 + vertex_buffer: 3.729163 + vertex_buffer: 0.668509 + vertex_buffer: 0.119914 + vertex_buffer: 5.599763 + vertex_buffer: 5.715470 + vertex_buffer: 2.724259 + vertex_buffer: 0.770092 + vertex_buffer: 0.232021 + vertex_buffer: 3.063932 + vertex_buffer: 6.566144 + vertex_buffer: 4.529981 + vertex_buffer: 0.635536 + vertex_buffer: 0.189249 + vertex_buffer: 5.720968 + vertex_buffer: 4.254584 + vertex_buffer: 2.830852 + vertex_buffer: 0.770391 + vertex_buffer: 0.299556 + vertex_buffer: 6.374393 + vertex_buffer: 4.785590 + vertex_buffer: 1.591691 + vertex_buffer: 0.826722 + vertex_buffer: 0.278755 + vertex_buffer: 0.672728 + vertex_buffer: -3.688016 + vertex_buffer: 5.737804 + vertex_buffer: 0.527121 + vertex_buffer: 0.666198 + vertex_buffer: 1.262560 + vertex_buffer: -3.787691 + vertex_buffer: 5.417779 + vertex_buffer: 0.553172 + vertex_buffer: 0.668527 + vertex_buffer: 1.732553 + vertex_buffer: -3.952767 + vertex_buffer: 5.000579 + vertex_buffer: 0.577238 + vertex_buffer: 0.673890 + vertex_buffer: 1.043625 + vertex_buffer: -1.464973 + vertex_buffer: 5.662455 + vertex_buffer: 0.554692 + vertex_buffer: 0.580066 + vertex_buffer: 2.321234 + vertex_buffer: -4.329069 + vertex_buffer: 4.258156 + vertex_buffer: 0.611897 + vertex_buffer: 0.693961 + vertex_buffer: 2.056846 + vertex_buffer: -4.477671 + vertex_buffer: 4.520883 + vertex_buffer: 0.596961 + vertex_buffer: 0.706540 + vertex_buffer: 2.153084 + vertex_buffer: -4.276322 + vertex_buffer: 4.038093 + vertex_buffer: 0.596371 + vertex_buffer: 0.693953 + vertex_buffer: 0.946874 + vertex_buffer: -1.035249 + vertex_buffer: 6.512274 + vertex_buffer: 0.539958 + vertex_buffer: 0.557139 + vertex_buffer: 1.469132 + vertex_buffer: -4.036351 + vertex_buffer: 4.604908 + vertex_buffer: 0.568842 + vertex_buffer: 0.692366 + vertex_buffer: 1.024340 + vertex_buffer: -3.989851 + vertex_buffer: 4.926693 + vertex_buffer: 0.547818 + vertex_buffer: 0.692366 + vertex_buffer: 0.533422 + vertex_buffer: -3.993222 + vertex_buffer: 5.138202 + vertex_buffer: 0.524613 + vertex_buffer: 0.692366 + vertex_buffer: 0.769720 + vertex_buffer: -6.095394 + vertex_buffer: 4.985883 + vertex_buffer: 0.534090 + vertex_buffer: 0.779141 + vertex_buffer: 0.699606 + vertex_buffer: -5.291850 + vertex_buffer: 5.448304 + vertex_buffer: 0.527671 + vertex_buffer: 0.736226 + vertex_buffer: 0.669687 + vertex_buffer: -4.949770 + vertex_buffer: 5.509612 + vertex_buffer: 0.526913 + vertex_buffer: 0.717857 + vertex_buffer: 0.630947 + vertex_buffer: -4.695101 + vertex_buffer: 5.449371 + vertex_buffer: 0.526878 + vertex_buffer: 0.704626 + vertex_buffer: 0.583218 + vertex_buffer: -4.517982 + vertex_buffer: 5.339869 + vertex_buffer: 0.526967 + vertex_buffer: 0.695278 + vertex_buffer: 1.537170 + vertex_buffer: -4.423206 + vertex_buffer: 4.745470 + vertex_buffer: 0.572058 + vertex_buffer: 0.695278 + vertex_buffer: 1.615600 + vertex_buffer: -4.475942 + vertex_buffer: 4.813632 + vertex_buffer: 0.573521 + vertex_buffer: 0.703540 + vertex_buffer: 1.729053 + vertex_buffer: -4.618680 + vertex_buffer: 4.854463 + vertex_buffer: 0.576838 + vertex_buffer: 0.711846 + vertex_buffer: 1.838624 + vertex_buffer: -4.828746 + vertex_buffer: 4.823737 + vertex_buffer: 0.581691 + vertex_buffer: 0.720063 + vertex_buffer: 2.368250 + vertex_buffer: -3.106237 + vertex_buffer: 4.868096 + vertex_buffer: 0.609945 + vertex_buffer: 0.639910 + vertex_buffer: 7.542244 + vertex_buffer: -1.049282 + vertex_buffer: -2.431321 + vertex_buffer: 0.986046 + vertex_buffer: 0.560034 + vertex_buffer: 1.826614 + vertex_buffer: -4.399531 + vertex_buffer: 4.399021 + vertex_buffer: 0.586800 + vertex_buffer: 0.695400 + vertex_buffer: 1.929558 + vertex_buffer: -4.411831 + vertex_buffer: 4.497052 + vertex_buffer: 0.590372 + vertex_buffer: 0.701823 + vertex_buffer: 0.597442 + vertex_buffer: -2.013686 + vertex_buffer: 5.866456 + vertex_buffer: 0.531915 + vertex_buffer: 0.601537 + vertex_buffer: 1.405627 + vertex_buffer: -1.714196 + vertex_buffer: 5.241087 + vertex_buffer: 0.577268 + vertex_buffer: 0.585935 + vertex_buffer: 0.662449 + vertex_buffer: -1.819321 + vertex_buffer: 5.863759 + vertex_buffer: 0.536915 + vertex_buffer: 0.593786 + vertex_buffer: 2.342340 + vertex_buffer: 0.572222 + vertex_buffer: 4.294303 + vertex_buffer: 0.627543 + vertex_buffer: 0.473352 + vertex_buffer: 3.327324 + vertex_buffer: 0.104863 + vertex_buffer: 4.113860 + vertex_buffer: 0.665586 + vertex_buffer: 0.495951 + vertex_buffer: 1.726175 + vertex_buffer: -0.919165 + vertex_buffer: 5.273355 + vertex_buffer: 0.588354 + vertex_buffer: 0.546862 + vertex_buffer: 5.133204 + vertex_buffer: 7.485602 + vertex_buffer: 2.660442 + vertex_buffer: 0.757824 + vertex_buffer: 0.147676 + vertex_buffer: 4.538641 + vertex_buffer: 6.319907 + vertex_buffer: 3.683424 + vertex_buffer: 0.709250 + vertex_buffer: 0.201508 + vertex_buffer: 3.986562 + vertex_buffer: 5.109487 + vertex_buffer: 4.466315 + vertex_buffer: 0.672684 + vertex_buffer: 0.256581 + vertex_buffer: 2.169681 + vertex_buffer: -5.440433 + vertex_buffer: 4.455874 + vertex_buffer: 0.600409 + vertex_buffer: 0.749005 + vertex_buffer: 1.395634 + vertex_buffer: 5.011963 + vertex_buffer: 5.316032 + vertex_buffer: 0.558266 + vertex_buffer: 0.261672 + vertex_buffer: 1.619500 + vertex_buffer: 6.599217 + vertex_buffer: 4.921106 + vertex_buffer: 0.570304 + vertex_buffer: 0.187871 + vertex_buffer: 1.891399 + vertex_buffer: 8.236377 + vertex_buffer: 4.274997 + vertex_buffer: 0.588166 + vertex_buffer: 0.109044 + vertex_buffer: 4.195832 + vertex_buffer: 2.235205 + vertex_buffer: 3.375099 + vertex_buffer: 0.711045 + vertex_buffer: 0.398952 + vertex_buffer: 5.733342 + vertex_buffer: 1.411738 + vertex_buffer: 2.431726 + vertex_buffer: 0.781070 + vertex_buffer: 0.435405 + vertex_buffer: 1.859887 + vertex_buffer: 2.355757 + vertex_buffer: 3.843181 + vertex_buffer: 0.587247 + vertex_buffer: 0.398932 + vertex_buffer: 4.988612 + vertex_buffer: 3.074654 + vertex_buffer: 3.083858 + vertex_buffer: 0.742870 + vertex_buffer: 0.355446 + vertex_buffer: 1.303263 + vertex_buffer: 1.416453 + vertex_buffer: 4.831091 + vertex_buffer: 0.572156 + vertex_buffer: 0.437652 + vertex_buffer: 1.305757 + vertex_buffer: -0.672779 + vertex_buffer: 6.415959 + vertex_buffer: 0.551868 + vertex_buffer: 0.536570 + vertex_buffer: 6.465170 + vertex_buffer: 0.937119 + vertex_buffer: 1.689873 + vertex_buffer: 0.821442 + vertex_buffer: 0.457556 + vertex_buffer: 5.258659 + vertex_buffer: 0.945811 + vertex_buffer: 2.974312 + vertex_buffer: 0.752702 + vertex_buffer: 0.457182 + vertex_buffer: 4.432338 + vertex_buffer: 0.722096 + vertex_buffer: 3.522615 + vertex_buffer: 0.713757 + vertex_buffer: 0.467627 + vertex_buffer: 3.300681 + vertex_buffer: 0.861641 + vertex_buffer: 3.872784 + vertex_buffer: 0.667113 + vertex_buffer: 0.460673 + vertex_buffer: 2.430178 + vertex_buffer: 1.131492 + vertex_buffer: 4.039035 + vertex_buffer: 0.631101 + vertex_buffer: 0.447154 + vertex_buffer: 1.820731 + vertex_buffer: 1.467954 + vertex_buffer: 4.224124 + vertex_buffer: 0.600862 + vertex_buffer: 0.432473 + vertex_buffer: 0.563221 + vertex_buffer: 2.307693 + vertex_buffer: 5.566789 + vertex_buffer: 0.523481 + vertex_buffer: 0.405627 + vertex_buffer: 6.338145 + vertex_buffer: -0.529279 + vertex_buffer: 1.881175 + vertex_buffer: 0.810748 + vertex_buffer: 0.523926 + vertex_buffer: 5.587698 + vertex_buffer: 3.208071 + vertex_buffer: 2.687839 + vertex_buffer: 0.771046 + vertex_buffer: 0.348959 + vertex_buffer: 0.242624 + vertex_buffer: -1.462857 + vertex_buffer: 7.071491 + vertex_buffer: 0.509127 + vertex_buffer: 0.562718 + vertex_buffer: 1.611251 + vertex_buffer: 0.339326 + vertex_buffer: 4.895421 + vertex_buffer: 0.595293 + vertex_buffer: 0.485024 + vertex_buffer: 7.743095 + vertex_buffer: 2.364999 + vertex_buffer: -2.005167 + vertex_buffer: 0.980531 + vertex_buffer: 0.401564 + vertex_buffer: 1.391142 + vertex_buffer: 1.851048 + vertex_buffer: 4.448999 + vertex_buffer: 0.573500 + vertex_buffer: 0.420000 + vertex_buffer: 1.785794 + vertex_buffer: -0.978284 + vertex_buffer: 4.850470 + vertex_buffer: 0.602995 + vertex_buffer: 0.548688 + vertex_buffer: 4.670959 + vertex_buffer: 2.664461 + vertex_buffer: 3.084075 + vertex_buffer: 0.733530 + vertex_buffer: 0.376977 + vertex_buffer: 1.333970 + vertex_buffer: -0.283761 + vertex_buffer: 6.097047 + vertex_buffer: 0.560611 + vertex_buffer: 0.519017 + vertex_buffer: 7.270895 + vertex_buffer: -2.890917 + vertex_buffer: -2.252455 + vertex_buffer: 0.967686 + vertex_buffer: 0.644357 + vertex_buffer: 1.856432 + vertex_buffer: 2.585245 + vertex_buffer: 3.757904 + vertex_buffer: 0.580985 + vertex_buffer: 0.387160 + vertex_buffer: 0.923388 + vertex_buffer: 0.073076 + vertex_buffer: 6.671944 + vertex_buffer: 0.537728 + vertex_buffer: 0.505385 + vertex_buffer: 5.000589 + vertex_buffer: -6.135128 + vertex_buffer: 1.892523 + vertex_buffer: 0.760966 + vertex_buffer: 0.779753 + vertex_buffer: 5.085276 + vertex_buffer: -7.178590 + vertex_buffer: 0.714711 + vertex_buffer: 0.801779 + vertex_buffer: 0.831938 + vertex_buffer: 7.159291 + vertex_buffer: -0.811820 + vertex_buffer: -0.072044 + vertex_buffer: 0.892441 + vertex_buffer: 0.540761 + vertex_buffer: 5.843051 + vertex_buffer: -5.248023 + vertex_buffer: 0.924091 + vertex_buffer: 0.816351 + vertex_buffer: 0.740260 + vertex_buffer: 6.847258 + vertex_buffer: 3.662916 + vertex_buffer: 0.724695 + vertex_buffer: 0.865595 + vertex_buffer: 0.333687 + vertex_buffer: 2.412942 + vertex_buffer: -8.258853 + vertex_buffer: 4.119213 + vertex_buffer: 0.614074 + vertex_buffer: 0.883246 + vertex_buffer: 0.179909 + vertex_buffer: -1.689864 + vertex_buffer: 6.573301 + vertex_buffer: 0.508953 + vertex_buffer: 0.579438 + vertex_buffer: 2.103655 + vertex_buffer: -0.163946 + vertex_buffer: 4.566119 + vertex_buffer: 0.617942 + vertex_buffer: 0.508316 + vertex_buffer: 6.407571 + vertex_buffer: 2.236021 + vertex_buffer: 1.560843 + vertex_buffer: 0.825608 + vertex_buffer: 0.397675 + vertex_buffer: 3.670075 + vertex_buffer: 2.360153 + vertex_buffer: 3.635230 + vertex_buffer: 0.681215 + vertex_buffer: 0.396235 + vertex_buffer: 3.177186 + vertex_buffer: 2.294265 + vertex_buffer: 3.775704 + vertex_buffer: 0.656636 + vertex_buffer: 0.400597 + vertex_buffer: 2.196121 + vertex_buffer: -4.598322 + vertex_buffer: 4.479786 + vertex_buffer: 0.603900 + vertex_buffer: 0.710217 + vertex_buffer: 6.234883 + vertex_buffer: -1.944430 + vertex_buffer: 1.663542 + vertex_buffer: 0.812086 + vertex_buffer: 0.588539 + vertex_buffer: 1.292924 + vertex_buffer: -9.295920 + vertex_buffer: 4.094063 + vertex_buffer: 0.568013 + vertex_buffer: 0.944565 + vertex_buffer: 3.210651 + vertex_buffer: -8.533278 + vertex_buffer: 2.802001 + vertex_buffer: 0.681008 + vertex_buffer: 0.898285 + vertex_buffer: 4.068926 + vertex_buffer: -7.993109 + vertex_buffer: 1.925119 + vertex_buffer: 0.733752 + vertex_buffer: 0.869701 + vertex_buffer: 2.724032 + vertex_buffer: 2.315802 + vertex_buffer: 3.777151 + vertex_buffer: 0.633830 + vertex_buffer: 0.398822 + vertex_buffer: 2.288460 + vertex_buffer: 2.398891 + vertex_buffer: 3.697603 + vertex_buffer: 0.606793 + vertex_buffer: 0.395537 + vertex_buffer: 1.998311 + vertex_buffer: 2.496547 + vertex_buffer: 3.689148 + vertex_buffer: 0.589660 + vertex_buffer: 0.391062 + vertex_buffer: 6.130040 + vertex_buffer: 3.399261 + vertex_buffer: 2.038516 + vertex_buffer: 0.805016 + vertex_buffer: 0.342108 + vertex_buffer: 2.288460 + vertex_buffer: 2.886504 + vertex_buffer: 3.775031 + vertex_buffer: 0.611335 + vertex_buffer: 0.362284 + vertex_buffer: 2.724032 + vertex_buffer: 2.961810 + vertex_buffer: 3.871767 + vertex_buffer: 0.634038 + vertex_buffer: 0.355971 + vertex_buffer: 3.177186 + vertex_buffer: 2.964136 + vertex_buffer: 3.876973 + vertex_buffer: 0.656636 + vertex_buffer: 0.355357 + vertex_buffer: 3.670075 + vertex_buffer: 2.927714 + vertex_buffer: 3.724325 + vertex_buffer: 0.681215 + vertex_buffer: 0.358340 + vertex_buffer: 4.018389 + vertex_buffer: 2.857357 + vertex_buffer: 3.482983 + vertex_buffer: 0.698585 + vertex_buffer: 0.363156 + vertex_buffer: 7.555811 + vertex_buffer: 4.106811 + vertex_buffer: -0.991917 + vertex_buffer: 0.941867 + vertex_buffer: 0.319076 + vertex_buffer: 4.018389 + vertex_buffer: 2.483695 + vertex_buffer: 3.440898 + vertex_buffer: 0.698585 + vertex_buffer: 0.387449 + vertex_buffer: 1.776217 + vertex_buffer: -2.683946 + vertex_buffer: 5.213116 + vertex_buffer: 0.584177 + vertex_buffer: 0.624107 + vertex_buffer: 1.222237 + vertex_buffer: -1.182444 + vertex_buffer: 5.952465 + vertex_buffer: 0.554318 + vertex_buffer: 0.566077 + vertex_buffer: 0.731493 + vertex_buffer: -2.536683 + vertex_buffer: 5.815343 + vertex_buffer: 0.534154 + vertex_buffer: 0.620640 + vertex_buffer: 4.135272 + vertex_buffer: -6.996638 + vertex_buffer: 2.671970 + vertex_buffer: 0.711218 + vertex_buffer: 0.819975 + vertex_buffer: 3.311811 + vertex_buffer: -7.660815 + vertex_buffer: 3.382963 + vertex_buffer: 0.664630 + vertex_buffer: 0.852871 + vertex_buffer: 1.313701 + vertex_buffer: -8.639995 + vertex_buffer: 4.702456 + vertex_buffer: 0.559100 + vertex_buffer: 0.902632 + vertex_buffer: 5.940524 + vertex_buffer: -6.223629 + vertex_buffer: -0.631468 + vertex_buffer: 0.871706 + vertex_buffer: 0.791941 + vertex_buffer: 1.998311 + vertex_buffer: 2.743838 + vertex_buffer: 3.744030 + vertex_buffer: 0.591234 + vertex_buffer: 0.373894 + vertex_buffer: 0.901447 + vertex_buffer: 1.236992 + vertex_buffer: 5.754256 + vertex_buffer: 0.544341 + vertex_buffer: 0.451584 + vertex_buffer: 2.308977 + vertex_buffer: -8.974196 + vertex_buffer: 3.609070 + vertex_buffer: 0.624563 + vertex_buffer: 0.924192 + vertex_buffer: 6.954154 + vertex_buffer: -2.439843 + vertex_buffer: -0.131163 + vertex_buffer: 0.885770 + vertex_buffer: 0.615029 + vertex_buffer: 1.098819 + vertex_buffer: -4.458788 + vertex_buffer: 5.120727 + vertex_buffer: 0.551338 + vertex_buffer: 0.695278 + vertex_buffer: 1.181124 + vertex_buffer: -4.579996 + vertex_buffer: 5.189564 + vertex_buffer: 0.551980 + vertex_buffer: 0.704632 + vertex_buffer: 1.255818 + vertex_buffer: -4.787901 + vertex_buffer: 5.237051 + vertex_buffer: 0.552888 + vertex_buffer: 0.715808 + vertex_buffer: 1.325085 + vertex_buffer: -5.106507 + vertex_buffer: 5.205010 + vertex_buffer: 0.555168 + vertex_buffer: 0.730794 + vertex_buffer: 1.546388 + vertex_buffer: -5.819392 + vertex_buffer: 4.757893 + vertex_buffer: 0.569944 + vertex_buffer: 0.767035 + vertex_buffer: 1.953754 + vertex_buffer: -4.183892 + vertex_buffer: 4.431713 + vertex_buffer: 0.593203 + vertex_buffer: 0.685676 + vertex_buffer: 2.117802 + vertex_buffer: -4.137093 + vertex_buffer: 4.555096 + vertex_buffer: 0.599262 + vertex_buffer: 0.681069 + vertex_buffer: 2.285339 + vertex_buffer: -4.051196 + vertex_buffer: 4.582438 + vertex_buffer: 0.607600 + vertex_buffer: 0.677703 + vertex_buffer: 2.850160 + vertex_buffer: -3.665720 + vertex_buffer: 4.484994 + vertex_buffer: 0.631938 + vertex_buffer: 0.663500 + vertex_buffer: 5.278538 + vertex_buffer: -2.238942 + vertex_buffer: 2.861224 + vertex_buffer: 0.752033 + vertex_buffer: 0.601315 + vertex_buffer: 0.946709 + vertex_buffer: 1.907628 + vertex_buffer: 5.196779 + vertex_buffer: 0.547226 + vertex_buffer: 0.420395 + vertex_buffer: 1.314173 + vertex_buffer: 3.104912 + vertex_buffer: 4.231404 + vertex_buffer: 0.563544 + vertex_buffer: 0.359828 + vertex_buffer: 1.780000 + vertex_buffer: 2.860000 + vertex_buffer: 3.881555 + vertex_buffer: 0.583841 + vertex_buffer: 0.368714 + vertex_buffer: 1.845110 + vertex_buffer: -4.098880 + vertex_buffer: 4.247264 + vertex_buffer: 0.586614 + vertex_buffer: 0.692366 + vertex_buffer: 5.436187 + vertex_buffer: -4.030482 + vertex_buffer: 2.109852 + vertex_buffer: 0.771915 + vertex_buffer: 0.683578 + vertex_buffer: 0.766444 + vertex_buffer: 3.182131 + vertex_buffer: 4.861453 + vertex_buffer: 0.531597 + vertex_buffer: 0.352483 + vertex_buffer: 1.938616 + vertex_buffer: -6.614410 + vertex_buffer: 4.521085 + vertex_buffer: 0.588371 + vertex_buffer: 0.804441 + vertex_buffer: 0.516573 + vertex_buffer: 1.583572 + vertex_buffer: 6.148363 + vertex_buffer: 0.520797 + vertex_buffer: 0.442565 + vertex_buffer: 1.246815 + vertex_buffer: 0.230297 + vertex_buffer: 5.681036 + vertex_buffer: 0.567985 + vertex_buffer: 0.493479 + vertex_buffer: 0.997827 + vertex_buffer: -6.930921 + vertex_buffer: 4.979576 + vertex_buffer: 0.543283 + vertex_buffer: 0.819255 + vertex_buffer: 3.288807 + vertex_buffer: -5.382514 + vertex_buffer: 3.795752 + vertex_buffer: 0.655317 + vertex_buffer: 0.745515 + vertex_buffer: 2.311631 + vertex_buffer: -1.566237 + vertex_buffer: 4.590085 + vertex_buffer: 0.621009 + vertex_buffer: 0.574018 + vertex_buffer: 2.680250 + vertex_buffer: -6.111567 + vertex_buffer: 4.096152 + vertex_buffer: 0.625560 + vertex_buffer: 0.780312 + vertex_buffer: 3.832928 + vertex_buffer: -1.537326 + vertex_buffer: 4.137731 + vertex_buffer: 0.680198 + vertex_buffer: 0.570719 + vertex_buffer: 2.961860 + vertex_buffer: -2.274215 + vertex_buffer: 4.440943 + vertex_buffer: 0.642764 + vertex_buffer: 0.604338 + vertex_buffer: 4.386901 + vertex_buffer: -2.683286 + vertex_buffer: 3.643886 + vertex_buffer: 0.704663 + vertex_buffer: 0.621530 + vertex_buffer: 1.217295 + vertex_buffer: -7.834465 + vertex_buffer: 4.969286 + vertex_buffer: 0.552012 + vertex_buffer: 0.862592 + vertex_buffer: 1.542374 + vertex_buffer: -0.136843 + vertex_buffer: 5.201008 + vertex_buffer: 0.589072 + vertex_buffer: 0.508637 + vertex_buffer: 3.878377 + vertex_buffer: -6.041764 + vertex_buffer: 3.311079 + vertex_buffer: 0.685945 + vertex_buffer: 0.775357 + vertex_buffer: 3.084037 + vertex_buffer: -6.809842 + vertex_buffer: 3.814195 + vertex_buffer: 0.645735 + vertex_buffer: 0.812640 + vertex_buffer: 3.747321 + vertex_buffer: -4.503545 + vertex_buffer: 3.726453 + vertex_buffer: 0.675343 + vertex_buffer: 0.703978 + vertex_buffer: 6.094129 + vertex_buffer: -3.205991 + vertex_buffer: 1.473482 + vertex_buffer: 0.810858 + vertex_buffer: 0.646305 + vertex_buffer: 4.588995 + vertex_buffer: -4.728726 + vertex_buffer: 2.983221 + vertex_buffer: 0.720122 + vertex_buffer: 0.714667 + vertex_buffer: 6.583231 + vertex_buffer: -3.941269 + vertex_buffer: 0.070268 + vertex_buffer: 0.866152 + vertex_buffer: 0.682705 + vertex_buffer: 3.492580 + vertex_buffer: -3.195820 + vertex_buffer: 4.130198 + vertex_buffer: 0.663187 + vertex_buffer: 0.644597 + vertex_buffer: 1.255543 + vertex_buffer: 0.802341 + vertex_buffer: 5.307551 + vertex_buffer: 0.570082 + vertex_buffer: 0.466326 + vertex_buffer: 1.126122 + vertex_buffer: -0.933602 + vertex_buffer: 6.538785 + vertex_buffer: 0.544562 + vertex_buffer: 0.548376 + vertex_buffer: 1.443109 + vertex_buffer: -1.142774 + vertex_buffer: 5.905127 + vertex_buffer: 0.562759 + vertex_buffer: 0.558785 + vertex_buffer: 0.923043 + vertex_buffer: -0.529042 + vertex_buffer: 7.003423 + vertex_buffer: 0.531987 + vertex_buffer: 0.530140 + vertex_buffer: 1.755386 + vertex_buffer: 3.529117 + vertex_buffer: 4.327696 + vertex_buffer: 0.585271 + vertex_buffer: 0.335177 + vertex_buffer: 2.632589 + vertex_buffer: 3.713828 + vertex_buffer: 4.364629 + vertex_buffer: 0.622953 + vertex_buffer: 0.322779 + vertex_buffer: 3.388062 + vertex_buffer: 3.721976 + vertex_buffer: 4.309028 + vertex_buffer: 0.655896 + vertex_buffer: 0.320163 + vertex_buffer: 4.075766 + vertex_buffer: 3.675413 + vertex_buffer: 4.076063 + vertex_buffer: 0.687132 + vertex_buffer: 0.322346 + vertex_buffer: 4.622910 + vertex_buffer: 3.474691 + vertex_buffer: 3.646321 + vertex_buffer: 0.716482 + vertex_buffer: 0.333201 + vertex_buffer: 5.171755 + vertex_buffer: 2.535753 + vertex_buffer: 2.670867 + vertex_buffer: 0.758757 + vertex_buffer: 0.382787 + vertex_buffer: 7.297331 + vertex_buffer: 0.763172 + vertex_buffer: -0.048769 + vertex_buffer: 0.897013 + vertex_buffer: 0.468769 + vertex_buffer: 4.706828 + vertex_buffer: 1.651000 + vertex_buffer: 3.109532 + vertex_buffer: 0.732392 + vertex_buffer: 0.424547 + vertex_buffer: 4.071712 + vertex_buffer: 1.476821 + vertex_buffer: 3.476944 + vertex_buffer: 0.702114 + vertex_buffer: 0.433163 + vertex_buffer: 3.269817 + vertex_buffer: 1.470659 + vertex_buffer: 3.731945 + vertex_buffer: 0.666525 + vertex_buffer: 0.433866 + vertex_buffer: 2.527572 + vertex_buffer: 1.617311 + vertex_buffer: 3.865444 + vertex_buffer: 0.633505 + vertex_buffer: 0.426088 + vertex_buffer: 1.970894 + vertex_buffer: 1.858505 + vertex_buffer: 3.961782 + vertex_buffer: 0.603876 + vertex_buffer: 0.416587 + vertex_buffer: 1.579543 + vertex_buffer: 2.097941 + vertex_buffer: 4.084996 + vertex_buffer: 0.579658 + vertex_buffer: 0.409945 + vertex_buffer: 7.664182 + vertex_buffer: 0.673132 + vertex_buffer: -2.435867 + vertex_buffer: 0.992440 + vertex_buffer: 0.480777 + vertex_buffer: 1.397041 + vertex_buffer: -1.340139 + vertex_buffer: 5.630378 + vertex_buffer: 0.567192 + vertex_buffer: 0.569420 + vertex_buffer: 0.884838 + vertex_buffer: 0.658740 + vertex_buffer: 6.233232 + vertex_buffer: 0.541366 + vertex_buffer: 0.478899 + vertex_buffer: 0.767097 + vertex_buffer: -0.968035 + vertex_buffer: 7.077932 + vertex_buffer: 0.526564 + vertex_buffer: 0.546118 + vertex_buffer: 0.460213 + vertex_buffer: -1.334106 + vertex_buffer: 6.787447 + vertex_buffer: 0.523913 + vertex_buffer: 0.563830 + vertex_buffer: 0.748618 + vertex_buffer: -1.067994 + vertex_buffer: 6.798303 + vertex_buffer: 0.531529 + vertex_buffer: 0.555057 + vertex_buffer: 1.236408 + vertex_buffer: -1.585568 + vertex_buffer: 5.480490 + vertex_buffer: 0.566036 + vertex_buffer: 0.582329 + vertex_buffer: 0.387306 + vertex_buffer: -1.409990 + vertex_buffer: 6.957705 + vertex_buffer: 0.516311 + vertex_buffer: 0.563054 + vertex_buffer: 0.319925 + vertex_buffer: -1.607931 + vertex_buffer: 6.508676 + vertex_buffer: 0.517472 + vertex_buffer: 0.577877 + vertex_buffer: 1.639633 + vertex_buffer: 2.556298 + vertex_buffer: 3.863736 + vertex_buffer: 0.573595 + vertex_buffer: 0.389807 + vertex_buffer: 1.255645 + vertex_buffer: 2.467144 + vertex_buffer: 4.203800 + vertex_buffer: 0.560698 + vertex_buffer: 0.395332 + vertex_buffer: 1.031362 + vertex_buffer: 2.382663 + vertex_buffer: 4.615849 + vertex_buffer: 0.549756 + vertex_buffer: 0.399751 + vertex_buffer: 4.253081 + vertex_buffer: 2.772296 + vertex_buffer: 3.315305 + vertex_buffer: 0.710288 + vertex_buffer: 0.368253 + vertex_buffer: 4.530000 + vertex_buffer: 2.910000 + vertex_buffer: 3.339685 + vertex_buffer: 0.723330 + vertex_buffer: 0.363373 + vertex_buffer: -3.18175 + vertex_buffer: 2.635786 + vertex_buffer: 3.826339 + vertex_buffer: 0.523494 + vertex_buffer: 0.653066 + vertex_buffer: -2.58175 + vertex_buffer: 2.635786 + vertex_buffer: 3.824459 + vertex_buffer: 0.619766 + vertex_buffer: 0.484153 + vertex_buffer: -3.18175 + vertex_buffer: 3.235786 + vertex_buffer: 3.876973 + vertex_buffer: 0.448126 + vertex_buffer: 0.441797 + vertex_buffer: -3.78175 + vertex_buffer: 2.635786 + vertex_buffer: 3.679778 + vertex_buffer: 0.564397 + vertex_buffer: 0.650577 + vertex_buffer: -3.18175 + vertex_buffer: 2.035786 + vertex_buffer: 3.775704 + vertex_buffer: 0.629396 + vertex_buffer: 0.487967 + vertex_buffer: 3.181751 + vertex_buffer: 2.635786 + vertex_buffer: 3.826339 + vertex_buffer: 0.500005 + vertex_buffer: 0.531923 + vertex_buffer: 3.781751 + vertex_buffer: 2.635786 + vertex_buffer: 3.679777 + vertex_buffer: 0.528836 + vertex_buffer: 0.363049 + vertex_buffer: 3.181751 + vertex_buffer: 3.235786 + vertex_buffer: 3.876973 + vertex_buffer: 0.601042 + vertex_buffer: 0.688245 + vertex_buffer: 2.581751 + vertex_buffer: 2.635786 + vertex_buffer: 3.824459 + vertex_buffer: 0.489588 + vertex_buffer: 0.725148 + vertex_buffer: 3.181751 + vertex_buffer: 2.035786 + vertex_buffer: 3.775704 + vertex_buffer: 0.626117 + vertex_buffer: 0.461480 + index_buffer: 173 + index_buffer: 155 + index_buffer: 133 + index_buffer: 246 + index_buffer: 33 + index_buffer: 7 + index_buffer: 382 + index_buffer: 398 + index_buffer: 362 + index_buffer: 263 + index_buffer: 466 + index_buffer: 249 + index_buffer: 308 + index_buffer: 415 + index_buffer: 324 + index_buffer: 78 + index_buffer: 95 + index_buffer: 191 + index_buffer: 356 + index_buffer: 389 + index_buffer: 264 + index_buffer: 127 + index_buffer: 34 + index_buffer: 162 + index_buffer: 368 + index_buffer: 264 + index_buffer: 389 + index_buffer: 139 + index_buffer: 162 + index_buffer: 34 + index_buffer: 267 + index_buffer: 0 + index_buffer: 302 + index_buffer: 37 + index_buffer: 72 + index_buffer: 0 + index_buffer: 11 + index_buffer: 302 + index_buffer: 0 + index_buffer: 11 + index_buffer: 0 + index_buffer: 72 + index_buffer: 349 + index_buffer: 451 + index_buffer: 350 + index_buffer: 120 + index_buffer: 121 + index_buffer: 231 + index_buffer: 452 + index_buffer: 350 + index_buffer: 451 + index_buffer: 232 + index_buffer: 231 + index_buffer: 121 + index_buffer: 267 + index_buffer: 302 + index_buffer: 269 + index_buffer: 37 + index_buffer: 39 + index_buffer: 72 + index_buffer: 303 + index_buffer: 269 + index_buffer: 302 + index_buffer: 73 + index_buffer: 72 + index_buffer: 39 + index_buffer: 357 + index_buffer: 343 + index_buffer: 350 + index_buffer: 128 + index_buffer: 121 + index_buffer: 114 + index_buffer: 277 + index_buffer: 350 + index_buffer: 343 + index_buffer: 47 + index_buffer: 114 + index_buffer: 121 + index_buffer: 350 + index_buffer: 452 + index_buffer: 357 + index_buffer: 121 + index_buffer: 128 + index_buffer: 232 + index_buffer: 453 + index_buffer: 357 + index_buffer: 452 + index_buffer: 233 + index_buffer: 232 + index_buffer: 128 + index_buffer: 299 + index_buffer: 333 + index_buffer: 297 + index_buffer: 69 + index_buffer: 67 + index_buffer: 104 + index_buffer: 332 + index_buffer: 297 + index_buffer: 333 + index_buffer: 103 + index_buffer: 104 + index_buffer: 67 + index_buffer: 175 + index_buffer: 152 + index_buffer: 396 + index_buffer: 175 + index_buffer: 171 + index_buffer: 152 + index_buffer: 377 + index_buffer: 396 + index_buffer: 152 + index_buffer: 148 + index_buffer: 152 + index_buffer: 171 + index_buffer: 381 + index_buffer: 384 + index_buffer: 382 + index_buffer: 154 + index_buffer: 155 + index_buffer: 157 + index_buffer: 398 + index_buffer: 382 + index_buffer: 384 + index_buffer: 173 + index_buffer: 157 + index_buffer: 155 + index_buffer: 280 + index_buffer: 347 + index_buffer: 330 + index_buffer: 50 + index_buffer: 101 + index_buffer: 118 + index_buffer: 348 + index_buffer: 330 + index_buffer: 347 + index_buffer: 119 + index_buffer: 118 + index_buffer: 101 + index_buffer: 269 + index_buffer: 303 + index_buffer: 270 + index_buffer: 39 + index_buffer: 40 + index_buffer: 73 + index_buffer: 304 + index_buffer: 270 + index_buffer: 303 + index_buffer: 74 + index_buffer: 73 + index_buffer: 40 + index_buffer: 9 + index_buffer: 336 + index_buffer: 151 + index_buffer: 9 + index_buffer: 151 + index_buffer: 107 + index_buffer: 337 + index_buffer: 151 + index_buffer: 336 + index_buffer: 108 + index_buffer: 107 + index_buffer: 151 + index_buffer: 344 + index_buffer: 278 + index_buffer: 360 + index_buffer: 115 + index_buffer: 131 + index_buffer: 48 + index_buffer: 279 + index_buffer: 360 + index_buffer: 278 + index_buffer: 49 + index_buffer: 48 + index_buffer: 131 + index_buffer: 262 + index_buffer: 431 + index_buffer: 418 + index_buffer: 32 + index_buffer: 194 + index_buffer: 211 + index_buffer: 424 + index_buffer: 418 + index_buffer: 431 + index_buffer: 204 + index_buffer: 211 + index_buffer: 194 + index_buffer: 304 + index_buffer: 408 + index_buffer: 270 + index_buffer: 74 + index_buffer: 40 + index_buffer: 184 + index_buffer: 409 + index_buffer: 270 + index_buffer: 408 + index_buffer: 185 + index_buffer: 184 + index_buffer: 40 + index_buffer: 272 + index_buffer: 310 + index_buffer: 407 + index_buffer: 42 + index_buffer: 183 + index_buffer: 80 + index_buffer: 415 + index_buffer: 407 + index_buffer: 310 + index_buffer: 191 + index_buffer: 80 + index_buffer: 183 + index_buffer: 322 + index_buffer: 270 + index_buffer: 410 + index_buffer: 92 + index_buffer: 186 + index_buffer: 40 + index_buffer: 409 + index_buffer: 410 + index_buffer: 270 + index_buffer: 185 + index_buffer: 40 + index_buffer: 186 + index_buffer: 347 + index_buffer: 449 + index_buffer: 348 + index_buffer: 118 + index_buffer: 119 + index_buffer: 229 + index_buffer: 450 + index_buffer: 348 + index_buffer: 449 + index_buffer: 230 + index_buffer: 229 + index_buffer: 119 + index_buffer: 434 + index_buffer: 432 + index_buffer: 430 + index_buffer: 214 + index_buffer: 210 + index_buffer: 212 + index_buffer: 422 + index_buffer: 430 + index_buffer: 432 + index_buffer: 202 + index_buffer: 212 + index_buffer: 210 + index_buffer: 313 + index_buffer: 314 + index_buffer: 18 + index_buffer: 83 + index_buffer: 18 + index_buffer: 84 + index_buffer: 17 + index_buffer: 18 + index_buffer: 314 + index_buffer: 17 + index_buffer: 84 + index_buffer: 18 + index_buffer: 307 + index_buffer: 375 + index_buffer: 306 + index_buffer: 77 + index_buffer: 76 + index_buffer: 146 + index_buffer: 291 + index_buffer: 306 + index_buffer: 375 + index_buffer: 61 + index_buffer: 146 + index_buffer: 76 + index_buffer: 259 + index_buffer: 387 + index_buffer: 260 + index_buffer: 29 + index_buffer: 30 + index_buffer: 160 + index_buffer: 388 + index_buffer: 260 + index_buffer: 387 + index_buffer: 161 + index_buffer: 160 + index_buffer: 30 + index_buffer: 286 + index_buffer: 414 + index_buffer: 384 + index_buffer: 56 + index_buffer: 157 + index_buffer: 190 + index_buffer: 398 + index_buffer: 384 + index_buffer: 414 + index_buffer: 173 + index_buffer: 190 + index_buffer: 157 + index_buffer: 418 + index_buffer: 424 + index_buffer: 406 + index_buffer: 194 + index_buffer: 182 + index_buffer: 204 + index_buffer: 335 + index_buffer: 406 + index_buffer: 424 + index_buffer: 106 + index_buffer: 204 + index_buffer: 182 + index_buffer: 367 + index_buffer: 416 + index_buffer: 364 + index_buffer: 138 + index_buffer: 135 + index_buffer: 192 + index_buffer: 434 + index_buffer: 364 + index_buffer: 416 + index_buffer: 214 + index_buffer: 192 + index_buffer: 135 + index_buffer: 391 + index_buffer: 423 + index_buffer: 327 + index_buffer: 165 + index_buffer: 98 + index_buffer: 203 + index_buffer: 358 + index_buffer: 327 + index_buffer: 423 + index_buffer: 129 + index_buffer: 203 + index_buffer: 98 + index_buffer: 298 + index_buffer: 301 + index_buffer: 284 + index_buffer: 68 + index_buffer: 54 + index_buffer: 71 + index_buffer: 251 + index_buffer: 284 + index_buffer: 301 + index_buffer: 21 + index_buffer: 71 + index_buffer: 54 + index_buffer: 4 + index_buffer: 275 + index_buffer: 5 + index_buffer: 4 + index_buffer: 5 + index_buffer: 45 + index_buffer: 281 + index_buffer: 5 + index_buffer: 275 + index_buffer: 51 + index_buffer: 45 + index_buffer: 5 + index_buffer: 254 + index_buffer: 373 + index_buffer: 253 + index_buffer: 24 + index_buffer: 23 + index_buffer: 144 + index_buffer: 374 + index_buffer: 253 + index_buffer: 373 + index_buffer: 145 + index_buffer: 144 + index_buffer: 23 + index_buffer: 320 + index_buffer: 321 + index_buffer: 307 + index_buffer: 90 + index_buffer: 77 + index_buffer: 91 + index_buffer: 375 + index_buffer: 307 + index_buffer: 321 + index_buffer: 146 + index_buffer: 91 + index_buffer: 77 + index_buffer: 280 + index_buffer: 425 + index_buffer: 411 + index_buffer: 50 + index_buffer: 187 + index_buffer: 205 + index_buffer: 427 + index_buffer: 411 + index_buffer: 425 + index_buffer: 207 + index_buffer: 205 + index_buffer: 187 + index_buffer: 421 + index_buffer: 313 + index_buffer: 200 + index_buffer: 201 + index_buffer: 200 + index_buffer: 83 + index_buffer: 18 + index_buffer: 200 + index_buffer: 313 + index_buffer: 18 + index_buffer: 83 + index_buffer: 200 + index_buffer: 335 + index_buffer: 321 + index_buffer: 406 + index_buffer: 106 + index_buffer: 182 + index_buffer: 91 + index_buffer: 405 + index_buffer: 406 + index_buffer: 321 + index_buffer: 181 + index_buffer: 91 + index_buffer: 182 + index_buffer: 405 + index_buffer: 321 + index_buffer: 404 + index_buffer: 181 + index_buffer: 180 + index_buffer: 91 + index_buffer: 320 + index_buffer: 404 + index_buffer: 321 + index_buffer: 90 + index_buffer: 91 + index_buffer: 180 + index_buffer: 17 + index_buffer: 314 + index_buffer: 16 + index_buffer: 17 + index_buffer: 16 + index_buffer: 84 + index_buffer: 315 + index_buffer: 16 + index_buffer: 314 + index_buffer: 85 + index_buffer: 84 + index_buffer: 16 + index_buffer: 425 + index_buffer: 266 + index_buffer: 426 + index_buffer: 205 + index_buffer: 206 + index_buffer: 36 + index_buffer: 423 + index_buffer: 426 + index_buffer: 266 + index_buffer: 203 + index_buffer: 36 + index_buffer: 206 + index_buffer: 369 + index_buffer: 396 + index_buffer: 400 + index_buffer: 140 + index_buffer: 176 + index_buffer: 171 + index_buffer: 377 + index_buffer: 400 + index_buffer: 396 + index_buffer: 148 + index_buffer: 171 + index_buffer: 176 + index_buffer: 391 + index_buffer: 269 + index_buffer: 322 + index_buffer: 165 + index_buffer: 92 + index_buffer: 39 + index_buffer: 270 + index_buffer: 322 + index_buffer: 269 + index_buffer: 40 + index_buffer: 39 + index_buffer: 92 + index_buffer: 417 + index_buffer: 465 + index_buffer: 413 + index_buffer: 193 + index_buffer: 189 + index_buffer: 245 + index_buffer: 464 + index_buffer: 413 + index_buffer: 465 + index_buffer: 244 + index_buffer: 245 + index_buffer: 189 + index_buffer: 257 + index_buffer: 258 + index_buffer: 386 + index_buffer: 27 + index_buffer: 159 + index_buffer: 28 + index_buffer: 385 + index_buffer: 386 + index_buffer: 258 + index_buffer: 158 + index_buffer: 28 + index_buffer: 159 + index_buffer: 260 + index_buffer: 388 + index_buffer: 467 + index_buffer: 30 + index_buffer: 247 + index_buffer: 161 + index_buffer: 466 + index_buffer: 467 + index_buffer: 388 + index_buffer: 246 + index_buffer: 161 + index_buffer: 247 + index_buffer: 248 + index_buffer: 456 + index_buffer: 419 + index_buffer: 3 + index_buffer: 196 + index_buffer: 236 + index_buffer: 399 + index_buffer: 419 + index_buffer: 456 + index_buffer: 174 + index_buffer: 236 + index_buffer: 196 + index_buffer: 333 + index_buffer: 298 + index_buffer: 332 + index_buffer: 104 + index_buffer: 103 + index_buffer: 68 + index_buffer: 284 + index_buffer: 332 + index_buffer: 298 + index_buffer: 54 + index_buffer: 68 + index_buffer: 103 + index_buffer: 285 + index_buffer: 8 + index_buffer: 417 + index_buffer: 55 + index_buffer: 193 + index_buffer: 8 + index_buffer: 168 + index_buffer: 417 + index_buffer: 8 + index_buffer: 168 + index_buffer: 8 + index_buffer: 193 + index_buffer: 340 + index_buffer: 261 + index_buffer: 346 + index_buffer: 111 + index_buffer: 117 + index_buffer: 31 + index_buffer: 448 + index_buffer: 346 + index_buffer: 261 + index_buffer: 228 + index_buffer: 31 + index_buffer: 117 + index_buffer: 285 + index_buffer: 417 + index_buffer: 441 + index_buffer: 55 + index_buffer: 221 + index_buffer: 193 + index_buffer: 413 + index_buffer: 441 + index_buffer: 417 + index_buffer: 189 + index_buffer: 193 + index_buffer: 221 + index_buffer: 327 + index_buffer: 460 + index_buffer: 326 + index_buffer: 98 + index_buffer: 97 + index_buffer: 240 + index_buffer: 328 + index_buffer: 326 + index_buffer: 460 + index_buffer: 99 + index_buffer: 240 + index_buffer: 97 + index_buffer: 277 + index_buffer: 355 + index_buffer: 329 + index_buffer: 47 + index_buffer: 100 + index_buffer: 126 + index_buffer: 371 + index_buffer: 329 + index_buffer: 355 + index_buffer: 142 + index_buffer: 126 + index_buffer: 100 + index_buffer: 309 + index_buffer: 392 + index_buffer: 438 + index_buffer: 79 + index_buffer: 218 + index_buffer: 166 + index_buffer: 439 + index_buffer: 438 + index_buffer: 392 + index_buffer: 219 + index_buffer: 166 + index_buffer: 218 + index_buffer: 381 + index_buffer: 382 + index_buffer: 256 + index_buffer: 154 + index_buffer: 26 + index_buffer: 155 + index_buffer: 341 + index_buffer: 256 + index_buffer: 382 + index_buffer: 112 + index_buffer: 155 + index_buffer: 26 + index_buffer: 360 + index_buffer: 279 + index_buffer: 420 + index_buffer: 131 + index_buffer: 198 + index_buffer: 49 + index_buffer: 429 + index_buffer: 420 + index_buffer: 279 + index_buffer: 209 + index_buffer: 49 + index_buffer: 198 + index_buffer: 365 + index_buffer: 364 + index_buffer: 379 + index_buffer: 136 + index_buffer: 150 + index_buffer: 135 + index_buffer: 394 + index_buffer: 379 + index_buffer: 364 + index_buffer: 169 + index_buffer: 135 + index_buffer: 150 + index_buffer: 355 + index_buffer: 277 + index_buffer: 437 + index_buffer: 126 + index_buffer: 217 + index_buffer: 47 + index_buffer: 343 + index_buffer: 437 + index_buffer: 277 + index_buffer: 114 + index_buffer: 47 + index_buffer: 217 + index_buffer: 443 + index_buffer: 444 + index_buffer: 282 + index_buffer: 223 + index_buffer: 52 + index_buffer: 224 + index_buffer: 283 + index_buffer: 282 + index_buffer: 444 + index_buffer: 53 + index_buffer: 224 + index_buffer: 52 + index_buffer: 281 + index_buffer: 275 + index_buffer: 363 + index_buffer: 51 + index_buffer: 134 + index_buffer: 45 + index_buffer: 440 + index_buffer: 363 + index_buffer: 275 + index_buffer: 220 + index_buffer: 45 + index_buffer: 134 + index_buffer: 431 + index_buffer: 262 + index_buffer: 395 + index_buffer: 211 + index_buffer: 170 + index_buffer: 32 + index_buffer: 369 + index_buffer: 395 + index_buffer: 262 + index_buffer: 140 + index_buffer: 32 + index_buffer: 170 + index_buffer: 337 + index_buffer: 299 + index_buffer: 338 + index_buffer: 108 + index_buffer: 109 + index_buffer: 69 + index_buffer: 297 + index_buffer: 338 + index_buffer: 299 + index_buffer: 67 + index_buffer: 69 + index_buffer: 109 + index_buffer: 335 + index_buffer: 273 + index_buffer: 321 + index_buffer: 106 + index_buffer: 91 + index_buffer: 43 + index_buffer: 375 + index_buffer: 321 + index_buffer: 273 + index_buffer: 146 + index_buffer: 43 + index_buffer: 91 + index_buffer: 348 + index_buffer: 450 + index_buffer: 349 + index_buffer: 119 + index_buffer: 120 + index_buffer: 230 + index_buffer: 451 + index_buffer: 349 + index_buffer: 450 + index_buffer: 231 + index_buffer: 230 + index_buffer: 120 + index_buffer: 467 + index_buffer: 359 + index_buffer: 342 + index_buffer: 247 + index_buffer: 113 + index_buffer: 130 + index_buffer: 446 + index_buffer: 342 + index_buffer: 359 + index_buffer: 226 + index_buffer: 130 + index_buffer: 113 + index_buffer: 282 + index_buffer: 283 + index_buffer: 334 + index_buffer: 52 + index_buffer: 105 + index_buffer: 53 + index_buffer: 293 + index_buffer: 334 + index_buffer: 283 + index_buffer: 63 + index_buffer: 53 + index_buffer: 105 + index_buffer: 250 + index_buffer: 458 + index_buffer: 462 + index_buffer: 20 + index_buffer: 242 + index_buffer: 238 + index_buffer: 461 + index_buffer: 462 + index_buffer: 458 + index_buffer: 241 + index_buffer: 238 + index_buffer: 242 + index_buffer: 276 + index_buffer: 353 + index_buffer: 300 + index_buffer: 46 + index_buffer: 70 + index_buffer: 124 + index_buffer: 383 + index_buffer: 300 + index_buffer: 353 + index_buffer: 156 + index_buffer: 124 + index_buffer: 70 + index_buffer: 325 + index_buffer: 292 + index_buffer: 324 + index_buffer: 96 + index_buffer: 95 + index_buffer: 62 + index_buffer: 308 + index_buffer: 324 + index_buffer: 292 + index_buffer: 78 + index_buffer: 62 + index_buffer: 95 + index_buffer: 283 + index_buffer: 276 + index_buffer: 293 + index_buffer: 53 + index_buffer: 63 + index_buffer: 46 + index_buffer: 300 + index_buffer: 293 + index_buffer: 276 + index_buffer: 70 + index_buffer: 46 + index_buffer: 63 + index_buffer: 447 + index_buffer: 264 + index_buffer: 345 + index_buffer: 227 + index_buffer: 116 + index_buffer: 34 + index_buffer: 372 + index_buffer: 345 + index_buffer: 264 + index_buffer: 143 + index_buffer: 34 + index_buffer: 116 + index_buffer: 352 + index_buffer: 345 + index_buffer: 346 + index_buffer: 123 + index_buffer: 117 + index_buffer: 116 + index_buffer: 340 + index_buffer: 346 + index_buffer: 345 + index_buffer: 111 + index_buffer: 116 + index_buffer: 117 + index_buffer: 1 + index_buffer: 19 + index_buffer: 274 + index_buffer: 1 + index_buffer: 44 + index_buffer: 19 + index_buffer: 354 + index_buffer: 274 + index_buffer: 19 + index_buffer: 125 + index_buffer: 19 + index_buffer: 44 + index_buffer: 248 + index_buffer: 281 + index_buffer: 456 + index_buffer: 3 + index_buffer: 236 + index_buffer: 51 + index_buffer: 363 + index_buffer: 456 + index_buffer: 281 + index_buffer: 134 + index_buffer: 51 + index_buffer: 236 + index_buffer: 425 + index_buffer: 426 + index_buffer: 427 + index_buffer: 205 + index_buffer: 207 + index_buffer: 206 + index_buffer: 436 + index_buffer: 427 + index_buffer: 426 + index_buffer: 216 + index_buffer: 206 + index_buffer: 207 + index_buffer: 380 + index_buffer: 381 + index_buffer: 252 + index_buffer: 153 + index_buffer: 22 + index_buffer: 154 + index_buffer: 256 + index_buffer: 252 + index_buffer: 381 + index_buffer: 26 + index_buffer: 154 + index_buffer: 22 + index_buffer: 391 + index_buffer: 393 + index_buffer: 269 + index_buffer: 165 + index_buffer: 39 + index_buffer: 167 + index_buffer: 267 + index_buffer: 269 + index_buffer: 393 + index_buffer: 37 + index_buffer: 167 + index_buffer: 39 + index_buffer: 199 + index_buffer: 428 + index_buffer: 200 + index_buffer: 199 + index_buffer: 200 + index_buffer: 208 + index_buffer: 421 + index_buffer: 200 + index_buffer: 428 + index_buffer: 201 + index_buffer: 208 + index_buffer: 200 + index_buffer: 330 + index_buffer: 329 + index_buffer: 266 + index_buffer: 101 + index_buffer: 36 + index_buffer: 100 + index_buffer: 371 + index_buffer: 266 + index_buffer: 329 + index_buffer: 142 + index_buffer: 100 + index_buffer: 36 + index_buffer: 422 + index_buffer: 432 + index_buffer: 273 + index_buffer: 202 + index_buffer: 43 + index_buffer: 212 + index_buffer: 287 + index_buffer: 273 + index_buffer: 432 + index_buffer: 57 + index_buffer: 212 + index_buffer: 43 + index_buffer: 290 + index_buffer: 250 + index_buffer: 328 + index_buffer: 60 + index_buffer: 99 + index_buffer: 20 + index_buffer: 462 + index_buffer: 328 + index_buffer: 250 + index_buffer: 242 + index_buffer: 20 + index_buffer: 99 + index_buffer: 258 + index_buffer: 286 + index_buffer: 385 + index_buffer: 28 + index_buffer: 158 + index_buffer: 56 + index_buffer: 384 + index_buffer: 385 + index_buffer: 286 + index_buffer: 157 + index_buffer: 56 + index_buffer: 158 + index_buffer: 342 + index_buffer: 446 + index_buffer: 353 + index_buffer: 113 + index_buffer: 124 + index_buffer: 226 + index_buffer: 265 + index_buffer: 353 + index_buffer: 446 + index_buffer: 35 + index_buffer: 226 + index_buffer: 124 + index_buffer: 257 + index_buffer: 386 + index_buffer: 259 + index_buffer: 27 + index_buffer: 29 + index_buffer: 159 + index_buffer: 387 + index_buffer: 259 + index_buffer: 386 + index_buffer: 160 + index_buffer: 159 + index_buffer: 29 + index_buffer: 430 + index_buffer: 422 + index_buffer: 431 + index_buffer: 210 + index_buffer: 211 + index_buffer: 202 + index_buffer: 424 + index_buffer: 431 + index_buffer: 422 + index_buffer: 204 + index_buffer: 202 + index_buffer: 211 + index_buffer: 445 + index_buffer: 342 + index_buffer: 276 + index_buffer: 225 + index_buffer: 46 + index_buffer: 113 + index_buffer: 353 + index_buffer: 276 + index_buffer: 342 + index_buffer: 124 + index_buffer: 113 + index_buffer: 46 + index_buffer: 424 + index_buffer: 422 + index_buffer: 335 + index_buffer: 204 + index_buffer: 106 + index_buffer: 202 + index_buffer: 273 + index_buffer: 335 + index_buffer: 422 + index_buffer: 43 + index_buffer: 202 + index_buffer: 106 + index_buffer: 306 + index_buffer: 292 + index_buffer: 307 + index_buffer: 76 + index_buffer: 77 + index_buffer: 62 + index_buffer: 325 + index_buffer: 307 + index_buffer: 292 + index_buffer: 96 + index_buffer: 62 + index_buffer: 77 + index_buffer: 366 + index_buffer: 447 + index_buffer: 352 + index_buffer: 137 + index_buffer: 123 + index_buffer: 227 + index_buffer: 345 + index_buffer: 352 + index_buffer: 447 + index_buffer: 116 + index_buffer: 227 + index_buffer: 123 + index_buffer: 302 + index_buffer: 268 + index_buffer: 303 + index_buffer: 72 + index_buffer: 73 + index_buffer: 38 + index_buffer: 271 + index_buffer: 303 + index_buffer: 268 + index_buffer: 41 + index_buffer: 38 + index_buffer: 73 + index_buffer: 371 + index_buffer: 358 + index_buffer: 266 + index_buffer: 142 + index_buffer: 36 + index_buffer: 129 + index_buffer: 423 + index_buffer: 266 + index_buffer: 358 + index_buffer: 203 + index_buffer: 129 + index_buffer: 36 + index_buffer: 327 + index_buffer: 294 + index_buffer: 460 + index_buffer: 98 + index_buffer: 240 + index_buffer: 64 + index_buffer: 455 + index_buffer: 460 + index_buffer: 294 + index_buffer: 235 + index_buffer: 64 + index_buffer: 240 + index_buffer: 294 + index_buffer: 331 + index_buffer: 278 + index_buffer: 64 + index_buffer: 48 + index_buffer: 102 + index_buffer: 279 + index_buffer: 278 + index_buffer: 331 + index_buffer: 49 + index_buffer: 102 + index_buffer: 48 + index_buffer: 303 + index_buffer: 271 + index_buffer: 304 + index_buffer: 73 + index_buffer: 74 + index_buffer: 41 + index_buffer: 272 + index_buffer: 304 + index_buffer: 271 + index_buffer: 42 + index_buffer: 41 + index_buffer: 74 + index_buffer: 427 + index_buffer: 436 + index_buffer: 434 + index_buffer: 207 + index_buffer: 214 + index_buffer: 216 + index_buffer: 432 + index_buffer: 434 + index_buffer: 436 + index_buffer: 212 + index_buffer: 216 + index_buffer: 214 + index_buffer: 304 + index_buffer: 272 + index_buffer: 408 + index_buffer: 74 + index_buffer: 184 + index_buffer: 42 + index_buffer: 407 + index_buffer: 408 + index_buffer: 272 + index_buffer: 183 + index_buffer: 42 + index_buffer: 184 + index_buffer: 394 + index_buffer: 430 + index_buffer: 395 + index_buffer: 169 + index_buffer: 170 + index_buffer: 210 + index_buffer: 431 + index_buffer: 395 + index_buffer: 430 + index_buffer: 211 + index_buffer: 210 + index_buffer: 170 + index_buffer: 395 + index_buffer: 369 + index_buffer: 378 + index_buffer: 170 + index_buffer: 149 + index_buffer: 140 + index_buffer: 400 + index_buffer: 378 + index_buffer: 369 + index_buffer: 176 + index_buffer: 140 + index_buffer: 149 + index_buffer: 296 + index_buffer: 334 + index_buffer: 299 + index_buffer: 66 + index_buffer: 69 + index_buffer: 105 + index_buffer: 333 + index_buffer: 299 + index_buffer: 334 + index_buffer: 104 + index_buffer: 105 + index_buffer: 69 + index_buffer: 417 + index_buffer: 168 + index_buffer: 351 + index_buffer: 193 + index_buffer: 122 + index_buffer: 168 + index_buffer: 6 + index_buffer: 351 + index_buffer: 168 + index_buffer: 6 + index_buffer: 168 + index_buffer: 122 + index_buffer: 280 + index_buffer: 411 + index_buffer: 352 + index_buffer: 50 + index_buffer: 123 + index_buffer: 187 + index_buffer: 376 + index_buffer: 352 + index_buffer: 411 + index_buffer: 147 + index_buffer: 187 + index_buffer: 123 + index_buffer: 319 + index_buffer: 320 + index_buffer: 325 + index_buffer: 89 + index_buffer: 96 + index_buffer: 90 + index_buffer: 307 + index_buffer: 325 + index_buffer: 320 + index_buffer: 77 + index_buffer: 90 + index_buffer: 96 + index_buffer: 285 + index_buffer: 295 + index_buffer: 336 + index_buffer: 55 + index_buffer: 107 + index_buffer: 65 + index_buffer: 296 + index_buffer: 336 + index_buffer: 295 + index_buffer: 66 + index_buffer: 65 + index_buffer: 107 + index_buffer: 404 + index_buffer: 320 + index_buffer: 403 + index_buffer: 180 + index_buffer: 179 + index_buffer: 90 + index_buffer: 319 + index_buffer: 403 + index_buffer: 320 + index_buffer: 89 + index_buffer: 90 + index_buffer: 179 + index_buffer: 330 + index_buffer: 348 + index_buffer: 329 + index_buffer: 101 + index_buffer: 100 + index_buffer: 119 + index_buffer: 349 + index_buffer: 329 + index_buffer: 348 + index_buffer: 120 + index_buffer: 119 + index_buffer: 100 + index_buffer: 334 + index_buffer: 293 + index_buffer: 333 + index_buffer: 105 + index_buffer: 104 + index_buffer: 63 + index_buffer: 298 + index_buffer: 333 + index_buffer: 293 + index_buffer: 68 + index_buffer: 63 + index_buffer: 104 + index_buffer: 323 + index_buffer: 454 + index_buffer: 366 + index_buffer: 93 + index_buffer: 137 + index_buffer: 234 + index_buffer: 447 + index_buffer: 366 + index_buffer: 454 + index_buffer: 227 + index_buffer: 234 + index_buffer: 137 + index_buffer: 16 + index_buffer: 315 + index_buffer: 15 + index_buffer: 16 + index_buffer: 15 + index_buffer: 85 + index_buffer: 316 + index_buffer: 15 + index_buffer: 315 + index_buffer: 86 + index_buffer: 85 + index_buffer: 15 + index_buffer: 429 + index_buffer: 279 + index_buffer: 358 + index_buffer: 209 + index_buffer: 129 + index_buffer: 49 + index_buffer: 331 + index_buffer: 358 + index_buffer: 279 + index_buffer: 102 + index_buffer: 49 + index_buffer: 129 + index_buffer: 15 + index_buffer: 316 + index_buffer: 14 + index_buffer: 15 + index_buffer: 14 + index_buffer: 86 + index_buffer: 317 + index_buffer: 14 + index_buffer: 316 + index_buffer: 87 + index_buffer: 86 + index_buffer: 14 + index_buffer: 8 + index_buffer: 285 + index_buffer: 9 + index_buffer: 8 + index_buffer: 9 + index_buffer: 55 + index_buffer: 336 + index_buffer: 9 + index_buffer: 285 + index_buffer: 107 + index_buffer: 55 + index_buffer: 9 + index_buffer: 329 + index_buffer: 349 + index_buffer: 277 + index_buffer: 100 + index_buffer: 47 + index_buffer: 120 + index_buffer: 350 + index_buffer: 277 + index_buffer: 349 + index_buffer: 121 + index_buffer: 120 + index_buffer: 47 + index_buffer: 252 + index_buffer: 253 + index_buffer: 380 + index_buffer: 22 + index_buffer: 153 + index_buffer: 23 + index_buffer: 374 + index_buffer: 380 + index_buffer: 253 + index_buffer: 145 + index_buffer: 23 + index_buffer: 153 + index_buffer: 402 + index_buffer: 403 + index_buffer: 318 + index_buffer: 178 + index_buffer: 88 + index_buffer: 179 + index_buffer: 319 + index_buffer: 318 + index_buffer: 403 + index_buffer: 89 + index_buffer: 179 + index_buffer: 88 + index_buffer: 351 + index_buffer: 6 + index_buffer: 419 + index_buffer: 122 + index_buffer: 196 + index_buffer: 6 + index_buffer: 197 + index_buffer: 419 + index_buffer: 6 + index_buffer: 197 + index_buffer: 6 + index_buffer: 196 + index_buffer: 324 + index_buffer: 318 + index_buffer: 325 + index_buffer: 95 + index_buffer: 96 + index_buffer: 88 + index_buffer: 319 + index_buffer: 325 + index_buffer: 318 + index_buffer: 89 + index_buffer: 88 + index_buffer: 96 + index_buffer: 397 + index_buffer: 367 + index_buffer: 365 + index_buffer: 172 + index_buffer: 136 + index_buffer: 138 + index_buffer: 364 + index_buffer: 365 + index_buffer: 367 + index_buffer: 135 + index_buffer: 138 + index_buffer: 136 + index_buffer: 288 + index_buffer: 435 + index_buffer: 397 + index_buffer: 58 + index_buffer: 172 + index_buffer: 215 + index_buffer: 367 + index_buffer: 397 + index_buffer: 435 + index_buffer: 138 + index_buffer: 215 + index_buffer: 172 + index_buffer: 438 + index_buffer: 439 + index_buffer: 344 + index_buffer: 218 + index_buffer: 115 + index_buffer: 219 + index_buffer: 278 + index_buffer: 344 + index_buffer: 439 + index_buffer: 48 + index_buffer: 219 + index_buffer: 115 + index_buffer: 271 + index_buffer: 311 + index_buffer: 272 + index_buffer: 41 + index_buffer: 42 + index_buffer: 81 + index_buffer: 310 + index_buffer: 272 + index_buffer: 311 + index_buffer: 80 + index_buffer: 81 + index_buffer: 42 + index_buffer: 5 + index_buffer: 281 + index_buffer: 195 + index_buffer: 5 + index_buffer: 195 + index_buffer: 51 + index_buffer: 248 + index_buffer: 195 + index_buffer: 281 + index_buffer: 3 + index_buffer: 51 + index_buffer: 195 + index_buffer: 273 + index_buffer: 287 + index_buffer: 375 + index_buffer: 43 + index_buffer: 146 + index_buffer: 57 + index_buffer: 291 + index_buffer: 375 + index_buffer: 287 + index_buffer: 61 + index_buffer: 57 + index_buffer: 146 + index_buffer: 396 + index_buffer: 428 + index_buffer: 175 + index_buffer: 171 + index_buffer: 175 + index_buffer: 208 + index_buffer: 199 + index_buffer: 175 + index_buffer: 428 + index_buffer: 199 + index_buffer: 208 + index_buffer: 175 + index_buffer: 268 + index_buffer: 312 + index_buffer: 271 + index_buffer: 38 + index_buffer: 41 + index_buffer: 82 + index_buffer: 311 + index_buffer: 271 + index_buffer: 312 + index_buffer: 81 + index_buffer: 82 + index_buffer: 41 + index_buffer: 444 + index_buffer: 445 + index_buffer: 283 + index_buffer: 224 + index_buffer: 53 + index_buffer: 225 + index_buffer: 276 + index_buffer: 283 + index_buffer: 445 + index_buffer: 46 + index_buffer: 225 + index_buffer: 53 + index_buffer: 254 + index_buffer: 339 + index_buffer: 373 + index_buffer: 24 + index_buffer: 144 + index_buffer: 110 + index_buffer: 390 + index_buffer: 373 + index_buffer: 339 + index_buffer: 163 + index_buffer: 110 + index_buffer: 144 + index_buffer: 295 + index_buffer: 282 + index_buffer: 296 + index_buffer: 65 + index_buffer: 66 + index_buffer: 52 + index_buffer: 334 + index_buffer: 296 + index_buffer: 282 + index_buffer: 105 + index_buffer: 52 + index_buffer: 66 + index_buffer: 346 + index_buffer: 448 + index_buffer: 347 + index_buffer: 117 + index_buffer: 118 + index_buffer: 228 + index_buffer: 449 + index_buffer: 347 + index_buffer: 448 + index_buffer: 229 + index_buffer: 228 + index_buffer: 118 + index_buffer: 454 + index_buffer: 356 + index_buffer: 447 + index_buffer: 234 + index_buffer: 227 + index_buffer: 127 + index_buffer: 264 + index_buffer: 447 + index_buffer: 356 + index_buffer: 34 + index_buffer: 127 + index_buffer: 227 + index_buffer: 336 + index_buffer: 296 + index_buffer: 337 + index_buffer: 107 + index_buffer: 108 + index_buffer: 66 + index_buffer: 299 + index_buffer: 337 + index_buffer: 296 + index_buffer: 69 + index_buffer: 66 + index_buffer: 108 + index_buffer: 151 + index_buffer: 337 + index_buffer: 10 + index_buffer: 151 + index_buffer: 10 + index_buffer: 108 + index_buffer: 338 + index_buffer: 10 + index_buffer: 337 + index_buffer: 109 + index_buffer: 108 + index_buffer: 10 + index_buffer: 278 + index_buffer: 439 + index_buffer: 294 + index_buffer: 48 + index_buffer: 64 + index_buffer: 219 + index_buffer: 455 + index_buffer: 294 + index_buffer: 439 + index_buffer: 235 + index_buffer: 219 + index_buffer: 64 + index_buffer: 407 + index_buffer: 415 + index_buffer: 292 + index_buffer: 183 + index_buffer: 62 + index_buffer: 191 + index_buffer: 308 + index_buffer: 292 + index_buffer: 415 + index_buffer: 78 + index_buffer: 191 + index_buffer: 62 + index_buffer: 358 + index_buffer: 371 + index_buffer: 429 + index_buffer: 129 + index_buffer: 209 + index_buffer: 142 + index_buffer: 355 + index_buffer: 429 + index_buffer: 371 + index_buffer: 126 + index_buffer: 142 + index_buffer: 209 + index_buffer: 345 + index_buffer: 372 + index_buffer: 340 + index_buffer: 116 + index_buffer: 111 + index_buffer: 143 + index_buffer: 265 + index_buffer: 340 + index_buffer: 372 + index_buffer: 35 + index_buffer: 143 + index_buffer: 111 + index_buffer: 388 + index_buffer: 390 + index_buffer: 466 + index_buffer: 161 + index_buffer: 246 + index_buffer: 163 + index_buffer: 249 + index_buffer: 466 + index_buffer: 390 + index_buffer: 7 + index_buffer: 163 + index_buffer: 246 + index_buffer: 352 + index_buffer: 346 + index_buffer: 280 + index_buffer: 123 + index_buffer: 50 + index_buffer: 117 + index_buffer: 347 + index_buffer: 280 + index_buffer: 346 + index_buffer: 118 + index_buffer: 117 + index_buffer: 50 + index_buffer: 295 + index_buffer: 442 + index_buffer: 282 + index_buffer: 65 + index_buffer: 52 + index_buffer: 222 + index_buffer: 443 + index_buffer: 282 + index_buffer: 442 + index_buffer: 223 + index_buffer: 222 + index_buffer: 52 + index_buffer: 19 + index_buffer: 94 + index_buffer: 354 + index_buffer: 19 + index_buffer: 125 + index_buffer: 94 + index_buffer: 370 + index_buffer: 354 + index_buffer: 94 + index_buffer: 141 + index_buffer: 94 + index_buffer: 125 + index_buffer: 295 + index_buffer: 285 + index_buffer: 442 + index_buffer: 65 + index_buffer: 222 + index_buffer: 55 + index_buffer: 441 + index_buffer: 442 + index_buffer: 285 + index_buffer: 221 + index_buffer: 55 + index_buffer: 222 + index_buffer: 419 + index_buffer: 197 + index_buffer: 248 + index_buffer: 196 + index_buffer: 3 + index_buffer: 197 + index_buffer: 195 + index_buffer: 248 + index_buffer: 197 + index_buffer: 195 + index_buffer: 197 + index_buffer: 3 + index_buffer: 359 + index_buffer: 263 + index_buffer: 255 + index_buffer: 130 + index_buffer: 25 + index_buffer: 33 + index_buffer: 249 + index_buffer: 255 + index_buffer: 263 + index_buffer: 7 + index_buffer: 33 + index_buffer: 25 + index_buffer: 275 + index_buffer: 274 + index_buffer: 440 + index_buffer: 45 + index_buffer: 220 + index_buffer: 44 + index_buffer: 457 + index_buffer: 440 + index_buffer: 274 + index_buffer: 237 + index_buffer: 44 + index_buffer: 220 + index_buffer: 300 + index_buffer: 383 + index_buffer: 301 + index_buffer: 70 + index_buffer: 71 + index_buffer: 156 + index_buffer: 368 + index_buffer: 301 + index_buffer: 383 + index_buffer: 139 + index_buffer: 156 + index_buffer: 71 + index_buffer: 417 + index_buffer: 351 + index_buffer: 465 + index_buffer: 193 + index_buffer: 245 + index_buffer: 122 + index_buffer: 412 + index_buffer: 465 + index_buffer: 351 + index_buffer: 188 + index_buffer: 122 + index_buffer: 245 + index_buffer: 466 + index_buffer: 263 + index_buffer: 467 + index_buffer: 246 + index_buffer: 247 + index_buffer: 33 + index_buffer: 359 + index_buffer: 467 + index_buffer: 263 + index_buffer: 130 + index_buffer: 33 + index_buffer: 247 + index_buffer: 389 + index_buffer: 251 + index_buffer: 368 + index_buffer: 162 + index_buffer: 139 + index_buffer: 21 + index_buffer: 301 + index_buffer: 368 + index_buffer: 251 + index_buffer: 71 + index_buffer: 21 + index_buffer: 139 + index_buffer: 374 + index_buffer: 386 + index_buffer: 380 + index_buffer: 145 + index_buffer: 153 + index_buffer: 159 + index_buffer: 385 + index_buffer: 380 + index_buffer: 386 + index_buffer: 158 + index_buffer: 159 + index_buffer: 153 + index_buffer: 379 + index_buffer: 394 + index_buffer: 378 + index_buffer: 150 + index_buffer: 149 + index_buffer: 169 + index_buffer: 395 + index_buffer: 378 + index_buffer: 394 + index_buffer: 170 + index_buffer: 169 + index_buffer: 149 + index_buffer: 351 + index_buffer: 419 + index_buffer: 412 + index_buffer: 122 + index_buffer: 188 + index_buffer: 196 + index_buffer: 399 + index_buffer: 412 + index_buffer: 419 + index_buffer: 174 + index_buffer: 196 + index_buffer: 188 + index_buffer: 426 + index_buffer: 322 + index_buffer: 436 + index_buffer: 206 + index_buffer: 216 + index_buffer: 92 + index_buffer: 410 + index_buffer: 436 + index_buffer: 322 + index_buffer: 186 + index_buffer: 92 + index_buffer: 216 + index_buffer: 387 + index_buffer: 373 + index_buffer: 388 + index_buffer: 160 + index_buffer: 161 + index_buffer: 144 + index_buffer: 390 + index_buffer: 388 + index_buffer: 373 + index_buffer: 163 + index_buffer: 144 + index_buffer: 161 + index_buffer: 393 + index_buffer: 326 + index_buffer: 164 + index_buffer: 167 + index_buffer: 164 + index_buffer: 97 + index_buffer: 2 + index_buffer: 164 + index_buffer: 326 + index_buffer: 2 + index_buffer: 97 + index_buffer: 164 + index_buffer: 354 + index_buffer: 370 + index_buffer: 461 + index_buffer: 125 + index_buffer: 241 + index_buffer: 141 + index_buffer: 462 + index_buffer: 461 + index_buffer: 370 + index_buffer: 242 + index_buffer: 141 + index_buffer: 241 + index_buffer: 0 + index_buffer: 267 + index_buffer: 164 + index_buffer: 0 + index_buffer: 164 + index_buffer: 37 + index_buffer: 393 + index_buffer: 164 + index_buffer: 267 + index_buffer: 167 + index_buffer: 37 + index_buffer: 164 + index_buffer: 11 + index_buffer: 12 + index_buffer: 302 + index_buffer: 11 + index_buffer: 72 + index_buffer: 12 + index_buffer: 268 + index_buffer: 302 + index_buffer: 12 + index_buffer: 38 + index_buffer: 12 + index_buffer: 72 + index_buffer: 386 + index_buffer: 374 + index_buffer: 387 + index_buffer: 159 + index_buffer: 160 + index_buffer: 145 + index_buffer: 373 + index_buffer: 387 + index_buffer: 374 + index_buffer: 144 + index_buffer: 145 + index_buffer: 160 + index_buffer: 12 + index_buffer: 13 + index_buffer: 268 + index_buffer: 12 + index_buffer: 38 + index_buffer: 13 + index_buffer: 312 + index_buffer: 268 + index_buffer: 13 + index_buffer: 82 + index_buffer: 13 + index_buffer: 38 + index_buffer: 293 + index_buffer: 300 + index_buffer: 298 + index_buffer: 63 + index_buffer: 68 + index_buffer: 70 + index_buffer: 301 + index_buffer: 298 + index_buffer: 300 + index_buffer: 71 + index_buffer: 70 + index_buffer: 68 + index_buffer: 340 + index_buffer: 265 + index_buffer: 261 + index_buffer: 111 + index_buffer: 31 + index_buffer: 35 + index_buffer: 446 + index_buffer: 261 + index_buffer: 265 + index_buffer: 226 + index_buffer: 35 + index_buffer: 31 + index_buffer: 380 + index_buffer: 385 + index_buffer: 381 + index_buffer: 153 + index_buffer: 154 + index_buffer: 158 + index_buffer: 384 + index_buffer: 381 + index_buffer: 385 + index_buffer: 157 + index_buffer: 158 + index_buffer: 154 + index_buffer: 280 + index_buffer: 330 + index_buffer: 425 + index_buffer: 50 + index_buffer: 205 + index_buffer: 101 + index_buffer: 266 + index_buffer: 425 + index_buffer: 330 + index_buffer: 36 + index_buffer: 101 + index_buffer: 205 + index_buffer: 423 + index_buffer: 391 + index_buffer: 426 + index_buffer: 203 + index_buffer: 206 + index_buffer: 165 + index_buffer: 322 + index_buffer: 426 + index_buffer: 391 + index_buffer: 92 + index_buffer: 165 + index_buffer: 206 + index_buffer: 429 + index_buffer: 355 + index_buffer: 420 + index_buffer: 209 + index_buffer: 198 + index_buffer: 126 + index_buffer: 437 + index_buffer: 420 + index_buffer: 355 + index_buffer: 217 + index_buffer: 126 + index_buffer: 198 + index_buffer: 391 + index_buffer: 327 + index_buffer: 393 + index_buffer: 165 + index_buffer: 167 + index_buffer: 98 + index_buffer: 326 + index_buffer: 393 + index_buffer: 327 + index_buffer: 97 + index_buffer: 98 + index_buffer: 167 + index_buffer: 457 + index_buffer: 438 + index_buffer: 440 + index_buffer: 237 + index_buffer: 220 + index_buffer: 218 + index_buffer: 344 + index_buffer: 440 + index_buffer: 438 + index_buffer: 115 + index_buffer: 218 + index_buffer: 220 + index_buffer: 382 + index_buffer: 362 + index_buffer: 341 + index_buffer: 155 + index_buffer: 112 + index_buffer: 133 + index_buffer: 463 + index_buffer: 341 + index_buffer: 362 + index_buffer: 243 + index_buffer: 133 + index_buffer: 112 + index_buffer: 457 + index_buffer: 461 + index_buffer: 459 + index_buffer: 237 + index_buffer: 239 + index_buffer: 241 + index_buffer: 458 + index_buffer: 459 + index_buffer: 461 + index_buffer: 238 + index_buffer: 241 + index_buffer: 239 + index_buffer: 434 + index_buffer: 430 + index_buffer: 364 + index_buffer: 214 + index_buffer: 135 + index_buffer: 210 + index_buffer: 394 + index_buffer: 364 + index_buffer: 430 + index_buffer: 169 + index_buffer: 210 + index_buffer: 135 + index_buffer: 414 + index_buffer: 463 + index_buffer: 398 + index_buffer: 190 + index_buffer: 173 + index_buffer: 243 + index_buffer: 362 + index_buffer: 398 + index_buffer: 463 + index_buffer: 133 + index_buffer: 243 + index_buffer: 173 + index_buffer: 262 + index_buffer: 428 + index_buffer: 369 + index_buffer: 32 + index_buffer: 140 + index_buffer: 208 + index_buffer: 396 + index_buffer: 369 + index_buffer: 428 + index_buffer: 171 + index_buffer: 208 + index_buffer: 140 + index_buffer: 457 + index_buffer: 274 + index_buffer: 461 + index_buffer: 237 + index_buffer: 241 + index_buffer: 44 + index_buffer: 354 + index_buffer: 461 + index_buffer: 274 + index_buffer: 125 + index_buffer: 44 + index_buffer: 241 + index_buffer: 316 + index_buffer: 403 + index_buffer: 317 + index_buffer: 86 + index_buffer: 87 + index_buffer: 179 + index_buffer: 402 + index_buffer: 317 + index_buffer: 403 + index_buffer: 178 + index_buffer: 179 + index_buffer: 87 + index_buffer: 315 + index_buffer: 404 + index_buffer: 316 + index_buffer: 85 + index_buffer: 86 + index_buffer: 180 + index_buffer: 403 + index_buffer: 316 + index_buffer: 404 + index_buffer: 179 + index_buffer: 180 + index_buffer: 86 + index_buffer: 314 + index_buffer: 405 + index_buffer: 315 + index_buffer: 84 + index_buffer: 85 + index_buffer: 181 + index_buffer: 404 + index_buffer: 315 + index_buffer: 405 + index_buffer: 180 + index_buffer: 181 + index_buffer: 85 + index_buffer: 313 + index_buffer: 406 + index_buffer: 314 + index_buffer: 83 + index_buffer: 84 + index_buffer: 182 + index_buffer: 405 + index_buffer: 314 + index_buffer: 406 + index_buffer: 181 + index_buffer: 182 + index_buffer: 84 + index_buffer: 418 + index_buffer: 406 + index_buffer: 421 + index_buffer: 194 + index_buffer: 201 + index_buffer: 182 + index_buffer: 313 + index_buffer: 421 + index_buffer: 406 + index_buffer: 83 + index_buffer: 182 + index_buffer: 201 + index_buffer: 366 + index_buffer: 401 + index_buffer: 323 + index_buffer: 137 + index_buffer: 93 + index_buffer: 177 + index_buffer: 361 + index_buffer: 323 + index_buffer: 401 + index_buffer: 132 + index_buffer: 177 + index_buffer: 93 + index_buffer: 408 + index_buffer: 407 + index_buffer: 306 + index_buffer: 184 + index_buffer: 76 + index_buffer: 183 + index_buffer: 292 + index_buffer: 306 + index_buffer: 407 + index_buffer: 62 + index_buffer: 183 + index_buffer: 76 + index_buffer: 408 + index_buffer: 306 + index_buffer: 409 + index_buffer: 184 + index_buffer: 185 + index_buffer: 76 + index_buffer: 291 + index_buffer: 409 + index_buffer: 306 + index_buffer: 61 + index_buffer: 76 + index_buffer: 185 + index_buffer: 410 + index_buffer: 409 + index_buffer: 287 + index_buffer: 186 + index_buffer: 57 + index_buffer: 185 + index_buffer: 291 + index_buffer: 287 + index_buffer: 409 + index_buffer: 61 + index_buffer: 185 + index_buffer: 57 + index_buffer: 436 + index_buffer: 410 + index_buffer: 432 + index_buffer: 216 + index_buffer: 212 + index_buffer: 186 + index_buffer: 287 + index_buffer: 432 + index_buffer: 410 + index_buffer: 57 + index_buffer: 186 + index_buffer: 212 + index_buffer: 434 + index_buffer: 416 + index_buffer: 427 + index_buffer: 214 + index_buffer: 207 + index_buffer: 192 + index_buffer: 411 + index_buffer: 427 + index_buffer: 416 + index_buffer: 187 + index_buffer: 192 + index_buffer: 207 + index_buffer: 264 + index_buffer: 368 + index_buffer: 372 + index_buffer: 34 + index_buffer: 143 + index_buffer: 139 + index_buffer: 383 + index_buffer: 372 + index_buffer: 368 + index_buffer: 156 + index_buffer: 139 + index_buffer: 143 + index_buffer: 457 + index_buffer: 459 + index_buffer: 438 + index_buffer: 237 + index_buffer: 218 + index_buffer: 239 + index_buffer: 309 + index_buffer: 438 + index_buffer: 459 + index_buffer: 79 + index_buffer: 239 + index_buffer: 218 + index_buffer: 352 + index_buffer: 376 + index_buffer: 366 + index_buffer: 123 + index_buffer: 137 + index_buffer: 147 + index_buffer: 401 + index_buffer: 366 + index_buffer: 376 + index_buffer: 177 + index_buffer: 147 + index_buffer: 137 + index_buffer: 4 + index_buffer: 1 + index_buffer: 275 + index_buffer: 4 + index_buffer: 45 + index_buffer: 1 + index_buffer: 274 + index_buffer: 275 + index_buffer: 1 + index_buffer: 44 + index_buffer: 1 + index_buffer: 45 + index_buffer: 428 + index_buffer: 262 + index_buffer: 421 + index_buffer: 208 + index_buffer: 201 + index_buffer: 32 + index_buffer: 418 + index_buffer: 421 + index_buffer: 262 + index_buffer: 194 + index_buffer: 32 + index_buffer: 201 + index_buffer: 327 + index_buffer: 358 + index_buffer: 294 + index_buffer: 98 + index_buffer: 64 + index_buffer: 129 + index_buffer: 331 + index_buffer: 294 + index_buffer: 358 + index_buffer: 102 + index_buffer: 129 + index_buffer: 64 + index_buffer: 367 + index_buffer: 435 + index_buffer: 416 + index_buffer: 138 + index_buffer: 192 + index_buffer: 215 + index_buffer: 433 + index_buffer: 416 + index_buffer: 435 + index_buffer: 213 + index_buffer: 215 + index_buffer: 192 + index_buffer: 455 + index_buffer: 439 + index_buffer: 289 + index_buffer: 235 + index_buffer: 59 + index_buffer: 219 + index_buffer: 392 + index_buffer: 289 + index_buffer: 439 + index_buffer: 166 + index_buffer: 219 + index_buffer: 59 + index_buffer: 328 + index_buffer: 462 + index_buffer: 326 + index_buffer: 99 + index_buffer: 97 + index_buffer: 242 + index_buffer: 370 + index_buffer: 326 + index_buffer: 462 + index_buffer: 141 + index_buffer: 242 + index_buffer: 97 + index_buffer: 326 + index_buffer: 370 + index_buffer: 2 + index_buffer: 97 + index_buffer: 2 + index_buffer: 141 + index_buffer: 94 + index_buffer: 2 + index_buffer: 370 + index_buffer: 94 + index_buffer: 141 + index_buffer: 2 + index_buffer: 460 + index_buffer: 455 + index_buffer: 305 + index_buffer: 240 + index_buffer: 75 + index_buffer: 235 + index_buffer: 289 + index_buffer: 305 + index_buffer: 455 + index_buffer: 59 + index_buffer: 235 + index_buffer: 75 + index_buffer: 448 + index_buffer: 339 + index_buffer: 449 + index_buffer: 228 + index_buffer: 229 + index_buffer: 110 + index_buffer: 254 + index_buffer: 449 + index_buffer: 339 + index_buffer: 24 + index_buffer: 110 + index_buffer: 229 + index_buffer: 261 + index_buffer: 446 + index_buffer: 255 + index_buffer: 31 + index_buffer: 25 + index_buffer: 226 + index_buffer: 359 + index_buffer: 255 + index_buffer: 446 + index_buffer: 130 + index_buffer: 226 + index_buffer: 25 + index_buffer: 449 + index_buffer: 254 + index_buffer: 450 + index_buffer: 229 + index_buffer: 230 + index_buffer: 24 + index_buffer: 253 + index_buffer: 450 + index_buffer: 254 + index_buffer: 23 + index_buffer: 24 + index_buffer: 230 + index_buffer: 450 + index_buffer: 253 + index_buffer: 451 + index_buffer: 230 + index_buffer: 231 + index_buffer: 23 + index_buffer: 252 + index_buffer: 451 + index_buffer: 253 + index_buffer: 22 + index_buffer: 23 + index_buffer: 231 + index_buffer: 451 + index_buffer: 252 + index_buffer: 452 + index_buffer: 231 + index_buffer: 232 + index_buffer: 22 + index_buffer: 256 + index_buffer: 452 + index_buffer: 252 + index_buffer: 26 + index_buffer: 22 + index_buffer: 232 + index_buffer: 256 + index_buffer: 341 + index_buffer: 452 + index_buffer: 26 + index_buffer: 232 + index_buffer: 112 + index_buffer: 453 + index_buffer: 452 + index_buffer: 341 + index_buffer: 233 + index_buffer: 112 + index_buffer: 232 + index_buffer: 413 + index_buffer: 464 + index_buffer: 414 + index_buffer: 189 + index_buffer: 190 + index_buffer: 244 + index_buffer: 463 + index_buffer: 414 + index_buffer: 464 + index_buffer: 243 + index_buffer: 244 + index_buffer: 190 + index_buffer: 441 + index_buffer: 413 + index_buffer: 286 + index_buffer: 221 + index_buffer: 56 + index_buffer: 189 + index_buffer: 414 + index_buffer: 286 + index_buffer: 413 + index_buffer: 190 + index_buffer: 189 + index_buffer: 56 + index_buffer: 441 + index_buffer: 286 + index_buffer: 442 + index_buffer: 221 + index_buffer: 222 + index_buffer: 56 + index_buffer: 258 + index_buffer: 442 + index_buffer: 286 + index_buffer: 28 + index_buffer: 56 + index_buffer: 222 + index_buffer: 442 + index_buffer: 258 + index_buffer: 443 + index_buffer: 222 + index_buffer: 223 + index_buffer: 28 + index_buffer: 257 + index_buffer: 443 + index_buffer: 258 + index_buffer: 27 + index_buffer: 28 + index_buffer: 223 + index_buffer: 444 + index_buffer: 443 + index_buffer: 259 + index_buffer: 224 + index_buffer: 29 + index_buffer: 223 + index_buffer: 257 + index_buffer: 259 + index_buffer: 443 + index_buffer: 27 + index_buffer: 223 + index_buffer: 29 + index_buffer: 259 + index_buffer: 260 + index_buffer: 444 + index_buffer: 29 + index_buffer: 224 + index_buffer: 30 + index_buffer: 445 + index_buffer: 444 + index_buffer: 260 + index_buffer: 225 + index_buffer: 30 + index_buffer: 224 + index_buffer: 260 + index_buffer: 467 + index_buffer: 445 + index_buffer: 30 + index_buffer: 225 + index_buffer: 247 + index_buffer: 342 + index_buffer: 445 + index_buffer: 467 + index_buffer: 113 + index_buffer: 247 + index_buffer: 225 + index_buffer: 250 + index_buffer: 309 + index_buffer: 458 + index_buffer: 20 + index_buffer: 238 + index_buffer: 79 + index_buffer: 459 + index_buffer: 458 + index_buffer: 309 + index_buffer: 239 + index_buffer: 79 + index_buffer: 238 + index_buffer: 290 + index_buffer: 305 + index_buffer: 392 + index_buffer: 60 + index_buffer: 166 + index_buffer: 75 + index_buffer: 289 + index_buffer: 392 + index_buffer: 305 + index_buffer: 59 + index_buffer: 75 + index_buffer: 166 + index_buffer: 460 + index_buffer: 305 + index_buffer: 328 + index_buffer: 240 + index_buffer: 99 + index_buffer: 75 + index_buffer: 290 + index_buffer: 328 + index_buffer: 305 + index_buffer: 60 + index_buffer: 75 + index_buffer: 99 + index_buffer: 376 + index_buffer: 433 + index_buffer: 401 + index_buffer: 147 + index_buffer: 177 + index_buffer: 213 + index_buffer: 435 + index_buffer: 401 + index_buffer: 433 + index_buffer: 215 + index_buffer: 213 + index_buffer: 177 + index_buffer: 250 + index_buffer: 290 + index_buffer: 309 + index_buffer: 20 + index_buffer: 79 + index_buffer: 60 + index_buffer: 392 + index_buffer: 309 + index_buffer: 290 + index_buffer: 166 + index_buffer: 60 + index_buffer: 79 + index_buffer: 411 + index_buffer: 416 + index_buffer: 376 + index_buffer: 187 + index_buffer: 147 + index_buffer: 192 + index_buffer: 433 + index_buffer: 376 + index_buffer: 416 + index_buffer: 213 + index_buffer: 192 + index_buffer: 147 + index_buffer: 341 + index_buffer: 463 + index_buffer: 453 + index_buffer: 112 + index_buffer: 233 + index_buffer: 243 + index_buffer: 464 + index_buffer: 453 + index_buffer: 463 + index_buffer: 244 + index_buffer: 243 + index_buffer: 233 + index_buffer: 453 + index_buffer: 464 + index_buffer: 357 + index_buffer: 233 + index_buffer: 128 + index_buffer: 244 + index_buffer: 465 + index_buffer: 357 + index_buffer: 464 + index_buffer: 245 + index_buffer: 244 + index_buffer: 128 + index_buffer: 412 + index_buffer: 343 + index_buffer: 465 + index_buffer: 188 + index_buffer: 245 + index_buffer: 114 + index_buffer: 357 + index_buffer: 465 + index_buffer: 343 + index_buffer: 128 + index_buffer: 114 + index_buffer: 245 + index_buffer: 437 + index_buffer: 343 + index_buffer: 399 + index_buffer: 217 + index_buffer: 174 + index_buffer: 114 + index_buffer: 412 + index_buffer: 399 + index_buffer: 343 + index_buffer: 188 + index_buffer: 114 + index_buffer: 174 + index_buffer: 363 + index_buffer: 440 + index_buffer: 360 + index_buffer: 134 + index_buffer: 131 + index_buffer: 220 + index_buffer: 344 + index_buffer: 360 + index_buffer: 440 + index_buffer: 115 + index_buffer: 220 + index_buffer: 131 + index_buffer: 456 + index_buffer: 420 + index_buffer: 399 + index_buffer: 236 + index_buffer: 174 + index_buffer: 198 + index_buffer: 437 + index_buffer: 399 + index_buffer: 420 + index_buffer: 217 + index_buffer: 198 + index_buffer: 174 + index_buffer: 456 + index_buffer: 363 + index_buffer: 420 + index_buffer: 236 + index_buffer: 198 + index_buffer: 134 + index_buffer: 360 + index_buffer: 420 + index_buffer: 363 + index_buffer: 131 + index_buffer: 134 + index_buffer: 198 + index_buffer: 361 + index_buffer: 401 + index_buffer: 288 + index_buffer: 132 + index_buffer: 58 + index_buffer: 177 + index_buffer: 435 + index_buffer: 288 + index_buffer: 401 + index_buffer: 215 + index_buffer: 177 + index_buffer: 58 + index_buffer: 353 + index_buffer: 265 + index_buffer: 383 + index_buffer: 124 + index_buffer: 156 + index_buffer: 35 + index_buffer: 372 + index_buffer: 383 + index_buffer: 265 + index_buffer: 143 + index_buffer: 35 + index_buffer: 156 + index_buffer: 255 + index_buffer: 249 + index_buffer: 339 + index_buffer: 25 + index_buffer: 110 + index_buffer: 7 + index_buffer: 390 + index_buffer: 339 + index_buffer: 249 + index_buffer: 163 + index_buffer: 7 + index_buffer: 110 + index_buffer: 261 + index_buffer: 255 + index_buffer: 448 + index_buffer: 31 + index_buffer: 228 + index_buffer: 25 + index_buffer: 339 + index_buffer: 448 + index_buffer: 255 + index_buffer: 110 + index_buffer: 25 + index_buffer: 228 + index_buffer: 14 + index_buffer: 317 + index_buffer: 13 + index_buffer: 14 + index_buffer: 13 + index_buffer: 87 + index_buffer: 312 + index_buffer: 13 + index_buffer: 317 + index_buffer: 82 + index_buffer: 87 + index_buffer: 13 + index_buffer: 317 + index_buffer: 402 + index_buffer: 312 + index_buffer: 87 + index_buffer: 82 + index_buffer: 178 + index_buffer: 311 + index_buffer: 312 + index_buffer: 402 + index_buffer: 81 + index_buffer: 178 + index_buffer: 82 + index_buffer: 402 + index_buffer: 318 + index_buffer: 311 + index_buffer: 178 + index_buffer: 81 + index_buffer: 88 + index_buffer: 310 + index_buffer: 311 + index_buffer: 318 + index_buffer: 80 + index_buffer: 88 + index_buffer: 81 + index_buffer: 318 + index_buffer: 324 + index_buffer: 310 + index_buffer: 88 + index_buffer: 80 + index_buffer: 95 + index_buffer: 415 + index_buffer: 310 + index_buffer: 324 + index_buffer: 191 + index_buffer: 95 + index_buffer: 80 + index_buffer: 468 + index_buffer: 471 + index_buffer: 472 + index_buffer: 469 + index_buffer: 468 + index_buffer: 472 + index_buffer: 470 + index_buffer: 468 + index_buffer: 469 + index_buffer: 470 + index_buffer: 471 + index_buffer: 468 + index_buffer: 473 + index_buffer: 476 + index_buffer: 477 + index_buffer: 474 + index_buffer: 473 + index_buffer: 477 + index_buffer: 475 + index_buffer: 473 + index_buffer: 474 + index_buffer: 475 + index_buffer: 476 + index_buffer: 473 +} diff --git a/mediapipe/objc/BUILD b/mediapipe/objc/BUILD index df6c8db08..481a60bb6 100644 --- a/mediapipe/objc/BUILD +++ b/mediapipe/objc/BUILD @@ -52,9 +52,9 @@ objc_library( ) MEDIAPIPE_IOS_SRCS = [ - "MPPGraph.mm", - "MPPTimestampConverter.mm", - "NSError+util_status.mm", + "MPPGraph.cc", + "MPPTimestampConverter.cc", + "NSError+util_status.cc", ] MEDIAPIPE_IOS_HDRS = [ @@ -63,11 +63,13 @@ MEDIAPIPE_IOS_HDRS = [ "NSError+util_status.h", ] -objc_library( +cc_library( name = "mediapipe_framework_ios", srcs = MEDIAPIPE_IOS_SRCS, hdrs = MEDIAPIPE_IOS_HDRS, copts = [ + "-x objective-c++", + "-fobjc-arc", # enable reference-counting "-Wno-shorten-64-to-32", ], # This build rule is public to allow external customers to build their own iOS apps. @@ -99,6 +101,7 @@ objc_library( "@com_google_absl//absl/synchronization", "@google_toolbox_for_mac//:GTM_Defines", ], + alwayslink = 1, ) objc_library( diff --git a/mediapipe/objc/MPPGraph.mm b/mediapipe/objc/MPPGraph.cc similarity index 74% rename from mediapipe/objc/MPPGraph.mm rename to mediapipe/objc/MPPGraph.cc index 3123eb863..df9a1ebd6 100644 --- a/mediapipe/objc/MPPGraph.mm +++ b/mediapipe/objc/MPPGraph.cc @@ -19,6 +19,7 @@ #include +#import "GTMDefines.h" #include "absl/memory/memory.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image.h" @@ -26,22 +27,22 @@ #include "mediapipe/framework/graph_service.h" #include "mediapipe/gpu/gl_base.h" #include "mediapipe/gpu/gpu_shared_data_internal.h" +#import "mediapipe/objc/NSError+util_status.h" #include "mediapipe/objc/util.h" -#import "mediapipe/objc/NSError+util_status.h" -#import "GTMDefines.h" - @implementation MPPGraph { - // Graph is wrapped in a unique_ptr because it was generating 39+KB of unnecessary ObjC runtime - // information. See https://medium.com/@dmaclach/objective-c-encoding-and-you-866624cc02de - // for details. + // Graph is wrapped in a unique_ptr because it was generating 39+KB of + // unnecessary ObjC runtime information. See + // https://medium.com/@dmaclach/objective-c-encoding-and-you-866624cc02de for + // details. std::unique_ptr _graph; /// Input side packets that will be added to the graph when it is started. std::map _inputSidePackets; /// Packet headers that will be added to the graph when it is started. std::map _streamHeaders; /// Service packets to be added to the graph when it is started. - std::map _servicePackets; + std::map + _servicePackets; /// Number of frames currently being processed by the graph. std::atomic _framesInFlight; @@ -56,7 +57,8 @@ BOOL _started; } -- (instancetype)initWithGraphConfig:(const mediapipe::CalculatorGraphConfig&)config { +- (instancetype)initWithGraphConfig: + (const mediapipe::CalculatorGraphConfig&)config { self = [super init]; if (self) { // Turn on Cocoa multithreading, since MediaPipe uses threads. @@ -76,40 +78,47 @@ return _graph->GetGraphInputStreamAddMode(); } -- (void)setPacketAddMode:(mediapipe::CalculatorGraph::GraphInputStreamAddMode)mode { +- (void)setPacketAddMode: + (mediapipe::CalculatorGraph::GraphInputStreamAddMode)mode { _graph->SetGraphInputStreamAddMode(mode); } - (void)addFrameOutputStream:(const std::string&)outputStreamName outputPacketType:(MPPPacketType)packetType { std::string callbackInputName; - mediapipe::tool::AddCallbackCalculator(outputStreamName, &_config, &callbackInputName, - /*use_std_function=*/true); - // No matter what ownership qualifiers are put on the pointer, NewPermanentCallback will - // still end up with a strong pointer to MPPGraph*. That is why we use void* instead. + mediapipe::tool::AddCallbackCalculator(outputStreamName, &_config, + &callbackInputName, + /*use_std_function=*/true); + // No matter what ownership qualifiers are put on the pointer, + // NewPermanentCallback will still end up with a strong pointer to MPPGraph*. + // That is why we use void* instead. void* wrapperVoid = (__bridge void*)self; _inputSidePackets[callbackInputName] = mediapipe::MakePacket>( - [wrapperVoid, outputStreamName, packetType](const mediapipe::Packet& packet) { - CallFrameDelegate(wrapperVoid, outputStreamName, packetType, packet); + [wrapperVoid, outputStreamName, + packetType](const mediapipe::Packet& packet) { + CallFrameDelegate(wrapperVoid, outputStreamName, packetType, + packet); }); } -- (NSString *)description { - return [NSString stringWithFormat:@"<%@: %p; framesInFlight = %d>", [self class], self, - _framesInFlight.load(std::memory_order_relaxed)]; +- (NSString*)description { + return [NSString + stringWithFormat:@"<%@: %p; framesInFlight = %d>", [self class], self, + _framesInFlight.load(std::memory_order_relaxed)]; } /// This is the function that gets called by the CallbackCalculator that /// receives the graph's output. void CallFrameDelegate(void* wrapperVoid, const std::string& streamName, - MPPPacketType packetType, const mediapipe::Packet& packet) { + MPPPacketType packetType, + const mediapipe::Packet& packet) { MPPGraph* wrapper = (__bridge MPPGraph*)wrapperVoid; @autoreleasepool { if (packetType == MPPPacketTypeRaw) { [wrapper.delegate mediapipeGraph:wrapper - didOutputPacket:packet - fromStream:streamName]; + didOutputPacket:packet + fromStream:streamName]; } else if (packetType == MPPPacketTypeImageFrame) { wrapper->_framesInFlight--; const auto& frame = packet.Get(); @@ -118,13 +127,16 @@ void CallFrameDelegate(void* wrapperVoid, const std::string& streamName, if (format == mediapipe::ImageFormat::SRGBA || format == mediapipe::ImageFormat::GRAY8) { CVPixelBufferRef pixelBuffer; - // If kCVPixelFormatType_32RGBA does not work, it returns kCVReturnInvalidPixelFormat. + // If kCVPixelFormatType_32RGBA does not work, it returns + // kCVReturnInvalidPixelFormat. CVReturn error = CVPixelBufferCreate( NULL, frame.Width(), frame.Height(), kCVPixelFormatType_32BGRA, GetCVPixelBufferAttributesForGlCompatibility(), &pixelBuffer); - _GTMDevAssert(error == kCVReturnSuccess, @"CVPixelBufferCreate failed: %d", error); + _GTMDevAssert(error == kCVReturnSuccess, + @"CVPixelBufferCreate failed: %d", error); error = CVPixelBufferLockBaseAddress(pixelBuffer, 0); - _GTMDevAssert(error == kCVReturnSuccess, @"CVPixelBufferLockBaseAddress failed: %d", error); + _GTMDevAssert(error == kCVReturnSuccess, + @"CVPixelBufferLockBaseAddress failed: %d", error); vImage_Buffer vDestination = vImageForCVPixelBuffer(pixelBuffer); // Note: we have to throw away const here, but we should not overwrite @@ -133,30 +145,35 @@ void CallFrameDelegate(void* wrapperVoid, const std::string& streamName, if (format == mediapipe::ImageFormat::SRGBA) { // Swap R and B channels. const uint8_t permuteMap[4] = {2, 1, 0, 3}; - vImage_Error __unused vError = - vImagePermuteChannels_ARGB8888(&vSource, &vDestination, permuteMap, kvImageNoFlags); - _GTMDevAssert(vError == kvImageNoError, @"vImagePermuteChannels failed: %zd", vError); + vImage_Error __unused vError = vImagePermuteChannels_ARGB8888( + &vSource, &vDestination, permuteMap, kvImageNoFlags); + _GTMDevAssert(vError == kvImageNoError, + @"vImagePermuteChannels failed: %zd", vError); } else { // Convert grayscale back to BGRA - vImage_Error __unused vError = vImageGrayToBGRA(&vSource, &vDestination); - _GTMDevAssert(vError == kvImageNoError, @"vImageGrayToBGRA failed: %zd", vError); + vImage_Error __unused vError = + vImageGrayToBGRA(&vSource, &vDestination); + _GTMDevAssert(vError == kvImageNoError, + @"vImageGrayToBGRA failed: %zd", vError); } error = CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); _GTMDevAssert(error == kCVReturnSuccess, @"CVPixelBufferUnlockBaseAddress failed: %d", error); - if ([wrapper.delegate respondsToSelector:@selector - (mediapipeGraph:didOutputPixelBuffer:fromStream:timestamp:)]) { + if ([wrapper.delegate + respondsToSelector:@selector + (mediapipeGraph:didOutputPixelBuffer:fromStream:timestamp:)]) { [wrapper.delegate mediapipeGraph:wrapper - didOutputPixelBuffer:pixelBuffer - fromStream:streamName - timestamp:packet.Timestamp()]; - } else if ([wrapper.delegate respondsToSelector:@selector - (mediapipeGraph:didOutputPixelBuffer:fromStream:)]) { + didOutputPixelBuffer:pixelBuffer + fromStream:streamName + timestamp:packet.Timestamp()]; + } else if ([wrapper.delegate + respondsToSelector:@selector + (mediapipeGraph:didOutputPixelBuffer:fromStream:)]) { [wrapper.delegate mediapipeGraph:wrapper - didOutputPixelBuffer:pixelBuffer - fromStream:streamName]; + didOutputPixelBuffer:pixelBuffer + fromStream:streamName]; } CVPixelBufferRelease(pixelBuffer); } else { @@ -168,22 +185,23 @@ void CallFrameDelegate(void* wrapperVoid, const std::string& streamName, wrapper->_framesInFlight--; CVPixelBufferRef pixelBuffer; if (packetType == MPPPacketTypePixelBuffer) - pixelBuffer = mediapipe::GetCVPixelBufferRef(packet.Get()); + pixelBuffer = + mediapipe::GetCVPixelBufferRef(packet.Get()); else pixelBuffer = packet.Get().GetCVPixelBufferRef(); -if ([wrapper.delegate + if ([wrapper.delegate respondsToSelector:@selector (mediapipeGraph:didOutputPixelBuffer:fromStream:timestamp:)]) { [wrapper.delegate mediapipeGraph:wrapper - didOutputPixelBuffer:pixelBuffer - fromStream:streamName - timestamp:packet.Timestamp()]; + didOutputPixelBuffer:pixelBuffer + fromStream:streamName + timestamp:packet.Timestamp()]; } else if ([wrapper.delegate respondsToSelector:@selector (mediapipeGraph:didOutputPixelBuffer:fromStream:)]) { [wrapper.delegate mediapipeGraph:wrapper - didOutputPixelBuffer:pixelBuffer - fromStream:streamName]; + didOutputPixelBuffer:pixelBuffer + fromStream:streamName]; } #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER } else { @@ -192,13 +210,15 @@ if ([wrapper.delegate } } -- (void)setHeaderPacket:(const mediapipe::Packet&)packet forStream:(const std::string&)streamName { +- (void)setHeaderPacket:(const mediapipe::Packet&)packet + forStream:(const std::string&)streamName { _GTMDevAssert(!_started, @"%@ must be called before the graph is started", NSStringFromSelector(_cmd)); _streamHeaders[streamName] = packet; } -- (void)setSidePacket:(const mediapipe::Packet&)packet named:(const std::string&)name { +- (void)setSidePacket:(const mediapipe::Packet&)packet + named:(const std::string&)name { _GTMDevAssert(!_started, @"%@ must be called before the graph is started", NSStringFromSelector(_cmd)); _inputSidePackets[name] = packet; @@ -211,7 +231,8 @@ if ([wrapper.delegate _servicePackets[&service] = std::move(packet); } -- (void)addSidePackets:(const std::map&)extraSidePackets { +- (void)addSidePackets: + (const std::map&)extraSidePackets { _GTMDevAssert(!_started, @"%@ must be called before the graph is started", NSStringFromSelector(_cmd)); _inputSidePackets.insert(extraSidePackets.begin(), extraSidePackets.end()); @@ -232,7 +253,8 @@ if ([wrapper.delegate - (absl::Status)performStart { absl::Status status; for (const auto& service_packet : _servicePackets) { - status = _graph->SetServicePacket(*service_packet.first, service_packet.second); + status = + _graph->SetServicePacket(*service_packet.first, service_packet.second); if (!status.ok()) { return status; } @@ -269,11 +291,12 @@ if ([wrapper.delegate } - (BOOL)waitUntilDoneWithError:(NSError**)error { - // Since this method blocks with no timeout, it should not be called in the main thread in - // an app. However, it's fine to allow that in a test. + // Since this method blocks with no timeout, it should not be called in the + // main thread in an app. However, it's fine to allow that in a test. // TODO: is this too heavy-handed? Maybe a warning would be fine. - _GTMDevAssert(![NSThread isMainThread] || (NSClassFromString(@"XCTest")), - @"waitUntilDoneWithError: should not be called on the main thread"); + _GTMDevAssert( + ![NSThread isMainThread] || (NSClassFromString(@"XCTest")), + @"waitUntilDoneWithError: should not be called on the main thread"); absl::Status status = _graph->WaitUntilDone(); _started = NO; if (!status.ok() && error) *error = [NSError gus_errorWithStatus:status]; @@ -289,7 +312,8 @@ if ([wrapper.delegate - (BOOL)movePacket:(mediapipe::Packet&&)packet intoStream:(const std::string&)streamName error:(NSError**)error { - absl::Status status = _graph->AddPacketToInputStream(streamName, std::move(packet)); + absl::Status status = + _graph->AddPacketToInputStream(streamName, std::move(packet)); if (!status.ok() && error) *error = [NSError gus_errorWithStatus:status]; return status.ok(); } @@ -305,15 +329,17 @@ if ([wrapper.delegate - (BOOL)setMaxQueueSize:(int)maxQueueSize forStream:(const std::string&)streamName error:(NSError**)error { - absl::Status status = _graph->SetInputStreamMaxQueueSize(streamName, maxQueueSize); + absl::Status status = + _graph->SetInputStreamMaxQueueSize(streamName, maxQueueSize); if (!status.ok() && error) *error = [NSError gus_errorWithStatus:status]; return status.ok(); } - (mediapipe::Packet)packetWithPixelBuffer:(CVPixelBufferRef)imageBuffer - packetType:(MPPPacketType)packetType { + packetType:(MPPPacketType)packetType { mediapipe::Packet packet; - if (packetType == MPPPacketTypeImageFrame || packetType == MPPPacketTypeImageFrameBGRANoSwap) { + if (packetType == MPPPacketTypeImageFrame || + packetType == MPPPacketTypeImageFrameBGRANoSwap) { auto frame = CreateImageFrameForCVPixelBuffer( imageBuffer, /* canOverwrite = */ false, /* bgrAsRgb = */ packetType == MPPPacketTypeImageFrameBGRANoSwap); @@ -328,7 +354,8 @@ if ([wrapper.delegate packet = mediapipe::MakePacket(imageBuffer); #else // CPU - auto frame = CreateImageFrameForCVPixelBuffer(imageBuffer, /* canOverwrite = */ false, + auto frame = CreateImageFrameForCVPixelBuffer(imageBuffer, + /* canOverwrite = */ false, /* bgrAsRgb = */ false); packet = mediapipe::MakePacket(std::move(frame)); #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER @@ -339,7 +366,8 @@ if ([wrapper.delegate } - (mediapipe::Packet)imagePacketWithPixelBuffer:(CVPixelBufferRef)pixelBuffer { - return [self packetWithPixelBuffer:(pixelBuffer) packetType:(MPPPacketTypeImage)]; + return [self packetWithPixelBuffer:(pixelBuffer) + packetType:(MPPPacketTypeImage)]; } - (BOOL)sendPixelBuffer:(CVPixelBufferRef)imageBuffer @@ -367,13 +395,16 @@ if ([wrapper.delegate allowOverwrite:(BOOL)allowOverwrite error:(NSError**)error { if (_maxFramesInFlight && _framesInFlight >= _maxFramesInFlight) return NO; - mediapipe::Packet packet = [self packetWithPixelBuffer:imageBuffer packetType:packetType]; + mediapipe::Packet packet = + [self packetWithPixelBuffer:imageBuffer packetType:packetType]; BOOL success; if (allowOverwrite) { packet = std::move(packet).At(timestamp); - success = [self movePacket:std::move(packet) intoStream:inputName error:error]; + success = + [self movePacket:std::move(packet) intoStream:inputName error:error]; } else { - success = [self sendPacket:packet.At(timestamp) intoStream:inputName error:error]; + success = + [self sendPacket:packet.At(timestamp) intoStream:inputName error:error]; } if (success) _framesInFlight++; return success; @@ -407,22 +438,24 @@ if ([wrapper.delegate } - (void)debugPrintGlInfo { - std::shared_ptr gpu_resources = _graph->GetGpuResources(); + std::shared_ptr gpu_resources = + _graph->GetGpuResources(); if (!gpu_resources) { NSLog(@"GPU not set up."); return; } NSString* extensionString; - (void)gpu_resources->gl_context()->Run([&extensionString]{ - extensionString = [NSString stringWithUTF8String:(char*)glGetString(GL_EXTENSIONS)]; + (void)gpu_resources->gl_context()->Run([&extensionString] { + extensionString = + [NSString stringWithUTF8String:(char*)glGetString(GL_EXTENSIONS)]; return absl::OkStatus(); }); - NSArray* extensions = [extensionString componentsSeparatedByCharactersInSet: - [NSCharacterSet whitespaceCharacterSet]]; - for (NSString* oneExtension in extensions) - NSLog(@"%@", oneExtension); + NSArray* extensions = [extensionString + componentsSeparatedByCharactersInSet:[NSCharacterSet + whitespaceCharacterSet]]; + for (NSString* oneExtension in extensions) NSLog(@"%@", oneExtension); } @end diff --git a/mediapipe/objc/MPPTimestampConverter.mm b/mediapipe/objc/MPPTimestampConverter.cc similarity index 81% rename from mediapipe/objc/MPPTimestampConverter.mm rename to mediapipe/objc/MPPTimestampConverter.cc index e53758d71..44857c8e9 100644 --- a/mediapipe/objc/MPPTimestampConverter.mm +++ b/mediapipe/objc/MPPTimestampConverter.cc @@ -20,8 +20,7 @@ mediapipe::TimestampDiff _timestampOffset; } -- (instancetype)init -{ +- (instancetype)init { self = [super init]; if (self) { [self reset]; @@ -36,11 +35,14 @@ } - (mediapipe::Timestamp)timestampForMediaTime:(CMTime)mediaTime { - Float64 sampleSeconds = CMTIME_IS_VALID(mediaTime) ? CMTimeGetSeconds(mediaTime) : 0; - const int64 sampleUsec = sampleSeconds * mediapipe::Timestamp::kTimestampUnitsPerSecond; + Float64 sampleSeconds = + CMTIME_IS_VALID(mediaTime) ? CMTimeGetSeconds(mediaTime) : 0; + const int64 sampleUsec = + sampleSeconds * mediapipe::Timestamp::kTimestampUnitsPerSecond; _mediapipeTimestamp = mediapipe::Timestamp(sampleUsec) + _timestampOffset; if (_mediapipeTimestamp <= _lastTimestamp) { - _timestampOffset = _timestampOffset + _lastTimestamp + 1 - _mediapipeTimestamp; + _timestampOffset = + _timestampOffset + _lastTimestamp + 1 - _mediapipeTimestamp; _mediapipeTimestamp = _lastTimestamp + 1; } _lastTimestamp = _mediapipeTimestamp; diff --git a/mediapipe/objc/NSError+util_status.cc b/mediapipe/objc/NSError+util_status.cc new file mode 100644 index 000000000..144ec6ed4 --- /dev/null +++ b/mediapipe/objc/NSError+util_status.cc @@ -0,0 +1,72 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/objc/NSError+util_status.h" + +@implementation GUSUtilStatusWrapper + ++ (instancetype)wrapStatus:(const absl::Status &)status { + return [[self alloc] initWithStatus:status]; +} + +- (instancetype)initWithStatus:(const absl::Status &)status { + self = [super init]; + if (self) { + _status = status; + } + return self; +} + +- (NSString *)description { + return [NSString stringWithFormat:@"<%@: %p; status = %s>", [self class], + self, _status.message().data()]; +} + +@end + +@implementation NSError (GUSGoogleUtilStatus) + +NSString *const kGUSGoogleUtilStatusErrorDomain = + @"GoogleUtilStatusErrorDomain"; +NSString *const kGUSGoogleUtilStatusErrorKey = @"GUSGoogleUtilStatusErrorKey"; + ++ (NSError *)gus_errorWithStatus:(const absl::Status &)status { + NSDictionary *userInfo = @{ + NSLocalizedDescriptionKey : @(status.message().data()), + kGUSGoogleUtilStatusErrorKey : [GUSUtilStatusWrapper wrapStatus:status], + }; + NSError *error = + [NSError errorWithDomain:kGUSGoogleUtilStatusErrorDomain + code:static_cast(status.code()) + userInfo:userInfo]; + return error; +} + +- (absl::Status)gus_status { + NSString *domain = self.domain; + if ([domain isEqual:kGUSGoogleUtilStatusErrorDomain]) { + GUSUtilStatusWrapper *wrapper = self.userInfo[kGUSGoogleUtilStatusErrorKey]; + if (wrapper) return wrapper.status; +#if 0 + // Unfortunately, util/task/posixerrorspace.h is not in portable status yet. + // TODO: fix that. + } else if ([domain isEqual:NSPOSIXErrorDomain]) { + return ::util::PosixErrorToStatus(self.code, self.localizedDescription.UTF8String); +#endif + } + return absl::Status(absl::StatusCode::kUnknown, + self.localizedDescription.UTF8String); +} + +@end diff --git a/mediapipe/python/image_test.py b/mediapipe/python/image_test.py index cd9124948..3181fb5f1 100644 --- a/mediapipe/python/image_test.py +++ b/mediapipe/python/image_test.py @@ -207,8 +207,12 @@ class ImageTest(absltest.TestCase): loaded_image = Image.create_from_file(image_path) self.assertEqual(loaded_image.width, 720) self.assertEqual(loaded_image.height, 382) - self.assertEqual(loaded_image.channels, 3) - self.assertEqual(loaded_image.image_format, ImageFormat.SRGB) + # On Mac w/ GPU support, images use 4 channels (SRGBA). Otherwise, all + # images use 3 channels (SRGB). + self.assertIn(loaded_image.channels, [3, 4]) + self.assertIn( + loaded_image.image_format, [ImageFormat.SRGB, ImageFormat.SRGBA] + ) if __name__ == '__main__': absltest.main() diff --git a/mediapipe/python/pybind/image.cc b/mediapipe/python/pybind/image.cc index 800e883b4..98f162342 100644 --- a/mediapipe/python/pybind/image.cc +++ b/mediapipe/python/pybind/image.cc @@ -51,10 +51,10 @@ void ImageSubmodule(pybind11::module* module) { ```python import cv2 - cv_mat = cv2.imread(input_file)[:, :, ::-1] - rgb_frame = mp.Image(image_format=ImageFormat.SRGB, data=cv_mat) + cv_mat = cv2.imread(input_file) + rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv_mat) gray_frame = mp.Image( - image_format=ImageFormat.GRAY, + image_format=mp.ImageFormat.GRAY8, data=cv2.cvtColor(cv_mat, cv2.COLOR_RGB2GRAY)) from PIL import Image @@ -244,12 +244,26 @@ void ImageSubmodule(pybind11::module* module) { image.def_static( "create_from_file", [](const std::string& file_name) { + unsigned char* image_data = nullptr; int width; int height; int channels; - auto* image_data = - stbi_load(file_name.c_str(), &width, &height, &channels, - /*desired_channels=*/0); + +#if TARGET_OS_OSX && !MEDIAPIPE_DISABLE_GPU + // Our ObjC layer does not support 3-channel images, so we read the + // number of channels first and request RGBA if needed. + if (stbi_info(file_name.c_str(), &width, &height, &channels)) { + if (channels == 3) { + channels = 4; + } + int unused; + image_data = + stbi_load(file_name.c_str(), &width, &height, &unused, channels); + } +#else + image_data = stbi_load(file_name.c_str(), &width, &height, &channels, + /*desired_channels=*/0); +#endif // TARGET_OS_OSX && !MEDIAPIPE_DISABLE_GPU if (image_data == nullptr) { throw RaisePyError(PyExc_RuntimeError, absl::StrFormat("Image decoding failed (%s): %s", @@ -263,11 +277,13 @@ void ImageSubmodule(pybind11::module* module) { ImageFormat::GRAY8, width, height, width, image_data, stbi_image_free); break; +#if !TARGET_OS_OSX || MEDIAPIPE_DISABLE_GPU case 3: image_frame = std::make_shared( ImageFormat::SRGB, width, height, 3 * width, image_data, stbi_image_free); break; +#endif // !TARGET_OS_OSX || MEDIAPIPE_DISABLE_GPU case 4: image_frame = std::make_shared( ImageFormat::SRGBA, width, height, 4 * width, image_data, diff --git a/mediapipe/python/pybind/image_frame.cc b/mediapipe/python/pybind/image_frame.cc index 7348133eb..90db05066 100644 --- a/mediapipe/python/pybind/image_frame.cc +++ b/mediapipe/python/pybind/image_frame.cc @@ -81,17 +81,20 @@ void ImageFrameSubmodule(pybind11::module* module) { become immutable after creation. Creation examples: - import cv2 - cv_mat = cv2.imread(input_file)[:, :, ::-1] - rgb_frame = mp.ImageFrame(image_format=ImageFormat.SRGB, data=cv_mat) - gray_frame = mp.ImageFrame( - image_format=ImageFormat.GRAY, - data=cv2.cvtColor(cv_mat, cv2.COLOR_RGB2GRAY)) - from PIL import Image - pil_img = Image.new('RGB', (60, 30), color = 'red') - image_frame = mp.ImageFrame( - image_format=mp.ImageFormat.SRGB, data=np.asarray(pil_img)) + ```python + import cv2 + cv_mat = cv2.imread(input_file) + rgb_frame = mp.ImageFrame(image_format=ImageFormat.SRGB, data=cv_mat) + gray_frame = mp.ImageFrame( + image_format=ImageFormat.GRAY, + data=cv2.cvtColor(cv_mat, cv2.COLOR_RGB2GRAY)) + + from PIL import Image + pil_img = Image.new('RGB', (60, 30), color = 'red') + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.SRGB, data=np.asarray(pil_img)) + ``` The pixel data in an ImageFrame can be retrieved as a numpy ndarray by calling `ImageFrame.numpy_view()`. The returned numpy ndarray is a reference to the diff --git a/mediapipe/python/solutions/drawing_utils.py b/mediapipe/python/solutions/drawing_utils.py index a1acc0be2..78e931264 100644 --- a/mediapipe/python/solutions/drawing_utils.py +++ b/mediapipe/python/solutions/drawing_utils.py @@ -125,7 +125,8 @@ def draw_landmarks( color=RED_COLOR), connection_drawing_spec: Union[DrawingSpec, Mapping[Tuple[int, int], - DrawingSpec]] = DrawingSpec()): + DrawingSpec]] = DrawingSpec(), + is_drawing_landmarks: bool = True): """Draws the landmarks and the connections on the image. Args: @@ -142,6 +143,8 @@ def draw_landmarks( connections to the DrawingSpecs that specifies the connections' drawing settings such as color and line thickness. If this argument is explicitly set to None, no landmark connections will be drawn. + is_drawing_landmarks: Whether to draw landmarks. If set false, skip drawing + landmarks, only contours will be drawed. Raises: ValueError: If one of the followings: @@ -181,7 +184,7 @@ def draw_landmarks( drawing_spec.thickness) # Draws landmark points after finishing the connection lines, which is # aesthetically better. - if landmark_drawing_spec: + if is_drawing_landmarks and landmark_drawing_spec: for idx, landmark_px in idx_to_coordinates.items(): drawing_spec = landmark_drawing_spec[idx] if isinstance( landmark_drawing_spec, Mapping) else landmark_drawing_spec diff --git a/mediapipe/tasks/c/components/containers/BUILD b/mediapipe/tasks/c/components/containers/BUILD index 4bb580873..3c4b557b3 100644 --- a/mediapipe/tasks/c/components/containers/BUILD +++ b/mediapipe/tasks/c/components/containers/BUILD @@ -43,6 +43,87 @@ cc_test( ], ) +cc_library( + name = "landmark", + hdrs = ["landmark.h"], +) + +cc_library( + name = "landmark_converter", + srcs = ["landmark_converter.cc"], + hdrs = ["landmark_converter.h"], + deps = [ + ":landmark", + "//mediapipe/tasks/cc/components/containers:landmark", + ], +) + +cc_test( + name = "landmark_converter_test", + srcs = ["landmark_converter_test.cc"], + deps = [ + ":landmark", + ":landmark_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/components/containers:landmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "rect", + hdrs = ["rect.h"], +) + +cc_library( + name = "rect_converter", + srcs = ["rect_converter.cc"], + hdrs = ["rect_converter.h"], + deps = [ + ":rect", + "//mediapipe/tasks/cc/components/containers:rect", + ], +) + +cc_test( + name = "rect_converter_test", + srcs = ["rect_converter_test.cc"], + deps = [ + ":rect", + ":rect_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/components/containers:rect", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "keypoint", + hdrs = ["keypoint.h"], +) + +cc_library( + name = "keypoint_converter", + srcs = ["keypoint_converter.cc"], + hdrs = ["keypoint_converter.h"], + deps = [ + ":keypoint", + "//mediapipe/tasks/cc/components/containers:keypoint", + ], +) + +cc_test( + name = "keypoint_converter_test", + srcs = ["keypoint_converter_test.cc"], + deps = [ + ":keypoint", + ":keypoint_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/components/containers:keypoint", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "classification_result", hdrs = ["classification_result.h"], @@ -72,6 +153,39 @@ cc_test( ], ) +cc_library( + name = "detection_result", + hdrs = ["detection_result.h"], + deps = [":rect"], +) + +cc_library( + name = "detection_result_converter", + srcs = ["detection_result_converter.cc"], + hdrs = ["detection_result_converter.h"], + deps = [ + ":category", + ":category_converter", + ":detection_result", + ":keypoint", + ":keypoint_converter", + ":rect_converter", + "//mediapipe/tasks/cc/components/containers:detection_result", + ], +) + +cc_test( + name = "detection_result_converter_test", + srcs = ["detection_result_converter_test.cc"], + deps = [ + ":detection_result", + ":detection_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/components/containers:detection_result", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "embedding_result", hdrs = ["embedding_result.h"], @@ -98,26 +212,3 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) - -cc_library( - name = "language_detection_result_converter", - srcs = ["language_detection_result_converter.cc"], - hdrs = ["language_detection_result_converter.h"], - deps = [ - "//mediapipe/tasks/c/text/language_detector", - "//mediapipe/tasks/cc/text/language_detector", - ], -) - -cc_test( - name = "language_detection_result_converter_test", - srcs = ["language_detection_result_converter_test.cc"], - linkstatic = 1, - deps = [ - ":language_detection_result_converter", - "//mediapipe/framework/port:gtest", - "//mediapipe/tasks/c/text/language_detector", - "//mediapipe/tasks/cc/text/language_detector", - "@com_google_googletest//:gtest_main", - ], -) diff --git a/mediapipe/tasks/c/components/containers/category.h b/mediapipe/tasks/c/components/containers/category.h index 9a47815ab..033831162 100644 --- a/mediapipe/tasks/c/components/containers/category.h +++ b/mediapipe/tasks/c/components/containers/category.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_H_ #define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_H_ +#include + #ifdef __cplusplus extern "C" { #endif @@ -43,6 +45,12 @@ struct Category { char* display_name; }; +// A list of categories. +struct Categories { + struct Category* categories; + uint32_t categories_count; +}; + #ifdef __cplusplus } // extern C #endif diff --git a/mediapipe/tasks/c/components/containers/category_converter.cc b/mediapipe/tasks/c/components/containers/category_converter.cc index b04f86c9a..dcc4d9d23 100644 --- a/mediapipe/tasks/c/components/containers/category_converter.cc +++ b/mediapipe/tasks/c/components/containers/category_converter.cc @@ -41,4 +41,14 @@ void CppCloseCategory(Category* in) { in->display_name = nullptr; } +void CppCloseCategories(Categories* in) { + for (int i = 0; i < in->categories_count; ++i) { + CppCloseCategory(&in->categories[i]); + } + delete[] in->categories; + + in->categories = nullptr; + in->categories_count = 0; +} + } // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/category_converter.h b/mediapipe/tasks/c/components/containers/category_converter.h index 9edf539b2..680a99f9c 100644 --- a/mediapipe/tasks/c/components/containers/category_converter.h +++ b/mediapipe/tasks/c/components/containers/category_converter.h @@ -27,6 +27,8 @@ void CppConvertToCategory( void CppCloseCategory(Category* in); +void CppCloseCategories(Categories* in); + } // namespace mediapipe::tasks::c::components::containers #endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/detection_result.h b/mediapipe/tasks/c/components/containers/detection_result.h new file mode 100644 index 000000000..0fd7722a1 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/detection_result.h @@ -0,0 +1,63 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/rect.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Detection for a single bounding box. +struct Detection { + // An array of detected categories. + struct Category* categories; + + // The number of elements in the categories array. + uint32_t categories_count; + + // The bounding box location. + struct MPRect bounding_box; + + // Optional list of keypoints associated with the detection. Keypoints + // represent interesting points related to the detection. For example, the + // keypoints represent the eye, ear and mouth from face detection model. Or + // in the template matching detection, e.g. KNIFT, they can represent the + // feature points for template matching. + // `nullptr` if keypoints is not present. + struct NormalizedKeypoint* keypoints; + + // The number of elements in the keypoints array. 0 if keypoints do not exist. + uint32_t keypoints_count; +}; + +// Detection results of a model. +struct DetectionResult { + // An array of Detections. + struct Detection* detections; + + // The number of detections in the detections array. + uint32_t detections_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_H_ diff --git a/mediapipe/tasks/c/components/containers/detection_result_converter.cc b/mediapipe/tasks/c/components/containers/detection_result_converter.cc new file mode 100644 index 000000000..dc76579bc --- /dev/null +++ b/mediapipe/tasks/c/components/containers/detection_result_converter.cc @@ -0,0 +1,86 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/detection_result_converter.h" + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/detection_result.h" +#include "mediapipe/tasks/c/components/containers/keypoint.h" +#include "mediapipe/tasks/c/components/containers/keypoint_converter.h" +#include "mediapipe/tasks/c/components/containers/rect_converter.h" +#include "mediapipe/tasks/cc/components/containers/detection_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToDetection( + const mediapipe::tasks::components::containers::Detection& in, + ::Detection* out) { + out->categories_count = in.categories.size(); + out->categories = new Category[out->categories_count]; + for (size_t i = 0; i < out->categories_count; ++i) { + CppConvertToCategory(in.categories[i], &out->categories[i]); + } + + CppConvertToRect(in.bounding_box, &out->bounding_box); + + if (in.keypoints.has_value()) { + auto& keypoints = in.keypoints.value(); + out->keypoints_count = keypoints.size(); + out->keypoints = new NormalizedKeypoint[out->keypoints_count]; + for (size_t i = 0; i < out->keypoints_count; ++i) { + CppConvertToNormalizedKeypoint(keypoints[i], &out->keypoints[i]); + } + } else { + out->keypoints = nullptr; + out->keypoints_count = 0; + } +} + +void CppConvertToDetectionResult( + const mediapipe::tasks::components::containers::DetectionResult& in, + ::DetectionResult* out) { + out->detections_count = in.detections.size(); + out->detections = new ::Detection[out->detections_count]; + for (size_t i = 0; i < out->detections_count; ++i) { + CppConvertToDetection(in.detections[i], &out->detections[i]); + } +} + +// Functions to free the memory of C structures. +void CppCloseDetection(::Detection* in) { + for (size_t i = 0; i < in->categories_count; ++i) { + CppCloseCategory(&in->categories[i]); + } + delete[] in->categories; + in->categories = nullptr; + for (size_t i = 0; i < in->keypoints_count; ++i) { + CppCloseNormalizedKeypoint(&in->keypoints[i]); + } + delete[] in->keypoints; + in->keypoints = nullptr; +} + +void CppCloseDetectionResult(::DetectionResult* in) { + for (size_t i = 0; i < in->detections_count; ++i) { + CppCloseDetection(&in->detections[i]); + } + delete[] in->detections; + in->detections = nullptr; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/detection_result_converter.h b/mediapipe/tasks/c/components/containers/detection_result_converter.h new file mode 100644 index 000000000..e338e47e9 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/detection_result_converter.h @@ -0,0 +1,38 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/components/containers/detection_result.h" +#include "mediapipe/tasks/cc/components/containers/detection_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToDetection( + const mediapipe::tasks::components::containers::Detection& in, + Detection* out); + +void CppConvertToDetectionResult( + const mediapipe::tasks::components::containers::DetectionResult& in, + DetectionResult* out); + +void CppCloseDetection(Detection* in); + +void CppCloseDetectionResult(DetectionResult* in); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_DETECTION_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/detection_result_converter_test.cc b/mediapipe/tasks/c/components/containers/detection_result_converter_test.cc new file mode 100644 index 000000000..16c0a76c2 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/detection_result_converter_test.cc @@ -0,0 +1,74 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/detection_result_converter.h" + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/detection_result.h" +#include "mediapipe/tasks/cc/components/containers/detection_result.h" + +namespace mediapipe::tasks::c::components::containers { + +TEST(DetectionResultConverterTest, ConvertsDetectionResultCustomCategory) { + mediapipe::tasks::components::containers::DetectionResult + cpp_detection_result = {/* detections= */ { + {/* categories= */ {{/* index= */ 1, /* score= */ 0.1, + /* category_name= */ "cat", + /* display_name= */ "cat"}}, + /* bounding_box= */ {10, 11, 12, 13}, + {/* keypoints */ {{0.1, 0.1, "foo", 0.5}}}}}}; + + DetectionResult c_detection_result; + CppConvertToDetectionResult(cpp_detection_result, &c_detection_result); + EXPECT_NE(c_detection_result.detections, nullptr); + EXPECT_EQ(c_detection_result.detections_count, 1); + EXPECT_NE(c_detection_result.detections[0].categories, nullptr); + EXPECT_EQ(c_detection_result.detections[0].categories_count, 1); + EXPECT_EQ(c_detection_result.detections[0].bounding_box.left, 10); + EXPECT_EQ(c_detection_result.detections[0].bounding_box.top, 11); + EXPECT_EQ(c_detection_result.detections[0].bounding_box.right, 12); + EXPECT_EQ(c_detection_result.detections[0].bounding_box.bottom, 13); + EXPECT_NE(c_detection_result.detections[0].keypoints, nullptr); + + CppCloseDetectionResult(&c_detection_result); +} + +TEST(DetectionResultConverterTest, ConvertsDetectionResultNoCategory) { + mediapipe::tasks::components::containers::DetectionResult + cpp_detection_result = {/* detections= */ {/* categories= */ {}}}; + + DetectionResult c_detection_result; + CppConvertToDetectionResult(cpp_detection_result, &c_detection_result); + EXPECT_NE(c_detection_result.detections, nullptr); + EXPECT_EQ(c_detection_result.detections_count, 1); + EXPECT_NE(c_detection_result.detections[0].categories, nullptr); + EXPECT_EQ(c_detection_result.detections[0].categories_count, 0); + + CppCloseDetectionResult(&c_detection_result); +} + +TEST(DetectionResultConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::DetectionResult + cpp_detection_result = {/* detections= */ {{/* categories= */ {}}}}; + + DetectionResult c_detection_result; + CppConvertToDetectionResult(cpp_detection_result, &c_detection_result); + EXPECT_NE(c_detection_result.detections, nullptr); + + CppCloseDetectionResult(&c_detection_result); + EXPECT_EQ(c_detection_result.detections, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/embedding_result_converter.cc b/mediapipe/tasks/c/components/containers/embedding_result_converter.cc index ba72c0994..2e552801d 100644 --- a/mediapipe/tasks/c/components/containers/embedding_result_converter.cc +++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.cc @@ -66,6 +66,29 @@ void CppConvertToEmbeddingResult( } } +void CppConvertToCppEmbedding( + const Embedding& in, // C struct as input + mediapipe::tasks::components::containers::Embedding* out) { + // Handle float embeddings + if (in.float_embedding != nullptr) { + out->float_embedding.assign(in.float_embedding, + in.float_embedding + in.values_count); + } + + // Handle quantized embeddings + if (in.quantized_embedding != nullptr) { + out->quantized_embedding.assign(in.quantized_embedding, + in.quantized_embedding + in.values_count); + } + + out->head_index = in.head_index; + + // Copy head_name if it is present. + if (in.head_name) { + out->head_name = std::string(in.head_name); + } +} + void CppCloseEmbeddingResult(EmbeddingResult* in) { for (uint32_t i = 0; i < in->embeddings_count; ++i) { auto embedding_in = in->embeddings[i]; diff --git a/mediapipe/tasks/c/components/containers/embedding_result_converter.h b/mediapipe/tasks/c/components/containers/embedding_result_converter.h index 15bcdbdd0..0955a949d 100644 --- a/mediapipe/tasks/c/components/containers/embedding_result_converter.h +++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.h @@ -29,6 +29,10 @@ void CppConvertToEmbeddingResult( const mediapipe::tasks::components::containers::EmbeddingResult& in, EmbeddingResult* out); +void CppConvertToCppEmbedding( + const Embedding& in, + mediapipe::tasks::components::containers::Embedding* out); + void CppCloseEmbedding(Embedding* in); void CppCloseEmbeddingResult(EmbeddingResult* in); diff --git a/mediapipe/tasks/c/components/containers/keypoint.h b/mediapipe/tasks/c/components/containers/keypoint.h new file mode 100644 index 000000000..e70d0325d --- /dev/null +++ b/mediapipe/tasks/c/components/containers/keypoint.h @@ -0,0 +1,46 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// A keypoint, defined by the coordinates (x, y), normalized by the image +// dimensions. +struct NormalizedKeypoint { + // x in normalized image coordinates. + float x; + + // y in normalized image coordinates. + float y; + + // Optional label of the keypoint. `nullptr` if the label is not present. + char* label; + + // Optional score of the keypoint. + float score; + + // `True` if the score is valid. + bool has_score; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_H_ diff --git a/mediapipe/tasks/c/components/containers/keypoint_converter.cc b/mediapipe/tasks/c/components/containers/keypoint_converter.cc new file mode 100644 index 000000000..d7fb9aa8a --- /dev/null +++ b/mediapipe/tasks/c/components/containers/keypoint_converter.cc @@ -0,0 +1,45 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/keypoint_converter.h" + +#include // IWYU pragma: for open source compule + +#include + +#include "mediapipe/tasks/c/components/containers/keypoint.h" +#include "mediapipe/tasks/cc/components/containers/keypoint.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToNormalizedKeypoint( + const mediapipe::tasks::components::containers::NormalizedKeypoint& in, + NormalizedKeypoint* out) { + out->x = in.x; + out->y = in.y; + + out->label = in.label.has_value() ? strdup(in.label->c_str()) : nullptr; + out->has_score = in.score.has_value(); + out->score = out->has_score ? in.score.value() : 0; +} + +void CppCloseNormalizedKeypoint(NormalizedKeypoint* keypoint) { + if (keypoint && keypoint->label) { + free(keypoint->label); + keypoint->label = nullptr; + } +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/keypoint_converter.h b/mediapipe/tasks/c/components/containers/keypoint_converter.h new file mode 100644 index 000000000..a4bd725f2 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/keypoint_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_CONVERTER_H_ + +#include "mediapipe/tasks/c/components/containers/keypoint.h" +#include "mediapipe/tasks/cc/components/containers/keypoint.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToNormalizedKeypoint( + const mediapipe::tasks::components::containers::NormalizedKeypoint& in, + NormalizedKeypoint* out); + +void CppCloseNormalizedKeypoint(NormalizedKeypoint* keypoint); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_KEYPOINT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/keypoint_converter_test.cc b/mediapipe/tasks/c/components/containers/keypoint_converter_test.cc new file mode 100644 index 000000000..7c9ba6fe2 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/keypoint_converter_test.cc @@ -0,0 +1,52 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/keypoint_converter.h" + +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/keypoint.h" +#include "mediapipe/tasks/cc/components/containers/keypoint.h" + +namespace mediapipe::tasks::c::components::containers { + +constexpr float kPrecision = 1e-6; + +TEST(KeypointConverterTest, ConvertsKeypointCustomValues) { + mediapipe::tasks::components::containers::NormalizedKeypoint cpp_keypoint = { + 0.1, 0.2, "foo", 0.5}; + + NormalizedKeypoint c_keypoint; + CppConvertToNormalizedKeypoint(cpp_keypoint, &c_keypoint); + EXPECT_NEAR(c_keypoint.x, 0.1f, kPrecision); + EXPECT_NEAR(c_keypoint.y, 0.2f, kPrecision); + EXPECT_EQ(std::string(c_keypoint.label), "foo"); + EXPECT_NEAR(c_keypoint.score, 0.5f, kPrecision); + CppCloseNormalizedKeypoint(&c_keypoint); +} + +TEST(KeypointConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::NormalizedKeypoint cpp_keypoint = { + 0.1, 0.2, "foo", 0.5}; + + NormalizedKeypoint c_keypoint; + CppConvertToNormalizedKeypoint(cpp_keypoint, &c_keypoint); + EXPECT_NE(c_keypoint.label, nullptr); + CppCloseNormalizedKeypoint(&c_keypoint); + EXPECT_EQ(c_keypoint.label, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark.h b/mediapipe/tasks/c/components/containers/landmark.h new file mode 100644 index 000000000..de6dd9928 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark.h @@ -0,0 +1,90 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Landmark represents a point in 3D space with x, y, z coordinates. The +// landmark coordinates are in meters. z represents the landmark depth, and the +// smaller the value the closer the world landmark is to the camera. +struct Landmark { + float x; + float y; + float z; + + // For optional visibility. + bool has_visibility; + + // Landmark visibility. Should stay unset if not supported. + // Float score of whether landmark is visible or occluded by other objects. + // Landmark considered as invisible also if it is not present on the screen + // (out of scene bounds). Depending on the model, visibility value is either + // a sigmoid or an argument of sigmoid. + float visibility; + + // For optional presence. + bool has_presence; + + // Landmark presence. Should stay unset if not supported. + // Float score of whether landmark is present on the scene (located within + // scene bounds). Depending on the model, presence value is either a result + // of sigmoid or an argument of sigmoid function to get landmark presence + // probability. + float presence; + + // Landmark name. Should stay unset if not supported. + // Defaults to nullptr. + char* name; +}; + +// A normalized version of above Landmark struct. All coordinates should be +// within [0, 1]. +struct NormalizedLandmark { + float x; + float y; + float z; + + bool has_visibility; + float visibility; + + bool has_presence; + float presence; + + char* name; +}; + +// A list of Landmarks. +struct Landmarks { + struct Landmark* landmarks; + uint32_t landmarks_count; +}; + +// A list of NormalizedLandmarks. +struct NormalizedLandmarks { + struct NormalizedLandmark* landmarks; + uint32_t landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.cc b/mediapipe/tasks/c/components/containers/landmark_converter.cc new file mode 100644 index 000000000..4a4620574 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter.cc @@ -0,0 +1,128 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" + +#include +#include +#include + +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToLandmark( + const mediapipe::tasks::components::containers::Landmark& in, + ::Landmark* out) { + out->x = in.x; + out->y = in.y; + out->z = in.z; + + if (in.visibility.has_value()) { + out->has_visibility = true; + out->visibility = in.visibility.value(); + } else { + out->has_visibility = false; + } + + if (in.presence.has_value()) { + out->has_presence = true; + out->presence = in.presence.value(); + } else { + out->has_presence = false; + } + + out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr; +} + +void CppConvertToNormalizedLandmark( + const mediapipe::tasks::components::containers::NormalizedLandmark& in, + ::NormalizedLandmark* out) { + out->x = in.x; + out->y = in.y; + out->z = in.z; + + if (in.visibility.has_value()) { + out->has_visibility = true; + out->visibility = in.visibility.value(); + } else { + out->has_visibility = false; + } + + if (in.presence.has_value()) { + out->has_presence = true; + out->presence = in.presence.value(); + } else { + out->has_presence = false; + } + + out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr; +} + +void CppConvertToLandmarks( + const std::vector& in, + ::Landmarks* out) { + out->landmarks_count = in.size(); + out->landmarks = new ::Landmark[out->landmarks_count]; + for (uint32_t i = 0; i < out->landmarks_count; ++i) { + CppConvertToLandmark(in[i], &out->landmarks[i]); + } +} + +void CppConvertToNormalizedLandmarks( + const std::vector< + mediapipe::tasks::components::containers::NormalizedLandmark>& in, + ::NormalizedLandmarks* out) { + out->landmarks_count = in.size(); + out->landmarks = new ::NormalizedLandmark[out->landmarks_count]; + for (uint32_t i = 0; i < out->landmarks_count; ++i) { + CppConvertToNormalizedLandmark(in[i], &out->landmarks[i]); + } +} + +void CppCloseLandmark(::Landmark* in) { + if (in && in->name) { + free(in->name); + in->name = nullptr; + } +} + +void CppCloseLandmarks(::Landmarks* in) { + for (uint32_t i = 0; i < in->landmarks_count; ++i) { + CppCloseLandmark(&in->landmarks[i]); + } + delete[] in->landmarks; + in->landmarks = nullptr; + in->landmarks_count = 0; +} + +void CppCloseNormalizedLandmark(::NormalizedLandmark* in) { + if (in && in->name) { + free(in->name); + in->name = nullptr; + } +} + +void CppCloseNormalizedLandmarks(::NormalizedLandmarks* in) { + for (uint32_t i = 0; i < in->landmarks_count; ++i) { + CppCloseNormalizedLandmark(&in->landmarks[i]); + } + delete[] in->landmarks; + in->landmarks = nullptr; + in->landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.h b/mediapipe/tasks/c/components/containers/landmark_converter.h new file mode 100644 index 000000000..1b3626386 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter.h @@ -0,0 +1,51 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ + +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToLandmark( + const mediapipe::tasks::components::containers::Landmark& in, + ::Landmark* out); + +void CppConvertToNormalizedLandmark( + const mediapipe::tasks::components::containers::NormalizedLandmark& in, + ::NormalizedLandmark* out); + +void CppConvertToLandmarks( + const std::vector& in, + ::Landmarks* out); + +void CppConvertToNormalizedLandmarks( + const std::vector< + mediapipe::tasks::components::containers::NormalizedLandmark>& in, + ::NormalizedLandmarks* out); + +void CppCloseLandmark(struct ::Landmark* in); + +void CppCloseLandmarks(struct ::Landmarks* in); + +void CppCloseNormalizedLandmark(struct ::NormalizedLandmark* in); + +void CppCloseNormalizedLandmarks(struct ::NormalizedLandmarks* in); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/landmark_converter_test.cc b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc new file mode 100644 index 000000000..cef15163d --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc @@ -0,0 +1,148 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" + +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +TEST(LandmarkConverterTest, ConvertsCustomLandmark) { + mediapipe::tasks::components::containers::Landmark cpp_landmark = {0.1f, 0.2f, + 0.3f}; + + ::Landmark c_landmark; + CppConvertToLandmark(cpp_landmark, &c_landmark); + EXPECT_FLOAT_EQ(c_landmark.x, cpp_landmark.x); + EXPECT_FLOAT_EQ(c_landmark.y, cpp_landmark.y); + EXPECT_FLOAT_EQ(c_landmark.z, cpp_landmark.z); + CppCloseLandmark(&c_landmark); +} + +TEST(LandmarksConverterTest, ConvertsCustomLandmarks) { + std::vector + cpp_landmarks = { + {0.1f, 0.2f, 0.3f}, // First Landmark + {0.4f, 0.5f, 0.6f} // Second Landmark + }; + + ::Landmarks c_landmarks; + CppConvertToLandmarks(cpp_landmarks, &c_landmarks); + + EXPECT_EQ(c_landmarks.landmarks_count, cpp_landmarks.size()); + for (size_t i = 0; i < c_landmarks.landmarks_count; ++i) { + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].x, cpp_landmarks[i].x); + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].y, cpp_landmarks[i].y); + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].z, cpp_landmarks[i].z); + } + + CppCloseLandmarks(&c_landmarks); +} + +TEST(NormalizedLandmarkConverterTest, ConvertsCustomNormalizedLandmark) { + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {0.7f, 0.8f, 0.9f}; + + ::NormalizedLandmark c_normalized_landmark; + CppConvertToNormalizedLandmark(cpp_normalized_landmark, + &c_normalized_landmark); + + EXPECT_FLOAT_EQ(c_normalized_landmark.x, cpp_normalized_landmark.x); + EXPECT_FLOAT_EQ(c_normalized_landmark.y, cpp_normalized_landmark.y); + EXPECT_FLOAT_EQ(c_normalized_landmark.z, cpp_normalized_landmark.z); + + CppCloseNormalizedLandmark(&c_normalized_landmark); +} + +TEST(NormalizedLandmarksConverterTest, ConvertsCustomNormalizedLandmarks) { + std::vector + cpp_normalized_landmarks = { + {0.1f, 0.2f, 0.3f}, // First NormalizedLandmark + {0.4f, 0.5f, 0.6f} // Second NormalizedLandmark + }; + + ::NormalizedLandmarks c_normalized_landmarks; + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &c_normalized_landmarks); + + EXPECT_EQ(c_normalized_landmarks.landmarks_count, + cpp_normalized_landmarks.size()); + for (size_t i = 0; i < c_normalized_landmarks.landmarks_count; ++i) { + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].x, + cpp_normalized_landmarks[i].x); + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].y, + cpp_normalized_landmarks[i].y); + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].z, + cpp_normalized_landmarks[i].z); + } + + CppCloseNormalizedLandmarks(&c_normalized_landmarks); +} + +TEST(LandmarkConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::Landmark cpp_landmark = { + 0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"}; + + ::Landmark c_landmark; + CppConvertToLandmark(cpp_landmark, &c_landmark); + EXPECT_NE(c_landmark.name, nullptr); + + CppCloseLandmark(&c_landmark); + EXPECT_EQ(c_landmark.name, nullptr); +} + +TEST(NormalizedLandmarkConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::NormalizedLandmark cpp_landmark = { + 0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"}; + + ::NormalizedLandmark c_landmark; + CppConvertToNormalizedLandmark(cpp_landmark, &c_landmark); + EXPECT_NE(c_landmark.name, nullptr); + + CppCloseNormalizedLandmark(&c_landmark); + EXPECT_EQ(c_landmark.name, nullptr); +} + +TEST(LandmarksConverterTest, FreesMemory) { + std::vector + cpp_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}}; + + ::Landmarks c_landmarks; + CppConvertToLandmarks(cpp_landmarks, &c_landmarks); + EXPECT_NE(c_landmarks.landmarks, nullptr); + + CppCloseLandmarks(&c_landmarks); + EXPECT_EQ(c_landmarks.landmarks, nullptr); +} + +TEST(NormalizedLandmarksConverterTest, FreesMemory) { + std::vector + cpp_normalized_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}}; + + ::NormalizedLandmarks c_normalized_landmarks; + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &c_normalized_landmarks); + EXPECT_NE(c_normalized_landmarks.landmarks, nullptr); + + CppCloseNormalizedLandmarks(&c_normalized_landmarks); + EXPECT_EQ(c_normalized_landmarks.landmarks, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/rect.h b/mediapipe/tasks/c/components/containers/rect.h new file mode 100644 index 000000000..c21857d2f --- /dev/null +++ b/mediapipe/tasks/c/components/containers/rect.h @@ -0,0 +1,46 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// Defines a rectangle, used e.g. as part of detection results or as input +// region-of-interest. +struct MPRect { + int left; + int top; + int bottom; + int right; +}; + +// The coordinates are normalized wrt the image dimensions, i.e. generally in +// [0,1] but they may exceed these bounds if describing a region overlapping the +// image. The origin is on the top-left corner of the image. +struct MPRectF { + float left; + float top; + float bottom; + float right; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_H_ diff --git a/mediapipe/tasks/c/components/containers/rect_converter.cc b/mediapipe/tasks/c/components/containers/rect_converter.cc new file mode 100644 index 000000000..42c574566 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/rect_converter.cc @@ -0,0 +1,41 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/rect_converter.h" + +#include "mediapipe/tasks/c/components/containers/rect.h" +#include "mediapipe/tasks/cc/components/containers/rect.h" + +namespace mediapipe::tasks::c::components::containers { + +// Converts a C++ Rect to a C Rect. +void CppConvertToRect(const mediapipe::tasks::components::containers::Rect& in, + struct MPRect* out) { + out->left = in.left; + out->top = in.top; + out->right = in.right; + out->bottom = in.bottom; +} + +// Converts a C++ RectF to a C RectF. +void CppConvertToRectF( + const mediapipe::tasks::components::containers::RectF& in, MPRectF* out) { + out->left = in.left; + out->top = in.top; + out->right = in.right; + out->bottom = in.bottom; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/rect_converter.h b/mediapipe/tasks/c/components/containers/rect_converter.h new file mode 100644 index 000000000..ee446a816 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/rect_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_CONVERTER_H_ + +#include "mediapipe/tasks/c/components/containers/rect.h" +#include "mediapipe/tasks/cc/components/containers/rect.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToRect(const mediapipe::tasks::components::containers::Rect& in, + MPRect* out); + +void CppConvertToRectF( + const mediapipe::tasks::components::containers::RectF& in, MPRectF* out); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_RECT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/rect_converter_test.cc b/mediapipe/tasks/c/components/containers/rect_converter_test.cc new file mode 100644 index 000000000..7aa2daed3 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/rect_converter_test.cc @@ -0,0 +1,47 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/rect_converter.h" + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/rect.h" +#include "mediapipe/tasks/cc/components/containers/rect.h" + +namespace mediapipe::tasks::c::components::containers { + +TEST(RectConverterTest, ConvertsRectCustomValues) { + mediapipe::tasks::components::containers::Rect cpp_rect = {0, 1, 2, 3}; + + MPRect c_rect; + CppConvertToRect(cpp_rect, &c_rect); + EXPECT_EQ(c_rect.left, 0); + EXPECT_EQ(c_rect.top, 1); + EXPECT_EQ(c_rect.right, 2); + EXPECT_EQ(c_rect.bottom, 3); +} + +TEST(RectFConverterTest, ConvertsRectFCustomValues) { + mediapipe::tasks::components::containers::RectF cpp_rect = {0.1, 0.2, 0.3, + 0.4}; + + MPRectF c_rect; + CppConvertToRectF(cpp_rect, &c_rect); + EXPECT_FLOAT_EQ(c_rect.left, 0.1); + EXPECT_FLOAT_EQ(c_rect.top, 0.2); + EXPECT_FLOAT_EQ(c_rect.right, 0.3); + EXPECT_FLOAT_EQ(c_rect.bottom, 0.4); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/core/base_options.h b/mediapipe/tasks/c/core/base_options.h index 78d89ce8c..20c068a87 100644 --- a/mediapipe/tasks/c/core/base_options.h +++ b/mediapipe/tasks/c/core/base_options.h @@ -22,9 +22,12 @@ extern "C" { // Base options for MediaPipe C Tasks. struct BaseOptions { - // The model asset file contents as a string. + // The model asset file contents as bytes. const char* model_asset_buffer; + // The size of the model assets buffer (or `0` if not set). + unsigned int model_asset_buffer_count; + // The path to the model asset to open and mmap in memory. const char* model_asset_path; }; diff --git a/mediapipe/tasks/c/core/base_options_converter.cc b/mediapipe/tasks/c/core/base_options_converter.cc index 3f126168b..07a9e81d0 100644 --- a/mediapipe/tasks/c/core/base_options_converter.cc +++ b/mediapipe/tasks/c/core/base_options_converter.cc @@ -27,7 +27,9 @@ void CppConvertToBaseOptions(const BaseOptions& in, mediapipe::tasks::core::BaseOptions* out) { out->model_asset_buffer = in.model_asset_buffer - ? std::make_unique(in.model_asset_buffer) + ? std::make_unique( + in.model_asset_buffer, + in.model_asset_buffer + in.model_asset_buffer_count) : nullptr; out->model_asset_path = in.model_asset_path ? std::string(in.model_asset_path) : ""; diff --git a/mediapipe/tasks/c/core/base_options_converter_test.cc b/mediapipe/tasks/c/core/base_options_converter_test.cc index 27c7fb3ec..37ab90f94 100644 --- a/mediapipe/tasks/c/core/base_options_converter_test.cc +++ b/mediapipe/tasks/c/core/base_options_converter_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "mediapipe/tasks/c/core/base_options_converter.h" +#include #include #include "mediapipe/framework/port/gtest.h" @@ -28,6 +29,8 @@ constexpr char kModelAssetPath[] = "abc.tflite"; TEST(BaseOptionsConverterTest, ConvertsBaseOptionsAssetBuffer) { BaseOptions c_base_options = {/* model_asset_buffer= */ kAssetBuffer, + /* model_asset_buffer_count= */ + static_cast(strlen(kAssetBuffer)), /* model_asset_path= */ nullptr}; mediapipe::tasks::core::BaseOptions cpp_base_options = {}; @@ -39,6 +42,7 @@ TEST(BaseOptionsConverterTest, ConvertsBaseOptionsAssetBuffer) { TEST(BaseOptionsConverterTest, ConvertsBaseOptionsAssetPath) { BaseOptions c_base_options = {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ kModelAssetPath}; mediapipe::tasks::core::BaseOptions cpp_base_options = {}; diff --git a/mediapipe/tasks/c/text/BUILD b/mediapipe/tasks/c/text/BUILD new file mode 100644 index 000000000..61fdf5969 --- /dev/null +++ b/mediapipe/tasks/c/text/BUILD @@ -0,0 +1,57 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +TEXT_LIBRARIES = [ + "//mediapipe/tasks/c/text/language_detector:language_detector_lib", + "//mediapipe/tasks/c/text/text_classifier:text_classifier_lib", + "//mediapipe/tasks/c/text/text_embedder:text_embedder_lib", +] + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/text:libtext.so +cc_binary( + name = "libtext.so", + linkopts = [ + "-Wl,-soname=libtext.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = TEXT_LIBRARIES, +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/text:libtext:.dylib +cc_binary( + name = "libtext.dylib", + linkopts = [ + "-Wl,-install_name,libtext.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = TEXT_LIBRARIES, +) diff --git a/mediapipe/tasks/c/text/language_detector/BUILD b/mediapipe/tasks/c/text/language_detector/BUILD index 9a3ce21e7..f4a674a35 100644 --- a/mediapipe/tasks/c/text/language_detector/BUILD +++ b/mediapipe/tasks/c/text/language_detector/BUILD @@ -22,7 +22,7 @@ cc_library( hdrs = ["language_detector.h"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/tasks/c/components/containers:language_detection_result_converter", + ":language_detector_result_converter", "//mediapipe/tasks/c/components/processors:classifier_options", "//mediapipe/tasks/c/components/processors:classifier_options_converter", "//mediapipe/tasks/c/core:base_options", @@ -77,6 +77,29 @@ cc_library( ], ) +cc_library( + name = "language_detector_result_converter", + srcs = ["language_detector_result_converter.cc"], + hdrs = ["language_detector_result_converter.h"], + deps = [ + ":language_detector", + "//mediapipe/tasks/cc/text/language_detector", + ], +) + +cc_test( + name = "language_detector_result_converter_test", + srcs = ["language_detector_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":language_detector", + ":language_detector_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/text/language_detector", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "language_detector_test", srcs = ["language_detector_test.cc"], diff --git a/mediapipe/tasks/c/text/language_detector/language_detector.cc b/mediapipe/tasks/c/text/language_detector/language_detector.cc index c71433fdc..ec2aba521 100644 --- a/mediapipe/tasks/c/text/language_detector/language_detector.cc +++ b/mediapipe/tasks/c/text/language_detector/language_detector.cc @@ -20,9 +20,9 @@ limitations under the License. #include "absl/log/absl_log.h" #include "absl/status/status.h" -#include "mediapipe/tasks/c/components/containers/language_detection_result_converter.h" #include "mediapipe/tasks/c/components/processors/classifier_options_converter.h" #include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h" #include "mediapipe/tasks/cc/text/language_detector/language_detector.h" namespace mediapipe::tasks::c::text::language_detector { @@ -30,9 +30,9 @@ namespace mediapipe::tasks::c::text::language_detector { namespace { using ::mediapipe::tasks::c::components::containers:: - CppCloseLanguageDetectionResult; + CppCloseLanguageDetectorResult; using ::mediapipe::tasks::c::components::containers:: - CppConvertToLanguageDetectionResult; + CppConvertToLanguageDetectorResult; using ::mediapipe::tasks::c::components::processors:: CppConvertToClassifierOptions; using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; @@ -72,16 +72,16 @@ int CppLanguageDetectorDetect(void* detector, const char* utf8_str, auto cpp_detector = static_cast(detector); auto cpp_result = cpp_detector->Detect(utf8_str); if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Language Detection failed: " << cpp_result.status(); + ABSL_LOG(ERROR) << "Language Detector failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); } - CppConvertToLanguageDetectionResult(*cpp_result, result); + CppConvertToLanguageDetectorResult(*cpp_result, result); return 0; } void CppLanguageDetectorCloseResult(LanguageDetectorResult* result) { - CppCloseLanguageDetectionResult(result); + CppCloseLanguageDetectorResult(result); } int CppLanguageDetectorClose(void* detector, char** error_msg) { diff --git a/mediapipe/tasks/c/text/language_detector/language_detector.h b/mediapipe/tasks/c/text/language_detector/language_detector.h index f1c85069f..a7f2159b6 100644 --- a/mediapipe/tasks/c/text/language_detector/language_detector.h +++ b/mediapipe/tasks/c/text/language_detector/language_detector.h @@ -60,18 +60,18 @@ struct LanguageDetectorOptions { // Creates a LanguageDetector from the provided `options`. // Returns a pointer to the language detector on success. // If an error occurs, returns `nullptr` and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT void* language_detector_create( - struct LanguageDetectorOptions* options, char** error_msg = nullptr); + struct LanguageDetectorOptions* options, char** error_msg); // Performs language detection on the input `text`. Returns `0` on success. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int language_detector_detect(void* detector, const char* utf8_str, LanguageDetectorResult* result, - char** error_msg = nullptr); + char** error_msg); // Frees the memory allocated inside a LanguageDetectorResult result. Does not // free the result pointer itself. @@ -79,10 +79,9 @@ MP_EXPORT void language_detector_close_result(LanguageDetectorResult* result); // Shuts down the LanguageDetector when all the work is done. Frees all memory. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int language_detector_close(void* detector, - char** error_msg = nullptr); +MP_EXPORT int language_detector_close(void* detector, char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/components/containers/language_detection_result_converter.cc b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter.cc similarity index 90% rename from mediapipe/tasks/c/components/containers/language_detection_result_converter.cc rename to mediapipe/tasks/c/text/language_detector/language_detector_result_converter.cc index 89b112e45..435c3d1dc 100644 --- a/mediapipe/tasks/c/components/containers/language_detection_result_converter.cc +++ b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/c/components/containers/language_detection_result_converter.h" +#include "mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h" #include #include @@ -23,7 +23,7 @@ limitations under the License. namespace mediapipe::tasks::c::components::containers { -void CppConvertToLanguageDetectionResult( +void CppConvertToLanguageDetectorResult( const mediapipe::tasks::text::language_detector::LanguageDetectorResult& in, LanguageDetectorResult* out) { out->predictions_count = in.size(); @@ -42,7 +42,7 @@ void CppConvertToLanguageDetectionResult( } } -void CppCloseLanguageDetectionResult(LanguageDetectorResult* in) { +void CppCloseLanguageDetectorResult(LanguageDetectorResult* in) { for (uint32_t i = 0; i < in->predictions_count; ++i) { auto prediction_in = in->predictions[i]; diff --git a/mediapipe/tasks/c/components/containers/language_detection_result_converter.h b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h similarity index 78% rename from mediapipe/tasks/c/components/containers/language_detection_result_converter.h rename to mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h index c9cfd55bd..c8b0c04ed 100644 --- a/mediapipe/tasks/c/components/containers/language_detection_result_converter.h +++ b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h @@ -13,20 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTION_RESULT_CONVERTER_H_ -#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTION_RESULT_CONVERTER_H_ +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTOR_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTOR_RESULT_CONVERTER_H_ #include "mediapipe/tasks/c/text/language_detector/language_detector.h" #include "mediapipe/tasks/cc/text/language_detector/language_detector.h" namespace mediapipe::tasks::c::components::containers { -void CppConvertToLanguageDetectionResult( +void CppConvertToLanguageDetectorResult( const mediapipe::tasks::text::language_detector::LanguageDetectorResult& in, LanguageDetectorResult* out); -void CppCloseLanguageDetectionResult(LanguageDetectorResult* in); +void CppCloseLanguageDetectorResult(LanguageDetectorResult* in); } // namespace mediapipe::tasks::c::components::containers -#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTION_RESULT_CONVERTER_H_ +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANGUAGE_DETECTOR_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/language_detection_result_converter_test.cc b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter_test.cc similarity index 77% rename from mediapipe/tasks/c/components/containers/language_detection_result_converter_test.cc rename to mediapipe/tasks/c/text/language_detector/language_detector_result_converter_test.cc index 633b77eae..4c93aa232 100644 --- a/mediapipe/tasks/c/components/containers/language_detection_result_converter_test.cc +++ b/mediapipe/tasks/c/text/language_detector/language_detector_result_converter_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/c/components/containers/language_detection_result_converter.h" +#include "mediapipe/tasks/c/text/language_detector/language_detector_result_converter.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/tasks/c/text/language_detector/language_detector.h" @@ -21,8 +21,8 @@ limitations under the License. namespace mediapipe::tasks::c::components::containers { -TEST(LanguageDetectionResultConverterTest, - ConvertsLanguageDetectionResultCustomResult) { +TEST(LanguageDetectorResultConverterTest, + ConvertsLanguageDetectorResultCustomResult) { mediapipe::tasks::text::language_detector::LanguageDetectorResult cpp_detector_result = {{/* language_code= */ "fr", /* probability= */ 0.5}, @@ -30,24 +30,24 @@ TEST(LanguageDetectionResultConverterTest, /* probability= */ 0.5}}; LanguageDetectorResult c_detector_result; - CppConvertToLanguageDetectionResult(cpp_detector_result, &c_detector_result); + CppConvertToLanguageDetectorResult(cpp_detector_result, &c_detector_result); EXPECT_NE(c_detector_result.predictions, nullptr); EXPECT_EQ(c_detector_result.predictions_count, 2); EXPECT_NE(c_detector_result.predictions[0].language_code, "fr"); EXPECT_EQ(c_detector_result.predictions[0].probability, 0.5); - CppCloseLanguageDetectionResult(&c_detector_result); + CppCloseLanguageDetectorResult(&c_detector_result); } -TEST(LanguageDetectionResultConverterTest, FreesMemory) { +TEST(LanguageDetectorResultConverterTest, FreesMemory) { mediapipe::tasks::text::language_detector::LanguageDetectorResult cpp_detector_result = {{"fr", 0.5}}; LanguageDetectorResult c_detector_result; - CppConvertToLanguageDetectionResult(cpp_detector_result, &c_detector_result); + CppConvertToLanguageDetectorResult(cpp_detector_result, &c_detector_result); EXPECT_NE(c_detector_result.predictions, nullptr); - CppCloseLanguageDetectionResult(&c_detector_result); + CppCloseLanguageDetectorResult(&c_detector_result); EXPECT_EQ(c_detector_result.predictions, nullptr); } diff --git a/mediapipe/tasks/c/text/language_detector/language_detector_test.cc b/mediapipe/tasks/c/text/language_detector/language_detector_test.cc index b8653e616..47770986f 100644 --- a/mediapipe/tasks/c/text/language_detector/language_detector_test.cc +++ b/mediapipe/tasks/c/text/language_detector/language_detector_test.cc @@ -44,6 +44,7 @@ TEST(LanguageDetectorTest, SmokeTest) { std::string model_path = GetFullPath(kTestLanguageDetectorModelPath); LanguageDetectorOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ model_path.c_str()}, /* classifier_options= */ {/* display_names_locale= */ nullptr, @@ -55,22 +56,24 @@ TEST(LanguageDetectorTest, SmokeTest) { /* category_denylist_count= */ 0}, }; - void* detector = language_detector_create(&options); + void* detector = language_detector_create(&options, /* error_msg */ nullptr); EXPECT_NE(detector, nullptr); LanguageDetectorResult result; - language_detector_detect(detector, kTestString, &result); + language_detector_detect(detector, kTestString, &result, + /* error_msg */ nullptr); EXPECT_EQ(std::string(result.predictions[0].language_code), "fr"); EXPECT_NEAR(result.predictions[0].probability, 0.999781, kPrecision); language_detector_close_result(&result); - language_detector_close(detector); + language_detector_close(detector, /* error_msg */ nullptr); } TEST(LanguageDetectorTest, ErrorHandling) { // It is an error to set neither the asset buffer nor the path. LanguageDetectorOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ nullptr}, /* classifier_options= */ {}, }; diff --git a/mediapipe/tasks/c/text/text_classifier/text_classifier.h b/mediapipe/tasks/c/text/text_classifier/text_classifier.h index 057b00f99..55b4bc710 100644 --- a/mediapipe/tasks/c/text/text_classifier/text_classifier.h +++ b/mediapipe/tasks/c/text/text_classifier/text_classifier.h @@ -44,18 +44,18 @@ struct TextClassifierOptions { // Creates a TextClassifier from the provided `options`. // Returns a pointer to the text classifier on success. // If an error occurs, returns `nullptr` and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT void* text_classifier_create(struct TextClassifierOptions* options, - char** error_msg = nullptr); + char** error_msg); // Performs classification on the input `text`. Returns `0` on success. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int text_classifier_classify(void* classifier, const char* utf8_str, TextClassifierResult* result, - char** error_msg = nullptr); + char** error_msg); // Frees the memory allocated inside a TextClassifierResult result. Does not // free the result pointer itself. @@ -63,10 +63,9 @@ MP_EXPORT void text_classifier_close_result(TextClassifierResult* result); // Shuts down the TextClassifier when all the work is done. Frees all memory. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int text_classifier_close(void* classifier, - char** error_msg = nullptr); +MP_EXPORT int text_classifier_close(void* classifier, char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/text/text_classifier/text_classifier_test.cc b/mediapipe/tasks/c/text/text_classifier/text_classifier_test.cc index 51232d63a..1dd3f6910 100644 --- a/mediapipe/tasks/c/text/text_classifier/text_classifier_test.cc +++ b/mediapipe/tasks/c/text/text_classifier/text_classifier_test.cc @@ -43,6 +43,7 @@ TEST(TextClassifierTest, SmokeTest) { std::string model_path = GetFullPath(kTestBertModelPath); TextClassifierOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ model_path.c_str()}, /* classifier_options= */ {/* display_names_locale= */ nullptr, @@ -54,11 +55,12 @@ TEST(TextClassifierTest, SmokeTest) { /* category_denylist_count= */ 0}, }; - void* classifier = text_classifier_create(&options); + void* classifier = text_classifier_create(&options, /* error_msg */ nullptr); EXPECT_NE(classifier, nullptr); TextClassifierResult result; - text_classifier_classify(classifier, kTestString, &result); + text_classifier_classify(classifier, kTestString, &result, + /* error_msg */ nullptr); EXPECT_EQ(result.classifications_count, 1); EXPECT_EQ(result.classifications[0].categories_count, 2); EXPECT_EQ(std::string{result.classifications[0].categories[0].category_name}, @@ -67,13 +69,14 @@ TEST(TextClassifierTest, SmokeTest) { kPrecision); text_classifier_close_result(&result); - text_classifier_close(classifier); + text_classifier_close(classifier, /* error_msg */ nullptr); } TEST(TextClassifierTest, ErrorHandling) { // It is an error to set neither the asset buffer nor the path. TextClassifierOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ nullptr}, /* classifier_options= */ {}, }; diff --git a/mediapipe/tasks/c/text/text_embedder/BUILD b/mediapipe/tasks/c/text/text_embedder/BUILD index 28a743eb8..8ced041cf 100644 --- a/mediapipe/tasks/c/text/text_embedder/BUILD +++ b/mediapipe/tasks/c/text/text_embedder/BUILD @@ -28,6 +28,7 @@ cc_library( "//mediapipe/tasks/c/components/processors:embedder_options_converter", "//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/cc/components/containers:embedding_result", "//mediapipe/tasks/cc/text/text_embedder", "@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/status", diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder.cc b/mediapipe/tasks/c/text/text_embedder/text_embedder.cc index c98b958f5..f32ad996b 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder.cc +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.cc @@ -20,9 +20,11 @@ limitations under the License. #include "absl/log/absl_log.h" #include "absl/status/status.h" +#include "mediapipe/tasks/c/components/containers/embedding_result.h" #include "mediapipe/tasks/c/components/containers/embedding_result_converter.h" #include "mediapipe/tasks/c/components/processors/embedder_options_converter.h" #include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/cc/components/containers/embedding_result.h" #include "mediapipe/tasks/cc/text/text_embedder/text_embedder.h" namespace mediapipe::tasks::c::text::text_embedder { @@ -30,12 +32,14 @@ namespace mediapipe::tasks::c::text::text_embedder { namespace { using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; +using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding; using ::mediapipe::tasks::c::components::containers:: CppConvertToEmbeddingResult; using ::mediapipe::tasks::c::components::processors:: CppConvertToEmbedderOptions; using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; using ::mediapipe::tasks::text::text_embedder::TextEmbedder; +typedef ::mediapipe::tasks::components::containers::Embedding CppEmbedding; int CppProcessError(absl::Status status, char** error_msg) { if (error_msg) { @@ -91,6 +95,24 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) { return 0; } +int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { + CppEmbedding cpp_u; + CppConvertToCppEmbedding(u, &cpp_u); + CppEmbedding cpp_v; + CppConvertToCppEmbedding(v, &cpp_v); + auto status_or_similarity = + mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity( + cpp_u, cpp_v); + if (status_or_similarity.ok()) { + *similarity = status_or_similarity.value(); + } else { + ABSL_LOG(ERROR) << "Cannot compute cosine similarity."; + return CppProcessError(status_or_similarity.status(), error_msg); + } + return 0; +} + } // namespace mediapipe::tasks::c::text::text_embedder extern "C" { @@ -116,4 +138,10 @@ int text_embedder_close(void* embedder, char** error_ms) { embedder, error_ms); } +int text_embedder_cosine_similarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { + return mediapipe::tasks::c::text::text_embedder:: + CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg); +} + } // extern "C" diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder.h b/mediapipe/tasks/c/text/text_embedder/text_embedder.h index c9ccf816b..b737f47f1 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder.h +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.h @@ -47,15 +47,14 @@ struct TextEmbedderOptions { // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT void* text_embedder_create(struct TextEmbedderOptions* options, - char** error_msg = nullptr); + char** error_msg); // Performs embedding extraction on the input `text`. Returns `0` on success. // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int text_embedder_embed(void* embedder, const char* utf8_str, - TextEmbedderResult* result, - char** error_msg = nullptr); + TextEmbedderResult* result, char** error_msg); // Frees the memory allocated inside a TextEmbedderResult result. Does not // free the result pointer itself. @@ -65,7 +64,18 @@ MP_EXPORT void text_embedder_close_result(TextEmbedderResult* result); // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int text_embedder_close(void* embedder, char** error_msg = nullptr); +MP_EXPORT int text_embedder_close(void* embedder, char** error_msg); + +// Utility function to compute cosine similarity [1] between two embeddings. +// May return an InvalidArgumentError if e.g. the embeddings are of different +// types (quantized vs. float), have different sizes, or have a an L2-norm of +// 0. +// +// [1]: https://en.wikipedia.org/wiki/Cosine_similarity +MP_EXPORT int text_embedder_cosine_similarity(const Embedding& u, + const Embedding& v, + double* similarity, + char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc b/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc index c823e01b4..3de075578 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc @@ -32,7 +32,12 @@ using testing::HasSubstr; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/"; constexpr char kTestBertModelPath[] = "mobilebert_embedding_with_metadata.tflite"; -constexpr char kTestString[] = "It's beautiful outside."; +constexpr char kTestString0[] = + "When you go to this restaurant, they hold the pancake upside-down " + "before they hand it to you. It's a great gimmick."; +constexpr char kTestString1[] = + "Let's make a plan to steal the declaration of independence."; +constexpr float kPrecision = 1e-3; std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); @@ -42,27 +47,63 @@ TEST(TextEmbedderTest, SmokeTest) { std::string model_path = GetFullPath(kTestBertModelPath); TextEmbedderOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ model_path.c_str()}, /* embedder_options= */ {/* l2_normalize= */ false, /* quantize= */ true}, }; - void* embedder = text_embedder_create(&options); + void* embedder = text_embedder_create(&options, /* error_msg */ nullptr); EXPECT_NE(embedder, nullptr); TextEmbedderResult result; - text_embedder_embed(embedder, kTestString, &result); + text_embedder_embed(embedder, kTestString0, &result, /* error_msg */ nullptr); EXPECT_EQ(result.embeddings_count, 1); EXPECT_EQ(result.embeddings[0].values_count, 512); text_embedder_close_result(&result); - text_embedder_close(embedder); + text_embedder_close(embedder, /* error_msg */ nullptr); +} + +TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) { + std::string model_path = GetFullPath(kTestBertModelPath); + TextEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* embedder_options= */ + {/* l2_normalize= */ false, + /* quantize= */ false}}; + + void* embedder = text_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + // Extract both embeddings. + TextEmbedderResult result0; + text_embedder_embed(embedder, kTestString0, &result0, + /* error_msg */ nullptr); + TextEmbedderResult result1; + text_embedder_embed(embedder, kTestString1, &result1, + /* error_msg */ nullptr); + + // Check cosine similarity. + double similarity; + text_embedder_cosine_similarity(result0.embeddings[0], result1.embeddings[0], + &similarity, nullptr); + double expected_similarity = 0.98077; + EXPECT_LE(abs(similarity - expected_similarity), kPrecision); + + text_embedder_close_result(&result0); + text_embedder_close_result(&result1); + text_embedder_close(embedder, /* error_msg */ nullptr); } TEST(TextEmbedderTest, ErrorHandling) { // It is an error to set neither the asset buffer nor the path. TextEmbedderOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ nullptr}, /* embedder_options= */ {}, }; diff --git a/mediapipe/tasks/c/vision/BUILD b/mediapipe/tasks/c/vision/BUILD new file mode 100644 index 000000000..1b3f6fa19 --- /dev/null +++ b/mediapipe/tasks/c/vision/BUILD @@ -0,0 +1,57 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +VISION_LIBRARIES = [ + "//mediapipe/tasks/c/vision/image_classifier:image_classifier_lib", + "//mediapipe/tasks/c/vision/image_embedder:image_embedder_lib", + "//mediapipe/tasks/c/vision/object_detector:object_detector_lib", +] + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision:libvision.so +cc_binary( + name = "libvision.so", + linkopts = [ + "-Wl,-soname=libvision.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = VISION_LIBRARIES, +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision:libvision:.dylib +cc_binary( + name = "libvision.dylib", + linkopts = [ + "-Wl,-install_name,libvision.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = VISION_LIBRARIES, +) diff --git a/mediapipe/tasks/c/vision/core/BUILD b/mediapipe/tasks/c/vision/core/BUILD new file mode 100644 index 000000000..7d3b0f9a9 --- /dev/null +++ b/mediapipe/tasks/c/vision/core/BUILD @@ -0,0 +1,22 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "common", + hdrs = ["common.h"], +) diff --git a/mediapipe/tasks/c/vision/core/common.h b/mediapipe/tasks/c/vision/core/common.h new file mode 100644 index 000000000..6b1639fba --- /dev/null +++ b/mediapipe/tasks/c/vision/core/common.h @@ -0,0 +1,68 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ +#define MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Supported image formats. +enum ImageFormat { + UNKNOWN = 0, + SRGB = 1, + SRGBA = 2, + GRAY8 = 3, + SBGRA = 11 // compatible with Flutter `bgra8888` format. +}; + +// Supported processing modes. +enum RunningMode { + IMAGE = 1, + VIDEO = 2, + LIVE_STREAM = 3, +}; + +// Structure to hold image frame. +struct ImageFrame { + enum ImageFormat format; + const uint8_t* image_buffer; + int width; + int height; +}; + +// TODO: Add GPU buffer declaration and processing logic for it. +struct GpuBuffer { + int width; + int height; +}; + +// The object to contain an image, realizes `OneOf` concept. +struct MpImage { + enum { IMAGE_FRAME, GPU_BUFFER } type; + union { + struct ImageFrame image_frame; + struct GpuBuffer gpu_buffer; + }; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/BUILD b/mediapipe/tasks/c/vision/gesture_recognizer/BUILD new file mode 100644 index 000000000..68d9f5ba1 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/BUILD @@ -0,0 +1,143 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "gesture_recognizer_result", + hdrs = ["gesture_recognizer_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:landmark", + ], +) + +cc_library( + name = "gesture_recognizer_result_converter", + srcs = ["gesture_recognizer_result_converter.cc"], + hdrs = ["gesture_recognizer_result_converter.h"], + deps = [ + ":gesture_recognizer_result", + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:category_converter", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:landmark_converter", + "//mediapipe/tasks/cc/components/containers:category", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result", + ], +) + +cc_test( + name = "gesture_recognizer_result_converter_test", + srcs = ["gesture_recognizer_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":gesture_recognizer_result", + ":gesture_recognizer_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "gesture_recognizer_lib", + srcs = ["gesture_recognizer.cc"], + hdrs = ["gesture_recognizer.h"], + visibility = ["//visibility:public"], + deps = [ + ":gesture_recognizer_result", + ":gesture_recognizer_result_converter", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/components/processors:classifier_options", + "//mediapipe/tasks/c/components/processors:classifier_options_converter", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/gesture_recognizer", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "gesture_recognizer_test", + srcs = ["gesture_recognizer_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":gesture_recognizer_lib", + ":gesture_recognizer_result", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/gesture_recognizer:libgesture_recognizer.so +cc_binary( + name = "libgesture_recognizer.so", + linkopts = [ + "-Wl,-soname=libgesture_recognizer.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":gesture_recognizer_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/gesture_recognizer:libgesture_recognizer.dylib +cc_binary( + name = "libgesture_recognizer.dylib", + linkopts = [ + "-Wl,-install_name,libgesture_recognizer.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":gesture_recognizer_lib"], +) diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc new file mode 100644 index 000000000..d3b0868f8 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc @@ -0,0 +1,297 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/components/processors/classifier_options_converter.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::gesture_recognizer { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppCloseGestureRecognizerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToGestureRecognizerResult; +using ::mediapipe::tasks::c::components::processors:: + CppConvertToClassifierOptions; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizer; +typedef ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + CppGestureRecognizerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToGestureRecognizerOptions( + const GestureRecognizerOptions& in, + mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerOptions* + out) { + out->num_hands = in.num_hands; + out->min_hand_detection_confidence = in.min_hand_detection_confidence; + out->min_hand_presence_confidence = in.min_hand_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; + CppConvertToClassifierOptions(in.canned_gestures_classifier_options, + &out->canned_gestures_classifier_options); + CppConvertToClassifierOptions(in.custom_gestures_classifier_options, + &out->custom_gestures_classifier_options); +} + +GestureRecognizer* CppGestureRecognizerCreate( + const GestureRecognizerOptions& options, char** error_msg) { + auto cpp_options = + std::make_unique<::mediapipe::tasks::vision::gesture_recognizer:: + GestureRecognizerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToGestureRecognizerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + GestureRecognizerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + GestureRecognizerResult result; + CppConvertToGestureRecognizerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseGestureRecognizerResult(&result); + }; + } + + auto recognizer = GestureRecognizer::Create(std::move(cpp_options)); + if (!recognizer.ok()) { + ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: " + << recognizer.status(); + CppProcessError(recognizer.status(), error_msg); + return nullptr; + } + return recognizer->release(); +} + +int CppGestureRecognizerRecognize(void* recognizer, const MpImage& image, + GestureRecognizerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->Recognize(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToGestureRecognizerResult(*cpp_result, result); + return 0; +} + +int CppGestureRecognizerRecognizeForVideo(void* recognizer, + const MpImage& image, + int64_t timestamp_ms, + GestureRecognizerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->RecognizeForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToGestureRecognizerResult(*cpp_result, result); + return 0; +} + +int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->RecognizeAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the image classification failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppGestureRecognizerCloseResult(GestureRecognizerResult* result) { + CppCloseGestureRecognizerResult(result); +} + +int CppGestureRecognizerClose(void* recognizer, char** error_msg) { + auto cpp_recognizer = static_cast(recognizer); + auto result = cpp_recognizer->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close GestureRecognizer: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_recognizer; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::gesture_recognizer + +extern "C" { + +void* gesture_recognizer_create(struct GestureRecognizerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerCreate(*options, error_msg); +} + +int gesture_recognizer_recognize_image(void* recognizer, const MpImage& image, + GestureRecognizerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognize(recognizer, image, result, error_msg); +} + +int gesture_recognizer_recognize_for_video(void* recognizer, + const MpImage& image, + int64_t timestamp_ms, + GestureRecognizerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognizeForVideo(recognizer, image, timestamp_ms, + result, error_msg); +} + +int gesture_recognizer_recognize_async(void* recognizer, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognizeAsync(recognizer, image, timestamp_ms, + error_msg); +} + +void gesture_recognizer_close_result(GestureRecognizerResult* result) { + mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerCloseResult(result); +} + +int gesture_recognizer_close(void* recognizer, char** error_ms) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerClose(recognizer, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h new file mode 100644 index 000000000..6d9102c66 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h @@ -0,0 +1,154 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ +#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ + +#include "mediapipe/tasks/c/components/processors/classifier_options.h" +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe gesture recognizer task. +struct GestureRecognizerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // GestureRecognizer has three running modes: + // 1) The image mode for recognizing hand gestures on single image inputs. + // 2) The video mode for recognizing hand gestures on the decoded frames of a + // video. + // 3) The live stream mode for recognizing hand gestures on the live stream of + // input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of hands can be detected by the GestureRecognizer. + int num_hands = 1; + + // The minimum confidence score for the hand detection to be considered + // successful. + float min_hand_detection_confidence = 0.5; + + // The minimum confidence score of hand presence score in the hand landmark + // detection. + float min_hand_presence_confidence = 0.5; + + // The minimum confidence score for the hand tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // Options for configuring the canned gestures classifier, such as score + // threshold, allow list and deny list of gestures. The categories for canned + // gesture classifier are: ["None", "Closed_Fist", "Open_Palm", + // "Pointing_Up", "Thumb_Down", "Thumb_Up", "Victory", "ILoveYou"] + struct ClassifierOptions canned_gestures_classifier_options; + + // Options for configuring the custom gestures classifier, such as score + // threshold, allow list and deny list of gestures. + struct ClassifierOptions custom_gestures_classifier_options; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing gesture recognizer result. + typedef void (*result_callback_fn)(GestureRecognizerResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an GestureRecognizer from the provided `options`. +// Returns a pointer to the gesture recognizer on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* gesture_recognizer_create( + struct GestureRecognizerOptions* options, char** error_msg); + +// Performs gesture recognition on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_image( + void* recognizer, const MpImage& image, GestureRecognizerResult* result, + char** error_msg); + +// Performs gesture recognition on the provided video frame. +// Only use this method when the GestureRecognizer is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_for_video( + void* recognizer, const MpImage& image, int64_t timestamp_ms, + GestureRecognizerResult* result, char** error_msg); + +// Sends live image data to gesture recognition, and the results will be +// available via the `result_callback` provided in the GestureRecognizerOptions. +// Only use this method when the GestureRecognizer is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the gesture recognizer. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an GestureRecognizerResult object. +// - The const reference to the corresponding input image that the gesture +// recognizer runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_async(void* recognizer, + const MpImage& image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a GestureRecognizerResult result. +// Does not free the result pointer itself. +MP_EXPORT void gesture_recognizer_close_result(GestureRecognizerResult* result); + +// Frees gesture recognizer. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_close(void* recognizer, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h new file mode 100644 index 000000000..542b2eb62 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h @@ -0,0 +1,65 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The gesture recognition result from GestureRecognizer, where each vector +// element represents a single hand detected in the image. +struct GestureRecognizerResult { + // Recognized hand gestures with sorted order such that the winning label is + // the first item in the list. + struct Categories* gestures; + + // The number of elements in the gestures array. + uint32_t gestures_count; + + // Classification of handedness. + struct Categories* handedness; + + // The number of elements in the handedness array. + uint32_t handedness_count; + + // Detected hand landmarks in normalized image coordinates. + struct NormalizedLandmarks* hand_landmarks; + + // The number of elements in the hand_landmarks array. + uint32_t hand_landmarks_count; + + // Detected hand landmarks in world coordinates. + struct Landmarks* hand_world_landmarks; + + // The number of elements in the hand_world_landmarks array. + uint32_t hand_world_landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.cc new file mode 100644 index 000000000..96b250800 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.cc @@ -0,0 +1,169 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h" + +#include +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppCategory = ::mediapipe::tasks::components::containers::Category; +using CppLandmark = ::mediapipe::tasks::components::containers::Landmark; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToGestureRecognizerResult( + const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult& + in, + GestureRecognizerResult* out) { + out->gestures_count = in.gestures.size(); + out->gestures = new Categories[out->gestures_count]; + + for (uint32_t i = 0; i < out->gestures_count; ++i) { + uint32_t categories_count = in.gestures[i].classification_size(); + out->gestures[i].categories_count = categories_count; + out->gestures[i].categories = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& classification = in.gestures[i].classification(j); + + CppCategory cpp_category; + // Set fields from the Classification protobuf + cpp_category.index = classification.index(); + cpp_category.score = classification.score(); + if (classification.has_label()) { + cpp_category.category_name = classification.label(); + } + if (classification.has_display_name()) { + cpp_category.display_name = classification.display_name(); + } + + CppConvertToCategory(cpp_category, &out->gestures[i].categories[j]); + } + } + + out->handedness_count = in.handedness.size(); + out->handedness = new Categories[out->handedness_count]; + + for (uint32_t i = 0; i < out->handedness_count; ++i) { + uint32_t categories_count = in.handedness[i].classification_size(); + out->handedness[i].categories_count = categories_count; + out->handedness[i].categories = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& classification = in.handedness[i].classification(j); + + CppCategory cpp_category; + // Set fields from the Classification protobuf + cpp_category.index = classification.index(); + cpp_category.score = classification.score(); + if (classification.has_label()) { + cpp_category.category_name = classification.label(); + } + if (classification.has_display_name()) { + cpp_category.display_name = classification.display_name(); + } + + CppConvertToCategory(cpp_category, &out->handedness[i].categories[j]); + } + } + + out->hand_landmarks_count = in.hand_landmarks.size(); + out->hand_landmarks = new NormalizedLandmarks[out->hand_landmarks_count]; + for (uint32_t i = 0; i < out->hand_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.hand_landmarks[i].landmark_size(); ++j) { + const auto& landmark = in.hand_landmarks[i].landmark(j); + CppNormalizedLandmark cpp_landmark; + cpp_landmark.x = landmark.x(); + cpp_landmark.y = landmark.y(); + cpp_landmark.z = landmark.z(); + if (landmark.has_presence()) { + cpp_landmark.presence = landmark.presence(); + } + if (landmark.has_visibility()) { + cpp_landmark.visibility = landmark.visibility(); + } + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->hand_landmarks[i]); + } + + out->hand_world_landmarks_count = in.hand_world_landmarks.size(); + out->hand_world_landmarks = new Landmarks[out->hand_world_landmarks_count]; + for (uint32_t i = 0; i < out->hand_world_landmarks_count; ++i) { + std::vector cpp_landmarks; + for (uint32_t j = 0; j < in.hand_world_landmarks[i].landmark_size(); ++j) { + const auto& landmark = in.hand_world_landmarks[i].landmark(j); + CppLandmark cpp_landmark; + cpp_landmark.x = landmark.x(); + cpp_landmark.y = landmark.y(); + cpp_landmark.z = landmark.z(); + if (landmark.has_presence()) { + cpp_landmark.presence = landmark.presence(); + } + if (landmark.has_visibility()) { + cpp_landmark.visibility = landmark.visibility(); + } + cpp_landmarks.push_back(cpp_landmark); + } + CppConvertToLandmarks(cpp_landmarks, &out->hand_world_landmarks[i]); + } +} + +void CppCloseGestureRecognizerResult(GestureRecognizerResult* result) { + for (uint32_t i = 0; i < result->gestures_count; ++i) { + CppCloseCategories(&result->gestures[i]); + } + delete[] result->gestures; + + for (uint32_t i = 0; i < result->handedness_count; ++i) { + CppCloseCategories(&result->handedness[i]); + } + delete[] result->handedness; + + for (uint32_t i = 0; i < result->hand_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->hand_landmarks[i]); + } + delete[] result->hand_landmarks; + + for (uint32_t i = 0; i < result->hand_world_landmarks_count; ++i) { + CppCloseLandmarks(&result->hand_world_landmarks[i]); + } + delete[] result->hand_world_landmarks; + + result->gestures = nullptr; + result->handedness = nullptr; + result->hand_landmarks = nullptr; + result->hand_world_landmarks = nullptr; + + result->gestures_count = 0; + result->handedness_count = 0; + result->hand_landmarks_count = 0; + result->hand_world_landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h new file mode 100644 index 000000000..d5105acf6 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h @@ -0,0 +1,33 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToGestureRecognizerResult( + const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult& + in, + GestureRecognizerResult* out); + +void CppCloseGestureRecognizerResult(GestureRecognizerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc new file mode 100644 index 000000000..603e5ed7d --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc @@ -0,0 +1,156 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h" + +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using mediapipe::ClassificationList; +using mediapipe::LandmarkList; +using mediapipe::NormalizedLandmarkList; + +void InitGestureRecognizerResult( + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult* + cpp_result) { + // Initialize gestures + mediapipe::Classification classification_for_gestures; + classification_for_gestures.set_index(0); + classification_for_gestures.set_score(0.9f); + classification_for_gestures.set_label("gesture_label_1"); + classification_for_gestures.set_display_name("gesture_display_name_1"); + ClassificationList gestures_list; + *gestures_list.add_classification() = classification_for_gestures; + cpp_result->gestures.push_back(gestures_list); + + // Initialize handedness + mediapipe::Classification classification_for_handedness; + classification_for_handedness.set_index(1); + classification_for_handedness.set_score(0.8f); + classification_for_handedness.set_label("handeness_label_1"); + classification_for_handedness.set_display_name("handeness_display_name_1"); + ClassificationList handedness_list; + *handedness_list.add_classification() = classification_for_handedness; + cpp_result->handedness.push_back(handedness_list); + + // Initialize hand_landmarks + NormalizedLandmarkList normalized_landmark_list; + auto& normalized_landmark = *normalized_landmark_list.add_landmark(); + normalized_landmark.set_x(0.1f); + normalized_landmark.set_y(0.2f); + normalized_landmark.set_z(0.3f); + + cpp_result->hand_landmarks.push_back(normalized_landmark_list); + + // Initialize hand_world_landmarks + LandmarkList landmark_list; + auto& landmark = *landmark_list.add_landmark(); + landmark.set_x(1.0f); + landmark.set_y(1.1f); + landmark.set_z(1.2f); + + cpp_result->hand_world_landmarks.push_back(landmark_list); +} + +TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + cpp_result; + InitGestureRecognizerResult(&cpp_result); + + GestureRecognizerResult c_result; + CppConvertToGestureRecognizerResult(cpp_result, &c_result); + + // Verify conversion of gestures + EXPECT_NE(c_result.gestures, nullptr); + EXPECT_EQ(c_result.gestures_count, cpp_result.gestures.size()); + + for (uint32_t i = 0; i < c_result.gestures_count; ++i) { + EXPECT_EQ(c_result.gestures[i].categories_count, + cpp_result.gestures[i].classification_size()); + for (uint32_t j = 0; j < c_result.gestures[i].categories_count; ++j) { + auto gesture = cpp_result.gestures[i].classification(j); + EXPECT_EQ(std::string(c_result.gestures[i].categories[j].category_name), + gesture.label()); + EXPECT_FLOAT_EQ(c_result.gestures[i].categories[j].score, + gesture.score()); + } + } + + // Verify conversion of hand_landmarks + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_landmarks[i].landmarks_count, + cpp_result.hand_landmarks[i].landmark_size()); + for (uint32_t j = 0; j < c_result.hand_landmarks[i].landmarks_count; ++j) { + const auto& landmark = cpp_result.hand_landmarks[i].landmark(j); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].x, landmark.x()); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].y, landmark.y()); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].z, landmark.z()); + } + } + + // Verify conversion of hand_world_landmarks + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks_count, + cpp_result.hand_world_landmarks.size()); + for (uint32_t i = 0; i < c_result.hand_world_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_world_landmarks[i].landmarks_count, + cpp_result.hand_world_landmarks[i].landmark_size()); + for (uint32_t j = 0; j < c_result.hand_world_landmarks[i].landmarks_count; + ++j) { + const auto& landmark = cpp_result.hand_world_landmarks[i].landmark(j); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].x, + landmark.x()); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].y, + landmark.y()); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].z, + landmark.z()); + } + } + + CppCloseGestureRecognizerResult(&c_result); +} + +TEST(GestureRecognizerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + cpp_result; + InitGestureRecognizerResult(&cpp_result); + + GestureRecognizerResult c_result; + CppConvertToGestureRecognizerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.gestures, nullptr); + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + + CppCloseGestureRecognizerResult(&c_result); + + EXPECT_EQ(c_result.gestures, nullptr); + EXPECT_EQ(c_result.handedness, nullptr); + EXPECT_EQ(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc new file mode 100644 index 000000000..06a46b219 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc @@ -0,0 +1,325 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "gesture_recognizer.task"; +constexpr char kImageFile[] = "fist.jpg"; +constexpr float kScorePrecision = 1e-2; +constexpr float kLandmarkPrecision = 1e-1; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +void MatchesGestureRecognizerResult(GestureRecognizerResult* result, + const float score_precision, + const float landmark_precision) { + // Expects to have the same number of hands detected. + EXPECT_EQ(result->gestures_count, 1); + EXPECT_EQ(result->handedness_count, 1); + // Actual gesture with top score matches expected gesture. + EXPECT_EQ(std::string{result->gestures[0].categories[0].category_name}, + "Closed_Fist"); + EXPECT_NEAR(result->gestures[0].categories[0].score, 0.91f, score_precision); + + // Actual handedness matches expected handedness. + EXPECT_EQ(std::string{result->handedness[0].categories[0].category_name}, + "Right"); + EXPECT_NEAR(result->handedness[0].categories[0].score, 0.9893f, + score_precision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].x, 0.477f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].y, 0.661f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].z, 0.0f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].x, -0.009f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].y, 0.082f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].z, 0.006f, + landmark_precision); +} + +TEST(GestureRecognizerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}}; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + GestureRecognizerResult result; + gesture_recognizer_recognize_image(recognizer, mp_image, &result, + /* error_msg */ nullptr); + MatchesGestureRecognizerResult(&result, kScorePrecision, kLandmarkPrecision); + gesture_recognizer_close_result(&result); + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} + +TEST(GestureRecognizerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}}; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + GestureRecognizerResult result; + gesture_recognizer_recognize_for_video(recognizer, mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesGestureRecognizerResult(&result, kScorePrecision, + kLandmarkPrecision); + gesture_recognizer_close_result(&result); + } + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(GestureRecognizerResult* recognizer_result, + const MpImage& image, int64_t timestamp, char* error_msg) { + ASSERT_NE(recognizer_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesGestureRecognizerResult(recognizer_result, kScorePrecision, + kLandmarkPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(GestureRecognizerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(gesture_recognizer_recognize_async(recognizer, mp_image, i, + /* error_msg */ nullptr), + 0); + } + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(GestureRecognizerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {}, + {}}; + + char* error_msg; + void* recognizer = gesture_recognizer_create(&options, &error_msg); + EXPECT_EQ(recognizer, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(GestureRecognizerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + }; + + void* recognizer = gesture_recognizer_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(recognizer, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + GestureRecognizerResult result; + char* error_msg; + gesture_recognizer_recognize_image(recognizer, mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} + +} // namespace diff --git a/mediapipe/tasks/c/vision/image_classifier/BUILD b/mediapipe/tasks/c/vision/image_classifier/BUILD index df0e636c5..b1930fb0e 100644 --- a/mediapipe/tasks/c/vision/image_classifier/BUILD +++ b/mediapipe/tasks/c/vision/image_classifier/BUILD @@ -30,13 +30,13 @@ cc_library( "//mediapipe/tasks/c/components/processors:classifier_options_converter", "//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/image_classifier", "//mediapipe/tasks/cc/vision/utils:image_utils", "@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/time", ], alwayslink = 1, ) @@ -56,11 +56,7 @@ cc_test( ":image_classifier_lib", "//mediapipe/framework/deps:file_path", "//mediapipe/framework/formats:image", - "//mediapipe/framework/formats:image_frame", - "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/port:gtest", - "//mediapipe/framework/port:opencv_core", - "//mediapipe/framework/port:opencv_imgproc", "//mediapipe/tasks/c/components/containers:category", "//mediapipe/tasks/cc/vision/utils:image_utils", "@com_google_absl//absl/flags:flag", @@ -68,3 +64,37 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/image_classifier:libimage_classifier.so +cc_binary( + name = "libimage_classifier.so", + linkopts = [ + "-Wl,-soname=libimage_classifier.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":image_classifier_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/image_classifier:libimage_classifier.dylib +cc_binary( + name = "libimage_classifier.dylib", + linkopts = [ + "-Wl,-install_name,libimage_classifier.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":image_classifier_lib"], +) diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier.cc b/mediapipe/tasks/c/vision/image_classifier/image_classifier.cc index 4245ca4cd..ff6f5bdfc 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier.cc +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier.cc @@ -15,6 +15,8 @@ limitations under the License. #include "mediapipe/tasks/c/vision/image_classifier/image_classifier.h" +#include +#include #include #include @@ -26,6 +28,7 @@ limitations under the License. #include "mediapipe/tasks/c/components/containers/classification_result_converter.h" #include "mediapipe/tasks/c/components/processors/classifier_options_converter.h" #include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/image_classifier/image_classifier.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" @@ -41,7 +44,10 @@ using ::mediapipe::tasks::c::components::processors:: CppConvertToClassifierOptions; using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; using ::mediapipe::tasks::vision::image_classifier::ImageClassifier; +typedef ::mediapipe::tasks::vision::image_classifier::ImageClassifierResult + CppImageClassifierResult; int CppProcessError(absl::Status status, char** error_msg) { if (error_msg) { @@ -60,6 +66,53 @@ ImageClassifier* CppImageClassifierCreate(const ImageClassifierOptions& options, CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); CppConvertToClassifierOptions(options.classifier_options, &cpp_options->classifier_options); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create ImageClassifier: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + ImageClassifierOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Classification failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + ImageClassifierResult result; + CppConvertToClassificationResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseClassificationResult(&result); + }; + } auto classifier = ImageClassifier::Create(std::move(cpp_options)); if (!classifier.ok()) { @@ -75,8 +128,8 @@ int CppImageClassifierClassify(void* classifier, const MpImage* image, ImageClassifierResult* result, char** error_msg) { if (image->type == MpImage::GPU_BUFFER) { - absl::Status status = - absl::InvalidArgumentError("gpu buffer not supported yet"); + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); ABSL_LOG(ERROR) << "Classification failed: " << status.message(); return CppProcessError(status, error_msg); @@ -102,6 +155,68 @@ int CppImageClassifierClassify(void* classifier, const MpImage* image, return 0; } +int CppImageClassifierClassifyForVideo(void* classifier, const MpImage* image, + int64_t timestamp_ms, + ImageClassifierResult* result, + char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Classification failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_classifier = static_cast(classifier); + auto cpp_result = cpp_classifier->ClassifyForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Classification failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToClassificationResult(*cpp_result, result); + return 0; +} + +int CppImageClassifierClassifyAsync(void* classifier, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Classification failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_classifier = static_cast(classifier); + auto cpp_result = cpp_classifier->ClassifyAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the image classification failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + void CppImageClassifierCloseResult(ImageClassifierResult* result) { CppCloseClassificationResult(result); } @@ -134,6 +249,22 @@ int image_classifier_classify_image(void* classifier, const MpImage* image, CppImageClassifierClassify(classifier, image, result, error_msg); } +int image_classifier_classify_for_video(void* classifier, const MpImage* image, + int64_t timestamp_ms, + ImageClassifierResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::image_classifier:: + CppImageClassifierClassifyForVideo(classifier, image, timestamp_ms, + result, error_msg); +} + +int image_classifier_classify_async(void* classifier, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::image_classifier:: + CppImageClassifierClassifyAsync(classifier, image, timestamp_ms, + error_msg); +} + void image_classifier_close_result(ImageClassifierResult* result) { mediapipe::tasks::c::vision::image_classifier::CppImageClassifierCloseResult( result); diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h index 9b59e4127..2a1691d3c 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h @@ -16,11 +16,10 @@ limitations under the License. #ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ #define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ -#include - #include "mediapipe/tasks/c/components/containers/classification_result.h" #include "mediapipe/tasks/c/components/processors/classifier_options.h" #include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" #ifndef MP_EXPORT #define MP_EXPORT __attribute__((visibility("default"))) @@ -32,43 +31,7 @@ extern "C" { typedef ClassificationResult ImageClassifierResult; -// Supported image formats. -enum ImageFormat { - UNKNOWN = 0, - SRGB = 1, - SRGBA = 2, - GRAY8 = 3, - SBGRA = 11 // compatible with Flutter `bgra8888` format. -}; - -// Supported processing modes. -enum RunningMode { - IMAGE = 1, - VIDEO = 2, - LIVE_STREAM = 3, -}; - -// Structure to hold image frame. -struct ImageFrame { - enum ImageFormat format; - const uint8_t* image_buffer; - int width; - int height; -}; - -// TODO: Add GPU buffer declaration and proccessing logic for it. -struct GpuBuffer {}; - -// The object to contain an image, realizes `OneOf` concept. -struct MpImage { - enum { IMAGE_FRAME, GPU_BUFFER } type; - union { - ImageFrame image_frame; - GpuBuffer gpu_buffer; - }; -}; - -// The options for configuring a Mediapipe image classifier task. +// The options for configuring a MediaPipe image classifier task. struct ImageClassifierOptions { // Base options for configuring MediaPipe Tasks, such as specifying the model // file with metadata, accelerator options, op resolver, etc. @@ -89,30 +52,73 @@ struct ImageClassifierOptions { // The user-defined result callback for processing live stream data. // The result callback should only be specified when the running mode is set - // to RunningMode::LIVE_STREAM. - typedef void (*result_callback_fn)(ImageClassifierResult*, const MpImage*, - int64_t); + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to classification result, the image that result was obtained + // on, the timestamp relevant to classification results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing image classifier result. + typedef void (*result_callback_fn)(ImageClassifierResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); result_callback_fn result_callback; }; -// Creates an ImageClassifier from provided `options`. +// Creates an ImageClassifier from the provided `options`. // Returns a pointer to the image classifier on success. // If an error occurs, returns `nullptr` and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT void* image_classifier_create(struct ImageClassifierOptions* options, - char** error_msg = nullptr); + char** error_msg); // Performs image classification on the input `image`. Returns `0` on success. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -// -// TODO: Add API for video and live stream processing. MP_EXPORT int image_classifier_classify_image(void* classifier, const MpImage* image, ImageClassifierResult* result, - char** error_msg = nullptr); + char** error_msg); + +// Performs image classification on the provided video frame. +// Only use this method when the ImageClassifier is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_classifier_classify_for_video(void* classifier, + const MpImage* image, + int64_t timestamp_ms, + ImageClassifierResult* result, + char** error_msg); + +// Sends live image data to image classification, and the results will be +// available via the `result_callback` provided in the ImageClassifierOptions. +// Only use this method when the ImageClassifier is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the object detector. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The classification results as an ImageClassifierResult object. +// - The const reference to the corresponding input image that the image +// classifier runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_classifier_classify_async(void* classifier, + const MpImage* image, + int64_t timestamp_ms, + char** error_msg); // Frees the memory allocated inside a ImageClassifierResult result. // Does not free the result pointer itself. @@ -120,10 +126,9 @@ MP_EXPORT void image_classifier_close_result(ImageClassifierResult* result); // Frees image classifier. // If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not nullptr). You must free the memory +// an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int image_classifier_close(void* classifier, - char** error_msg = nullptr); +MP_EXPORT int image_classifier_close(void* classifier, char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc b/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc index e8e84d864..2b0114dc6 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "mediapipe/tasks/c/vision/image_classifier/image_classifier.h" +#include #include #include @@ -36,18 +37,20 @@ using testing::HasSubstr; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; constexpr char kModelName[] = "mobilenet_v2_1.0_224.tflite"; constexpr float kPrecision = 1e-4; +constexpr int kIterations = 100; std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); } -TEST(ImageClassifierTest, SmokeTest) { +TEST(ImageClassifierTest, ImageModeTest) { const auto image = DecodeImageFromFile(GetFullPath("burger.jpg")); ASSERT_TRUE(image.ok()); const std::string model_path = GetFullPath(kModelName); ImageClassifierOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ model_path.c_str()}, /* running_mode= */ RunningMode::IMAGE, /* classifier_options= */ @@ -60,20 +63,20 @@ TEST(ImageClassifierTest, SmokeTest) { /* category_denylist_count= */ 0}, }; - void* classifier = image_classifier_create(&options); + void* classifier = image_classifier_create(&options, /* error_msg */ nullptr); EXPECT_NE(classifier, nullptr); + const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { .type = MpImage::IMAGE_FRAME, - .image_frame = { - .format = static_cast( - image->GetImageFrameSharedPtr()->Format()), - .image_buffer = image->GetImageFrameSharedPtr()->PixelData(), - .width = image->GetImageFrameSharedPtr()->Width(), - .height = image->GetImageFrameSharedPtr()->Height()}}; + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; ImageClassifierResult result; - image_classifier_classify_image(classifier, &mp_image, &result); + image_classifier_classify_image(classifier, &mp_image, &result, + /* error_msg */ nullptr); EXPECT_EQ(result.classifications_count, 1); EXPECT_EQ(result.classifications[0].categories_count, 1001); EXPECT_EQ(std::string{result.classifications[0].categories[0].category_name}, @@ -81,13 +84,133 @@ TEST(ImageClassifierTest, SmokeTest) { EXPECT_NEAR(result.classifications[0].categories[0].score, 0.7939f, kPrecision); image_classifier_close_result(&result); - image_classifier_close(classifier); + image_classifier_close(classifier, /* error_msg */ nullptr); +} + +TEST(ImageClassifierTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath("burger.jpg")); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + ImageClassifierOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* classifier_options= */ + {/* display_names_locale= */ nullptr, + /* max_results= */ 3, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + /* result_callback= */ nullptr, + }; + + void* classifier = image_classifier_create(&options, /* error_msg */ nullptr); + EXPECT_NE(classifier, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + ImageClassifierResult result; + image_classifier_classify_for_video(classifier, &mp_image, i, &result, + /* error_msg */ nullptr); + EXPECT_EQ(result.classifications_count, 1); + EXPECT_EQ(result.classifications[0].categories_count, 3); + EXPECT_EQ( + std::string{result.classifications[0].categories[0].category_name}, + "cheeseburger"); + EXPECT_NEAR(result.classifications[0].categories[0].score, 0.7939f, + kPrecision); + image_classifier_close_result(&result); + } + image_classifier_close(classifier, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(ImageClassifierResult* classifier_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(classifier_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + EXPECT_EQ( + std::string{ + classifier_result->classifications[0].categories[0].category_name}, + "cheeseburger"); + EXPECT_NEAR(classifier_result->classifications[0].categories[0].score, + 0.7939f, kPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(ImageClassifierTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath("burger.jpg")); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + ImageClassifierOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* classifier_options= */ + {/* display_names_locale= */ nullptr, + /* max_results= */ 3, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* classifier = image_classifier_create(&options, /* error_msg */ nullptr); + EXPECT_NE(classifier, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(image_classifier_classify_async(classifier, &mp_image, i, + /* error_msg */ nullptr), + 0); + } + image_classifier_close(classifier, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); } TEST(ImageClassifierTest, InvalidArgumentHandling) { // It is an error to set neither the asset buffer nor the path. ImageClassifierOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ nullptr}, /* classifier_options= */ {}, }; @@ -105,6 +228,7 @@ TEST(ImageClassifierTest, FailedClassificationHandling) { const std::string model_path = GetFullPath(kModelName); ImageClassifierOptions options = { /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, /* model_asset_path= */ model_path.c_str()}, /* running_mode= */ RunningMode::IMAGE, /* classifier_options= */ @@ -117,16 +241,16 @@ TEST(ImageClassifierTest, FailedClassificationHandling) { /* category_denylist_count= */ 0}, }; - void* classifier = image_classifier_create(&options); + void* classifier = image_classifier_create(&options, /* error_msg */ nullptr); EXPECT_NE(classifier, nullptr); const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; ImageClassifierResult result; char* error_msg; image_classifier_classify_image(classifier, &mp_image, &result, &error_msg); - EXPECT_THAT(error_msg, HasSubstr("gpu buffer not supported yet")); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); free(error_msg); - image_classifier_close(classifier); + image_classifier_close(classifier, /* error_msg */ nullptr); } } // namespace diff --git a/mediapipe/tasks/c/vision/image_embedder/BUILD b/mediapipe/tasks/c/vision/image_embedder/BUILD new file mode 100644 index 000000000..5d96d90e8 --- /dev/null +++ b/mediapipe/tasks/c/vision/image_embedder/BUILD @@ -0,0 +1,101 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "image_embedder_lib", + srcs = ["image_embedder.cc"], + hdrs = ["image_embedder.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/components/containers:embedding_result", + "//mediapipe/tasks/c/components/containers:embedding_result_converter", + "//mediapipe/tasks/c/components/processors:embedder_options", + "//mediapipe/tasks/c/components/processors:embedder_options_converter", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/components/containers:embedding_result", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/image_embedder", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "image_embedder_test", + srcs = ["image_embedder_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":image_embedder_lib", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/image_embedder:libimage_embedder.so +cc_binary( + name = "libimage_embedder.so", + linkopts = [ + "-Wl,-soname=libimage_embedder.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":image_embedder_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/image_embedder:libimage_embedder.dylib +cc_binary( + name = "libimage_embedder.dylib", + linkopts = [ + "-Wl,-install_name,libimage_embedder.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":image_embedder_lib"], +) diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc b/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc new file mode 100644 index 000000000..48b4d15ac --- /dev/null +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc @@ -0,0 +1,303 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/image_embedder/image_embedder.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/components/containers/embedding_result.h" +#include "mediapipe/tasks/c/components/containers/embedding_result_converter.h" +#include "mediapipe/tasks/c/components/processors/embedder_options_converter.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/cc/components/containers/embedding_result.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/image_embedder/image_embedder.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::image_embedder { + +namespace { + +using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; +using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToEmbeddingResult; +using ::mediapipe::tasks::c::components::processors:: + CppConvertToEmbedderOptions; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::image_embedder::ImageEmbedder; +typedef ::mediapipe::tasks::components::containers::Embedding CppEmbedding; +typedef ::mediapipe::tasks::vision::image_embedder::ImageEmbedderResult + CppImageEmbedderResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +ImageEmbedder* CppImageEmbedderCreate(const ImageEmbedderOptions& options, + char** error_msg) { + auto cpp_options = std::make_unique< + ::mediapipe::tasks::vision::image_embedder::ImageEmbedderOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToEmbedderOptions(options.embedder_options, + &cpp_options->embedder_options); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create ImageEmbedder: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + ImageEmbedderOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) + << "Embedding extraction failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + ImageEmbedderResult result; + CppConvertToEmbeddingResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseEmbeddingResult(&result); + }; + } + + auto embedder = ImageEmbedder::Create(std::move(cpp_options)); + if (!embedder.ok()) { + ABSL_LOG(ERROR) << "Failed to create ImageEmbedder: " << embedder.status(); + CppProcessError(embedder.status(), error_msg); + return nullptr; + } + return embedder->release(); +} + +int CppImageEmbedderEmbed(void* embedder, const MpImage* image, + ImageEmbedderResult* result, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_embedder = static_cast(embedder); + auto cpp_result = cpp_embedder->Embed(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Embedding extraction failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToEmbeddingResult(*cpp_result, result); + return 0; +} + +int CppImageEmbedderEmbedForVideo(void* embedder, const MpImage* image, + int64_t timestamp_ms, + ImageEmbedderResult* result, + char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_embedder = static_cast(embedder); + auto cpp_result = cpp_embedder->EmbedForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Embedding extraction failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToEmbeddingResult(*cpp_result, result); + return 0; +} + +int CppImageEmbedderEmbedAsync(void* embedder, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_embedder = static_cast(embedder); + auto cpp_result = cpp_embedder->EmbedAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the embedding extraction failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppImageEmbedderCloseResult(ImageEmbedderResult* result) { + CppCloseEmbeddingResult(result); +} + +int CppImageEmbedderClose(void* embedder, char** error_msg) { + auto cpp_embedder = static_cast(embedder); + auto result = cpp_embedder->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close ImageEmbedder: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_embedder; + return 0; +} + +int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { + CppEmbedding cpp_u; + CppConvertToCppEmbedding(u, &cpp_u); + CppEmbedding cpp_v; + CppConvertToCppEmbedding(v, &cpp_v); + auto status_or_similarity = + mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity( + cpp_u, cpp_v); + if (status_or_similarity.ok()) { + *similarity = status_or_similarity.value(); + } else { + ABSL_LOG(ERROR) << "Cannot compute cosine similarity."; + return CppProcessError(status_or_similarity.status(), error_msg); + } + return 0; +} + +} // namespace mediapipe::tasks::c::vision::image_embedder + +extern "C" { + +void* image_embedder_create(struct ImageEmbedderOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderCreate( + *options, error_msg); +} + +int image_embedder_embed_image(void* embedder, const MpImage* image, + ImageEmbedderResult* result, char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderEmbed( + embedder, image, result, error_msg); +} + +int image_embedder_embed_for_video(void* embedder, const MpImage* image, + int64_t timestamp_ms, + ImageEmbedderResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder:: + CppImageEmbedderEmbedForVideo(embedder, image, timestamp_ms, result, + error_msg); +} + +int image_embedder_embed_async(void* embedder, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder:: + CppImageEmbedderEmbedAsync(embedder, image, timestamp_ms, error_msg); +} + +void image_embedder_close_result(ImageEmbedderResult* result) { + mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderCloseResult( + result); +} + +int image_embedder_close(void* embedder, char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderClose( + embedder, error_msg); +} + +int image_embedder_cosine_similarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { + return mediapipe::tasks::c::vision::image_embedder:: + CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h new file mode 100644 index 000000000..809c7f2f8 --- /dev/null +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h @@ -0,0 +1,148 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_EMBEDDER_IMAGE_EMBEDDER_H_ +#define MEDIAPIPE_TASKS_C_VISION_IMAGE_EMBEDDER_IMAGE_EMBEDDER_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/embedding_result.h" +#include "mediapipe/tasks/c/components/processors/embedder_options.h" +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +typedef EmbeddingResult ImageEmbedderResult; + +// The options for configuring a MediaPipe image embedder task. +struct ImageEmbedderOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // Image embedder has three running modes: + // 1) The image mode for embedding image on single image inputs. + // 2) The video mode for embedding image on the decoded frames of a video. + // 3) The live stream mode for embedding image on the live stream of input + // data, such as from camera. In this mode, the "result_callback" below must + // be specified to receive the embedding results asynchronously. + RunningMode running_mode; + + // Options for configuring the embedder behavior, such as l2_normalize and + // quantize. + struct EmbedderOptions embedder_options; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to embedding result, the image that result was obtained + // on, the timestamp relevant to embedding extraction results and pointer to + // error message in case of any failure. The validity of the passed arguments + // is true for the lifetime of the callback function. + // + // A caller is responsible for closing image embedder result. + typedef void (*result_callback_fn)(ImageEmbedderResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an ImageEmbedder from the provided `options`. +// Returns a pointer to the image embedder on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* image_embedder_create(struct ImageEmbedderOptions* options, + char** error_msg); + +// Performs embedding extraction on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image, + ImageEmbedderResult* result, + char** error_msg); + +// Performs embedding extraction on the provided video frame. +// Only use this method when the ImageEmbedder is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_embedder_embed_for_video(void* embedder, + const MpImage* image, + int64_t timestamp_ms, + ImageEmbedderResult* result, + char** error_msg); + +// Sends live image data to embedder, and the results will be available via +// the `result_callback` provided in the ImageEmbedderOptions. +// Only use this method when the ImageEmbedder is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the object detector. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides +// - The embedding results as a `ImageEmbedderResult` object. +// - The const reference to the corresponding input image that the image +// embedder runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a ImageEmbedderResult result. +// Does not free the result pointer itself. +MP_EXPORT void image_embedder_close_result(ImageEmbedderResult* result); + +// Frees image embedder. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int image_embedder_close(void* embedder, char** error_msg); + +// Utility function to compute cosine similarity [1] between two embeddings. +// May return an InvalidArgumentError if e.g. the embeddings are of different +// types (quantized vs. float), have different sizes, or have a an L2-norm of +// 0. +// +// [1]: https://en.wikipedia.org/wiki/Cosine_similarity +MP_EXPORT int image_embedder_cosine_similarity(const Embedding& u, + const Embedding& v, + double* similarity, + char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_IMAGE_EMBEDDER_IMAGE_EMBEDDER_H_ diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc new file mode 100644 index 000000000..5daeac949 --- /dev/null +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc @@ -0,0 +1,302 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/image_embedder/image_embedder.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "mobilenet_v3_small_100_224_embedder.tflite"; +constexpr char kImageFile[] = "burger.jpg"; +constexpr float kPrecision = 1e-6; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +// Utility function to check the sizes, head_index and head_names of a result +// produced by kMobileNetV3Embedder. +void CheckMobileNetV3Result(const ImageEmbedderResult& result, bool quantized) { + EXPECT_EQ(result.embeddings_count, 1); + EXPECT_EQ(result.embeddings[0].head_index, 0); + EXPECT_EQ(std::string{result.embeddings[0].head_name}, "feature"); + if (quantized) { + EXPECT_EQ(result.embeddings[0].values_count, 1024); + } else { + EXPECT_EQ(result.embeddings[0].values_count, 1024); + } +} + +TEST(ImageEmbedderTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* embedder_options= */ + {/* l2_normalize= */ true, + /* quantize= */ false}}; + + void* embedder = image_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast( + image->GetImageFrameSharedPtr()->Format()), + .image_buffer = image->GetImageFrameSharedPtr()->PixelData(), + .width = image->GetImageFrameSharedPtr()->Width(), + .height = image->GetImageFrameSharedPtr()->Height()}}; + + ImageEmbedderResult result; + image_embedder_embed_image(embedder, &mp_image, &result, + /* error_msg */ nullptr); + CheckMobileNetV3Result(result, false); + EXPECT_NEAR(result.embeddings[0].float_embedding[0], -0.0142344, kPrecision); + image_embedder_close_result(&result); + image_embedder_close(embedder, /* error_msg */ nullptr); +} + +TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) { + const auto image = DecodeImageFromFile(GetFullPath("burger.jpg")); + ASSERT_TRUE(image.ok()); + const auto crop = DecodeImageFromFile(GetFullPath("burger_crop.jpg")); + ASSERT_TRUE(crop.ok()); + + const std::string model_path = GetFullPath(kModelName); + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* embedder_options= */ + {/* l2_normalize= */ true, + /* quantize= */ false}}; + + void* embedder = image_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast( + image->GetImageFrameSharedPtr()->Format()), + .image_buffer = image->GetImageFrameSharedPtr()->PixelData(), + .width = image->GetImageFrameSharedPtr()->Width(), + .height = image->GetImageFrameSharedPtr()->Height()}}; + + const MpImage mp_crop = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast( + crop->GetImageFrameSharedPtr()->Format()), + .image_buffer = crop->GetImageFrameSharedPtr()->PixelData(), + .width = crop->GetImageFrameSharedPtr()->Width(), + .height = crop->GetImageFrameSharedPtr()->Height()}}; + + // Extract both embeddings. + ImageEmbedderResult image_result; + image_embedder_embed_image(embedder, &mp_image, &image_result, + /* error_msg */ nullptr); + ImageEmbedderResult crop_result; + image_embedder_embed_image(embedder, &mp_crop, &crop_result, + /* error_msg */ nullptr); + + // Check results. + CheckMobileNetV3Result(image_result, false); + CheckMobileNetV3Result(crop_result, false); + // Check cosine similarity. + double similarity; + image_embedder_cosine_similarity(image_result.embeddings[0], + crop_result.embeddings[0], &similarity, + /* error_msg */ nullptr); + double expected_similarity = 0.925519; + EXPECT_LE(abs(similarity - expected_similarity), kPrecision); + image_embedder_close_result(&image_result); + image_embedder_close_result(&crop_result); + image_embedder_close(embedder, /* error_msg */ nullptr); +} + +TEST(ImageEmbedderTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* embedder_options= */ + {/* l2_normalize= */ true, + /* quantize= */ false}}; + + void* embedder = image_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + ImageEmbedderResult result; + image_embedder_embed_for_video(embedder, &mp_image, i, &result, + /* error_msg */ nullptr); + CheckMobileNetV3Result(result, false); + EXPECT_NEAR(result.embeddings[0].float_embedding[0], -0.0142344, + kPrecision); + image_embedder_close_result(&result); + } + image_embedder_close(embedder, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(ImageEmbedderResult* embedder_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(embedder_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + CheckMobileNetV3Result(*embedder_result, false); + EXPECT_NEAR(embedder_result->embeddings[0].float_embedding[0], -0.0142344, + kPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(ImageEmbedderTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* embedder_options= */ + {/* l2_normalize= */ true, + /* quantize= */ false}, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* embedder = image_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(image_embedder_embed_async(embedder, &mp_image, i, + /* error_msg */ nullptr), + 0); + } + image_embedder_close(embedder, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(ImageEmbedderTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* embedder_options= */ {}, + }; + + char* error_msg; + void* embedder = image_embedder_create(&options, &error_msg); + EXPECT_EQ(embedder, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(ImageEmbedderTest, FailedEmbeddingHandling) { + const std::string model_path = GetFullPath(kModelName); + ImageEmbedderOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* embedder_options= */ + {/* l2_normalize= */ false, + /* quantize= */ false}, + }; + + void* embedder = image_embedder_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(embedder, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + ImageEmbedderResult result; + char* error_msg; + image_embedder_embed_image(embedder, &mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet.")); + free(error_msg); + image_embedder_close(embedder, /* error_msg */ nullptr); +} + +} // namespace diff --git a/mediapipe/tasks/c/vision/object_detector/BUILD b/mediapipe/tasks/c/vision/object_detector/BUILD new file mode 100644 index 000000000..01c6d772d --- /dev/null +++ b/mediapipe/tasks/c/vision/object_detector/BUILD @@ -0,0 +1,99 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "object_detector_lib", + srcs = ["object_detector.cc"], + hdrs = ["object_detector.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/components/containers:detection_result", + "//mediapipe/tasks/c/components/containers:detection_result_converter", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/object_detector", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "object_detector_test", + srcs = ["object_detector_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":object_detector_lib", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/object_detector:libobject_detector.so +cc_binary( + name = "libobject_detector.so", + linkopts = [ + "-Wl,-soname=libobject_detector.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":object_detector_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/object_detector:libobject_detector.dylib +cc_binary( + name = "libobject_detector.dylib", + linkopts = [ + "-Wl,-install_name,libobject_detector.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":object_detector_lib"], +) diff --git a/mediapipe/tasks/c/vision/object_detector/object_detector.cc b/mediapipe/tasks/c/vision/object_detector/object_detector.cc new file mode 100644 index 000000000..70f35ec95 --- /dev/null +++ b/mediapipe/tasks/c/vision/object_detector/object_detector.cc @@ -0,0 +1,290 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/object_detector/object_detector.h" + +#include +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/components/containers/detection_result_converter.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/object_detector/object_detector.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::object_detector { + +namespace { + +using ::mediapipe::tasks::c::components::containers::CppCloseDetectionResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToDetectionResult; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::ObjectDetector; +using ::mediapipe::tasks::vision::core::RunningMode; +typedef ::mediapipe::tasks::vision::ObjectDetectorResult + CppObjectDetectorResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToDetectorOptions( + const ObjectDetectorOptions& in, + mediapipe::tasks::vision::ObjectDetectorOptions* out) { + out->display_names_locale = + in.display_names_locale ? std::string(in.display_names_locale) : "en"; + out->max_results = in.max_results; + out->score_threshold = in.score_threshold; + out->category_allowlist = + std::vector(in.category_allowlist_count); + for (uint32_t i = 0; i < in.category_allowlist_count; ++i) { + out->category_allowlist[i] = in.category_allowlist[i]; + } + out->category_denylist = std::vector(in.category_denylist_count); + for (uint32_t i = 0; i < in.category_denylist_count; ++i) { + out->category_denylist[i] = in.category_denylist[i]; + } +} + +ObjectDetector* CppObjectDetectorCreate(const ObjectDetectorOptions& options, + char** error_msg) { + auto cpp_options = + std::make_unique<::mediapipe::tasks::vision::ObjectDetectorOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToDetectorOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create ObjectDetector: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + ObjectDetectorOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + ObjectDetectorResult result; + CppConvertToDetectionResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseDetectionResult(&result); + }; + } + + auto detector = ObjectDetector::Create(std::move(cpp_options)); + if (!detector.ok()) { + ABSL_LOG(ERROR) << "Failed to create ObjectDetector: " << detector.status(); + CppProcessError(detector.status(), error_msg); + return nullptr; + } + return detector->release(); +} + +int CppObjectDetectorDetect(void* detector, const MpImage* image, + ObjectDetectorResult* result, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->Detect(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToDetectionResult(*cpp_result, result); + return 0; +} + +int CppObjectDetectorDetectForVideo(void* detector, const MpImage* image, + int64_t timestamp_ms, + ObjectDetectorResult* result, + char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToDetectionResult(*cpp_result, result); + return 0; +} + +int CppObjectDetectorDetectAsync(void* detector, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the object detection failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppObjectDetectorCloseResult(ObjectDetectorResult* result) { + CppCloseDetectionResult(result); +} + +int CppObjectDetectorClose(void* detector, char** error_msg) { + auto cpp_detector = static_cast(detector); + auto result = cpp_detector->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close ObjectDetector: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_detector; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::object_detector + +extern "C" { + +void* object_detector_create(struct ObjectDetectorOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::object_detector::CppObjectDetectorCreate( + *options, error_msg); +} + +int object_detector_detect_image(void* detector, const MpImage* image, + ObjectDetectorResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::object_detector::CppObjectDetectorDetect( + detector, image, result, error_msg); +} + +int object_detector_detect_for_video(void* detector, const MpImage* image, + int64_t timestamp_ms, + ObjectDetectorResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::object_detector:: + CppObjectDetectorDetectForVideo(detector, image, timestamp_ms, result, + error_msg); +} + +int object_detector_detect_async(void* detector, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::object_detector:: + CppObjectDetectorDetectAsync(detector, image, timestamp_ms, error_msg); +} + +void object_detector_close_result(ObjectDetectorResult* result) { + mediapipe::tasks::c::vision::object_detector::CppObjectDetectorCloseResult( + result); +} + +int object_detector_close(void* detector, char** error_ms) { + return mediapipe::tasks::c::vision::object_detector::CppObjectDetectorClose( + detector, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/object_detector/object_detector.h b/mediapipe/tasks/c/vision/object_detector/object_detector.h new file mode 100644 index 000000000..e14523a49 --- /dev/null +++ b/mediapipe/tasks/c/vision/object_detector/object_detector.h @@ -0,0 +1,157 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_OBJECT_DETECTOR_OBJECT_DETECTOR_H_ +#define MEDIAPIPE_TASKS_C_VISION_OBJECT_DETECTOR_OBJECT_DETECTOR_H_ + +#include "mediapipe/tasks/c/components/containers/detection_result.h" +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +typedef DetectionResult ObjectDetectorResult; + +// The options for configuring a MediaPipe object detector task. +struct ObjectDetectorOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // Object detector has three running modes: + // 1) The image mode for detecting objects on single image inputs. + // 2) The video mode for detecting objects on the decoded frames of a video. + // 3) The live stream mode for detecting objects on the live stream of input + // data, such as from camera. In this mode, the "result_callback" below must + // be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The locale to use for display names specified through the TFLite Model + // Metadata, if any. Defaults to English. + const char* display_names_locale; + + // The maximum number of top-scored detection results to return. If < 0, + // all available results will be returned. If 0, an invalid argument error is + // returned. + int max_results; + + // Score threshold to override the one provided in the model metadata (if + // any). Results below this value are rejected. + float score_threshold; + + // The allowlist of category names. If non-empty, detection results whose + // category name is not in this set will be filtered out. Duplicate or unknown + // category names are ignored. Mutually exclusive with category_denylist. + const char** category_allowlist; + // The number of elements in the category allowlist. + uint32_t category_allowlist_count; + + // The denylist of category names. If non-empty, detection results whose + // category name is in this set will be filtered out. Duplicate or unknown + // category names are ignored. Mutually exclusive with category_allowlist. + const char** category_denylist; + // The number of elements in the category denylist. + uint32_t category_denylist_count; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to detection result, the image that result was obtained + // on, the timestamp relevant to detection results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing object detector result. + typedef void (*result_callback_fn)(ObjectDetectorResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an ObjectDetector from the provided `options`. +// Returns a pointer to the image detector on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* object_detector_create(struct ObjectDetectorOptions* options, + char** error_msg); + +// Performs image detection on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int object_detector_detect_image(void* detector, const MpImage* image, + ObjectDetectorResult* result, + char** error_msg); + +// Performs image detection on the provided video frame. +// Only use this method when the ObjectDetector is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int object_detector_detect_for_video(void* detector, + const MpImage* image, + int64_t timestamp_ms, + ObjectDetectorResult* result, + char** error_msg); + +// Sends live image data to image detection, and the results will be +// available via the `result_callback` provided in the ObjectDetectorOptions. +// Only use this method when the ObjectDetector is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the object detector. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The detection results as an ObjectDetectorResult object. +// - The const reference to the corresponding input image that the image +// detector runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int object_detector_detect_async(void* detector, const MpImage* image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a ObjectDetectorResult result. +// Does not free the result pointer itself. +MP_EXPORT void object_detector_close_result(ObjectDetectorResult* result); + +// Frees object detector. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int object_detector_close(void* detector, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_OBJECT_DETECTOR_OBJECT_DETECTOR_H_ diff --git a/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc b/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc new file mode 100644 index 000000000..8e53fa5c9 --- /dev/null +++ b/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc @@ -0,0 +1,253 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/object_detector/object_detector.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kImageFile[] = "cats_and_dogs.jpg"; +constexpr char kModelName[] = + "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.tflite"; +constexpr float kPrecision = 1e-4; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +TEST(ObjectDetectorTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + ObjectDetectorOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0, + }; + + void* detector = object_detector_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + ObjectDetectorResult result; + object_detector_detect_image(detector, &mp_image, &result, + /* error_msg */ nullptr); + EXPECT_EQ(result.detections_count, 10); + EXPECT_EQ(result.detections[0].categories_count, 1); + EXPECT_EQ(std::string{result.detections[0].categories[0].category_name}, + "cat"); + EXPECT_NEAR(result.detections[0].categories[0].score, 0.6992f, kPrecision); + object_detector_close_result(&result); + object_detector_close(detector, /* error_msg */ nullptr); +} + +TEST(ObjectDetectorTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + ObjectDetectorOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* display_names_locale= */ nullptr, + /* max_results= */ 3, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0, + }; + + void* detector = object_detector_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + ObjectDetectorResult result; + object_detector_detect_for_video(detector, &mp_image, i, &result, + /* error_msg */ nullptr); + EXPECT_EQ(result.detections_count, 3); + EXPECT_EQ(result.detections[0].categories_count, 1); + EXPECT_EQ(std::string{result.detections[0].categories[0].category_name}, + "cat"); + EXPECT_NEAR(result.detections[0].categories[0].score, 0.6992f, kPrecision); + object_detector_close_result(&result); + } + object_detector_close(detector, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(ObjectDetectorResult* detector_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(detector_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + EXPECT_EQ(detector_result->detections_count, 3); + EXPECT_EQ(detector_result->detections[0].categories_count, 1); + EXPECT_EQ( + std::string{detector_result->detections[0].categories[0].category_name}, + "cat"); + EXPECT_NEAR(detector_result->detections[0].categories[0].score, 0.6992f, + kPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(ObjectDetectorTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + ObjectDetectorOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* display_names_locale= */ nullptr, + /* max_results= */ 3, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* detector = object_detector_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(object_detector_detect_async(detector, &mp_image, i, + /* error_msg */ nullptr), + 0); + } + object_detector_close(detector, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(ObjectDetectorTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + ObjectDetectorOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + }; + + char* error_msg; + void* detector = object_detector_create(&options, &error_msg); + EXPECT_EQ(detector, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(ObjectDetectorTest, FailedDetectionHandling) { + const std::string model_path = GetFullPath(kModelName); + ObjectDetectorOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0, + }; + + void* detector = object_detector_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(detector, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + ObjectDetectorResult result; + char* error_msg; + object_detector_detect_image(detector, &mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + object_detector_close(detector, /* error_msg */ nullptr); +} + +} // namespace diff --git a/mediapipe/tasks/cc/components/processors/BUILD b/mediapipe/tasks/cc/components/processors/BUILD index 1a1e75d41..f02c6cd04 100644 --- a/mediapipe/tasks/cc/components/processors/BUILD +++ b/mediapipe/tasks/cc/components/processors/BUILD @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite") + package(default_visibility = ["//mediapipe/tasks:internal"]) licenses(["notice"]) @@ -99,10 +101,14 @@ cc_library( alwayslink = 1, ) -cc_library( +cc_library_with_tflite( name = "image_preprocessing_graph", srcs = ["image_preprocessing_graph.cc"], hdrs = ["image_preprocessing_graph.h"], + tflite_deps = [ + "//mediapipe/tasks/cc/core:model_resources", + "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", + ], deps = [ "//mediapipe/calculators/core:pass_through_calculator", "//mediapipe/calculators/image:image_clone_calculator", @@ -120,10 +126,8 @@ cc_library( "//mediapipe/gpu:gpu_origin_cc_proto", "//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc/components/processors/proto:image_preprocessing_graph_options_cc_proto", - "//mediapipe/tasks/cc/core:model_resources", "//mediapipe/tasks/cc/core/proto:acceleration_cc_proto", "//mediapipe/tasks/cc/core/proto:base_options_cc_proto", - "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@org_tensorflow//tensorflow/lite/schema:schema_fbs", diff --git a/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc b/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc index 1604dfbd5..da9d66c71 100644 --- a/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc +++ b/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc @@ -271,8 +271,9 @@ class ImagePreprocessingGraph : public Subgraph { }; } }; + REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::components::processors::ImagePreprocessingGraph); + ::mediapipe::tasks::components::processors::ImagePreprocessingGraph) } // namespace processors } // namespace components diff --git a/mediapipe/tasks/cc/components/processors/proto/BUILD b/mediapipe/tasks/cc/components/processors/proto/BUILD index a45c91633..82d4ea21b 100644 --- a/mediapipe/tasks/cc/components/processors/proto/BUILD +++ b/mediapipe/tasks/cc/components/processors/proto/BUILD @@ -93,8 +93,3 @@ mediapipe_proto_library( "//mediapipe/framework:calculator_proto", ], ) - -mediapipe_proto_library( - name = "transformer_params_proto", - srcs = ["transformer_params.proto"], -) diff --git a/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto b/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto deleted file mode 100644 index b2d13c3a2..000000000 --- a/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright 2023 The MediaPipe Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -syntax = "proto3"; - -package mediapipe.tasks.components.processors.proto; - -option java_package = "com.google.mediapipe.tasks.components.processors.proto"; -option java_outer_classname = "TransformerParametersProto"; - -// The parameters of transformer (https://arxiv.org/pdf/1706.03762.pdf) -message TransformerParameters { - // Batch size of tensors. - int32 batch_size = 1; - - // Maximum sequence length of the input/output tensor. - int32 max_seq_length = 2; - - // Embedding dimension (or model dimension), `d_model` in the paper. - // `d_k` == `d_v` == `d_model`/`h`. - int32 embedding_dim = 3; - - // Hidden dimension used in the feedforward layer, `d_ff` in the paper. - int32 hidden_dimension = 4; - - // Head dimension, `d_k` or `d_v` in the paper. - int32 head_dimension = 5; - - // Number of heads, `h` in the paper. - int32 num_heads = 6; - - // Number of stacked transformers, `N` in the paper. - int32 num_stacks = 7; - - // Whether to use Multi-Query-Attention (MQA). - bool use_mqa = 8; -} diff --git a/mediapipe/tasks/cc/core/BUILD b/mediapipe/tasks/cc/core/BUILD index fa61feb9d..9185d0a97 100644 --- a/mediapipe/tasks/cc/core/BUILD +++ b/mediapipe/tasks/cc/core/BUILD @@ -91,16 +91,16 @@ cc_library( ], ) -# TODO: Switch to use cc_library_with_tflite after the MediaPipe InferenceCalculator -# supports TFLite-in-GMSCore. -cc_library( +cc_library_with_tflite( name = "model_task_graph", srcs = ["model_task_graph.cc"], hdrs = ["model_task_graph.h"], - deps = [ - ":model_asset_bundle_resources", + tflite_deps = [ ":model_resources", ":model_resources_cache", + ], + deps = [ + ":model_asset_bundle_resources", ":model_resources_calculator", "//mediapipe/calculators/tensor:inference_calculator_cc_proto", "//mediapipe/framework:calculator_cc_proto", @@ -224,19 +224,16 @@ cc_library_with_tflite( alwayslink = 1, ) -cc_test_with_tflite( +cc_test( name = "model_resources_calculator_test", srcs = ["model_resources_calculator_test.cc"], data = [ "//mediapipe/tasks/testdata/core:test_models", ], - tflite_deps = [ + deps = [ ":model_resources", ":model_resources_cache", ":model_resources_calculator", - "@org_tensorflow//tensorflow/lite:test_util", - ], - deps = [ "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:parse_text_proto", "//mediapipe/tasks/cc/core/proto:external_file_cc_proto", @@ -245,6 +242,7 @@ cc_test_with_tflite( "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@org_tensorflow//tensorflow/lite:test_util", "@org_tensorflow//tensorflow/lite/core/api:op_resolver", ], ) @@ -264,6 +262,7 @@ cc_library_with_tflite( "//mediapipe/framework:executor", "//mediapipe/framework/port:status", "//mediapipe/framework/tool:name_util", + "//mediapipe/gpu:gpu_shared_data_internal", "//mediapipe/tasks/cc:common", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", @@ -302,22 +301,26 @@ cc_test_with_tflite( ], ) -cc_library( +cc_library_with_tflite( name = "base_task_api", hdrs = ["base_task_api.h"], - deps = [ + tflite_deps = [ ":task_runner", + ], + deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", ], ) -cc_library( +cc_library_with_tflite( name = "task_api_factory", hdrs = ["task_api_factory.h"], - deps = [ + tflite_deps = [ ":base_task_api", ":model_resources", ":task_runner", + ], + deps = [ ":utils", "//mediapipe/framework:calculator_cc_proto", "//mediapipe/framework:executor", diff --git a/mediapipe/tasks/cc/core/model_resources.h b/mediapipe/tasks/cc/core/model_resources.h index d8e8dada0..ab3897015 100644 --- a/mediapipe/tasks/cc/core/model_resources.h +++ b/mediapipe/tasks/cc/core/model_resources.h @@ -78,10 +78,10 @@ class ModelResources { ModelResources& operator=(const ModelResources&) = delete; // Returns the model resources tag. - std::string GetTag() const { return tag_; } + const std::string& GetTag() const { return tag_; } - // Returns a copy of the model file proto. - proto::ExternalFile GetModelFile() const { return *model_file_; } + // Returns the model file proto. + const proto::ExternalFile& GetModelFile() const { return *model_file_; } // Returns a pointer to tflite::model. const tflite::Model* GetTfLiteModel() const; diff --git a/mediapipe/tasks/cc/core/model_task_graph.cc b/mediapipe/tasks/cc/core/model_task_graph.cc index b82a69718..57bb25bf8 100644 --- a/mediapipe/tasks/cc/core/model_task_graph.cc +++ b/mediapipe/tasks/cc/core/model_task_graph.cc @@ -31,6 +31,7 @@ limitations under the License. #include "mediapipe/framework/api2/builder.h" #include "mediapipe/framework/api2/port.h" #include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/core/model_asset_bundle_resources.h" #include "mediapipe/tasks/cc/core/model_resources.h" @@ -147,7 +148,8 @@ class InferenceSubgraph : public Subgraph { return delegate; } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::core::InferenceSubgraph); + +REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::core::InferenceSubgraph) absl::StatusOr ModelTaskGraph::GetConfig( SubgraphContext* sc) { diff --git a/mediapipe/tasks/cc/core/task_runner.cc b/mediapipe/tasks/cc/core/task_runner.cc index 88c91bcdb..e3862ddd7 100644 --- a/mediapipe/tasks/cc/core/task_runner.cc +++ b/mediapipe/tasks/cc/core/task_runner.cc @@ -39,6 +39,10 @@ limitations under the License. #include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/core/model_resources_cache.h" +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gpu_shared_data_internal.h" +#endif // !MEDIAPIPE_DISABLE_GPU + namespace mediapipe { namespace tasks { namespace core { @@ -88,16 +92,34 @@ absl::StatusOr GenerateOutputPacketMap( } // namespace /* static */ +#if !MEDIAPIPE_DISABLE_GPU +absl::StatusOr> TaskRunner::Create( + CalculatorGraphConfig config, + std::unique_ptr op_resolver, + PacketsCallback packets_callback, + std::shared_ptr default_executor, + std::optional input_side_packets, + std::shared_ptr<::mediapipe::GpuResources> resources) { +#else absl::StatusOr> TaskRunner::Create( CalculatorGraphConfig config, std::unique_ptr op_resolver, PacketsCallback packets_callback, std::shared_ptr default_executor, std::optional input_side_packets) { +#endif // !MEDIAPIPE_DISABLE_GPU auto task_runner = absl::WrapUnique(new TaskRunner(packets_callback)); MP_RETURN_IF_ERROR(task_runner->Initialize( std::move(config), std::move(op_resolver), std::move(default_executor), std::move(input_side_packets))); + +#if !MEDIAPIPE_DISABLE_GPU + if (resources) { + MP_RETURN_IF_ERROR( + task_runner->graph_.SetGpuResources(std::move(resources))); + } +#endif // !MEDIAPIPE_DISABLE_GPU + MP_RETURN_IF_ERROR(task_runner->Start()); return task_runner; } diff --git a/mediapipe/tasks/cc/core/task_runner.h b/mediapipe/tasks/cc/core/task_runner.h index 810063d4b..ef48bef55 100644 --- a/mediapipe/tasks/cc/core/task_runner.h +++ b/mediapipe/tasks/cc/core/task_runner.h @@ -42,6 +42,11 @@ limitations under the License. #include "tensorflow/lite/core/api/op_resolver.h" namespace mediapipe { + +#if !MEDIAPIPE_DISABLE_GPU +class GpuResources; +#endif // !MEDIAPIPE_DISABLE_GPU + namespace tasks { namespace core { @@ -72,12 +77,22 @@ class TaskRunner { // asynchronous method, Send(), to provide the input packets. If the packets // callback is absent, clients must use the synchronous method, Process(), to // provide the input packets and receive the output packets. +#if !MEDIAPIPE_DISABLE_GPU + static absl::StatusOr> Create( + CalculatorGraphConfig config, + std::unique_ptr op_resolver = nullptr, + PacketsCallback packets_callback = nullptr, + std::shared_ptr default_executor = nullptr, + std::optional input_side_packets = std::nullopt, + std::shared_ptr<::mediapipe::GpuResources> resources = nullptr); +#else static absl::StatusOr> Create( CalculatorGraphConfig config, std::unique_ptr op_resolver = nullptr, PacketsCallback packets_callback = nullptr, std::shared_ptr default_executor = nullptr, std::optional input_side_packets = std::nullopt); +#endif // !MEDIAPIPE_DISABLE_GPU // TaskRunner is neither copyable nor movable. TaskRunner(const TaskRunner&) = delete; diff --git a/mediapipe/tasks/cc/vision/core/BUILD b/mediapipe/tasks/cc/vision/core/BUILD index 6bcf2f5d6..d7ebfec68 100644 --- a/mediapipe/tasks/cc/vision/core/BUILD +++ b/mediapipe/tasks/cc/vision/core/BUILD @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite") + licenses(["notice"]) package(default_visibility = ["//mediapipe/tasks:internal"]) @@ -31,9 +33,15 @@ cc_library( ], ) -cc_library( +cc_library_with_tflite( name = "base_vision_task_api", hdrs = ["base_vision_task_api.h"], + tflite_deps = [ + "//mediapipe/tasks/cc/core:base_task_api", + "//mediapipe/tasks/cc/core:task_api_factory", + "//mediapipe/tasks/cc/core:task_runner", + "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", + ], deps = [ ":image_processing_options", ":running_mode", @@ -42,24 +50,22 @@ cc_library( "//mediapipe/framework/formats:image", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/tasks/cc/components/containers:rect", - "//mediapipe/tasks/cc/core:base_task_api", - "//mediapipe/tasks/cc/core:task_api_factory", - "//mediapipe/tasks/cc/core:task_runner", - "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) -cc_library( +cc_library_with_tflite( name = "vision_task_api_factory", hdrs = ["vision_task_api_factory.h"], - deps = [ + tflite_deps = [ ":base_vision_task_api", + "//mediapipe/tasks/cc/core:task_api_factory", + ], + deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/framework:calculator_cc_proto", - "//mediapipe/tasks/cc/core:task_api_factory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", diff --git a/mediapipe/tasks/cc/vision/face_detector/BUILD b/mediapipe/tasks/cc/vision/face_detector/BUILD index fbfd94628..bdad3bd06 100644 --- a/mediapipe/tasks/cc/vision/face_detector/BUILD +++ b/mediapipe/tasks/cc/vision/face_detector/BUILD @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite") package(default_visibility = [ "//mediapipe/tasks:internal", @@ -18,9 +19,15 @@ package(default_visibility = [ licenses(["notice"]) -cc_library( +cc_library_with_tflite( name = "face_detector_graph", srcs = ["face_detector_graph.cc"], + tflite_deps = [ + "//mediapipe/tasks/cc/components/processors:image_preprocessing_graph", + "//mediapipe/tasks/cc/core:model_task_graph", + "//mediapipe/tasks/cc/core:model_resources", + "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", + ], deps = [ "//mediapipe/calculators/core:clip_vector_size_calculator", "//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto", @@ -38,6 +45,7 @@ cc_library( "//mediapipe/calculators/util:non_max_suppression_calculator_cc_proto", "//mediapipe/calculators/util:rect_transformation_calculator", "//mediapipe/calculators/util:rect_transformation_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", "//mediapipe/framework/api2:builder", "//mediapipe/framework/api2:port", "//mediapipe/framework/formats:detection_cc_proto", @@ -45,36 +53,34 @@ cc_library( "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/formats:tensor", "//mediapipe/tasks/cc:common", - "//mediapipe/tasks/cc/components/processors:image_preprocessing_graph", - "//mediapipe/tasks/cc/core:model_resources", - "//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto", - "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", ], alwayslink = 1, ) -cc_library( +cc_library_with_tflite( name = "face_detector", srcs = ["face_detector.cc"], hdrs = ["face_detector.h"], + tflite_deps = [ + ":face_detector_graph", + "//mediapipe/tasks/cc/vision/core:base_vision_task_api", + "//mediapipe/tasks/cc/vision/core:vision_task_api_factory", + ], visibility = ["//visibility:public"], deps = [ - ":face_detector_graph", "//mediapipe/framework/api2:builder", "//mediapipe/framework/formats:detection_cc_proto", "//mediapipe/framework/formats:image", "//mediapipe/tasks/cc/components/containers:detection_result", "//mediapipe/tasks/cc/core:base_options", "//mediapipe/tasks/cc/core:utils", - "//mediapipe/tasks/cc/vision/core:base_vision_task_api", "//mediapipe/tasks/cc/vision/core:image_processing_options", "//mediapipe/tasks/cc/vision/core:running_mode", - "//mediapipe/tasks/cc/vision/core:vision_task_api_factory", "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph.cc b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph.cc index 8586a7ebd..5a8a60101 100644 --- a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include "absl/status/status.h" @@ -26,6 +27,7 @@ limitations under the License. #include "mediapipe/calculators/util/rect_transformation_calculator.pb.h" #include "mediapipe/framework/api2/builder.h" #include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/rect.pb.h" @@ -213,14 +215,16 @@ class FaceDetectorGraph : public core::ModelTaskGraph { } private: + std::string GetImagePreprocessingGraphName() { + return "mediapipe.tasks.components.processors.ImagePreprocessingGraph"; + } absl::StatusOr BuildFaceDetectionSubgraph( const FaceDetectorGraphOptions& subgraph_options, const core::ModelResources& model_resources, Source image_in, Source norm_rect_in, Graph& graph) { // Image preprocessing subgraph to convert image to tensor for the tflite // model. - auto& preprocessing = graph.AddNode( - "mediapipe.tasks.components.processors.ImagePreprocessingGraph"); + auto& preprocessing = graph.AddNode(GetImagePreprocessingGraphName()); bool use_gpu = components::processors::DetermineImagePreprocessingGpuBackend( subgraph_options.base_options().acceleration()); @@ -337,7 +341,7 @@ class FaceDetectorGraph : public core::ModelTaskGraph { }; REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::face_detector::FaceDetectorGraph); + ::mediapipe::tasks::vision::face_detector::FaceDetectorGraph) } // namespace face_detector } // namespace vision diff --git a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test.cc b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test.cc index 651ad722d..768b92cfd 100644 --- a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test.cc +++ b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/flags/flag.h" #include "absl/log/absl_check.h" #include "absl/status/statusor.h" +#include "absl/strings/match.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "mediapipe/framework/api2/builder.h" @@ -92,11 +93,10 @@ constexpr float kFaceDetectionMaxDiff = 0.01; // Helper function to create a TaskRunner. absl::StatusOr> CreateTaskRunner( - absl::string_view model_name) { + absl::string_view model_name, std::string graph_name) { Graph graph; - auto& face_detector_graph = - graph.AddNode("mediapipe.tasks.vision.face_detector.FaceDetectorGraph"); + auto& face_detector_graph = graph.AddNode(graph_name); auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( @@ -136,6 +136,8 @@ struct TestParams { std::string test_image_name; // Expected face detection results. std::vector expected_result; + // The name of the mediapipe graph to run. + std::string graph_name; }; class FaceDetectorGraphTest : public testing::TestWithParam {}; @@ -149,8 +151,9 @@ TEST_P(FaceDetectorGraphTest, Succeed) { input_norm_rect.set_y_center(0.5); input_norm_rect.set_width(1.0); input_norm_rect.set_height(1.0); - MP_ASSERT_OK_AND_ASSIGN( - auto task_runner, CreateTaskRunner(GetParam().face_detection_model_name)); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, + CreateTaskRunner(GetParam().face_detection_model_name, + GetParam().graph_name)); auto output_packets = task_runner->Process( {{kImageName, MakePacket(std::move(image))}, {kNormRectName, @@ -165,11 +168,15 @@ TEST_P(FaceDetectorGraphTest, Succeed) { INSTANTIATE_TEST_SUITE_P( FaceDetectorGraphTest, FaceDetectorGraphTest, - Values(TestParams{.test_name = "ShortRange", - .face_detection_model_name = kShortRangeBlazeFaceModel, - .test_image_name = kPortraitImage, - .expected_result = {GetExpectedFaceDetectionResult( - kPortraitExpectedDetection)}}), + Values( + TestParams{ + .test_name = "ShortRange", + .face_detection_model_name = kShortRangeBlazeFaceModel, + .test_image_name = kPortraitImage, + .expected_result = {GetExpectedFaceDetectionResult( + kPortraitExpectedDetection)}, + .graph_name = + "mediapipe.tasks.vision.face_detector.FaceDetectorGraph"}, ), [](const TestParamInfo& info) { return info.param.test_name; }); diff --git a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.cc b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.cc new file mode 100644 index 000000000..a8b82a2ac --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.cc @@ -0,0 +1,28 @@ +#include "mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.h" + +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "tensorflow/lite/core/api/op_resolver.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace face_detector { +namespace test_util { + +absl::StatusOr> +CreateTaskRunnerGms(mediapipe::CalculatorGraphConfig config, + std::unique_ptr op_resolver) { + return mediapipe::tasks::core::TaskRunner::Create(std::move(config), + std::move(op_resolver)); +} + +} // namespace test_util +} // namespace face_detector +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.h b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.h new file mode 100644 index 000000000..c34464c9e --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_detector/face_detector_graph_test_task_runner_gms.h @@ -0,0 +1,27 @@ +#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_DETECTOR_FACE_DETECTOR_GRAPH_TEST_TASK_RUNNER_GMS_H_ +#define MEDIAPIPE_TASKS_CC_VISION_FACE_DETECTOR_FACE_DETECTOR_GRAPH_TEST_TASK_RUNNER_GMS_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "tensorflow/lite/core/api/op_resolver.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace face_detector { +namespace test_util { + +absl::StatusOr> +CreateTaskRunnerGms(mediapipe::CalculatorGraphConfig config, + std::unique_ptr op_resolver = nullptr); + +} // namespace test_util +} // namespace face_detector +} // namespace vision +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_DETECTOR_FACE_DETECTOR_GRAPH_TEST_TASK_RUNNER_GMS_H_ diff --git a/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc index 6b67e43aa..1a99cb88c 100644 --- a/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc +++ b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc @@ -33,6 +33,7 @@ #include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/gpu/gpu_origin.pb.h" +#include "mediapipe/gpu/gpu_service.h" #include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h" #if !MEDIAPIPE_DISABLE_GPU @@ -145,7 +146,8 @@ absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) { #if MEDIAPIPE_METAL_ENABLED MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #else - return GlCalculatorHelper::UpdateContract(cc); + return GlCalculatorHelper::UpdateContract(cc, + /*requesst_gpu_as_optional=*/true); #endif // MEDIAPIPE_METAL_ENABLED #endif // !MEDIAPIPE_DISABLE_GPU return absl::OkStatus(); @@ -153,16 +155,7 @@ absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) { absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) { options_ = cc->Options(); - if (CanUseGpu()) { -#if !MEDIAPIPE_DISABLE_GPU -#if MEDIAPIPE_METAL_ENABLED - gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; - RET_CHECK(gpu_helper_); -#else - MP_RETURN_IF_ERROR(gl_helper_.Open(cc)); -#endif // MEDIAPIPE_METAL_ENABLED -#endif // !MEDIAPIPE_DISABLE_GPU - } else { + if (!CanUseGpu()) { ABSL_CHECK(options_.has_input_tensor_float_range() ^ options_.has_input_tensor_uint_range()) << "Must specify either `input_tensor_float_range` or " @@ -179,7 +172,9 @@ absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) { #if MEDIAPIPE_METAL_ENABLED return MetalProcess(cc); #else - return GlProcess(cc); + if (cc->Service(kGpuService).IsAvailable()) { + return GlProcess(cc); + } #endif // MEDIAPIPE_METAL_ENABLED #endif // !MEDIAPIPE_DISABLE_GPU } @@ -188,14 +183,16 @@ absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) { absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) { #if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED - gl_helper_.RunInGlContext([this] { + if (gl_initialized_) { + gl_helper_.RunInGlContext([this] { #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - gl_compute_program_.reset(); + gl_compute_program_.reset(); #else - if (program_) glDeleteProgram(program_); - program_ = 0; + if (program_) glDeleteProgram(program_); + program_ = 0; #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - }); + }); + } #endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED return absl::OkStatus(); } @@ -315,6 +312,9 @@ absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) { } absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) { + gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; + RET_CHECK(gpu_helper_); + id device = gpu_helper_.mtlDevice; const std::string shader_source = R"( @@ -450,6 +450,10 @@ absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) { } absl::Status TensorsToImageCalculator::GlProcess(CalculatorContext* cc) { + if (!gl_initialized_) { + MP_RETURN_IF_ERROR(gl_helper_.Open(cc)); + } + return gl_helper_.RunInGlContext([this, cc]() -> absl::Status { if (!gl_initialized_) { MP_RETURN_IF_ERROR(GlSetup(cc)); diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index 5b75ef8fc..6db49c668 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -155,6 +155,37 @@ cc_library( # TODO: open source hand joints graph +cc_library( + name = "hand_roi_refinement_graph", + srcs = ["hand_roi_refinement_graph.cc"], + deps = [ + "//mediapipe/calculators/tensor:image_to_tensor_calculator_cc_proto", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2/stream:detections_to_rects", + "//mediapipe/framework/api2/stream:landmarks_projection", + "//mediapipe/framework/api2/stream:landmarks_to_detection", + "//mediapipe/framework/api2/stream:rect_transformation", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/tasks/cc/components/processors:image_preprocessing_graph", + "//mediapipe/tasks/cc/components/processors/proto:image_preprocessing_graph_options_cc_proto", + "//mediapipe/tasks/cc/core:model_task_graph", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + cc_library( name = "hand_landmarker_result", srcs = ["hand_landmarker_result.cc"], diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_roi_refinement_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_roi_refinement_graph.cc new file mode 100644 index 000000000..e7e9b94d0 --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_roi_refinement_graph.cc @@ -0,0 +1,154 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h" +#include "mediapipe/calculators/tensor/tensors_to_landmarks_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/detections_to_rects.h" +#include "mediapipe/framework/api2/stream/landmarks_projection.h" +#include "mediapipe/framework/api2/stream/landmarks_to_detection.h" +#include "mediapipe/framework/api2/stream/rect_transformation.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/tasks/cc/components/processors/image_preprocessing_graph.h" +#include "mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options.pb.h" +#include "mediapipe/tasks/cc/core/model_task_graph.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace hand_landmarker { + +using ::mediapipe::api2::builder::ConvertAlignmentPointsDetectionToRect; +using ::mediapipe::api2::builder::ConvertLandmarksToDetection; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::ProjectLandmarks; +using ::mediapipe::api2::builder::ScaleAndShiftAndMakeSquareLong; +using ::mediapipe::api2::builder::Stream; + +// Refine the input hand RoI with hand_roi_refinement model. +// +// Inputs: +// IMAGE - Image +// The image to preprocess. +// NORM_RECT - NormalizedRect +// Coarse RoI of hand. +// Outputs: +// NORM_RECT - NormalizedRect +// Refined RoI of hand. +class HandRoiRefinementGraph : public core::ModelTaskGraph { + public: + absl::StatusOr GetConfig( + mediapipe::SubgraphContext* context) override { + Graph graph; + Stream image_in = graph.In("IMAGE").Cast(); + Stream roi_in = + graph.In("NORM_RECT").Cast(); + + auto& graph_options = + *context->MutableOptions(); + + MP_ASSIGN_OR_RETURN( + const auto* model_resources, + GetOrCreateModelResources( + context)); + + auto& preprocessing = graph.AddNode( + "mediapipe.tasks.components.processors.ImagePreprocessingGraph"); + bool use_gpu = + components::processors::DetermineImagePreprocessingGpuBackend( + graph_options.base_options().acceleration()); + auto& image_to_tensor_options = + *preprocessing + .GetOptions() + .mutable_image_to_tensor_options(); + image_to_tensor_options.set_keep_aspect_ratio(true); + image_to_tensor_options.set_border_mode( + mediapipe::ImageToTensorCalculatorOptions::BORDER_REPLICATE); + MP_RETURN_IF_ERROR(components::processors::ConfigureImagePreprocessingGraph( + *model_resources, use_gpu, graph_options.base_options().gpu_origin(), + &preprocessing.GetOptions())); + image_in >> preprocessing.In("IMAGE"); + roi_in >> preprocessing.In("NORM_RECT"); + auto tensors_in = preprocessing.Out("TENSORS"); + auto matrix = preprocessing.Out("MATRIX").Cast>(); + auto image_size = + preprocessing.Out("IMAGE_SIZE").Cast>(); + + auto& inference = AddInference( + *model_resources, graph_options.base_options().acceleration(), graph); + tensors_in >> inference.In("TENSORS"); + auto tensors_out = inference.Out("TENSORS").Cast>(); + + MP_ASSIGN_OR_RETURN(auto image_tensor_specs, + BuildInputImageTensorSpecs(*model_resources)); + + // Convert tensors to landmarks. Recrop model outputs two points, + // center point and guide point. + auto& to_landmarks = graph.AddNode("TensorsToLandmarksCalculator"); + auto& to_landmarks_opts = + to_landmarks + .GetOptions(); + to_landmarks_opts.set_num_landmarks(/*num_landmarks=*/2); + to_landmarks_opts.set_input_image_width(image_tensor_specs.image_width); + to_landmarks_opts.set_input_image_height(image_tensor_specs.image_height); + to_landmarks_opts.set_normalize_z(/*z_norm_factor=*/1.0f); + tensors_out.ConnectTo(to_landmarks.In("TENSORS")); + auto recrop_landmarks = to_landmarks.Out("NORM_LANDMARKS") + .Cast(); + + // Project landmarks. + auto projected_recrop_landmarks = + ProjectLandmarks(recrop_landmarks, matrix, graph); + + // Convert re-crop landmarks to detection. + auto recrop_detection = + ConvertLandmarksToDetection(projected_recrop_landmarks, graph); + + // Convert re-crop detection to rect. + auto recrop_rect = ConvertAlignmentPointsDetectionToRect( + recrop_detection, image_size, /*start_keypoint_index=*/0, + /*end_keypoint_index=*/1, /*target_angle=*/-90, graph); + + auto refined_roi = + ScaleAndShiftAndMakeSquareLong(recrop_rect, image_size, + /*scale_x_factor=*/1.0, + /*scale_y_factor=*/1.0, /*shift_x=*/0, + /*shift_y=*/-0.1, graph); + refined_roi >> graph.Out("NORM_RECT").Cast(); + return graph.GetConfig(); + } +}; + +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_landmarker::HandRoiRefinementGraph); + +} // namespace hand_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/BUILD b/mediapipe/tasks/cc/vision/holistic_landmarker/BUILD new file mode 100644 index 000000000..446cf1e09 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/BUILD @@ -0,0 +1,152 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package( + default_visibility = ["//mediapipe/tasks:internal"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "holistic_face_tracking", + srcs = ["holistic_face_tracking.cc"], + hdrs = ["holistic_face_tracking.h"], + deps = [ + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2/stream:detections_to_rects", + "//mediapipe/framework/api2/stream:image_size", + "//mediapipe/framework/api2/stream:landmarks_to_detection", + "//mediapipe/framework/api2/stream:loopback", + "//mediapipe/framework/api2/stream:rect_transformation", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:status", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/face_detector:face_detector_graph", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker:face_blendshapes_graph", + "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_graph", + "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarks_detector_graph", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_blendshapes_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_cc_proto", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "holistic_hand_tracking", + srcs = ["holistic_hand_tracking.cc"], + hdrs = ["holistic_hand_tracking.h"], + deps = [ + "//mediapipe/calculators/util:align_hand_to_pose_in_world_calculator", + "//mediapipe/calculators/util:align_hand_to_pose_in_world_calculator_cc_proto", + "//mediapipe/calculators/util:landmark_visibility_calculator", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2/stream:image_size", + "//mediapipe/framework/api2/stream:landmarks_to_detection", + "//mediapipe/framework/api2/stream:loopback", + "//mediapipe/framework/api2/stream:rect_transformation", + "//mediapipe/framework/api2/stream:split", + "//mediapipe/framework/api2/stream:threshold", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:status", + "//mediapipe/modules/holistic_landmark/calculators:hand_detections_from_pose_to_rects_calculator", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator_cc_proto", + "//mediapipe/tasks/cc/components/utils:gate", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_graph", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_roi_refinement_graph", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_cc_proto", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "holistic_pose_tracking", + srcs = ["holistic_pose_tracking.cc"], + hdrs = ["holistic_pose_tracking.h"], + deps = [ + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2/stream:detections_to_rects", + "//mediapipe/framework/api2/stream:image_size", + "//mediapipe/framework/api2/stream:landmarks_to_detection", + "//mediapipe/framework/api2/stream:loopback", + "//mediapipe/framework/api2/stream:merge", + "//mediapipe/framework/api2/stream:presence", + "//mediapipe/framework/api2/stream:rect_transformation", + "//mediapipe/framework/api2/stream:segmentation_smoothing", + "//mediapipe/framework/api2/stream:smoothing", + "//mediapipe/framework/api2/stream:split", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/tasks/cc/components/utils:gate", + "//mediapipe/tasks/cc/vision/pose_detector:pose_detector_graph", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarks_detector_graph", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_cc_proto", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "holistic_landmarker_graph", + srcs = ["holistic_landmarker_graph.cc"], + deps = [ + ":holistic_face_tracking", + ":holistic_hand_tracking", + ":holistic_pose_tracking", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2/stream:split", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/tasks/cc/core:model_asset_bundle_resources", + "//mediapipe/tasks/cc/core:model_resources_cache", + "//mediapipe/tasks/cc/core:model_task_graph", + "//mediapipe/tasks/cc/core:utils", + "//mediapipe/tasks/cc/metadata/utils:zip_utils", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_blendshapes_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_topology", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_cc_proto", + "//mediapipe/util:graph_builder_utils", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.cc new file mode 100644 index 000000000..1116cda21 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.cc @@ -0,0 +1,260 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h" + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/detections_to_rects.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/api2/stream/landmarks_to_detection.h" +#include "mediapipe/framework/api2/stream/loopback.h" +#include "mediapipe/framework/api2/stream/rect_transformation.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::mediapipe::NormalizedRect; +using ::mediapipe::api2::builder::ConvertDetectionsToRectUsingKeypoints; +using ::mediapipe::api2::builder::ConvertDetectionToRect; +using ::mediapipe::api2::builder::ConvertLandmarksToDetection; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::GetLoopbackData; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Scale; +using ::mediapipe::api2::builder::ScaleAndMakeSquare; +using ::mediapipe::api2::builder::Stream; + +struct FaceLandmarksResult { + std::optional> landmarks; + std::optional> classifications; +}; + +absl::Status ValidateGraphOptions( + const face_detector::proto::FaceDetectorGraphOptions& + face_detector_graph_options, + const face_landmarker::proto::FaceLandmarksDetectorGraphOptions& + face_landmarks_detector_graph_options, + const HolisticFaceTrackingRequest& request) { + if (face_detector_graph_options.num_faces() != 1) { + return absl::InvalidArgumentError(absl::StrFormat( + "Only support num_faces to be 1, but got num_faces = %d.", + face_detector_graph_options.num_faces())); + } + if (request.classifications && !face_landmarks_detector_graph_options + .has_face_blendshapes_graph_options()) { + return absl::InvalidArgumentError( + "Blendshapes detection is requested, but " + "face_blendshapes_graph_options is not configured."); + } + return absl::OkStatus(); +} + +Stream GetFaceRoiFromPoseFaceLandmarks( + Stream pose_face_landmarks, + Stream> image_size, Graph& graph) { + Stream detection = + ConvertLandmarksToDetection(pose_face_landmarks, graph); + + // Refer the pose face landmarks indices here: + // https://developers.google.com/mediapipe/solutions/vision/pose_landmarker#pose_landmarker_model + Stream rect = ConvertDetectionToRect( + detection, image_size, /*start_keypoint_index=*/5, + /*end_keypoint_index=*/2, /*target_angle=*/0, graph); + + // Scale the face RoI from a tight rect enclosing the pose face landmarks, to + // a larger square so that the whole face is within the RoI. + return ScaleAndMakeSquare(rect, image_size, + /*scale_x_factor=*/3.0, + /*scale_y_factor=*/3.0, graph); +} + +Stream GetFaceRoiFromFaceLandmarks( + Stream face_landmarks, + Stream> image_size, Graph& graph) { + Stream detection = + ConvertLandmarksToDetection(face_landmarks, graph); + + Stream rect = ConvertDetectionToRect( + detection, image_size, /*start_keypoint_index=*/33, + /*end_keypoint_index=*/263, /*target_angle=*/0, graph); + + return Scale(rect, image_size, + /*scale_x_factor=*/1.5, + /*scale_y_factor=*/1.5, graph); +} + +Stream> GetFaceDetections( + Stream image, Stream roi, + const face_detector::proto::FaceDetectorGraphOptions& + face_detector_graph_options, + Graph& graph) { + auto& face_detector_graph = + graph.AddNode("mediapipe.tasks.vision.face_detector.FaceDetectorGraph"); + face_detector_graph + .GetOptions() = + face_detector_graph_options; + image >> face_detector_graph.In("IMAGE"); + roi >> face_detector_graph.In("NORM_RECT"); + return face_detector_graph.Out("DETECTIONS").Cast>(); +} + +Stream GetFaceRoiFromFaceDetections( + Stream> face_detections, + Stream> image_size, Graph& graph) { + // Convert detection to rect. + Stream rect = ConvertDetectionsToRectUsingKeypoints( + face_detections, image_size, /*start_keypoint_index=*/0, + /*end_keypoint_index=*/1, /*target_angle=*/0, graph); + + return ScaleAndMakeSquare(rect, image_size, + /*scale_x_factor=*/2.0, + /*scale_y_factor=*/2.0, graph); +} + +Stream TrackFaceRoi( + Stream prev_landmarks, Stream roi, + Stream> image_size, Graph& graph) { + // Gets face ROI from previous frame face landmarks. + Stream prev_roi = + GetFaceRoiFromFaceLandmarks(prev_landmarks, image_size, graph); + + auto& tracking_node = graph.AddNode("RoiTrackingCalculator"); + auto& tracking_node_opts = + tracking_node.GetOptions(); + auto* rect_requirements = tracking_node_opts.mutable_rect_requirements(); + rect_requirements->set_rotation_degrees(15.0); + rect_requirements->set_translation(0.1); + rect_requirements->set_scale(0.3); + auto* landmarks_requirements = + tracking_node_opts.mutable_landmarks_requirements(); + landmarks_requirements->set_recrop_rect_margin(-0.2); + prev_landmarks.ConnectTo(tracking_node.In("PREV_LANDMARKS")); + prev_roi.ConnectTo(tracking_node.In("PREV_LANDMARKS_RECT")); + roi.ConnectTo(tracking_node.In("RECROP_RECT")); + image_size.ConnectTo(tracking_node.In("IMAGE_SIZE")); + return tracking_node.Out("TRACKING_RECT").Cast(); +} + +FaceLandmarksResult GetFaceLandmarksDetection( + Stream image, Stream roi, + Stream> image_size, + const face_landmarker::proto::FaceLandmarksDetectorGraphOptions& + face_landmarks_detector_graph_options, + const HolisticFaceTrackingRequest& request, Graph& graph) { + FaceLandmarksResult result; + auto& face_landmarks_detector_graph = graph.AddNode( + "mediapipe.tasks.vision.face_landmarker." + "SingleFaceLandmarksDetectorGraph"); + face_landmarks_detector_graph + .GetOptions() = + face_landmarks_detector_graph_options; + image >> face_landmarks_detector_graph.In("IMAGE"); + roi >> face_landmarks_detector_graph.In("NORM_RECT"); + auto landmarks = face_landmarks_detector_graph.Out("NORM_LANDMARKS") + .Cast(); + result.landmarks = landmarks; + if (request.classifications) { + auto& blendshapes_graph = graph.AddNode( + "mediapipe.tasks.vision.face_landmarker.FaceBlendshapesGraph"); + blendshapes_graph + .GetOptions() = + face_landmarks_detector_graph_options.face_blendshapes_graph_options(); + landmarks >> blendshapes_graph.In("LANDMARKS"); + image_size >> blendshapes_graph.In("IMAGE_SIZE"); + result.classifications = + blendshapes_graph.Out("BLENDSHAPES").Cast(); + } + return result; +} + +} // namespace + +absl::StatusOr TrackHolisticFace( + Stream image, Stream pose_face_landmarks, + const face_detector::proto::FaceDetectorGraphOptions& + face_detector_graph_options, + const face_landmarker::proto::FaceLandmarksDetectorGraphOptions& + face_landmarks_detector_graph_options, + const HolisticFaceTrackingRequest& request, Graph& graph) { + MP_RETURN_IF_ERROR(ValidateGraphOptions(face_detector_graph_options, + face_landmarks_detector_graph_options, + request)); + + // Extracts image size from the input images. + Stream> image_size = GetImageSize(image, graph); + + // Gets face ROI from pose face landmarks. + Stream roi_from_pose = + GetFaceRoiFromPoseFaceLandmarks(pose_face_landmarks, image_size, graph); + + // Detects faces within ROI of pose face. + Stream> face_detections = GetFaceDetections( + image, roi_from_pose, face_detector_graph_options, graph); + + // Gets face ROI from face detector. + Stream roi_from_detection = + GetFaceRoiFromFaceDetections(face_detections, image_size, graph); + + // Loop for previous frame landmarks. + auto [prev_landmarks, set_prev_landmarks_fn] = + GetLoopbackData(/*tick=*/image_size, graph); + + // Tracks face ROI. + auto tracking_roi = + TrackFaceRoi(prev_landmarks, roi_from_detection, image_size, graph); + + // Predicts face landmarks. + auto landmarks_detection_result = GetFaceLandmarksDetection( + image, tracking_roi, image_size, face_landmarks_detector_graph_options, + request, graph); + + // Sets previous landmarks for ROI tracking. + set_prev_landmarks_fn(landmarks_detection_result.landmarks.value()); + + return {{.landmarks = landmarks_detection_result.landmarks, + .classifications = landmarks_detection_result.classifications, + .debug_output = { + .roi_from_pose = roi_from_pose, + .roi_from_detection = roi_from_detection, + .tracking_roi = tracking_roi, + }}}; +} + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h new file mode 100644 index 000000000..835767ebc --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h @@ -0,0 +1,89 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_FACE_TRACKING_H_ +#define MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_FACE_TRACKING_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +struct HolisticFaceTrackingRequest { + bool classifications = false; +}; + +struct HolisticFaceTrackingOutput { + std::optional> + landmarks; + std::optional> + classifications; + + struct DebugOutput { + api2::builder::Stream roi_from_pose; + api2::builder::Stream roi_from_detection; + api2::builder::Stream tracking_roi; + }; + + DebugOutput debug_output; +}; + +// Updates @graph to track a single face in @image based on pose landmarks. +// +// To track single face this subgraph uses pose face landmarks to obtain +// approximate face location, refines it with face detector model and then runs +// face landmarks model. It can also reuse face ROI from the previous frame if +// face hasn't moved too much. +// +// @image - Image to track a single face in. +// @pose_face_landmarks - Pose face landmarks to derive initial face location +// from. +// @face_detector_graph_options - face detector graph options used to detect the +// face within the RoI constructed from the pose face landmarks. +// @face_landmarks_detector_graph_options - face landmarks detector graph +// options used to detect face landmarks within the RoI given be the face +// detector graph. +// @request - object to request specific face tracking outputs. +// NOTE: Outputs that were not requested won't be returned and corresponding +// parts of the graph won't be genertaed. +// @graph - graph to update. +absl::StatusOr TrackHolisticFace( + api2::builder::Stream image, + api2::builder::Stream + pose_face_landmarks, + const face_detector::proto::FaceDetectorGraphOptions& + face_detector_graph_options, + const face_landmarker::proto::FaceLandmarksDetectorGraphOptions& + face_landmarks_detector_graph_options, + const HolisticFaceTrackingRequest& request, + mediapipe::api2::builder::Graph& graph); + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_FACE_TRACKING_H_ diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking_test.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking_test.cc new file mode 100644 index 000000000..314c330b3 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking_test.cc @@ -0,0 +1,227 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h" + +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/calculators/util/rect_to_render_data_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/api2/stream/split.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/framework/tool/test_util.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/model_asset_bundle_resources.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.pb.h" +#include "mediapipe/tasks/cc/vision/utils/data_renderer.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::mediapipe::Image; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::SplitToRanges; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::core::ModelAssetBundleResources; +using ::mediapipe::tasks::core::TaskRunner; +using ::mediapipe::tasks::core::proto::ExternalFile; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr float kAbsMargin = 0.015; +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kTestImageFile[] = "male_full_height_hands.jpg"; +constexpr char kHolisticResultFile[] = + "male_full_height_hands_result_cpu.pbtxt"; +constexpr char kImageInStream[] = "image_in"; +constexpr char kPoseLandmarksInStream[] = "pose_landmarks_in"; +constexpr char kFaceLandmarksOutStream[] = "face_landmarks_out"; +constexpr char kRenderedImageOutStream[] = "rendered_image_out"; +constexpr char kFaceDetectorTFLiteName[] = "face_detector.tflite"; +constexpr char kFaceLandmarksDetectorTFLiteName[] = + "face_landmarks_detector.tflite"; + +std::string GetFilePath(absl::string_view filename) { + return file::JoinPath("./", kTestDataDirectory, filename); +} + +mediapipe::LandmarksToRenderDataCalculatorOptions GetFaceRendererOptions() { + mediapipe::LandmarksToRenderDataCalculatorOptions render_options; + for (const auto& connection : + face_landmarker::FaceLandmarksConnections::kFaceLandmarksConnectors) { + render_options.add_landmark_connections(connection[0]); + render_options.add_landmark_connections(connection[1]); + } + render_options.mutable_landmark_color()->set_r(255); + render_options.mutable_landmark_color()->set_g(255); + render_options.mutable_landmark_color()->set_b(255); + render_options.mutable_connection_color()->set_r(255); + render_options.mutable_connection_color()->set_g(255); + render_options.mutable_connection_color()->set_b(255); + render_options.set_thickness(0.5); + render_options.set_visualize_landmark_depth(false); + return render_options; +} + +absl::StatusOr> +CreateModelAssetBundleResources(const std::string& model_asset_filename) { + auto external_model_bundle = std::make_unique(); + external_model_bundle->set_file_name(model_asset_filename); + return ModelAssetBundleResources::Create("", + std::move(external_model_bundle)); +} + +// Helper function to create a TaskRunner. +absl::StatusOr> CreateTaskRunner() { + Graph graph; + Stream image = graph.In("IMAGE").Cast().SetName(kImageInStream); + Stream pose_landmarks = + graph.In("POSE_LANDMARKS") + .Cast() + .SetName(kPoseLandmarksInStream); + Stream face_landmarks_from_pose = + SplitToRanges(pose_landmarks, {{0, 11}}, graph)[0]; + // Create face landmarker model bundle. + MP_ASSIGN_OR_RETURN( + auto model_bundle, + CreateModelAssetBundleResources(GetFilePath("face_landmarker_v2.task"))); + face_detector::proto::FaceDetectorGraphOptions detector_options; + face_landmarker::proto::FaceLandmarksDetectorGraphOptions + landmarks_detector_options; + + // Set face detection model. + MP_ASSIGN_OR_RETURN(auto face_detector_model_file, + model_bundle->GetFile(kFaceDetectorTFLiteName)); + core::proto::FilePointerMeta face_detection_file_pointer; + face_detection_file_pointer.set_pointer( + reinterpret_cast(face_detector_model_file.data())); + face_detection_file_pointer.set_length(face_detector_model_file.size()); + detector_options.mutable_base_options() + ->mutable_model_asset() + ->mutable_file_pointer_meta() + ->Swap(&face_detection_file_pointer); + detector_options.set_num_faces(1); + + // Set face landmarks model. + MP_ASSIGN_OR_RETURN(auto face_landmarks_model_file, + model_bundle->GetFile(kFaceLandmarksDetectorTFLiteName)); + core::proto::FilePointerMeta face_landmarks_detector_file_pointer; + face_landmarks_detector_file_pointer.set_pointer( + reinterpret_cast(face_landmarks_model_file.data())); + face_landmarks_detector_file_pointer.set_length( + face_landmarks_model_file.size()); + landmarks_detector_options.mutable_base_options() + ->mutable_model_asset() + ->mutable_file_pointer_meta() + ->Swap(&face_landmarks_detector_file_pointer); + + // Track holistic face. + HolisticFaceTrackingRequest request; + MP_ASSIGN_OR_RETURN( + HolisticFaceTrackingOutput result, + TrackHolisticFace(image, face_landmarks_from_pose, detector_options, + landmarks_detector_options, request, graph)); + auto face_landmarks = + result.landmarks.value().SetName(kFaceLandmarksOutStream); + + auto image_size = GetImageSize(image, graph); + auto render_scale = utils::GetRenderScale( + image_size, result.debug_output.roi_from_pose, 0.0001, graph); + + auto face_landmarks_render_data = utils::RenderLandmarks( + face_landmarks, render_scale, GetFaceRendererOptions(), graph); + std::vector> render_list = { + face_landmarks_render_data}; + + auto rendered_image = + utils::Render( + image, absl::Span>(render_list), graph) + .SetName(kRenderedImageOutStream); + face_landmarks >> graph.Out("FACE_LANDMARKS"); + rendered_image >> graph.Out("RENDERED_IMAGE"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + return TaskRunner::Create( + config, std::make_unique()); +} + +class HolisticFaceTrackingTest : public ::testing::Test {}; + +TEST_F(HolisticFaceTrackingTest, SmokeTest) { + MP_ASSERT_OK_AND_ASSIGN(Image image, + DecodeImageFromFile(GetFilePath(kTestImageFile))); + + proto::HolisticResult holistic_result; + MP_ASSERT_OK(GetTextProto(GetFilePath(kHolisticResultFile), &holistic_result, + ::file::Defaults())); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); + MP_ASSERT_OK_AND_ASSIGN( + auto output_packets, + task_runner->Process( + {{kImageInStream, MakePacket(image)}, + {kPoseLandmarksInStream, MakePacket( + holistic_result.pose_landmarks())}})); + ASSERT_TRUE(output_packets.find(kFaceLandmarksOutStream) != + output_packets.end()); + auto face_landmarks = output_packets.find(kFaceLandmarksOutStream) + ->second.Get(); + EXPECT_THAT( + face_landmarks, + Approximately(Partially(EqualsProto(holistic_result.face_landmarks())), + /*margin=*/kAbsMargin)); + auto rendered_image = output_packets.at(kRenderedImageOutStream).Get(); + MP_EXPECT_OK(SavePngTestOutput(*rendered_image.GetImageFrameSharedPtr(), + "holistic_face_landmarks")); +} + +} // namespace +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.cc new file mode 100644 index 000000000..2c57aa059 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.cc @@ -0,0 +1,272 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h" + +#include +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/util/align_hand_to_pose_in_world_calculator.h" +#include "mediapipe/calculators/util/align_hand_to_pose_in_world_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/api2/stream/landmarks_to_detection.h" +#include "mediapipe/framework/api2/stream/loopback.h" +#include "mediapipe/framework/api2/stream/rect_transformation.h" +#include "mediapipe/framework/api2/stream/split.h" +#include "mediapipe/framework/api2/stream/threshold.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.pb.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::mediapipe::NormalizedRect; +using ::mediapipe::api2::AlignHandToPoseInWorldCalculator; +using ::mediapipe::api2::builder::ConvertLandmarksToDetection; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::GetLoopbackData; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::IsOverThreshold; +using ::mediapipe::api2::builder::ScaleAndShiftAndMakeSquareLong; +using ::mediapipe::api2::builder::SplitAndCombine; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::components::utils::AllowIf; + +struct HandLandmarksResult { + std::optional> landmarks; + std::optional> world_landmarks; +}; + +Stream AlignHandToPoseInWorldCalculator( + Stream hand_world_landmarks, + Stream pose_world_landmarks, int pose_wrist_idx, + Graph& graph) { + auto& node = graph.AddNode("AlignHandToPoseInWorldCalculator"); + auto& opts = node.GetOptions(); + opts.set_hand_wrist_idx(0); + opts.set_pose_wrist_idx(pose_wrist_idx); + hand_world_landmarks.ConnectTo( + node[AlignHandToPoseInWorldCalculator::kInHandLandmarks]); + pose_world_landmarks.ConnectTo( + node[AlignHandToPoseInWorldCalculator::kInPoseLandmarks]); + return node[AlignHandToPoseInWorldCalculator::kOutHandLandmarks]; +} + +Stream GetPosePalmVisibility( + Stream pose_palm_landmarks, Graph& graph) { + // Get wrist landmark. + auto pose_wrist = SplitAndCombine(pose_palm_landmarks, {0}, graph); + + // Get visibility score. + auto& score_node = graph.AddNode("LandmarkVisibilityCalculator"); + pose_wrist.ConnectTo(score_node.In("NORM_LANDMARKS")); + Stream score = score_node.Out("VISIBILITY").Cast(); + + // Convert score into flag. + return IsOverThreshold(score, /*threshold=*/0.1, graph); +} + +Stream GetHandRoiFromPosePalmLandmarks( + Stream pose_palm_landmarks, + Stream> image_size, Graph& graph) { + // Convert pose palm landmarks to detection. + auto detection = ConvertLandmarksToDetection(pose_palm_landmarks, graph); + + // Convert detection to rect. + auto& rect_node = graph.AddNode("HandDetectionsFromPoseToRectsCalculator"); + detection.ConnectTo(rect_node.In("DETECTION")); + image_size.ConnectTo(rect_node.In("IMAGE_SIZE")); + Stream rect = + rect_node.Out("NORM_RECT").Cast(); + + return ScaleAndShiftAndMakeSquareLong(rect, image_size, + /*scale_x_factor=*/2.7, + /*scale_y_factor=*/2.7, /*shift_x=*/0, + /*shift_y=*/-0.1, graph); +} + +absl::StatusOr> RefineHandRoi( + Stream image, Stream roi, + const hand_landmarker::proto::HandRoiRefinementGraphOptions& + hand_roi_refinenement_graph_options, + Graph& graph) { + auto& hand_roi_refinement = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.HandRoiRefinementGraph"); + hand_roi_refinement + .GetOptions() = + hand_roi_refinenement_graph_options; + image >> hand_roi_refinement.In("IMAGE"); + roi >> hand_roi_refinement.In("NORM_RECT"); + return hand_roi_refinement.Out("NORM_RECT").Cast(); +} + +Stream TrackHandRoi( + Stream prev_landmarks, Stream roi, + Stream> image_size, Graph& graph) { + // Convert hand landmarks to tight rect. + auto& prev_rect_node = graph.AddNode("HandLandmarksToRectCalculator"); + prev_landmarks.ConnectTo(prev_rect_node.In("NORM_LANDMARKS")); + image_size.ConnectTo(prev_rect_node.In("IMAGE_SIZE")); + Stream prev_rect = + prev_rect_node.Out("NORM_RECT").Cast(); + + // Convert tight hand rect to hand roi. + Stream prev_roi = + ScaleAndShiftAndMakeSquareLong(prev_rect, image_size, + /*scale_x_factor=*/2.0, + /*scale_y_factor=*/2.0, /*shift_x=*/0, + /*shift_y=*/-0.1, graph); + + auto& tracking_node = graph.AddNode("RoiTrackingCalculator"); + auto& tracking_node_opts = + tracking_node.GetOptions(); + auto* rect_requirements = tracking_node_opts.mutable_rect_requirements(); + rect_requirements->set_rotation_degrees(40.0); + rect_requirements->set_translation(0.2); + rect_requirements->set_scale(0.4); + auto* landmarks_requirements = + tracking_node_opts.mutable_landmarks_requirements(); + landmarks_requirements->set_recrop_rect_margin(-0.1); + prev_landmarks.ConnectTo(tracking_node.In("PREV_LANDMARKS")); + prev_roi.ConnectTo(tracking_node.In("PREV_LANDMARKS_RECT")); + roi.ConnectTo(tracking_node.In("RECROP_RECT")); + image_size.ConnectTo(tracking_node.In("IMAGE_SIZE")); + return tracking_node.Out("TRACKING_RECT").Cast(); +} + +HandLandmarksResult GetHandLandmarksDetection( + Stream image, Stream roi, + const hand_landmarker::proto::HandLandmarksDetectorGraphOptions& + hand_landmarks_detector_graph_options, + const HolisticHandTrackingRequest& request, Graph& graph) { + HandLandmarksResult result; + auto& hand_landmarks_detector_graph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "SingleHandLandmarksDetectorGraph"); + hand_landmarks_detector_graph + .GetOptions() = + hand_landmarks_detector_graph_options; + + image >> hand_landmarks_detector_graph.In("IMAGE"); + roi >> hand_landmarks_detector_graph.In("HAND_RECT"); + + if (request.landmarks) { + result.landmarks = hand_landmarks_detector_graph.Out("LANDMARKS") + .Cast(); + } + if (request.world_landmarks) { + result.world_landmarks = + hand_landmarks_detector_graph.Out("WORLD_LANDMARKS") + .Cast(); + } + return result; +} + +} // namespace + +absl::StatusOr TrackHolisticHand( + Stream image, Stream pose_landmarks, + Stream pose_world_landmarks, + const hand_landmarker::proto::HandLandmarksDetectorGraphOptions& + hand_landmarks_detector_graph_options, + const hand_landmarker::proto::HandRoiRefinementGraphOptions& + hand_roi_refinement_graph_options, + const PoseIndices& pose_indices, const HolisticHandTrackingRequest& request, + Graph& graph) { + // Extracts pose palm landmarks. + Stream pose_palm_landmarks = SplitAndCombine( + pose_landmarks, + {pose_indices.wrist_idx, pose_indices.pinky_idx, pose_indices.index_idx}, + graph); + + // Get pose palm visibility. + Stream is_pose_palm_visible = + GetPosePalmVisibility(pose_palm_landmarks, graph); + + // Drop pose palm landmarks if pose palm is invisible. + pose_palm_landmarks = + AllowIf(pose_palm_landmarks, is_pose_palm_visible, graph); + + // Extracts image size from the input images. + Stream> image_size = GetImageSize(image, graph); + + // Get hand ROI from pose palm landmarks. + Stream roi_from_pose = + GetHandRoiFromPosePalmLandmarks(pose_palm_landmarks, image_size, graph); + + // Refine hand ROI with re-crop model. + MP_ASSIGN_OR_RETURN(Stream roi_from_recrop, + RefineHandRoi(image, roi_from_pose, + hand_roi_refinement_graph_options, graph)); + + // Loop for previous frame landmarks. + auto [prev_landmarks, set_prev_landmarks_fn] = + GetLoopbackData(/*tick=*/image_size, graph); + + // Track hand ROI. + auto tracking_roi = + TrackHandRoi(prev_landmarks, roi_from_recrop, image_size, graph); + + // Predict hand landmarks. + auto landmarks_detection_result = GetHandLandmarksDetection( + image, tracking_roi, hand_landmarks_detector_graph_options, request, + graph); + + // Set previous landmarks for ROI tracking. + set_prev_landmarks_fn(landmarks_detection_result.landmarks.value()); + + // Output landmarks. + std::optional> hand_landmarks; + if (request.landmarks) { + hand_landmarks = landmarks_detection_result.landmarks; + } + + // Output world landmarks. + std::optional> hand_world_landmarks; + if (request.world_landmarks) { + hand_world_landmarks = landmarks_detection_result.world_landmarks; + + // Align hand world landmarks with pose world landmarks. + hand_world_landmarks = AlignHandToPoseInWorldCalculator( + hand_world_landmarks.value(), pose_world_landmarks, + pose_indices.wrist_idx, graph); + } + + return {{.landmarks = hand_landmarks, + .world_landmarks = hand_world_landmarks, + .debug_output = { + .roi_from_pose = roi_from_pose, + .roi_from_recrop = roi_from_recrop, + .tracking_roi = tracking_roi, + }}}; +} + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h new file mode 100644 index 000000000..463f4979b --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h @@ -0,0 +1,94 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_HAND_TRACKING_H_ +#define MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_HAND_TRACKING_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +struct PoseIndices { + int wrist_idx; + int pinky_idx; + int index_idx; +}; + +struct HolisticHandTrackingRequest { + bool landmarks = false; + bool world_landmarks = false; +}; + +struct HolisticHandTrackingOutput { + std::optional> + landmarks; + std::optional> world_landmarks; + + struct DebugOutput { + api2::builder::Stream roi_from_pose; + api2::builder::Stream roi_from_recrop; + api2::builder::Stream tracking_roi; + }; + + DebugOutput debug_output; +}; + +// Updates @graph to track a single hand in @image based on pose landmarks. +// +// To track single hand this subgraph uses pose palm landmarks to obtain +// approximate hand location, refines it with re-crop model and then runs hand +// landmarks model. It can also reuse hand ROI from the previous frame if hand +// hasn't moved too much. +// +// @image - ImageFrame/GpuBuffer to track a single hand in. +// @pose_landmarks - Pose landmarks to derive initial hand location from. +// @pose_world_landmarks - Pose world landmarks to align hand world landmarks +// wrist with. +// @ hand_landmarks_detector_graph_options - Options of the +// HandLandmarksDetectorGraph used to detect the hand landmarks. +// @ hand_roi_refinement_graph_options - Options of HandRoiRefinementGraph used +// to refine the hand RoIs got from Pose landmarks. +// @request - object to request specific hand tracking outputs. +// NOTE: Outputs that were not requested won't be returned and corresponding +// parts of the graph won't be genertaed. +// @graph - graph to update. +absl::StatusOr TrackHolisticHand( + api2::builder::Stream image, + api2::builder::Stream pose_landmarks, + api2::builder::Stream pose_world_landmarks, + const hand_landmarker::proto::HandLandmarksDetectorGraphOptions& + hand_landmarks_detector_graph_options, + const hand_landmarker::proto::HandRoiRefinementGraphOptions& + hand_roi_refinement_graph_options, + const PoseIndices& pose_indices, const HolisticHandTrackingRequest& request, + mediapipe::api2::builder::Graph& graph); + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_HAND_TRACKING_H_ diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking_test.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking_test.cc new file mode 100644 index 000000000..4ae4a37ed --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking_test.cc @@ -0,0 +1,303 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/substitute.h" +#include "absl/types/span.h" +#include "file/base/helpers.h" +#include "file/base/options.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/tool/test_util.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_topology.h" +#include "mediapipe/tasks/cc/vision/utils/data_renderer.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::file::Defaults; +using ::file::GetTextProto; +using ::mediapipe::Image; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::core::TaskRunner; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr float kAbsMargin = 0.018; +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kHolisticHandTrackingLeft[] = + "holistic_hand_tracking_left_hand_graph.pbtxt"; +constexpr char kTestImageFile[] = "male_full_height_hands.jpg"; +constexpr char kHolisticResultFile[] = + "male_full_height_hands_result_cpu.pbtxt"; +constexpr char kImageInStream[] = "image_in"; +constexpr char kPoseLandmarksInStream[] = "pose_landmarks_in"; +constexpr char kPoseWorldLandmarksInStream[] = "pose_world_landmarks_in"; +constexpr char kLeftHandLandmarksOutStream[] = "left_hand_landmarks_out"; +constexpr char kLeftHandWorldLandmarksOutStream[] = + "left_hand_world_landmarks_out"; +constexpr char kRightHandLandmarksOutStream[] = "right_hand_landmarks_out"; +constexpr char kRenderedImageOutStream[] = "rendered_image_out"; +constexpr char kHandLandmarksModelFile[] = "hand_landmark_full.tflite"; +constexpr char kHandRoiRefinementModelFile[] = + "handrecrop_2020_07_21_v0.f16.tflite"; + +std::string GetFilePath(const std::string& filename) { + return file::JoinPath("./", kTestDataDirectory, filename); +} + +mediapipe::LandmarksToRenderDataCalculatorOptions GetHandRendererOptions() { + mediapipe::LandmarksToRenderDataCalculatorOptions renderer_options; + for (const auto& connection : hand_landmarker::kHandConnections) { + renderer_options.add_landmark_connections(connection[0]); + renderer_options.add_landmark_connections(connection[1]); + } + renderer_options.mutable_landmark_color()->set_r(255); + renderer_options.mutable_landmark_color()->set_g(255); + renderer_options.mutable_landmark_color()->set_b(255); + renderer_options.mutable_connection_color()->set_r(255); + renderer_options.mutable_connection_color()->set_g(255); + renderer_options.mutable_connection_color()->set_b(255); + renderer_options.set_thickness(0.5); + renderer_options.set_visualize_landmark_depth(false); + return renderer_options; +} + +void ConfigHandTrackingModelsOptions( + hand_landmarker::proto::HandLandmarksDetectorGraphOptions& + hand_landmarks_detector_graph_options, + hand_landmarker::proto::HandRoiRefinementGraphOptions& + hand_roi_refinement_options) { + hand_landmarks_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kHandLandmarksModelFile)); + + hand_roi_refinement_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kHandRoiRefinementModelFile)); +} + +// Helper function to create a TaskRunner. +absl::StatusOr> CreateTaskRunner() { + Graph graph; + Stream image = graph.In("IMAGE").Cast().SetName(kImageInStream); + Stream pose_landmarks = + graph.In("POSE_LANDMARKS") + .Cast() + .SetName(kPoseLandmarksInStream); + Stream pose_world_landmarks = + graph.In("POSE_WORLD_LANDMARKS") + .Cast() + .SetName(kPoseWorldLandmarksInStream); + hand_landmarker::proto::HandLandmarksDetectorGraphOptions + hand_landmarks_detector_options; + hand_landmarker::proto::HandRoiRefinementGraphOptions + hand_roi_refinement_options; + ConfigHandTrackingModelsOptions(hand_landmarks_detector_options, + hand_roi_refinement_options); + HolisticHandTrackingRequest request; + request.landmarks = true; + MP_ASSIGN_OR_RETURN( + HolisticHandTrackingOutput left_hand_result, + TrackHolisticHand( + image, pose_landmarks, pose_world_landmarks, + hand_landmarks_detector_options, hand_roi_refinement_options, + PoseIndices{ + /*wrist_idx=*/static_cast( + pose_landmarker::PoseLandmarkName::kLeftWrist), + /*pinky_idx=*/ + static_cast(pose_landmarker::PoseLandmarkName::kLeftPinky1), + /*index_idx=*/ + static_cast(pose_landmarker::PoseLandmarkName::kLeftIndex1)}, + request, graph)); + MP_ASSIGN_OR_RETURN( + HolisticHandTrackingOutput right_hand_result, + TrackHolisticHand( + image, pose_landmarks, pose_world_landmarks, + hand_landmarks_detector_options, hand_roi_refinement_options, + PoseIndices{ + /*wrist_idx=*/static_cast( + pose_landmarker::PoseLandmarkName::kRightWrist), + /*pinky_idx=*/ + static_cast(pose_landmarker::PoseLandmarkName::kRightPinky1), + /*index_idx=*/ + static_cast( + pose_landmarker::PoseLandmarkName::kRightIndex1)}, + request, graph)); + + auto image_size = GetImageSize(image, graph); + auto left_hand_landmarks_render_data = utils::RenderLandmarks( + *left_hand_result.landmarks, + utils::GetRenderScale(image_size, + left_hand_result.debug_output.roi_from_pose, 0.0001, + graph), + GetHandRendererOptions(), graph); + auto right_hand_landmarks_render_data = utils::RenderLandmarks( + *right_hand_result.landmarks, + utils::GetRenderScale(image_size, + right_hand_result.debug_output.roi_from_pose, + 0.0001, graph), + GetHandRendererOptions(), graph); + std::vector> render_list = { + left_hand_landmarks_render_data, right_hand_landmarks_render_data}; + auto rendered_image = + utils::Render( + image, absl::Span>(render_list), graph) + .SetName(kRenderedImageOutStream); + left_hand_result.landmarks->SetName(kLeftHandLandmarksOutStream) >> + graph.Out("LEFT_HAND_LANDMARKS"); + right_hand_result.landmarks->SetName(kRightHandLandmarksOutStream) >> + graph.Out("RIGHT_HAND_LANDMARKS"); + rendered_image >> graph.Out("RENDERED_IMAGE"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + + return TaskRunner::Create( + config, std::make_unique()); +} + +class HolisticHandTrackingTest : public ::testing::Test {}; + +TEST_F(HolisticHandTrackingTest, VerifyGraph) { + Graph graph; + Stream image = graph.In("IMAGE").Cast().SetName(kImageInStream); + Stream pose_landmarks = + graph.In("POSE_LANDMARKS") + .Cast() + .SetName(kPoseLandmarksInStream); + Stream pose_world_landmarks = + graph.In("POSE_WORLD_LANDMARKS") + .Cast() + .SetName(kPoseWorldLandmarksInStream); + hand_landmarker::proto::HandLandmarksDetectorGraphOptions + hand_landmarks_detector_options; + hand_landmarker::proto::HandRoiRefinementGraphOptions + hand_roi_refinement_options; + ConfigHandTrackingModelsOptions(hand_landmarks_detector_options, + hand_roi_refinement_options); + HolisticHandTrackingRequest request; + request.landmarks = true; + request.world_landmarks = true; + MP_ASSERT_OK_AND_ASSIGN( + HolisticHandTrackingOutput left_hand_result, + TrackHolisticHand( + image, pose_landmarks, pose_world_landmarks, + hand_landmarks_detector_options, hand_roi_refinement_options, + PoseIndices{ + /*wrist_idx=*/static_cast( + pose_landmarker::PoseLandmarkName::kLeftWrist), + /*pinky_idx=*/ + static_cast(pose_landmarker::PoseLandmarkName::kLeftPinky1), + /*index_idx=*/ + static_cast(pose_landmarker::PoseLandmarkName::kLeftIndex1)}, + request, graph)); + left_hand_result.landmarks->SetName(kLeftHandLandmarksOutStream) >> + graph.Out("LEFT_HAND_LANDMARKS"); + left_hand_result.world_landmarks->SetName(kLeftHandWorldLandmarksOutStream) >> + graph.Out("LEFT_HAND_WORLD_LANDMARKS"); + + // Read the expected graph config. + std::string expected_graph_contents; + MP_ASSERT_OK(file::GetContents( + file::JoinPath("./", kTestDataDirectory, kHolisticHandTrackingLeft), + &expected_graph_contents)); + + // Need to replace the expected graph config with the test srcdir, because + // each run has different test dir on TAP. + expected_graph_contents = absl::Substitute( + expected_graph_contents, FLAGS_test_srcdir, FLAGS_test_srcdir); + CalculatorGraphConfig expected_graph = + ParseTextProtoOrDie(expected_graph_contents); + + EXPECT_THAT(graph.GetConfig(), testing::proto::IgnoringRepeatedFieldOrdering( + testing::EqualsProto(expected_graph))); +} + +TEST_F(HolisticHandTrackingTest, SmokeTest) { + MP_ASSERT_OK_AND_ASSIGN(Image image, + DecodeImageFromFile(GetFilePath(kTestImageFile))); + + proto::HolisticResult holistic_result; + MP_ASSERT_OK(GetTextProto(GetFilePath(kHolisticResultFile), &holistic_result, + Defaults())); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); + MP_ASSERT_OK_AND_ASSIGN( + auto output_packets, + task_runner->Process( + {{kImageInStream, MakePacket(image)}, + {kPoseLandmarksInStream, MakePacket( + holistic_result.pose_landmarks())}, + {kPoseWorldLandmarksInStream, + MakePacket( + holistic_result.pose_world_landmarks())}})); + auto left_hand_landmarks = output_packets.at(kLeftHandLandmarksOutStream) + .Get(); + auto right_hand_landmarks = output_packets.at(kRightHandLandmarksOutStream) + .Get(); + EXPECT_THAT(left_hand_landmarks, + Approximately( + Partially(EqualsProto(holistic_result.left_hand_landmarks())), + /*margin=*/kAbsMargin)); + EXPECT_THAT( + right_hand_landmarks, + Approximately( + Partially(EqualsProto(holistic_result.right_hand_landmarks())), + /*margin=*/kAbsMargin)); + auto rendered_image = output_packets.at(kRenderedImageOutStream).Get(); + MP_EXPECT_OK(SavePngTestOutput(*rendered_image.GetImageFrameSharedPtr(), + "holistic_hand_landmarks")); +} + +} // namespace +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph.cc new file mode 100644 index 000000000..2de358a6c --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph.cc @@ -0,0 +1,521 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/split.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/tasks/cc/core/model_asset_bundle_resources.h" +#include "mediapipe/tasks/cc/core/model_resources_cache.h" +#include "mediapipe/tasks/cc/core/model_task_graph.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/metadata/utils/zip_utils.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_face_tracking.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_hand_tracking.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_topology.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" +#include "mediapipe/util/graph_builder_utils.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { +namespace { + +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::metadata::SetExternalFile; + +constexpr absl::string_view kHandLandmarksDetectorModelName = + "hand_landmarks_detector.tflite"; +constexpr absl::string_view kHandRoiRefinementModelName = + "hand_roi_refinement.tflite"; +constexpr absl::string_view kFaceDetectorModelName = "face_detector.tflite"; +constexpr absl::string_view kFaceLandmarksDetectorModelName = + "face_landmarks_detector.tflite"; +constexpr absl::string_view kFaceBlendshapesModelName = + "face_blendshapes.tflite"; +constexpr absl::string_view kPoseDetectorModelName = "pose_detector.tflite"; +constexpr absl::string_view kPoseLandmarksDetectorModelName = + "pose_landmarks_detector.tflite"; + +absl::Status SetGraphPoseOutputs( + const HolisticPoseTrackingRequest& pose_request, + const CalculatorGraphConfig::Node& node, + HolisticPoseTrackingOutput& pose_output, Graph& graph) { + // Main outputs. + if (pose_request.landmarks) { + RET_CHECK(pose_output.landmarks.has_value()) + << "POSE_LANDMARKS output is not supported."; + pose_output.landmarks->ConnectTo(graph.Out("POSE_LANDMARKS")); + } + if (pose_request.world_landmarks) { + RET_CHECK(pose_output.world_landmarks.has_value()) + << "POSE_WORLD_LANDMARKS output is not supported."; + pose_output.world_landmarks->ConnectTo(graph.Out("POSE_WORLD_LANDMARKS")); + } + if (pose_request.segmentation_mask) { + RET_CHECK(pose_output.segmentation_mask.has_value()) + << "POSE_SEGMENTATION_MASK output is not supported."; + pose_output.segmentation_mask->ConnectTo( + graph.Out("POSE_SEGMENTATION_MASK")); + } + + // Debug outputs. + if (HasOutput(node, "POSE_AUXILIARY_LANDMARKS")) { + pose_output.debug_output.auxiliary_landmarks.ConnectTo( + graph.Out("POSE_AUXILIARY_LANDMARKS")); + } + if (HasOutput(node, "POSE_LANDMARKS_ROI")) { + pose_output.debug_output.roi_from_landmarks.ConnectTo( + graph.Out("POSE_LANDMARKS_ROI")); + } + + return absl::OkStatus(); +} + +// Sets the base options in the sub tasks. +template +absl::Status SetSubTaskBaseOptions( + const core::ModelAssetBundleResources* resources, + proto::HolisticLandmarkerGraphOptions* options, T* sub_task_options, + absl::string_view model_name, bool is_copy) { + if (!sub_task_options->base_options().has_model_asset()) { + MP_ASSIGN_OR_RETURN(const auto model_file_content, + resources->GetFile(std::string(model_name))); + SetExternalFile( + model_file_content, + sub_task_options->mutable_base_options()->mutable_model_asset(), + is_copy); + } + sub_task_options->mutable_base_options()->mutable_acceleration()->CopyFrom( + options->base_options().acceleration()); + sub_task_options->mutable_base_options()->set_use_stream_mode( + options->base_options().use_stream_mode()); + sub_task_options->mutable_base_options()->set_gpu_origin( + options->base_options().gpu_origin()); + return absl::OkStatus(); +} + +void SetGraphHandOutputs(bool is_left, const CalculatorGraphConfig::Node& node, + HolisticHandTrackingOutput& hand_output, + Graph& graph) { + const std::string hand_side = is_left ? "LEFT" : "RIGHT"; + + if (hand_output.landmarks) { + hand_output.landmarks->ConnectTo(graph.Out(hand_side + "_HAND_LANDMARKS")); + } + if (hand_output.world_landmarks) { + hand_output.world_landmarks->ConnectTo( + graph.Out(hand_side + "_HAND_WORLD_LANDMARKS")); + } + + // Debug outputs. + if (HasOutput(node, hand_side + "_HAND_ROI_FROM_POSE")) { + hand_output.debug_output.roi_from_pose.ConnectTo( + graph.Out(hand_side + "_HAND_ROI_FROM_POSE")); + } + if (HasOutput(node, hand_side + "_HAND_ROI_FROM_RECROP")) { + hand_output.debug_output.roi_from_recrop.ConnectTo( + graph.Out(hand_side + "_HAND_ROI_FROM_RECROP")); + } + if (HasOutput(node, hand_side + "_HAND_TRACKING_ROI")) { + hand_output.debug_output.tracking_roi.ConnectTo( + graph.Out(hand_side + "_HAND_TRACKING_ROI")); + } +} + +void SetGraphFaceOutputs(const CalculatorGraphConfig::Node& node, + HolisticFaceTrackingOutput& face_output, + Graph& graph) { + if (face_output.landmarks) { + face_output.landmarks->ConnectTo(graph.Out("FACE_LANDMARKS")); + } + if (face_output.classifications) { + face_output.classifications->ConnectTo(graph.Out("FACE_BLENDSHAPES")); + } + + // Face detection debug outputs + if (HasOutput(node, "FACE_ROI_FROM_POSE")) { + face_output.debug_output.roi_from_pose.ConnectTo( + graph.Out("FACE_ROI_FROM_POSE")); + } + if (HasOutput(node, "FACE_ROI_FROM_DETECTION")) { + face_output.debug_output.roi_from_detection.ConnectTo( + graph.Out("FACE_ROI_FROM_DETECTION")); + } + if (HasOutput(node, "FACE_TRACKING_ROI")) { + face_output.debug_output.tracking_roi.ConnectTo( + graph.Out("FACE_TRACKING_ROI")); + } +} + +} // namespace + +// Tracks pose and detects hands and face. +// +// NOTE: for GPU works only with image having GpuOrigin::TOP_LEFT +// +// Inputs: +// IMAGE - Image +// Image to perform detection on. +// +// Outputs: +// POSE_LANDMARKS - NormalizedLandmarkList +// 33 landmarks (see pose_landmarker/pose_topology.h) +// 0 - nose +// 1 - left eye (inner) +// 2 - left eye +// 3 - left eye (outer) +// 4 - right eye (inner) +// 5 - right eye +// 6 - right eye (outer) +// 7 - left ear +// 8 - right ear +// 9 - mouth (left) +// 10 - mouth (right) +// 11 - left shoulder +// 12 - right shoulder +// 13 - left elbow +// 14 - right elbow +// 15 - left wrist +// 16 - right wrist +// 17 - left pinky +// 18 - right pinky +// 19 - left index +// 20 - right index +// 21 - left thumb +// 22 - right thumb +// 23 - left hip +// 24 - right hip +// 25 - left knee +// 26 - right knee +// 27 - left ankle +// 28 - right ankle +// 29 - left heel +// 30 - right heel +// 31 - left foot index +// 32 - right foot index +// POSE_WORLD_LANDMARKS - LandmarkList +// World landmarks are real world 3D coordinates with origin in hips center +// and coordinates in meters. To understand the difference: POSE_LANDMARKS +// stream provides coordinates (in pixels) of 3D object projected on a 2D +// surface of the image (check on how perspective projection works), while +// POSE_WORLD_LANDMARKS stream provides coordinates (in meters) of the 3D +// object itself. POSE_WORLD_LANDMARKS has the same landmarks topology, +// visibility and presence as POSE_LANDMARKS. +// POSE_SEGMENTATION_MASK - Image +// Separates person from background. Mask is stored as gray float32 image +// with [0.0, 1.0] range for pixels (1 for person and 0 for background) on +// CPU and, on GPU - RGBA texture with R channel indicating person vs. +// background probability. +// LEFT_HAND_LANDMARKS - NormalizedLandmarkList +// 21 left hand landmarks. +// RIGHT_HAND_LANDMARKS - NormalizedLandmarkList +// 21 right hand landmarks. +// FACE_LANDMARKS - NormalizedLandmarkList +// 468 face landmarks. +// FACE_BLENDSHAPES - ClassificationList +// Supplementary blendshape coefficients that are predicted directly from +// the input image. +// LEFT_HAND_WORLD_LANDMARKS - LandmarkList +// 21 left hand world 3D landmarks. +// Hand landmarks are aligned with pose landmarks: translated so that wrist +// from # hand matches wrist from pose in pose coordinates system. +// RIGHT_HAND_WORLD_LANDMARKS - LandmarkList +// 21 right hand world 3D landmarks. +// Hand landmarks are aligned with pose landmarks: translated so that wrist +// from # hand matches wrist from pose in pose coordinates system. +// IMAGE - Image +// The input image that the hiolistic landmarker runs on and has the pixel +// data stored on the target storage (CPU vs GPU). +// +// Debug outputs: +// POSE_AUXILIARY_LANDMARKS - NormalizedLandmarkList +// TODO: Return ROI rather than auxiliary landmarks +// Auxiliary landmarks for deriving the ROI in the subsequent image. +// 0 - hidden center point +// 1 - hidden scale point +// POSE_LANDMARKS_ROI - NormalizedRect +// Region of interest calculated based on landmarks. +// LEFT_HAND_ROI_FROM_POSE - NormalizedLandmarkList +// LEFT_HAND_ROI_FROM_RECROP - NormalizedLandmarkList +// LEFT_HAND_TRACKING_ROI - NormalizedLandmarkList +// RIGHT_HAND_ROI_FROM_POSE - NormalizedLandmarkList +// RIGHT_HAND_ROI_FROM_RECROP - NormalizedLandmarkList +// RIGHT_HAND_TRACKING_ROI - NormalizedLandmarkList +// FACE_ROI_FROM_POSE - NormalizedLandmarkList +// FACE_ROI_FROM_DETECTION - NormalizedLandmarkList +// FACE_TRACKING_ROI - NormalizedLandmarkList +// +// NOTE: failure is reported if some output has been requested, but specified +// model doesn't support it. +// +// NOTE: there will not be an output packet in an output stream for a +// particular timestamp if nothing is detected. However, the MediaPipe +// framework will internally inform the downstream calculators of the +// absence of this packet so that they don't wait for it unnecessarily. +// +// Example: +// node { +// calculator: +// "mediapipe.tasks.vision.holistic_landmarker.HolisticLandmarkerGraph" +// input_stream: "IMAGE:input_frames_image" +// output_stream: "POSE_LANDMARKS:pose_landmarks" +// output_stream: "POSE_WORLD_LANDMARKS:pose_world_landmarks" +// output_stream: "FACE_LANDMARKS:face_landmarks" +// output_stream: "FACE_BLENDSHAPES:extra_blendshapes" +// output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +// output_stream: "LEFT_HAND_WORLD_LANDMARKS:left_hand_world_landmarks" +// output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +// output_stream: "RIGHT_HAND_WORLD_LANDMARKS:right_hand_world_landmarks" +// node_options { +// [type.googleapis.com/mediapipe.tasks.vision.holistic_landmarker.proto.HolisticLandmarkerGraphOptions] +// { +// base_options { +// model_asset { +// file_name: +// "mediapipe/tasks/testdata/vision/holistic_landmarker.task" +// } +// } +// face_detector_graph_options: { +// num_faces: 1 +// } +// pose_detector_graph_options: { +// num_poses: 1 +// } +// } +// } +// } +class HolisticLandmarkerGraph : public core::ModelTaskGraph { + public: + absl::StatusOr GetConfig( + SubgraphContext* sc) override { + Graph graph; + const auto& holistic_node = sc->OriginalNode(); + proto::HolisticLandmarkerGraphOptions* holistic_options = + sc->MutableOptions(); + const core::ModelAssetBundleResources* model_asset_bundle_resources; + if (holistic_options->base_options().has_model_asset()) { + MP_ASSIGN_OR_RETURN(model_asset_bundle_resources, + CreateModelAssetBundleResources< + proto::HolisticLandmarkerGraphOptions>(sc)); + } + // Copies the file content instead of passing the pointer of file in + // memory if the subgraph model resource service is not available. + bool create_copy = + !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService) + .IsAvailable(); + + Stream image = graph.In("IMAGE").Cast(); + + // Check whether Hand requested + const bool is_left_hand_requested = + HasOutput(holistic_node, "LEFT_HAND_LANDMARKS"); + const bool is_right_hand_requested = + HasOutput(holistic_node, "RIGHT_HAND_LANDMARKS"); + const bool is_left_hand_world_requested = + HasOutput(holistic_node, "LEFT_HAND_WORLD_LANDMARKS"); + const bool is_right_hand_world_requested = + HasOutput(holistic_node, "RIGHT_HAND_WORLD_LANDMARKS"); + const bool hands_requested = + is_left_hand_requested || is_right_hand_requested || + is_left_hand_world_requested || is_right_hand_world_requested; + if (hands_requested) { + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_hand_landmarks_detector_graph_options(), + kHandLandmarksDetectorModelName, create_copy)); + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_hand_roi_refinement_graph_options(), + kHandRoiRefinementModelName, create_copy)); + } + + // Check whether Face requested + const bool is_face_requested = HasOutput(holistic_node, "FACE_LANDMARKS"); + const bool is_face_blendshapes_requested = + HasOutput(holistic_node, "FACE_BLENDSHAPES"); + const bool face_requested = + is_face_requested || is_face_blendshapes_requested; + if (face_requested) { + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_face_detector_graph_options(), + kFaceDetectorModelName, create_copy)); + // Forcely set num_faces to 1, because holistic landmarker only supports a + // single subject for now. + holistic_options->mutable_face_detector_graph_options()->set_num_faces(1); + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_face_landmarks_detector_graph_options(), + kFaceLandmarksDetectorModelName, create_copy)); + if (is_face_blendshapes_requested) { + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_face_landmarks_detector_graph_options() + ->mutable_face_blendshapes_graph_options(), + kFaceBlendshapesModelName, create_copy)); + } + } + + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_pose_detector_graph_options(), + kPoseDetectorModelName, create_copy)); + // Forcely set num_poses to 1, because holistic landmarker sonly supports a + // single subject for now. + holistic_options->mutable_pose_detector_graph_options()->set_num_poses(1); + MP_RETURN_IF_ERROR(SetSubTaskBaseOptions( + model_asset_bundle_resources, holistic_options, + holistic_options->mutable_pose_landmarks_detector_graph_options(), + kPoseLandmarksDetectorModelName, create_copy)); + + HolisticPoseTrackingRequest pose_request = { + .landmarks = HasOutput(holistic_node, "POSE_LANDMARKS") || + hands_requested || face_requested, + .world_landmarks = + HasOutput(holistic_node, "POSE_WORLD_LANDMARKS") || hands_requested, + .segmentation_mask = + HasOutput(holistic_node, "POSE_SEGMENTATION_MASK")}; + + // Detect and track pose. + MP_ASSIGN_OR_RETURN( + HolisticPoseTrackingOutput pose_output, + TrackHolisticPose( + image, holistic_options->pose_detector_graph_options(), + holistic_options->pose_landmarks_detector_graph_options(), + pose_request, graph)); + MP_RETURN_IF_ERROR( + SetGraphPoseOutputs(pose_request, holistic_node, pose_output, graph)); + + // Detect and track hand. + if (hands_requested) { + if (is_left_hand_requested || is_left_hand_world_requested) { + RET_CHECK(pose_output.landmarks.has_value()); + RET_CHECK(pose_output.world_landmarks.has_value()); + + PoseIndices pose_indices = { + .wrist_idx = + static_cast(pose_landmarker::PoseLandmarkName::kLeftWrist), + .pinky_idx = static_cast( + pose_landmarker::PoseLandmarkName::kLeftPinky1), + .index_idx = static_cast( + pose_landmarker::PoseLandmarkName::kLeftIndex1), + }; + HolisticHandTrackingRequest hand_request = { + .landmarks = is_left_hand_requested, + .world_landmarks = is_left_hand_world_requested, + }; + MP_ASSIGN_OR_RETURN( + HolisticHandTrackingOutput hand_output, + TrackHolisticHand( + image, *pose_output.landmarks, *pose_output.world_landmarks, + holistic_options->hand_landmarks_detector_graph_options(), + holistic_options->hand_roi_refinement_graph_options(), + pose_indices, hand_request, graph + + )); + SetGraphHandOutputs(/*is_left=*/true, holistic_node, hand_output, + graph); + } + + if (is_right_hand_requested || is_right_hand_world_requested) { + RET_CHECK(pose_output.landmarks.has_value()); + RET_CHECK(pose_output.world_landmarks.has_value()); + + PoseIndices pose_indices = { + .wrist_idx = static_cast( + pose_landmarker::PoseLandmarkName::kRightWrist), + .pinky_idx = static_cast( + pose_landmarker::PoseLandmarkName::kRightPinky1), + .index_idx = static_cast( + pose_landmarker::PoseLandmarkName::kRightIndex1), + }; + HolisticHandTrackingRequest hand_request = { + .landmarks = is_right_hand_requested, + .world_landmarks = is_right_hand_world_requested, + }; + MP_ASSIGN_OR_RETURN( + HolisticHandTrackingOutput hand_output, + TrackHolisticHand( + image, *pose_output.landmarks, *pose_output.world_landmarks, + holistic_options->hand_landmarks_detector_graph_options(), + holistic_options->hand_roi_refinement_graph_options(), + pose_indices, hand_request, graph + + )); + SetGraphHandOutputs(/*is_left=*/false, holistic_node, hand_output, + graph); + } + } + + // Detect and track face. + if (face_requested) { + RET_CHECK(pose_output.landmarks.has_value()); + + Stream face_landmarks_from_pose = + api2::builder::SplitToRanges(*pose_output.landmarks, {{0, 11}}, + graph)[0]; + + HolisticFaceTrackingRequest face_request = { + .classifications = is_face_blendshapes_requested, + }; + MP_ASSIGN_OR_RETURN( + HolisticFaceTrackingOutput face_output, + TrackHolisticFace( + image, face_landmarks_from_pose, + holistic_options->face_detector_graph_options(), + holistic_options->face_landmarks_detector_graph_options(), + face_request, graph)); + SetGraphFaceOutputs(holistic_node, face_output, graph); + } + + auto& pass_through = graph.AddNode("PassThroughCalculator"); + image >> pass_through.In(""); + pass_through.Out("") >> graph.Out("IMAGE"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + return config; + } +}; + +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::holistic_landmarker::HolisticLandmarkerGraph); + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph_test.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph_test.cc new file mode 100644 index 000000000..c549a022b --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_landmarker_graph_test.cc @@ -0,0 +1,595 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "file/base/helpers.h" +#include "file/base/options.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/tool/test_util.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.pb.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/data_renderer.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "testing/base/public/gmock.h" +#include "testing/base/public/googletest.h" +#include "testing/base/public/gunit.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { +namespace { + +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::core::TaskRunner; +using ::testing::TestParamInfo; +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr float kAbsMargin = 0.025; +constexpr absl::string_view kTestDataDirectory = + "/mediapipe/tasks/testdata/vision/"; +constexpr char kHolisticResultFile[] = + "male_full_height_hands_result_cpu.pbtxt"; +constexpr absl::string_view kTestImageFile = "male_full_height_hands.jpg"; +constexpr absl::string_view kImageInStream = "image_in"; +constexpr absl::string_view kLeftHandLandmarksStream = "left_hand_landmarks"; +constexpr absl::string_view kRightHandLandmarksStream = "right_hand_landmarks"; +constexpr absl::string_view kFaceLandmarksStream = "face_landmarks"; +constexpr absl::string_view kFaceBlendshapesStream = "face_blendshapes"; +constexpr absl::string_view kPoseLandmarksStream = "pose_landmarks"; +constexpr absl::string_view kRenderedImageOutStream = "rendered_image_out"; +constexpr absl::string_view kPoseSegmentationMaskStream = + "pose_segmentation_mask"; +constexpr absl::string_view kHolisticLandmarkerModelBundleFile = + "holistic_landmarker.task"; +constexpr absl::string_view kHandLandmarksModelFile = + "hand_landmark_full.tflite"; +constexpr absl::string_view kHandRoiRefinementModelFile = + "handrecrop_2020_07_21_v0.f16.tflite"; +constexpr absl::string_view kPoseDetectionModelFile = "pose_detection.tflite"; +constexpr absl::string_view kPoseLandmarksModelFile = + "pose_landmark_lite.tflite"; +constexpr absl::string_view kFaceDetectionModelFile = + "face_detection_short_range.tflite"; +constexpr absl::string_view kFaceLandmarksModelFile = + "facemesh2_lite_iris_faceflag_2023_02_14.tflite"; +constexpr absl::string_view kFaceBlendshapesModelFile = + "face_blendshapes.tflite"; + +enum RenderPart { + HAND = 0, + POSE = 1, + FACE = 2, +}; + +mediapipe::Color GetColor(RenderPart render_part) { + mediapipe::Color color; + switch (render_part) { + case HAND: + color.set_b(255); + color.set_g(255); + color.set_r(255); + break; + case POSE: + color.set_b(0); + color.set_g(255); + color.set_r(0); + break; + case FACE: + color.set_b(0); + color.set_g(0); + color.set_r(255); + break; + } + return color; +} + +std::string GetFilePath(absl::string_view filename) { + return file::JoinPath("./", kTestDataDirectory, filename); +} + +template +mediapipe::LandmarksToRenderDataCalculatorOptions GetRendererOptions( + const std::array, N>& connections, + mediapipe::Color color) { + mediapipe::LandmarksToRenderDataCalculatorOptions renderer_options; + for (const auto& connection : connections) { + renderer_options.add_landmark_connections(connection[0]); + renderer_options.add_landmark_connections(connection[1]); + } + *renderer_options.mutable_landmark_color() = color; + *renderer_options.mutable_connection_color() = color; + renderer_options.set_thickness(0.5); + renderer_options.set_visualize_landmark_depth(false); + return renderer_options; +} + +void ConfigureHandProtoOptions(proto::HolisticLandmarkerGraphOptions& options) { + options.mutable_hand_landmarks_detector_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kHandLandmarksModelFile)); + + options.mutable_hand_roi_refinement_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kHandRoiRefinementModelFile)); +} + +void ConfigureFaceProtoOptions(proto::HolisticLandmarkerGraphOptions& options) { + // Set face detection model. + face_detector::proto::FaceDetectorGraphOptions& face_detector_graph_options = + *options.mutable_face_detector_graph_options(); + face_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kFaceDetectionModelFile)); + face_detector_graph_options.set_num_faces(1); + + // Set face landmarks model. + face_landmarker::proto::FaceLandmarksDetectorGraphOptions& + face_landmarks_graph_options = + *options.mutable_face_landmarks_detector_graph_options(); + face_landmarks_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kFaceLandmarksModelFile)); + face_landmarks_graph_options.mutable_face_blendshapes_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kFaceBlendshapesModelFile)); +} + +void ConfigurePoseProtoOptions(proto::HolisticLandmarkerGraphOptions& options) { + pose_detector::proto::PoseDetectorGraphOptions& pose_detector_graph_options = + *options.mutable_pose_detector_graph_options(); + pose_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kPoseDetectionModelFile)); + pose_detector_graph_options.set_num_poses(1); + options.mutable_pose_landmarks_detector_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath(kPoseLandmarksModelFile)); +} + +struct HolisticRequest { + bool is_left_hand_requested = false; + bool is_right_hand_requested = false; + bool is_face_requested = false; + bool is_face_blendshapes_requested = false; +}; + +// Helper function to create a TaskRunner. +absl::StatusOr> CreateTaskRunner( + bool use_model_bundle, HolisticRequest holistic_request) { + Graph graph; + + Stream image = graph.In("IMAEG").Cast().SetName(kImageInStream); + + auto& holistic_graph = graph.AddNode( + "mediapipe.tasks.vision.holistic_landmarker.HolisticLandmarkerGraph"); + proto::HolisticLandmarkerGraphOptions& options = + holistic_graph.GetOptions(); + if (use_model_bundle) { + options.mutable_base_options()->mutable_model_asset()->set_file_name( + GetFilePath(kHolisticLandmarkerModelBundleFile)); + } else { + ConfigureHandProtoOptions(options); + ConfigurePoseProtoOptions(options); + ConfigureFaceProtoOptions(options); + } + + std::vector> render_list; + image >> holistic_graph.In("IMAGE"); + Stream> image_size = GetImageSize(image, graph); + + if (holistic_request.is_left_hand_requested) { + Stream left_hand_landmarks = + holistic_graph.Out("LEFT_HAND_LANDMARKS") + .Cast() + .SetName(kLeftHandLandmarksStream); + Stream left_hand_tracking_roi = + holistic_graph.Out("LEFT_HAND_TRACKING_ROI").Cast(); + auto left_hand_landmarks_render_data = utils::RenderLandmarks( + left_hand_landmarks, + utils::GetRenderScale(image_size, left_hand_tracking_roi, 0.0001, + graph), + GetRendererOptions(hand_landmarker::kHandConnections, + GetColor(RenderPart::HAND)), + graph); + render_list.push_back(left_hand_landmarks_render_data); + left_hand_landmarks >> graph.Out("LEFT_HAND_LANDMARKS"); + } + if (holistic_request.is_right_hand_requested) { + Stream right_hand_landmarks = + holistic_graph.Out("RIGHT_HAND_LANDMARKS") + .Cast() + .SetName(kRightHandLandmarksStream); + Stream right_hand_tracking_roi = + holistic_graph.Out("RIGHT_HAND_TRACKING_ROI").Cast(); + auto right_hand_landmarks_render_data = utils::RenderLandmarks( + right_hand_landmarks, + utils::GetRenderScale(image_size, right_hand_tracking_roi, 0.0001, + graph), + GetRendererOptions(hand_landmarker::kHandConnections, + GetColor(RenderPart::HAND)), + graph); + render_list.push_back(right_hand_landmarks_render_data); + right_hand_landmarks >> graph.Out("RIGHT_HAND_LANDMARKS"); + } + if (holistic_request.is_face_requested) { + Stream face_landmarks = + holistic_graph.Out("FACE_LANDMARKS") + .Cast() + .SetName(kFaceLandmarksStream); + Stream face_tracking_roi = + holistic_graph.Out("FACE_TRACKING_ROI").Cast(); + auto face_landmarks_render_data = utils::RenderLandmarks( + face_landmarks, + utils::GetRenderScale(image_size, face_tracking_roi, 0.0001, graph), + GetRendererOptions( + face_landmarker::FaceLandmarksConnections::kFaceLandmarksConnectors, + GetColor(RenderPart::FACE)), + graph); + render_list.push_back(face_landmarks_render_data); + face_landmarks >> graph.Out("FACE_LANDMARKS"); + } + if (holistic_request.is_face_blendshapes_requested) { + Stream face_blendshapes = + holistic_graph.Out("FACE_BLENDSHAPES") + .Cast() + .SetName(kFaceBlendshapesStream); + face_blendshapes >> graph.Out("FACE_BLENDSHAPES"); + } + Stream pose_landmarks = + holistic_graph.Out("POSE_LANDMARKS") + .Cast() + .SetName(kPoseLandmarksStream); + Stream pose_tracking_roi = + holistic_graph.Out("POSE_LANDMARKS_ROI").Cast(); + Stream pose_segmentation_mask = + holistic_graph.Out("POSE_SEGMENTATION_MASK") + .Cast() + .SetName(kPoseSegmentationMaskStream); + + auto pose_landmarks_render_data = utils::RenderLandmarks( + pose_landmarks, + utils::GetRenderScale(image_size, pose_tracking_roi, 0.0001, graph), + GetRendererOptions(pose_landmarker::kPoseLandmarksConnections, + GetColor(RenderPart::POSE)), + graph); + render_list.push_back(pose_landmarks_render_data); + auto rendered_image = + utils::Render( + image, absl::Span>(render_list), graph) + .SetName(kRenderedImageOutStream); + + pose_landmarks >> graph.Out("POSE_LANDMARKS"); + pose_segmentation_mask >> graph.Out("POSE_SEGMENTATION_MASK"); + rendered_image >> graph.Out("RENDERED_IMAGE"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + + return TaskRunner::Create( + config, std::make_unique()); +} + +template +absl::StatusOr FetchResult(const core::PacketMap& output_packets, + absl::string_view stream_name) { + auto it = output_packets.find(std::string(stream_name)); + RET_CHECK(it != output_packets.end()); + return it->second.Get(); +} + +// Remove fields not to be checked in the result, since the model +// generating expected result is different from the testing model. +void RemoveUncheckedResult(proto::HolisticResult& holistic_result) { + for (auto& landmark : + *holistic_result.mutable_pose_landmarks()->mutable_landmark()) { + landmark.clear_z(); + landmark.clear_visibility(); + landmark.clear_presence(); + } + for (auto& landmark : + *holistic_result.mutable_face_landmarks()->mutable_landmark()) { + landmark.clear_z(); + landmark.clear_visibility(); + landmark.clear_presence(); + } + for (auto& landmark : + *holistic_result.mutable_left_hand_landmarks()->mutable_landmark()) { + landmark.clear_z(); + landmark.clear_visibility(); + landmark.clear_presence(); + } + for (auto& landmark : + *holistic_result.mutable_right_hand_landmarks()->mutable_landmark()) { + landmark.clear_z(); + landmark.clear_visibility(); + landmark.clear_presence(); + } +} + +std::string RequestToString(HolisticRequest request) { + return absl::StrFormat( + "%s_%s_%s_%s", + request.is_left_hand_requested ? "left_hand" : "no_left_hand", + request.is_right_hand_requested ? "right_hand" : "no_right_hand", + request.is_face_requested ? "face" : "no_face", + request.is_face_blendshapes_requested ? "face_blendshapes" + : "no_face_blendshapes"); +} + +struct TestParams { + // The name of this test, for convenience when displaying test results. + std::string test_name; + // The filename of test image. + std::string test_image_name; + // Whether to use holistic model bundle to test. + bool use_model_bundle; + // Requests of holistic parts. + HolisticRequest holistic_request; +}; + +class SmokeTest : public testing::TestWithParam {}; + +TEST_P(SmokeTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, + DecodeImageFromFile(GetFilePath(GetParam().test_image_name))); + + proto::HolisticResult holistic_result; + MP_ASSERT_OK(GetTextProto(GetFilePath(kHolisticResultFile), &holistic_result, + ::file::Defaults())); + RemoveUncheckedResult(holistic_result); + + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, + CreateTaskRunner(GetParam().use_model_bundle, + GetParam().holistic_request)); + MP_ASSERT_OK_AND_ASSIGN(auto output_packets, + task_runner->Process({{std::string(kImageInStream), + MakePacket(image)}})); + + // Check face landmarks + if (GetParam().holistic_request.is_face_requested) { + MP_ASSERT_OK_AND_ASSIGN(auto face_landmarks, + FetchResult( + output_packets, kFaceLandmarksStream)); + EXPECT_THAT( + face_landmarks, + Approximately(Partially(EqualsProto(holistic_result.face_landmarks())), + /*margin=*/kAbsMargin)); + } else { + ASSERT_FALSE(output_packets.contains(std::string(kFaceLandmarksStream))); + } + + if (GetParam().holistic_request.is_face_blendshapes_requested) { + MP_ASSERT_OK_AND_ASSIGN(auto face_blendshapes, + FetchResult( + output_packets, kFaceBlendshapesStream)); + EXPECT_THAT(face_blendshapes, + Approximately( + Partially(EqualsProto(holistic_result.face_blendshapes())), + /*margin=*/kAbsMargin)); + } else { + ASSERT_FALSE(output_packets.contains(std::string(kFaceBlendshapesStream))); + } + + // Check Pose landmarks + MP_ASSERT_OK_AND_ASSIGN(auto pose_landmarks, + FetchResult( + output_packets, kPoseLandmarksStream)); + EXPECT_THAT( + pose_landmarks, + Approximately(Partially(EqualsProto(holistic_result.pose_landmarks())), + /*margin=*/kAbsMargin)); + + // Check Hand landmarks + if (GetParam().holistic_request.is_left_hand_requested) { + MP_ASSERT_OK_AND_ASSIGN(auto left_hand_landmarks, + FetchResult( + output_packets, kLeftHandLandmarksStream)); + EXPECT_THAT( + left_hand_landmarks, + Approximately( + Partially(EqualsProto(holistic_result.left_hand_landmarks())), + /*margin=*/kAbsMargin)); + } else { + ASSERT_FALSE( + output_packets.contains(std::string(kLeftHandLandmarksStream))); + } + + if (GetParam().holistic_request.is_right_hand_requested) { + MP_ASSERT_OK_AND_ASSIGN(auto right_hand_landmarks, + FetchResult( + output_packets, kRightHandLandmarksStream)); + EXPECT_THAT( + right_hand_landmarks, + Approximately( + Partially(EqualsProto(holistic_result.right_hand_landmarks())), + /*margin=*/kAbsMargin)); + } else { + ASSERT_FALSE( + output_packets.contains(std::string(kRightHandLandmarksStream))); + } + + auto rendered_image = + output_packets.at(std::string(kRenderedImageOutStream)).Get(); + MP_EXPECT_OK(SavePngTestOutput( + *rendered_image.GetImageFrameSharedPtr(), + absl::StrCat("holistic_landmark_", + RequestToString(GetParam().holistic_request)))); + + auto pose_segmentation_mask = + output_packets.at(std::string(kPoseSegmentationMaskStream)).Get(); + + cv::Mat matting_mask = mediapipe::formats::MatView( + pose_segmentation_mask.GetImageFrameSharedPtr().get()); + cv::Mat visualized_mask; + matting_mask.convertTo(visualized_mask, CV_8UC1, 255); + ImageFrame visualized_image(mediapipe::ImageFormat::GRAY8, + visualized_mask.cols, visualized_mask.rows, + visualized_mask.step, visualized_mask.data, + [visualized_mask](uint8_t[]) {}); + + MP_EXPECT_OK( + SavePngTestOutput(visualized_image, "holistic_pose_segmentation_mask")); +} + +INSTANTIATE_TEST_SUITE_P( + HolisticLandmarkerGraphTest, SmokeTest, + Values(TestParams{ + /* test_name= */ "UseModelBundle", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ true, + /*is_right_hand_requested= */ true, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ true, + }, + }, + TestParams{ + /* test_name= */ "UseSeparateModelFiles", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ false, + /* holistic_request= */ + { + /*is_left_hand_requested= */ true, + /*is_right_hand_requested= */ true, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ true, + }, + }, + TestParams{ + /* test_name= */ "ModelBundleNoLeftHand", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ false, + /*is_right_hand_requested= */ true, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ true, + }, + }, + TestParams{ + /* test_name= */ "ModelBundleNoRightHand", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ true, + /*is_right_hand_requested= */ false, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ true, + }, + }, + TestParams{ + /* test_name= */ "ModelBundleNoHand", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ false, + /*is_right_hand_requested= */ false, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ true, + }, + }, + TestParams{ + /* test_name= */ "ModelBundleNoFace", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ true, + /*is_right_hand_requested= */ true, + /*is_face_requested= */ false, + /*is_face_blendshapes_requested= */ false, + }, + }, + TestParams{ + /* test_name= */ "ModelBundleNoFaceBlendshapes", + /* test_image_name= */ std::string(kTestImageFile), + /* use_model_bundle= */ true, + /* holistic_request= */ + { + /*is_left_hand_requested= */ true, + /*is_right_hand_requested= */ true, + /*is_face_requested= */ true, + /*is_face_blendshapes_requested= */ false, + }, + }), + [](const TestParamInfo& info) { + return info.param.test_name; + }); + +} // namespace +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.cc new file mode 100644 index 000000000..860035ad0 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.cc @@ -0,0 +1,307 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h" + +#include +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/detections_to_rects.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/api2/stream/landmarks_to_detection.h" +#include "mediapipe/framework/api2/stream/loopback.h" +#include "mediapipe/framework/api2/stream/merge.h" +#include "mediapipe/framework/api2/stream/presence.h" +#include "mediapipe/framework/api2/stream/rect_transformation.h" +#include "mediapipe/framework/api2/stream/segmentation_smoothing.h" +#include "mediapipe/framework/api2/stream/smoothing.h" +#include "mediapipe/framework/api2/stream/split.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::mediapipe::NormalizedRect; +using ::mediapipe::api2::builder::ConvertAlignmentPointsDetectionsToRect; +using ::mediapipe::api2::builder::ConvertAlignmentPointsDetectionToRect; +using ::mediapipe::api2::builder::ConvertLandmarksToDetection; +using ::mediapipe::api2::builder::GenericNode; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::GetLoopbackData; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::IsPresent; +using ::mediapipe::api2::builder::Merge; +using ::mediapipe::api2::builder::ScaleAndMakeSquare; +using ::mediapipe::api2::builder::SmoothLandmarks; +using ::mediapipe::api2::builder::SmoothLandmarksVisibility; +using ::mediapipe::api2::builder::SmoothSegmentationMask; +using ::mediapipe::api2::builder::SplitToRanges; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::components::utils::DisallowIf; +using Size = std::pair; + +constexpr int kAuxLandmarksStartKeypointIndex = 0; +constexpr int kAuxLandmarksEndKeypointIndex = 1; +constexpr float kAuxLandmarksTargetAngle = 90; +constexpr float kRoiFromDetectionScaleFactor = 1.25f; +constexpr float kRoiFromLandmarksScaleFactor = 1.25f; + +Stream CalculateRoiFromDetections( + Stream> detections, Stream image_size, + Graph& graph) { + auto roi = ConvertAlignmentPointsDetectionsToRect(detections, image_size, + /*start_keypoint_index=*/0, + /*end_keypoint_index=*/1, + /*target_angle=*/90, graph); + return ScaleAndMakeSquare( + roi, image_size, /*scale_x_factor=*/kRoiFromDetectionScaleFactor, + /*scale_y_factor=*/kRoiFromDetectionScaleFactor, graph); +} + +Stream CalculateScaleRoiFromAuxiliaryLandmarks( + Stream landmarks, Stream image_size, + Graph& graph) { + // TODO: consider calculating ROI directly from landmarks. + auto detection = ConvertLandmarksToDetection(landmarks, graph); + return ConvertAlignmentPointsDetectionToRect( + detection, image_size, kAuxLandmarksStartKeypointIndex, + kAuxLandmarksEndKeypointIndex, kAuxLandmarksTargetAngle, graph); +} + +Stream CalculateRoiFromAuxiliaryLandmarks( + Stream landmarks, Stream image_size, + Graph& graph) { + // TODO: consider calculating ROI directly from landmarks. + auto detection = ConvertLandmarksToDetection(landmarks, graph); + auto roi = ConvertAlignmentPointsDetectionToRect( + detection, image_size, kAuxLandmarksStartKeypointIndex, + kAuxLandmarksEndKeypointIndex, kAuxLandmarksTargetAngle, graph); + return ScaleAndMakeSquare( + roi, image_size, /*scale_x_factor=*/kRoiFromLandmarksScaleFactor, + /*scale_y_factor=*/kRoiFromLandmarksScaleFactor, graph); +} + +struct PoseLandmarksResult { + std::optional> landmarks; + std::optional> world_landmarks; + std::optional> auxiliary_landmarks; + std::optional> segmentation_mask; +}; + +PoseLandmarksResult RunLandmarksDetection( + Stream image, Stream roi, + const pose_landmarker::proto::PoseLandmarksDetectorGraphOptions& + pose_landmarks_detector_graph_options, + const HolisticPoseTrackingRequest& request, Graph& graph) { + GenericNode& landmarks_graph = graph.AddNode( + "mediapipe.tasks.vision.pose_landmarker." + "SinglePoseLandmarksDetectorGraph"); + landmarks_graph + .GetOptions() = + pose_landmarks_detector_graph_options; + image >> landmarks_graph.In("IMAGE"); + roi >> landmarks_graph.In("NORM_RECT"); + + PoseLandmarksResult result; + if (request.landmarks) { + result.landmarks = + landmarks_graph.Out("LANDMARKS").Cast(); + result.auxiliary_landmarks = landmarks_graph.Out("AUXILIARY_LANDMARKS") + .Cast(); + } + if (request.world_landmarks) { + result.world_landmarks = + landmarks_graph.Out("WORLD_LANDMARKS").Cast(); + } + if (request.segmentation_mask) { + result.segmentation_mask = + landmarks_graph.Out("SEGMENTATION_MASK").Cast(); + } + return result; +} + +} // namespace + +absl::StatusOr +TrackHolisticPoseUsingCustomPoseDetection( + Stream image, PoseDetectionFn pose_detection_fn, + const pose_landmarker::proto::PoseLandmarksDetectorGraphOptions& + pose_landmarks_detector_graph_options, + const HolisticPoseTrackingRequest& request, Graph& graph) { + // Calculate ROI from scratch (pose detection) or reuse one from the + // previous run if available. + auto [previous_roi, set_previous_roi_fn] = + GetLoopbackData(/*tick=*/image, graph); + auto is_previous_roi_available = IsPresent(previous_roi, graph); + auto image_for_detection = + DisallowIf(image, is_previous_roi_available, graph); + MP_ASSIGN_OR_RETURN(auto pose_detections, + pose_detection_fn(image_for_detection, graph)); + auto roi_from_detections = CalculateRoiFromDetections( + pose_detections, GetImageSize(image_for_detection, graph), graph); + // Take first non-empty. + auto roi = Merge(roi_from_detections, previous_roi, graph); + + // Calculate landmarks and other outputs (if requested) in the specified ROI. + auto landmarks_detection_result = RunLandmarksDetection( + image, roi, pose_landmarks_detector_graph_options, + { + // Landmarks are required for tracking, hence force-requesting them. + .landmarks = true, + .world_landmarks = request.world_landmarks, + .segmentation_mask = request.segmentation_mask, + }, + graph); + RET_CHECK(landmarks_detection_result.landmarks.has_value() && + landmarks_detection_result.auxiliary_landmarks.has_value()) + << "Failed to calculate landmarks required for tracking."; + + // Split landmarks to pose landmarks and auxiliary landmarks. + auto pose_landmarks_raw = *landmarks_detection_result.landmarks; + auto auxiliary_landmarks = *landmarks_detection_result.auxiliary_landmarks; + + auto image_size = GetImageSize(image, graph); + + // TODO: b/305750053 - Apply adaptive crop by adding AdaptiveCropCalculator. + + // Calculate ROI from smoothed auxiliary landmarks. + auto scale_roi = CalculateScaleRoiFromAuxiliaryLandmarks(auxiliary_landmarks, + image_size, graph); + auto auxiliary_landmarks_smoothed = SmoothLandmarks( + auxiliary_landmarks, image_size, scale_roi, + {// Min cutoff 0.01 results into ~0.002 alpha in landmark EMA filter when + // landmark is static. + .min_cutoff = 0.01, + // Beta 10.0 in combintation with min_cutoff 0.01 results into ~0.68 + // alpha in landmark EMA filter when landmark is moving fast. + .beta = 10.0, + // Derivative cutoff 1.0 results into ~0.17 alpha in landmark velocity + // EMA filter. + .derivate_cutoff = 1.0}, + graph); + auto roi_from_auxiliary_landmarks = CalculateRoiFromAuxiliaryLandmarks( + auxiliary_landmarks_smoothed, image_size, graph); + + // Make ROI from auxiliary landmarks to be used as "previous" ROI for a + // subsequent run. + set_previous_roi_fn(roi_from_auxiliary_landmarks); + + // Populate and smooth pose landmarks if corresponding output has been + // requested. + std::optional> pose_landmarks; + if (request.landmarks) { + pose_landmarks = SmoothLandmarksVisibility( + pose_landmarks_raw, /*low_pass_filter_alpha=*/0.1f, graph); + pose_landmarks = SmoothLandmarks( + *pose_landmarks, image_size, scale_roi, + {// Min cutoff 0.05 results into ~0.01 alpha in landmark EMA filter when + // landmark is static. + .min_cutoff = 0.05f, + // Beta 80.0 in combination with min_cutoff 0.05 results into ~0.94 + // alpha in landmark EMA filter when landmark is moving fast. + .beta = 80.0f, + // Derivative cutoff 1.0 results into ~0.17 alpha in landmark velocity + // EMA filter. + .derivate_cutoff = 1.0f}, + graph); + } + + // Populate and smooth world landmarks if available. + std::optional> world_landmarks; + if (landmarks_detection_result.world_landmarks) { + world_landmarks = SplitToRanges(*landmarks_detection_result.world_landmarks, + /*ranges*/ {{0, 33}}, graph)[0]; + world_landmarks = SmoothLandmarksVisibility( + *world_landmarks, /*low_pass_filter_alpha=*/0.1f, graph); + world_landmarks = SmoothLandmarks( + *world_landmarks, + /*scale_roi=*/std::nullopt, + {// Min cutoff 0.1 results into ~ 0.02 alpha in landmark EMA filter when + // landmark is static. + .min_cutoff = 0.1f, + // Beta 40.0 in combination with min_cutoff 0.1 results into ~0.8 + // alpha in landmark EMA filter when landmark is moving fast. + .beta = 40.0f, + // Derivative cutoff 1.0 results into ~0.17 alpha in landmark velocity + // EMA filter. + .derivate_cutoff = 1.0f}, + graph); + } + + // Populate and smooth segmentation mask if available. + std::optional> segmentation_mask; + if (landmarks_detection_result.segmentation_mask) { + auto mask = *landmarks_detection_result.segmentation_mask; + auto [prev_mask_as_img, set_prev_mask_as_img_fn] = + GetLoopbackData( + /*tick=*/*landmarks_detection_result.segmentation_mask, graph); + auto mask_smoothed = + SmoothSegmentationMask(mask, prev_mask_as_img, + /*combine_with_previous_ratio=*/0.7f, graph); + set_prev_mask_as_img_fn(mask_smoothed); + segmentation_mask = mask_smoothed; + } + + return {{/*landmarks=*/pose_landmarks, + /*world_landmarks=*/world_landmarks, + /*segmentation_mask=*/segmentation_mask, + /*debug_output=*/ + {/*auxiliary_landmarks=*/auxiliary_landmarks_smoothed, + /*roi_from_landmarks=*/roi_from_auxiliary_landmarks, + /*detections*/ pose_detections}}}; +} + +absl::StatusOr TrackHolisticPose( + Stream image, + const pose_detector::proto::PoseDetectorGraphOptions& + pose_detector_graph_options, + const pose_landmarker::proto::PoseLandmarksDetectorGraphOptions& + pose_landmarks_detector_graph_options, + const HolisticPoseTrackingRequest& request, Graph& graph) { + PoseDetectionFn pose_detection_fn = [&pose_detector_graph_options]( + Stream image, Graph& graph) + -> absl::StatusOr>> { + GenericNode& pose_detector = + graph.AddNode("mediapipe.tasks.vision.pose_detector.PoseDetectorGraph"); + pose_detector.GetOptions() = + pose_detector_graph_options; + image >> pose_detector.In("IMAGE"); + return pose_detector.Out("DETECTIONS") + .Cast>(); + }; + return TrackHolisticPoseUsingCustomPoseDetection( + image, pose_detection_fn, pose_landmarks_detector_graph_options, request, + graph); +} + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h new file mode 100644 index 000000000..f51ccc283 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h @@ -0,0 +1,110 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_POSE_TRACKING_H_ +#define MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_POSE_TRACKING_H_ + +#include +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +// Type of pose detection function that can be used to customize pose tracking, +// by supplying the function into a corresponding `TrackPose` function overload. +// +// Function should update provided graph with node/nodes that accept image +// stream and produce stream of detections. +using PoseDetectionFn = std::function< + absl::StatusOr>>( + api2::builder::Stream, api2::builder::Graph&)>; + +struct HolisticPoseTrackingRequest { + bool landmarks = false; + bool world_landmarks = false; + bool segmentation_mask = false; +}; + +struct HolisticPoseTrackingOutput { + std::optional> + landmarks; + std::optional> world_landmarks; + std::optional> segmentation_mask; + + struct DebugOutput { + api2::builder::Stream + auxiliary_landmarks; + api2::builder::Stream roi_from_landmarks; + api2::builder::Stream> detections; + }; + + DebugOutput debug_output; +}; + +// Updates @graph to track pose in @image. +// +// @image - ImageFrame/GpuBuffer to track pose in. +// @pose_detection_fn - pose detection function that takes @image as input and +// produces stream of pose detections. +// @pose_landmarks_detector_graph_options - options of the +// PoseLandmarksDetectorGraph used to detect the pose landmarks. +// @request - object to request specific pose tracking outputs. +// NOTE: Outputs that were not requested won't be returned and corresponding +// parts of the graph won't be genertaed all. +// @graph - graph to update. +absl::StatusOr +TrackHolisticPoseUsingCustomPoseDetection( + api2::builder::Stream image, PoseDetectionFn pose_detection_fn, + const pose_landmarker::proto::PoseLandmarksDetectorGraphOptions& + pose_landmarks_detector_graph_options, + const HolisticPoseTrackingRequest& request, api2::builder::Graph& graph); + +// Updates @graph to track pose in @image. +// +// @image - ImageFrame/GpuBuffer to track pose in. +// @pose_detector_graph_options - options of the PoseDetectorGraph used to +// detect the pose. +// @pose_landmarks_detector_graph_options - options of the +// PoseLandmarksDetectorGraph used to detect the pose landmarks. +// @request - object to request specific pose tracking outputs. +// NOTE: Outputs that were not requested won't be returned and corresponding +// parts of the graph won't be genertaed all. +// @graph - graph to update. +absl::StatusOr TrackHolisticPose( + api2::builder::Stream image, + const pose_detector::proto::PoseDetectorGraphOptions& + pose_detector_graph_options, + const pose_landmarker::proto::PoseLandmarksDetectorGraphOptions& + pose_landmarks_detector_graph_options, + const HolisticPoseTrackingRequest& request, api2::builder::Graph& graph); + +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_VISION_HOLISTIC_LANDMARKER_HOLISTIC_POSE_TRACKING_H_ diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking_test.cc b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking_test.cc new file mode 100644 index 000000000..0bf7259e8 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking_test.cc @@ -0,0 +1,243 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/holistic_landmarker/holistic_pose_tracking.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/strings/substitute.h" +#include "absl/types/span.h" +#include "file/base/helpers.h" +#include "file/base/options.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/stream/image_size.h" +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/tool/test_util.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.pb.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarks_connections.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/data_renderer.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "testing/base/public/googletest.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace holistic_landmarker { + +namespace { + +using ::file::Defaults; +using ::file::GetTextProto; +using ::mediapipe::Image; +using ::mediapipe::api2::builder::GetImageSize; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Stream; +using ::mediapipe::tasks::core::TaskRunner; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr float kAbsMargin = 0.025; +constexpr absl::string_view kTestDataDirectory = + "/mediapipe/tasks/testdata/vision/"; +constexpr absl::string_view kTestImageFile = "male_full_height_hands.jpg"; +constexpr absl::string_view kImageInStream = "image_in"; +constexpr absl::string_view kPoseLandmarksOutStream = "pose_landmarks_out"; +constexpr absl::string_view kPoseWorldLandmarksOutStream = + "pose_world_landmarks_out"; +constexpr absl::string_view kRenderedImageOutStream = "rendered_image_out"; +constexpr absl::string_view kHolisticResultFile = + "male_full_height_hands_result_cpu.pbtxt"; +constexpr absl::string_view kHolisticPoseTrackingGraph = + "holistic_pose_tracking_graph.pbtxt"; + +std::string GetFilePath(absl::string_view filename) { + return file::JoinPath("./", kTestDataDirectory, filename); +} + +mediapipe::LandmarksToRenderDataCalculatorOptions GetPoseRendererOptions() { + mediapipe::LandmarksToRenderDataCalculatorOptions renderer_options; + for (const auto& connection : pose_landmarker::kPoseLandmarksConnections) { + renderer_options.add_landmark_connections(connection[0]); + renderer_options.add_landmark_connections(connection[1]); + } + renderer_options.mutable_landmark_color()->set_r(255); + renderer_options.mutable_landmark_color()->set_g(255); + renderer_options.mutable_landmark_color()->set_b(255); + renderer_options.mutable_connection_color()->set_r(255); + renderer_options.mutable_connection_color()->set_g(255); + renderer_options.mutable_connection_color()->set_b(255); + renderer_options.set_thickness(0.5); + renderer_options.set_visualize_landmark_depth(false); + return renderer_options; +} + +// Helper function to create a TaskRunner. +absl::StatusOr> CreateTaskRunner() { + Graph graph; + Stream image = graph.In("IMAGE").Cast().SetName(kImageInStream); + pose_detector::proto::PoseDetectorGraphOptions pose_detector_graph_options; + pose_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath("pose_detection.tflite")); + pose_detector_graph_options.set_num_poses(1); + pose_landmarker::proto::PoseLandmarksDetectorGraphOptions + pose_landmarks_detector_graph_options; + pose_landmarks_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath("pose_landmark_lite.tflite")); + + HolisticPoseTrackingRequest request; + request.landmarks = true; + request.world_landmarks = true; + MP_ASSIGN_OR_RETURN( + HolisticPoseTrackingOutput result, + TrackHolisticPose(image, pose_detector_graph_options, + pose_landmarks_detector_graph_options, request, graph)); + + auto image_size = GetImageSize(image, graph); + auto render_data = utils::RenderLandmarks( + *result.landmarks, + utils::GetRenderScale(image_size, result.debug_output.roi_from_landmarks, + 0.0001, graph), + GetPoseRendererOptions(), graph); + std::vector> render_list = {render_data}; + auto rendered_image = + utils::Render( + image, absl::Span>(render_list), graph) + .SetName(kRenderedImageOutStream); + + rendered_image >> graph.Out("RENDERED_IMAGE"); + result.landmarks->SetName(kPoseLandmarksOutStream) >> + graph.Out("POSE_LANDMARKS"); + result.world_landmarks->SetName(kPoseWorldLandmarksOutStream) >> + graph.Out("POSE_WORLD_LANDMARKS"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + + return TaskRunner::Create( + config, std::make_unique()); +} + +// Remove fields not to be checked in the result, since the model +// generating expected result is different from the testing model. +void RemoveUncheckedResult(proto::HolisticResult& holistic_result) { + for (auto& landmark : + *holistic_result.mutable_pose_landmarks()->mutable_landmark()) { + landmark.clear_z(); + landmark.clear_visibility(); + landmark.clear_presence(); + } +} + +class HolisticPoseTrackingTest : public testing::Test {}; + +TEST_F(HolisticPoseTrackingTest, VerifyGraph) { + Graph graph; + Stream image = graph.In("IMAGE").Cast().SetName(kImageInStream); + pose_detector::proto::PoseDetectorGraphOptions pose_detector_graph_options; + pose_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath("pose_detection.tflite")); + pose_detector_graph_options.set_num_poses(1); + pose_landmarker::proto::PoseLandmarksDetectorGraphOptions + pose_landmarks_detector_graph_options; + pose_landmarks_detector_graph_options.mutable_base_options() + ->mutable_model_asset() + ->set_file_name(GetFilePath("pose_landmark_lite.tflite")); + HolisticPoseTrackingRequest request; + request.landmarks = true; + request.world_landmarks = true; + MP_ASSERT_OK_AND_ASSIGN( + HolisticPoseTrackingOutput result, + TrackHolisticPose(image, pose_detector_graph_options, + pose_landmarks_detector_graph_options, request, graph)); + result.landmarks->SetName(kPoseLandmarksOutStream) >> + graph.Out("POSE_LANDMARKS"); + result.world_landmarks->SetName(kPoseWorldLandmarksOutStream) >> + graph.Out("POSE_WORLD_LANDMARKS"); + + auto config = graph.GetConfig(); + core::FixGraphBackEdges(config); + + // Read the expected graph config. + std::string expected_graph_contents; + MP_ASSERT_OK(file::GetContents( + file::JoinPath("./", kTestDataDirectory, kHolisticPoseTrackingGraph), + &expected_graph_contents)); + + // Need to replace the expected graph config with the test srcdir, because + // each run has different test dir on TAP. + expected_graph_contents = absl::Substitute( + expected_graph_contents, FLAGS_test_srcdir, FLAGS_test_srcdir); + CalculatorGraphConfig expected_graph = + ParseTextProtoOrDie(expected_graph_contents); + + EXPECT_THAT(config, testing::proto::IgnoringRepeatedFieldOrdering( + testing::EqualsProto(expected_graph))); +} + +TEST_F(HolisticPoseTrackingTest, SmokeTest) { + MP_ASSERT_OK_AND_ASSIGN(Image image, + DecodeImageFromFile(GetFilePath(kTestImageFile))); + + proto::HolisticResult holistic_result; + MP_ASSERT_OK(GetTextProto(GetFilePath(kHolisticResultFile), &holistic_result, + Defaults())); + RemoveUncheckedResult(holistic_result); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); + MP_ASSERT_OK_AND_ASSIGN(auto output_packets, + task_runner->Process({{std::string(kImageInStream), + MakePacket(image)}})); + auto pose_landmarks = output_packets.at(std::string(kPoseLandmarksOutStream)) + .Get(); + EXPECT_THAT( + pose_landmarks, + Approximately(Partially(EqualsProto(holistic_result.pose_landmarks())), + /*margin=*/kAbsMargin)); + auto rendered_image = + output_packets.at(std::string(kRenderedImageOutStream)).Get(); + MP_EXPECT_OK(SavePngTestOutput(*rendered_image.GetImageFrameSharedPtr(), + "pose_landmarks")); +} + +} // namespace +} // namespace holistic_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/proto/BUILD b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/BUILD new file mode 100644 index 000000000..147f3cc86 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/BUILD @@ -0,0 +1,44 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +package(default_visibility = [ + "//mediapipe/tasks:internal", +]) + +licenses(["notice"]) + +mediapipe_proto_library( + name = "holistic_result_proto", + srcs = ["holistic_result.proto"], + deps = [ + "//mediapipe/framework/formats:classification_proto", + "//mediapipe/framework/formats:landmark_proto", + ], +) + +mediapipe_proto_library( + name = "holistic_landmarker_graph_options_proto", + srcs = ["holistic_landmarker_graph_options.proto"], + deps = [ + "//mediapipe/tasks/cc/core/proto:base_options_proto", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_proto", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_proto", + ], +) diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options.proto b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options.proto new file mode 100644 index 000000000..86aba8887 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options.proto @@ -0,0 +1,57 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +syntax = "proto3"; + +package mediapipe.tasks.vision.holistic_landmarker.proto; + +import "mediapipe/tasks/cc/core/proto/base_options.proto"; +import "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.proto"; +import "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.proto"; +import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto"; +import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto"; +import "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.proto"; +import "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.proto"; + +option java_package = "com.google.mediapipe.tasks.vision.holisticlandmarker.proto"; +option java_outer_classname = "HolisticLandmarkerGraphOptionsProto"; + +message HolisticLandmarkerGraphOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // asset bundle file with metadata, accelerator options, etc. + core.proto.BaseOptions base_options = 1; + + // Options for hand landmarks graph. + hand_landmarker.proto.HandLandmarksDetectorGraphOptions + hand_landmarks_detector_graph_options = 2; + + // Options for hand roi refinement graph. + hand_landmarker.proto.HandRoiRefinementGraphOptions + hand_roi_refinement_graph_options = 3; + + // Options for face detector graph. + face_detector.proto.FaceDetectorGraphOptions face_detector_graph_options = 4; + + // Options for face landmarks detector graph. + face_landmarker.proto.FaceLandmarksDetectorGraphOptions + face_landmarks_detector_graph_options = 5; + + // Options for pose detector graph. + pose_detector.proto.PoseDetectorGraphOptions pose_detector_graph_options = 6; + + // Options for pose landmarks detector graph. + pose_landmarker.proto.PoseLandmarksDetectorGraphOptions + pose_landmarks_detector_graph_options = 7; +} diff --git a/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.proto b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.proto new file mode 100644 index 000000000..356da45d9 --- /dev/null +++ b/mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result.proto @@ -0,0 +1,34 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +syntax = "proto3"; + +package mediapipe.tasks.vision.holistic_landmarker.proto; + +import "mediapipe/framework/formats/classification.proto"; +import "mediapipe/framework/formats/landmark.proto"; + +option java_package = "com.google.mediapipe.tasks.vision.holisticlandmarker"; +option java_outer_classname = "HolisticResultProto"; + +message HolisticResult { + mediapipe.NormalizedLandmarkList pose_landmarks = 1; + mediapipe.LandmarkList pose_world_landmarks = 7; + mediapipe.NormalizedLandmarkList left_hand_landmarks = 2; + mediapipe.NormalizedLandmarkList right_hand_landmarks = 3; + mediapipe.NormalizedLandmarkList face_landmarks = 4; + mediapipe.ClassificationList face_blendshapes = 6; + mediapipe.NormalizedLandmarkList auxiliary_landmarks = 5; +} diff --git a/mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.cc b/mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.cc index d449bb123..69f74b469 100644 --- a/mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.cc +++ b/mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.cc @@ -45,7 +45,7 @@ limitations under the License. #ifdef __EMSCRIPTEN__ #define TASK_SEGMENTATION_USE_GL_POSTPROCESSING 1 #elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 && \ - !MEDIAPIPE_USING_SWIFTSHADER && defined(MEDIAPIPE_ANDROID) + !MEDIAPIPE_USING_LEGACY_SWIFTSHADER && defined(MEDIAPIPE_ANDROID) #define TASK_SEGMENTATION_USE_GL_POSTPROCESSING 1 #else #undef TASK_SEGMENTATION_USE_GL_POSTPROCESSING diff --git a/mediapipe/tasks/ios/BUILD b/mediapipe/tasks/ios/BUILD index 7f3db7f7a..88b99ffec 100644 --- a/mediapipe/tasks/ios/BUILD +++ b/mediapipe/tasks/ios/BUILD @@ -57,6 +57,7 @@ CALCULATORS_AND_GRAPHS = [ "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_graph", "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph", "//mediapipe/tasks/cc/vision/image_classifier:image_classifier_graph", + "//mediapipe/tasks/cc/vision/image_segmenter:image_segmenter_graph", "//mediapipe/tasks/cc/vision/object_detector:object_detector_graph", ] @@ -83,6 +84,7 @@ strip_api_include_path_prefix( "//mediapipe/tasks/ios/text/text_embedder:sources/MPPTextEmbedderResult.h", "//mediapipe/tasks/ios/vision/core:sources/MPPRunningMode.h", "//mediapipe/tasks/ios/vision/core:sources/MPPImage.h", + "//mediapipe/tasks/ios/vision/core:sources/MPPMask.h", "//mediapipe/tasks/ios/vision/face_detector:sources/MPPFaceDetector.h", "//mediapipe/tasks/ios/vision/face_detector:sources/MPPFaceDetectorOptions.h", "//mediapipe/tasks/ios/vision/face_detector:sources/MPPFaceDetectorResult.h", @@ -98,6 +100,9 @@ strip_api_include_path_prefix( "//mediapipe/tasks/ios/vision/image_classifier:sources/MPPImageClassifier.h", "//mediapipe/tasks/ios/vision/image_classifier:sources/MPPImageClassifierOptions.h", "//mediapipe/tasks/ios/vision/image_classifier:sources/MPPImageClassifierResult.h", + "//mediapipe/tasks/ios/vision/image_segmenter:sources/MPPImageSegmenter.h", + "//mediapipe/tasks/ios/vision/image_segmenter:sources/MPPImageSegmenterOptions.h", + "//mediapipe/tasks/ios/vision/image_segmenter:sources/MPPImageSegmenterResult.h", "//mediapipe/tasks/ios/vision/object_detector:sources/MPPObjectDetector.h", "//mediapipe/tasks/ios/vision/object_detector:sources/MPPObjectDetectorOptions.h", "//mediapipe/tasks/ios/vision/object_detector:sources/MPPObjectDetectorResult.h", @@ -178,6 +183,7 @@ apple_static_xcframework( ":MPPTaskOptions.h", ":MPPTaskResult.h", ":MPPImage.h", + ":MPPMask.h", ":MPPRunningMode.h", ":MPPFaceDetector.h", ":MPPFaceDetectorOptions.h", @@ -188,6 +194,9 @@ apple_static_xcframework( ":MPPImageClassifier.h", ":MPPImageClassifierOptions.h", ":MPPImageClassifierResult.h", + ":MPPImageSegmenter.h", + ":MPPImageSegmenterOptions.h", + ":MPPImageSegmenterResult.h", ":MPPHandLandmarker.h", ":MPPHandLandmarkerOptions.h", ":MPPHandLandmarkerResult.h", @@ -204,6 +213,7 @@ apple_static_xcframework( "//mediapipe/tasks/ios/vision/gesture_recognizer:MPPGestureRecognizer", "//mediapipe/tasks/ios/vision/hand_landmarker:MPPHandLandmarker", "//mediapipe/tasks/ios/vision/image_classifier:MPPImageClassifier", + "//mediapipe/tasks/ios/vision/image_segmenter:MPPImageSegmenter", "//mediapipe/tasks/ios/vision/object_detector:MPPObjectDetector", ], ) diff --git a/mediapipe/tasks/ios/components/containers/BUILD b/mediapipe/tasks/ios/components/containers/BUILD index 0477d288a..4effb74b2 100644 --- a/mediapipe/tasks/ios/components/containers/BUILD +++ b/mediapipe/tasks/ios/components/containers/BUILD @@ -66,3 +66,10 @@ objc_library( srcs = ["sources/MPPConnection.m"], hdrs = ["sources/MPPConnection.h"], ) + +objc_library( + name = "MPPRegionOfInterest", + srcs = ["sources/MPPRegionOfInterest.m"], + hdrs = ["sources/MPPRegionOfInterest.h"], + deps = [":MPPDetection"], +) diff --git a/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h b/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h new file mode 100644 index 000000000..67b86fbc7 --- /dev/null +++ b/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h @@ -0,0 +1,73 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/components/containers/sources/MPPDetection.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * The Region-Of-Interest (ROI) to interact with in an interactive segmentation inference. + * + * An instance can contain erither contain a single normalized point pointing to the object that the + * user wants to segment or array of normalized key points that make up scribbles over the object + * that the user wants to segment.*/ +NS_SWIFT_NAME(RegionOfInterest) +@interface MPPRegionOfInterest : NSObject + +/** + * The normalized point pointing to the object that the user wants to segment. `nil` if `scribbles` + * is not `nil`. + */ +@property(nonatomic, readonly, nullable) MPPNormalizedKeypoint *keypoint; + +/** + * The array of normalized key points that make up scribbles over the object that the user wants to + * segment. `nil` if `keypoint` is not `nil`. + */ +@property(nonatomic, readonly, nullable) NSArray *scribbles; + +/** + * Initializes a new `RegionOfInterest` that represents a single normalized point pointing to the + * object that the user wants to segment. + * + * @param normalizedKeypoint The normalized key point pointing to the object that the user wants to + * segment. + * + * @return An instance of `RegionOfInterest` initialized with the given normalized key point + * pointing to the object that the user wants to segment. + */ +- (instancetype)initWithNormalizedKeyPoint:(MPPNormalizedKeypoint *)normalizedKeypoint + NS_DESIGNATED_INITIALIZER; + +/** + * Initializes a new `RegionOfInterest` that represents scribbles over the object that the user + * wants to segment. + * + * @param scribbles The array of normalized key points that make up scribbles over the object that + * the user wants to segment. + * + * @return An instance of `RegionOfInterest` initialized with the given normalized key points that + * make up scribbles over the object that the user wants to segment. + */ +- (instancetype)initWitScribbles:(NSArray *)scribbles + NS_DESIGNATED_INITIALIZER; + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.m b/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.m new file mode 100644 index 000000000..0dfa0e5b4 --- /dev/null +++ b/mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.m @@ -0,0 +1,35 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h" + +@implementation MPPRegionOfInterest + +- (instancetype)initWithNormalizedKeyPoint:(MPPNormalizedKeypoint *)normalizedKeypoint { + self = [super init]; + if (self) { + _keypoint = normalizedKeypoint; + } + return self; +} + +- (instancetype)initWitScribbles:(NSArray *)scribbles { + self = [super init]; + if (self) { + _scribbles = scribbles; + } + return self; +} + +@end diff --git a/mediapipe/tasks/ios/components/containers/utils/BUILD b/mediapipe/tasks/ios/components/containers/utils/BUILD index 5f0311f51..88366df0b 100644 --- a/mediapipe/tasks/ios/components/containers/utils/BUILD +++ b/mediapipe/tasks/ios/components/containers/utils/BUILD @@ -84,3 +84,16 @@ objc_library( "//mediapipe/tasks/ios/components/containers:MPPLandmark", ], ) + +objc_library( + name = "MPPRegionOfInterestHelpers", + srcs = ["sources/MPPRegionOfInterest+Helpers.mm"], + hdrs = ["sources/MPPRegionOfInterest+Helpers.h"], + deps = [ + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/components/containers:MPPRegionOfInterest", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + ], +) diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.h b/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.h new file mode 100644 index 000000000..026d4f2e9 --- /dev/null +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.h @@ -0,0 +1,35 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/util/render_data.pb.h" + +#import "mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPRegionOfInterest (Helpers) + +/** + * Creates a `RenderData` from the region of interest. + * + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return A `RenderData1 proto created from the region of interest. + */ +- (std::optional)getRenderDataWithError:(NSError **)error; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.mm b/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.mm new file mode 100644 index 000000000..3df127439 --- /dev/null +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.mm @@ -0,0 +1,57 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.h" +#include "mediapipe/util/color.pb.h" + +namespace { +using RenderData = ::mediapipe::RenderData; +using RenderAnnotation = ::mediapipe::RenderAnnotation; + +} // namespace + +@implementation MPPRegionOfInterest (Helpers) + +- (std::optional)getRenderDataWithError:(NSError**)error { + RenderData result; + if (self.keypoint) { + auto* annotation = result.add_render_annotations(); + annotation->mutable_color()->set_r(255); + auto* point = annotation->mutable_point(); + point->set_normalized(true); + point->set_x(self.keypoint.location.x); + point->set_y(self.keypoint.location.y); + return result; + } else if (self.scribbles) { + auto* annotation = result.add_render_annotations(); + annotation->mutable_color()->set_r(255); + for (MPPNormalizedKeypoint* keypoint in self.scribbles) { + auto* point = annotation->mutable_scribble()->add_point(); + point->set_normalized(true); + point->set_x(keypoint.location.x); + point->set_y(keypoint.location.y); + } + return result; + } + + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:@"RegionOfInterest does not include a valid user interaction."]; + + return std::nullopt; +} + +@end diff --git a/mediapipe/tasks/ios/core/sources/MPPBaseOptions.h b/mediapipe/tasks/ios/core/sources/MPPBaseOptions.h index bef6bb9ee..eecb5e14e 100644 --- a/mediapipe/tasks/ios/core/sources/MPPBaseOptions.h +++ b/mediapipe/tasks/ios/core/sources/MPPBaseOptions.h @@ -14,6 +14,15 @@ #import +/** + * The delegate to run MediaPipe. If the delegate is not set, the default + * delegate CPU is used. + */ +typedef NS_ENUM(NSUInteger, MPPDelegate) { + MPPDelegateCPU, + MPPDelegateGPU, +} NS_SWIFT_NAME(Delegate); + NS_ASSUME_NONNULL_BEGIN /** @@ -26,6 +35,9 @@ NS_SWIFT_NAME(BaseOptions) /** The path to the model asset to open and mmap in memory. */ @property(nonatomic, copy) NSString *modelAssetPath; +/** Overrides the default backend to use for the provided model. */ +@property(nonatomic) MPPDelegate delegate; + @end NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/core/sources/MPPBaseOptions.m b/mediapipe/tasks/ios/core/sources/MPPBaseOptions.m index a43119ad8..fac1b94c0 100644 --- a/mediapipe/tasks/ios/core/sources/MPPBaseOptions.m +++ b/mediapipe/tasks/ios/core/sources/MPPBaseOptions.m @@ -20,6 +20,7 @@ self = [super init]; if (self) { self.modelAssetPath = [[NSString alloc] init]; + self.delegate = MPPDelegateCPU; } return self; } @@ -28,6 +29,7 @@ MPPBaseOptions *baseOptions = [[MPPBaseOptions alloc] init]; baseOptions.modelAssetPath = self.modelAssetPath; + baseOptions.delegate = self.delegate; return baseOptions; } diff --git a/mediapipe/tasks/ios/core/sources/MPPTaskRunner.h b/mediapipe/tasks/ios/core/sources/MPPTaskRunner.h index 41515571a..247dce875 100644 --- a/mediapipe/tasks/ios/core/sources/MPPTaskRunner.h +++ b/mediapipe/tasks/ios/core/sources/MPPTaskRunner.h @@ -36,6 +36,12 @@ NS_ASSUME_NONNULL_BEGIN */ @interface MPPTaskRunner : NSObject +/** + * The canonicalized `CalculatorGraphConfig` of the underlying graph managed by the C++ task + * runner. + */ +@property(nonatomic, readonly) const mediapipe::CalculatorGraphConfig &graphConfig; + /** * Initializes a new `MPPTaskRunner` with the MediaPipe calculator configuration proto and an * optional C++ packets callback. diff --git a/mediapipe/tasks/ios/core/sources/MPPTaskRunner.mm b/mediapipe/tasks/ios/core/sources/MPPTaskRunner.mm index 0813760c2..3e9fb61ea 100644 --- a/mediapipe/tasks/ios/core/sources/MPPTaskRunner.mm +++ b/mediapipe/tasks/ios/core/sources/MPPTaskRunner.mm @@ -33,6 +33,10 @@ using TaskRunnerCpp = ::mediapipe::tasks::core::TaskRunner; @implementation MPPTaskRunner +- (const CalculatorGraphConfig &)graphConfig { + return _cppTaskRunner->GetGraphConfig(); +} + - (instancetype)initWithCalculatorGraphConfig:(CalculatorGraphConfig)graphConfig packetsCallback:(PacketsCallback)packetsCallback error:(NSError **)error { diff --git a/mediapipe/tasks/ios/core/utils/BUILD b/mediapipe/tasks/ios/core/utils/BUILD index 3cd8bf231..d5a166eb3 100644 --- a/mediapipe/tasks/ios/core/utils/BUILD +++ b/mediapipe/tasks/ios/core/utils/BUILD @@ -21,6 +21,7 @@ objc_library( srcs = ["sources/MPPBaseOptions+Helpers.mm"], hdrs = ["sources/MPPBaseOptions+Helpers.h"], deps = [ + "//mediapipe/calculators/tensor:inference_calculator_cc_proto", "//mediapipe/tasks/cc/core/proto:acceleration_cc_proto", "//mediapipe/tasks/cc/core/proto:base_options_cc_proto", "//mediapipe/tasks/cc/core/proto:external_file_cc_proto", diff --git a/mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.mm b/mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.mm index 73bcac49d..9b2307c7e 100644 --- a/mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.mm +++ b/mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.mm @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mediapipe/calculators/tensor/inference_calculator.pb.h" #include "mediapipe/tasks/cc/core/proto/acceleration.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #import "mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.h" namespace { using BaseOptionsProto = ::mediapipe::tasks::core::proto::BaseOptions; +using InferenceCalculatorOptionsProto = ::mediapipe::InferenceCalculatorOptions; } @implementation MPPBaseOptions (Helpers) @@ -33,6 +35,11 @@ using BaseOptionsProto = ::mediapipe::tasks::core::proto::BaseOptions; if (self.modelAssetPath) { baseOptionsProto->mutable_model_asset()->set_file_name(self.modelAssetPath.UTF8String); } + + if (self.delegate == MPPDelegateGPU) { + baseOptionsProto->mutable_acceleration()->mutable_gpu()->MergeFrom( + InferenceCalculatorOptionsProto::Delegate::Gpu()); + } } @end diff --git a/mediapipe/tasks/ios/test/text/language_detector/BUILD b/mediapipe/tasks/ios/test/text/language_detector/BUILD new file mode 100644 index 000000000..379c1b056 --- /dev/null +++ b/mediapipe/tasks/ios/test/text/language_detector/BUILD @@ -0,0 +1,57 @@ +load( + "//mediapipe/framework/tool:ios.bzl", + "MPP_TASK_MINIMUM_OS_VERSION", +) +load( + "@build_bazel_rules_apple//apple:ios.bzl", + "ios_unit_test", +) +load( + "@org_tensorflow//tensorflow/lite:special_rules.bzl", + "tflite_ios_lab_runner", +) + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# Default tags for filtering iOS targets. Targets are restricted to Apple platforms. +TFL_DEFAULT_TAGS = [ + "apple", +] + +# Following sanitizer tests are not supported by iOS test targets. +TFL_DISABLED_SANITIZER_TAGS = [ + "noasan", + "nomsan", + "notsan", +] + +objc_library( + name = "MPPLanguageDetectorObjcTestLibrary", + testonly = 1, + srcs = ["MPPLanguageDetectorTests.mm"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + data = [ + "//mediapipe/tasks/testdata/text:language_detector", + ], + deps = [ + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/test/utils:MPPFileInfo", + "//mediapipe/tasks/ios/text/language_detector:MPPLanguageDetector", + ], +) + +ios_unit_test( + name = "MPPLanguageDetectorObjcTest", + minimum_os_version = MPP_TASK_MINIMUM_OS_VERSION, + runner = tflite_ios_lab_runner("IOS_LATEST"), + tags = TFL_DEFAULT_TAGS + TFL_DISABLED_SANITIZER_TAGS, + deps = [ + ":MPPLanguageDetectorObjcTestLibrary", + ], +) diff --git a/mediapipe/tasks/ios/test/text/language_detector/MPPLanguageDetectorTests.mm b/mediapipe/tasks/ios/test/text/language_detector/MPPLanguageDetectorTests.mm new file mode 100644 index 000000000..28d2ea5c0 --- /dev/null +++ b/mediapipe/tasks/ios/test/text/language_detector/MPPLanguageDetectorTests.mm @@ -0,0 +1,162 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/test/utils/sources/MPPFileInfo.h" +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.h" + +static MPPFileInfo *const kLanguageDetectorModelFileInfo = + [[MPPFileInfo alloc] initWithName:@"language_detector" type:@"tflite"]; + +static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks"; + +#define AssertEqualErrors(error, expectedError) \ + XCTAssertNotNil(error); \ + XCTAssertEqualObjects(error.domain, expectedError.domain); \ + XCTAssertEqual(error.code, expectedError.code); \ + XCTAssertEqualObjects(error.localizedDescription, expectedError.localizedDescription) + +@interface MPPLanguageDetectorTests : XCTestCase +@end + +@implementation MPPLanguageDetectorTests + +- (void)testCreateLanguageDetectorFailsWithMissingModelPath { + MPPFileInfo *fileInfo = [[MPPFileInfo alloc] initWithName:@"" type:@""]; + + NSError *error = nil; + MPPLanguageDetector *languageDetector = + [[MPPLanguageDetector alloc] initWithModelPath:fileInfo.path error:&error]; + XCTAssertNil(languageDetector); + + NSError *expectedError = [NSError + errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"INVALID_ARGUMENT: ExternalFile must specify at least one of 'file_content', " + @"'file_name', 'file_pointer_meta' or 'file_descriptor_meta'." + }]; + AssertEqualErrors(error, expectedError); +} + +- (void)testCreateLanguageDetectorFailsWithBothAllowlistAndDenylist { + MPPLanguageDetectorOptions *options = + [self languageDetectorOptionsWithModelFileInfo:kLanguageDetectorModelFileInfo]; + options.categoryAllowlist = @[ @"en" ]; + options.categoryDenylist = @[ @"en" ]; + + [self assertCreateLanguageDetectorWithOptions:options + failsWithExpectedError: + [NSError + errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"INVALID_ARGUMENT: `category_allowlist` and " + @"`category_denylist` are mutually exclusive options." + }]]; +} + +- (void)testCreateLanguageDetectorFailsWithInvalidMaxResults { + MPPLanguageDetectorOptions *options = + [self languageDetectorOptionsWithModelFileInfo:kLanguageDetectorModelFileInfo]; + options.maxResults = 0; + + [self + assertCreateLanguageDetectorWithOptions:options + failsWithExpectedError: + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"INVALID_ARGUMENT: Invalid `max_results` option: " + @"value must be != 0." + }]]; +} + +- (void)testClassifyWithL2CModelSucceeds { + MPPLanguageDetectorOptions *options = + [self languageDetectorOptionsWithModelFileInfo:kLanguageDetectorModelFileInfo]; + + MPPLanguageDetector *languageDetector = [self createLanguageDetectorWithOptionsSucceeds:options]; + NSString *enText = @"To be, or not to be, that is the question"; + NSArray *expectedEnLanguagePredictions = + @[ [[MPPLanguagePrediction alloc] initWithLanguageCode:@"en" probability:0.9998559f] ]; + + [self assertResultsOfDetectLanguageOfText:enText + usingLanguageDetector:languageDetector + approximatelyEqualsExpectedLanguagePredictions:expectedEnLanguagePredictions]; + + NSString *frText = @"Il y a beaucoup de bouches qui parlent et fort peu de têtes qui pensent."; + NSArray *expectedFrLanguagePredictions = + @[ [[MPPLanguagePrediction alloc] initWithLanguageCode:@"fr" probability:0.9997813f] ]; + + [self assertResultsOfDetectLanguageOfText:frText + usingLanguageDetector:languageDetector + approximatelyEqualsExpectedLanguagePredictions:expectedFrLanguagePredictions]; + + NSString *ruText = @"это какой-то английский язык"; + NSArray *expectedRuLanguagePredictions = + @[ [[MPPLanguagePrediction alloc] initWithLanguageCode:@"ru" probability:0.9933616f] ]; + + [self assertResultsOfDetectLanguageOfText:ruText + usingLanguageDetector:languageDetector + approximatelyEqualsExpectedLanguagePredictions:expectedRuLanguagePredictions]; +} + +#pragma mark Assert Segmenter Results +- (void)assertResultsOfDetectLanguageOfText:(NSString *)text + usingLanguageDetector:(MPPLanguageDetector *)languageDetector + approximatelyEqualsExpectedLanguagePredictions: + (NSArray *)expectedLanguagePredictions { + MPPLanguageDetectorResult *result = [languageDetector detectText:text error:nil]; + XCTAssertNotNil(result); + XCTAssertEqualWithAccuracy(result.languagePredictions[0].probability, + expectedLanguagePredictions[0].probability, 1e-3); + XCTAssertEqualObjects(result.languagePredictions[0].languageCode, + expectedLanguagePredictions[0].languageCode); +} + +#pragma mark Language Detector Initializers + +- (MPPLanguageDetectorOptions *)languageDetectorOptionsWithModelFileInfo:(MPPFileInfo *)fileInfo { + MPPLanguageDetectorOptions *options = [[MPPLanguageDetectorOptions alloc] init]; + options.baseOptions.modelAssetPath = fileInfo.path; + return options; +} + +- (MPPLanguageDetector *)createLanguageDetectorWithOptionsSucceeds: + (MPPLanguageDetectorOptions *)options { + NSError *error; + MPPLanguageDetector *languageDetector = [[MPPLanguageDetector alloc] initWithOptions:options + error:&error]; + XCTAssertNotNil(languageDetector); + XCTAssertNil(error); + + return languageDetector; +} + +- (void)assertCreateLanguageDetectorWithOptions:(MPPLanguageDetectorOptions *)options + failsWithExpectedError:(NSError *)expectedError { + NSError *error = nil; + MPPLanguageDetector *languageDetector = [[MPPLanguageDetector alloc] initWithOptions:options + error:&error]; + XCTAssertNil(languageDetector); + AssertEqualErrors(error, expectedError); +} + +@end diff --git a/mediapipe/tasks/ios/test/vision/image_segmenter/MPPImageSegmenterTests.mm b/mediapipe/tasks/ios/test/vision/image_segmenter/MPPImageSegmenterTests.mm index 4954555e5..6d11edaa7 100644 --- a/mediapipe/tasks/ios/test/vision/image_segmenter/MPPImageSegmenterTests.mm +++ b/mediapipe/tasks/ios/test/vision/image_segmenter/MPPImageSegmenterTests.mm @@ -507,6 +507,23 @@ double softIOU(const float *mask1, const float *mask2, size_t size) { }]; } +#pragma mark GetLabelsTest + +- (void)testGetLabelsSucceeds { + MPPImageSegmenterOptions *options = + [self imageSegmenterOptionsWithModelFileInfo:kImageSegmenterModelFileInfo]; + + MPPImageSegmenter *imageSegmenter = [self createImageSegmenterWithOptionsSucceeds:options]; + + NSArray *expectedLabels = @[ + @"background", @"aeroplane", @"bicycle", @"bird", @"boat", @"bottle", @"bus", + @"car", @"cat", @"chair", @"cow", @"dining table", @"dog", @"horse", + @"motorbike", @"person", @"potted plant", @"sheep", @"sofa", @"train", @"tv" + ]; + + XCTAssertEqualObjects(imageSegmenter.labels, expectedLabels); +} + #pragma mark - Image Segmenter Initializers - (MPPImageSegmenterOptions *)imageSegmenterOptionsWithModelFileInfo:(MPPFileInfo *)fileInfo { diff --git a/mediapipe/tasks/ios/test/vision/pose_landmarker/BUILD b/mediapipe/tasks/ios/test/vision/pose_landmarker/BUILD new file mode 100644 index 000000000..64bbcd7d1 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/pose_landmarker/BUILD @@ -0,0 +1,64 @@ +load( + "//mediapipe/framework/tool:ios.bzl", + "MPP_TASK_MINIMUM_OS_VERSION", +) +load( + "@org_tensorflow//tensorflow/lite:special_rules.bzl", + "tflite_ios_lab_runner", +) +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test") + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# Default tags for filtering iOS targets. Targets are restricted to Apple platforms. +TFL_DEFAULT_TAGS = [ + "apple", +] + +# Following sanitizer tests are not supported by iOS test targets. +TFL_DISABLED_SANITIZER_TAGS = [ + "noasan", + "nomsan", + "notsan", +] + +objc_library( + name = "MPPPoseLandmarkerObjcTestLibrary", + testonly = 1, + srcs = ["MPPPoseLandmarkerTests.mm"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + data = [ + "//mediapipe/tasks/testdata/vision:pose_landmarker.task", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_protos", + ], + deps = [ + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/test/vision/pose_landmarker/utils:MPPPoseLandmarkerResultProtobufHelpers", + "//mediapipe/tasks/ios/test/vision/utils:MPPImageTestUtils", + "//mediapipe/tasks/ios/test/vision/utils:MPPMaskTestUtils", + "//mediapipe/tasks/ios/vision/pose_landmarker:MPPPoseLandmarker", + ] + select({ + "//third_party:opencv_ios_sim_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_x86_64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//conditions:default": ["@ios_opencv//:OpencvFramework"], + }), +) + +ios_unit_test( + name = "MPPPoseLandmarkerObjcTest", + minimum_os_version = MPP_TASK_MINIMUM_OS_VERSION, + runner = tflite_ios_lab_runner("IOS_LATEST"), + tags = TFL_DEFAULT_TAGS + TFL_DISABLED_SANITIZER_TAGS, + deps = [ + ":MPPPoseLandmarkerObjcTestLibrary", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/pose_landmarker/MPPPoseLandmarkerTests.mm b/mediapipe/tasks/ios/test/vision/pose_landmarker/MPPPoseLandmarkerTests.mm new file mode 100644 index 000000000..e734b41b9 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/pose_landmarker/MPPPoseLandmarkerTests.mm @@ -0,0 +1,481 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.h" +#import "mediapipe/tasks/ios/test/vision/utils/sources/MPPImage+TestUtils.h" +#import "mediapipe/tasks/ios/test/vision/utils/sources/MPPMask+TestUtils.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.h" + +static NSString *const kPbFileExtension = @"pbtxt"; + +static MPPFileInfo *const kPoseLandmarkerBundleAssetFileInfo = + [[MPPFileInfo alloc] initWithName:@"pose_landmarker" type:@"task"]; + +static MPPFileInfo *const kPoseImageFileInfo = [[MPPFileInfo alloc] initWithName:@"pose" + type:@"jpg"]; +static MPPFileInfo *const kNoPoseImageFileInfo = [[MPPFileInfo alloc] initWithName:@"burger" + type:@"jpg"]; + +static MPPFileInfo *const kExpectedPoseLandmarksFileInfo = + [[MPPFileInfo alloc] initWithName:@"pose_landmarks" type:kPbFileExtension]; + +static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks"; +static const float kLandmarksErrorTolerance = 0.03f; +static const float kVisibilityTolerance = 0.9f; +static const float kPresenceTolerance = 0.9f; + +static NSString *const kLiveStreamTestsDictPoseLandmarkerKey = @"pose_landmarker"; +static NSString *const kLiveStreamTestsDictExpectationKey = @"expectation"; + +#define AssertEqualErrors(error, expectedError) \ + XCTAssertNotNil(error); \ + XCTAssertEqualObjects(error.domain, expectedError.domain); \ + XCTAssertEqual(error.code, expectedError.code); \ + XCTAssertEqualObjects(error.localizedDescription, expectedError.localizedDescription) + +#define AssertApproximatelyEqualLandmarks(landmark, expectedLandmark, poseIndex, landmarkIndex) \ + XCTAssertEqualWithAccuracy(landmark.x, expectedLandmark.x, kLandmarksErrorTolerance, \ + @"pose index = %d landmark index j = %d", poseIndex, landmarkIndex); \ + XCTAssertEqualWithAccuracy(landmark.y, expectedLandmark.y, kLandmarksErrorTolerance, \ + @"pose index = %d landmark index j = %d", poseIndex, landmarkIndex); + +@interface MPPPoseLandmarkerTests : XCTestCase { + NSDictionary *_liveStreamSucceedsTestDict; + NSDictionary *_outOfOrderTimestampTestDict; +} +@end + +@implementation MPPPoseLandmarkerTests + +#pragma mark General Tests + +- (void)testDetectWithModelPathSucceeds { + MPPPoseLandmarker *poseLandmarker = + [[MPPPoseLandmarker alloc] initWithModelPath:kPoseLandmarkerBundleAssetFileInfo.path + error:nil]; + XCTAssertNotNil(poseLandmarker); + + [self assertResultsOfDetectInImageWithFileInfo:kPoseImageFileInfo + usingPoseLandmarker:poseLandmarker + approximatelyEqualsPoseLandmarkerResult:[MPPPoseLandmarkerTests + expectedPoseLandmarkerResult]]; +} + +- (void)testDetectWithOptionsSucceeds { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + [self assertResultsOfDetectInImageWithFileInfo:kPoseImageFileInfo + usingPoseLandmarker:poseLandmarker + approximatelyEqualsPoseLandmarkerResult:[MPPPoseLandmarkerTests + expectedPoseLandmarkerResult]]; +} + +- (void)testDetectWithEmptyResultsSucceeds { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + [self + assertResultsOfDetectInImageWithFileInfo:kNoPoseImageFileInfo + usingPoseLandmarker:poseLandmarker + approximatelyEqualsPoseLandmarkerResult:[MPPPoseLandmarkerTests emptyPoseLandmarkerResult]]; +} + +- (void)testCreatePoseLandmarkerFailsWithDelegateInNonLiveStreamMode { + MPPRunningMode runningModesToTest[] = {MPPRunningModeImage, MPPRunningModeVideo}; + for (int i = 0; i < sizeof(runningModesToTest) / sizeof(runningModesToTest[0]); i++) { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + + options.runningMode = runningModesToTest[i]; + options.poseLandmarkerLiveStreamDelegate = self; + + [self + assertCreatePoseLandmarkerWithOptions:options + failsWithExpectedError: + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"The vision task is in image or video mode. The " + @"delegate must not be set in the task's options." + }]]; + } +} + +#pragma mark Running Mode Tests + +- (void)testCreatePoseLandmarkerFailsWithMissingDelegateInLiveStreamMode { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + + options.runningMode = MPPRunningModeLiveStream; + + [self assertCreatePoseLandmarkerWithOptions:options + failsWithExpectedError: + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"The vision task is in live stream mode. An " + @"object must be set as the delegate of the task " + @"in its options to ensure asynchronous delivery " + @"of results." + }]]; +} + +- (void)testDetectFailsWithCallingWrongApiInImageMode { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + NSError *liveStreamApiCallError; + XCTAssertFalse([poseLandmarker detectAsyncImage:image + timestampInMilliseconds:0 + error:&liveStreamApiCallError]); + + NSError *expectedLiveStreamApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with live " + @"stream mode. Current Running Mode: Image" + }]; + + AssertEqualErrors(liveStreamApiCallError, expectedLiveStreamApiCallError); + + NSError *videoApiCallError; + XCTAssertFalse([poseLandmarker detectVideoFrame:image + timestampInMilliseconds:0 + error:&videoApiCallError]); + + NSError *expectedVideoApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with " + @"video mode. Current Running Mode: Image" + }]; + AssertEqualErrors(videoApiCallError, expectedVideoApiCallError); +} + +- (void)testDetectFailsWithCallingWrongApiInVideoMode { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + options.runningMode = MPPRunningModeVideo; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + NSError *liveStreamApiCallError; + XCTAssertFalse([poseLandmarker detectAsyncImage:image + timestampInMilliseconds:0 + error:&liveStreamApiCallError]); + + NSError *expectedLiveStreamApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with live " + @"stream mode. Current Running Mode: Video" + }]; + + AssertEqualErrors(liveStreamApiCallError, expectedLiveStreamApiCallError); + + NSError *imageApiCallError; + XCTAssertFalse([poseLandmarker detectImage:image error:&imageApiCallError]); + + NSError *expectedImageApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with " + @"image mode. Current Running Mode: Video" + }]; + AssertEqualErrors(imageApiCallError, expectedImageApiCallError); +} + +- (void)testDetectFailsWithCallingWrongApiInLiveStreamMode { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + options.runningMode = MPPRunningModeLiveStream; + options.poseLandmarkerLiveStreamDelegate = self; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + NSError *imageApiCallError; + XCTAssertFalse([poseLandmarker detectImage:image error:&imageApiCallError]); + + NSError *expectedImageApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with " + @"image mode. Current Running Mode: Live Stream" + }]; + AssertEqualErrors(imageApiCallError, expectedImageApiCallError); + + NSError *videoApiCallError; + XCTAssertFalse([poseLandmarker detectVideoFrame:image + timestampInMilliseconds:0 + error:&videoApiCallError]); + + NSError *expectedVideoApiCallError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : @"The vision task is not initialized with " + @"video mode. Current Running Mode: Live Stream" + }]; + AssertEqualErrors(videoApiCallError, expectedVideoApiCallError); +} + +- (void)testDetectWithVideoModeSucceeds { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + options.runningMode = MPPRunningModeVideo; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + for (int i = 0; i < 3; i++) { + MPPPoseLandmarkerResult *poseLandmarkerResult = [poseLandmarker detectVideoFrame:image + timestampInMilliseconds:i + error:nil]; + [self assertPoseLandmarkerResult:poseLandmarkerResult + isApproximatelyEqualToExpectedResult:[MPPPoseLandmarkerTests expectedPoseLandmarkerResult]]; + } +} + +- (void)testDetectWithOutOfOrderTimestampsAndLiveStreamModeFails { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + options.runningMode = MPPRunningModeLiveStream; + options.poseLandmarkerLiveStreamDelegate = self; + + XCTestExpectation *expectation = [[XCTestExpectation alloc] + initWithDescription:@"detectWiththOutOfOrderTimestampsAndLiveStream"]; + + expectation.expectedFulfillmentCount = 1; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + _outOfOrderTimestampTestDict = @{ + kLiveStreamTestsDictPoseLandmarkerKey : poseLandmarker, + kLiveStreamTestsDictExpectationKey : expectation + }; + + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + XCTAssertTrue([poseLandmarker detectAsyncImage:image timestampInMilliseconds:1 error:nil]); + + NSError *error; + XCTAssertFalse([poseLandmarker detectAsyncImage:image timestampInMilliseconds:0 error:&error]); + + NSError *expectedError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"INVALID_ARGUMENT: Input timestamp must be monotonically increasing." + }]; + AssertEqualErrors(error, expectedError); + + NSTimeInterval timeout = 0.5f; + [self waitForExpectations:@[ expectation ] timeout:timeout]; +} + +- (void)testDetectWithLiveStreamModeSucceeds { + MPPPoseLandmarkerOptions *options = + [self poseLandmarkerOptionsWithModelFileInfo:kPoseLandmarkerBundleAssetFileInfo]; + options.runningMode = MPPRunningModeLiveStream; + options.poseLandmarkerLiveStreamDelegate = self; + + NSInteger iterationCount = 100; + + // Because of flow limiting, we cannot ensure that the callback will be invoked `iterationCount` + // times. An normal expectation will fail if expectation.fulfill() is not called + // `expectation.expectedFulfillmentCount` times. If `expectation.isInverted = true`, the test will + // only succeed if expectation is not fulfilled for the specified `expectedFulfillmentCount`. + // Since in our case we cannot predict how many times the expectation is supposed to be fulfilled + // setting, `expectation.expectedFulfillmentCount` = `iterationCount` + 1 and + // `expectation.isInverted = true` ensures that test succeeds if the expectation is fulfilled <= + // `iterationCount` times. + XCTestExpectation *expectation = + [[XCTestExpectation alloc] initWithDescription:@"detectWithLiveStream"]; + + expectation.expectedFulfillmentCount = iterationCount + 1; + expectation.inverted = YES; + + MPPPoseLandmarker *poseLandmarker = [self createPoseLandmarkerWithOptionsSucceeds:options]; + + _liveStreamSucceedsTestDict = @{ + kLiveStreamTestsDictPoseLandmarkerKey : poseLandmarker, + kLiveStreamTestsDictExpectationKey : expectation + }; + + // TODO: Mimic initialization from CMSampleBuffer as live stream mode is most likely to be used + // with the iOS camera. AVCaptureVideoDataOutput sample buffer delegates provide frames of type + // `CMSampleBuffer`. + MPPImage *image = [MPPImage imageWithFileInfo:kPoseImageFileInfo]; + + for (int i = 0; i < iterationCount; i++) { + XCTAssertTrue([poseLandmarker detectAsyncImage:image timestampInMilliseconds:i error:nil]); + } + + NSTimeInterval timeout = 0.5f; + [self waitForExpectations:@[ expectation ] timeout:timeout]; +} + +- (void)poseLandmarker:(MPPPoseLandmarker *)poseLandmarker + didFinishDetectionWithResult:(MPPPoseLandmarkerResult *)poseLandmarkerResult + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError *)error { + [self assertPoseLandmarkerResult:poseLandmarkerResult + isApproximatelyEqualToExpectedResult:[MPPPoseLandmarkerTests expectedPoseLandmarkerResult]]; + + if (poseLandmarker == _outOfOrderTimestampTestDict[kLiveStreamTestsDictPoseLandmarkerKey]) { + [_outOfOrderTimestampTestDict[kLiveStreamTestsDictExpectationKey] fulfill]; + } else if (poseLandmarker == _liveStreamSucceedsTestDict[kLiveStreamTestsDictPoseLandmarkerKey]) { + [_liveStreamSucceedsTestDict[kLiveStreamTestsDictExpectationKey] fulfill]; + } +} + +#pragma mark Pose Landmarker Initializers + +- (MPPPoseLandmarkerOptions *)poseLandmarkerOptionsWithModelFileInfo:(MPPFileInfo *)modelFileInfo { + MPPPoseLandmarkerOptions *poseLandmarkerOptions = [[MPPPoseLandmarkerOptions alloc] init]; + poseLandmarkerOptions.baseOptions.modelAssetPath = modelFileInfo.path; + + return poseLandmarkerOptions; +} + +- (MPPPoseLandmarker *)createPoseLandmarkerWithOptionsSucceeds: + (MPPPoseLandmarkerOptions *)poseLandmarkerOptions { + NSError *error; + MPPPoseLandmarker *poseLandmarker = + [[MPPPoseLandmarker alloc] initWithOptions:poseLandmarkerOptions error:&error]; + XCTAssertNotNil(poseLandmarker); + XCTAssertNil(error); + + return poseLandmarker; +} + +- (void)assertCreatePoseLandmarkerWithOptions:(MPPPoseLandmarkerOptions *)poseLandmarkerOptions + failsWithExpectedError:(NSError *)expectedError { + NSError *error = nil; + MPPPoseLandmarker *poseLandmarker = + [[MPPPoseLandmarker alloc] initWithOptions:poseLandmarkerOptions error:&error]; + + XCTAssertNil(poseLandmarker); + AssertEqualErrors(error, expectedError); +} + +#pragma mark Results + ++ (MPPPoseLandmarkerResult *)emptyPoseLandmarkerResult { + return [[MPPPoseLandmarkerResult alloc] initWithLandmarks:@[] + worldLandmarks:@[] + segmentationMasks:@[] + timestampInMilliseconds:0]; +} + ++ (MPPPoseLandmarkerResult *)expectedPoseLandmarkerResult { + return [MPPPoseLandmarkerResult + poseLandmarkerResultFromProtobufFileWithName:kExpectedPoseLandmarksFileInfo.path + shouldRemoveZPosition:YES]; +} + +- (void)assertResultsOfDetectInImageWithFileInfo:(MPPFileInfo *)fileInfo + usingPoseLandmarker:(MPPPoseLandmarker *)poseLandmarker + approximatelyEqualsPoseLandmarkerResult: + (MPPPoseLandmarkerResult *)expectedPoseLandmarkerResult { + MPPPoseLandmarkerResult *poseLandmarkerResult = [self detectImageWithFileInfo:fileInfo + usingPoseLandmarker:poseLandmarker]; + [self assertPoseLandmarkerResult:poseLandmarkerResult + isApproximatelyEqualToExpectedResult:expectedPoseLandmarkerResult]; +} + +- (MPPPoseLandmarkerResult *)detectImageWithFileInfo:(MPPFileInfo *)imageFileInfo + usingPoseLandmarker:(MPPPoseLandmarker *)poseLandmarker { + MPPImage *image = [MPPImage imageWithFileInfo:imageFileInfo]; + + MPPPoseLandmarkerResult *poseLandmarkerResult = [poseLandmarker detectImage:image error:nil]; + XCTAssertNotNil(poseLandmarkerResult); + + return poseLandmarkerResult; +} + +- (void)assertPoseLandmarkerResult:(MPPPoseLandmarkerResult *)poseLandmarkerResult + isApproximatelyEqualToExpectedResult:(MPPPoseLandmarkerResult *)expectedPoseLandmarkerResult { + // TODO: Add additional tests for auxiliary, world landmarks and segmentation masks. + // Expects to have the same number of poses detected. + [self assertMultiPoseLandmarks:poseLandmarkerResult.landmarks + areApproximatelyEqualToExpectedMultiPoseLandmarks:expectedPoseLandmarkerResult.landmarks]; + + [self assertLandmarksAreVisibleAndPresentInPoseLandmarkerResult:poseLandmarkerResult]; +} + +- (void)assertMultiPoseLandmarks:(NSArray *> *)multiPoseLandmarks + areApproximatelyEqualToExpectedMultiPoseLandmarks: + (NSArray *> *)expectedMultiPoseLandmarks { + XCTAssertEqual(multiPoseLandmarks.count, expectedMultiPoseLandmarks.count); + + if (multiPoseLandmarks.count == 0) { + return; + } + + NSArray *topPoseLandmarks = multiPoseLandmarks[0]; + NSArray *expectedTopPoseLandmarks = expectedMultiPoseLandmarks[0]; + + XCTAssertEqual(topPoseLandmarks.count, expectedTopPoseLandmarks.count); + for (int i = 0; i < expectedTopPoseLandmarks.count; i++) { + MPPNormalizedLandmark *landmark = topPoseLandmarks[i]; + XCTAssertNotNil(landmark); + AssertApproximatelyEqualLandmarks(landmark, expectedTopPoseLandmarks[i], 0, i); + } +} + +- (void)assertLandmarksAreVisibleAndPresentInPoseLandmarkerResult: + (MPPPoseLandmarkerResult *)poseLandmarkerResult { + for (int i = 0; i < poseLandmarkerResult.landmarks.count; i++) { + NSArray *landmarks = poseLandmarkerResult.landmarks[i]; + for (int j = 0; j < landmarks.count; j++) { + MPPNormalizedLandmark *landmark = landmarks[i]; + XCTAssertGreaterThanOrEqual( + landmark.visibility.floatValue, kVisibilityTolerance, + @"multi pose landmark index i = %d landmark index j = %d visibility %f", i, j, + landmark.visibility.floatValue); + XCTAssertGreaterThanOrEqual( + landmark.presence.floatValue, kPresenceTolerance, + @"multi pose landmark index i = %d landmark index j = %d presence %f", i, j, + landmark.presence.floatValue); + } + } +} + +@end diff --git a/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/BUILD b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/BUILD new file mode 100644 index 000000000..297cca510 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/BUILD @@ -0,0 +1,35 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPPoseLandmarkerResultProtobufHelpers", + srcs = ["sources/MPPPoseLandmarkerResult+ProtobufHelpers.mm"], + hdrs = ["sources/MPPPoseLandmarkerResult+ProtobufHelpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + "//mediapipe/tasks/cc/components/containers/proto:landmarks_detection_result_cc_proto", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/test/vision/utils:parse_proto_utils", + "//mediapipe/tasks/ios/vision/pose_landmarker:MPPPoseLandmarkerResult", + "//mediapipe/tasks/ios/vision/pose_landmarker/utils:MPPPoseLandmarkerResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.h b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.h new file mode 100644 index 000000000..3db43c41f --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.h @@ -0,0 +1,26 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h" + +NS_ASSUME_NONNULL_BEGIN +@interface MPPPoseLandmarkerResult (ProtobufHelpers) + ++ (MPPPoseLandmarkerResult *)poseLandmarkerResultFromProtobufFileWithName:(NSString *)fileName + shouldRemoveZPosition:(BOOL)removeZPosition; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.mm b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.mm new file mode 100644 index 000000000..eb88741cc --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.mm @@ -0,0 +1,55 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/test/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+ProtobufHelpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h" + +#include "mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result.pb.h" +#include "mediapipe/tasks/ios/test/vision/utils/sources/parse_proto_utils.h" + +namespace { +using LandmarksDetectionResultProto = + ::mediapipe::tasks::containers::proto::LandmarksDetectionResult; +using ::mediapipe::tasks::ios::test::vision::utils::get_proto_from_pbtxt; +} // anonymous namespace + +@implementation MPPPoseLandmarkerResult (ProtobufHelpers) + ++ (MPPPoseLandmarkerResult *)poseLandmarkerResultFromProtobufFileWithName:(NSString *)fileName + shouldRemoveZPosition:(BOOL)removeZPosition { + LandmarksDetectionResultProto landmarkDetectionResultProto; + + if (!get_proto_from_pbtxt(fileName.cppString, landmarkDetectionResultProto).ok()) { + return nil; + } + + if (removeZPosition) { + // Remove z position of landmarks, because they are not used in correctness testing. For video + // or live stream mode, the z positions varies a lot during tracking from frame to frame. + for (int i = 0; i < landmarkDetectionResultProto.landmarks().landmark().size(); i++) { + auto &landmark = *landmarkDetectionResultProto.mutable_landmarks()->mutable_landmark(i); + landmark.clear_z(); + } + } + + return [MPPPoseLandmarkerResult + poseLandmarkerResultWithLandmarksProto:{landmarkDetectionResultProto.landmarks()} + worldLandmarksProto:{landmarkDetectionResultProto.world_landmarks()} + segmentationMasks:nullptr + timestampInMilliseconds:0]; +} + +@end diff --git a/mediapipe/tasks/ios/text/language_detector/BUILD b/mediapipe/tasks/ios/text/language_detector/BUILD new file mode 100644 index 000000000..4df278037 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/BUILD @@ -0,0 +1,58 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPLanguageDetectorOptions", + srcs = ["sources/MPPLanguageDetectorOptions.m"], + hdrs = ["sources/MPPLanguageDetectorOptions.h"], + deps = ["//mediapipe/tasks/ios/core:MPPTaskOptions"], +) + +objc_library( + name = "MPPLanguageDetectorResult", + srcs = ["sources/MPPLanguageDetectorResult.m"], + hdrs = ["sources/MPPLanguageDetectorResult.h"], + deps = [ + "//mediapipe/tasks/ios/core:MPPTaskResult", + ], +) + +objc_library( + name = "MPPLanguageDetector", + srcs = ["sources/MPPLanguageDetector.mm"], + hdrs = ["sources/MPPLanguageDetector.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + module_name = "MPPLanguageDetector", + deps = [ + ":MPPLanguageDetectorOptions", + ":MPPLanguageDetectorResult", + "//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskInfo", + "//mediapipe/tasks/ios/core:MPPTaskOptions", + "//mediapipe/tasks/ios/core:MPPTextPacketCreator", + "//mediapipe/tasks/ios/text/core:MPPTextTaskRunner", + "//mediapipe/tasks/ios/text/language_detector/utils:MPPLanguageDetectorOptionsHelpers", + "//mediapipe/tasks/ios/text/language_detector/utils:MPPLanguageDetectorResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.h b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.h new file mode 100644 index 000000000..7213a8e5f --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.h @@ -0,0 +1,88 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h" +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @brief Predicts the language of an input text. + * + * This API expects a TFLite model with [TFLite Model + * Metadata](https://www.tensorflow.org/lite/convert/metadata")that contains the mandatory + * (described below) input tensor, output tensor, and the language codes in an AssociatedFile. + * + * Metadata is required for models with int32 input tensors because it contains the input + * process unit for the model's Tokenizer. No metadata is required for models with string + * input tensors. + * + * Input tensor + * - One input tensor (`kTfLiteString`) of shape `[1]` containing the input string. + * + * Output tensor + * - One output tensor (`kTfLiteFloat32`) of shape `[1 x N]` where `N` is the number of languages. + */ +NS_SWIFT_NAME(LanguageDetector) +@interface MPPLanguageDetector : NSObject + +/** + * Creates a new instance of `LanguageDetector` from an absolute path to a TensorFlow Lite + * model file stored locally on the device and the default `LanguageDetectorOptions`. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * @param error An optional error parameter populated when there is an error in initializing the + * language detector. + * + * @return A new instance of `LanguageDetector` with the given model path. `nil` if there is an + * error in initializing the language detector. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Creates a new instance of `LanguageDetector` from the given `LanguageDetectorOptions`. + * + * @param options The options of type `LanguageDetectorOptions` to use for configuring the + * `LanguageDetector`. + * @param error An optional error parameter populated when there is an error in initializing the + * language detector. + * + * @return A new instance of `LanguageDetector` with the given options. `nil` if there is an + * error in initializing the language detector. + */ +- (nullable instancetype)initWithOptions:(MPPLanguageDetectorOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Predicts the language of the input text. + * + * @param text The `NSString` for which language is to be predicted. + * @param error An optional error parameter populated when there is an error in performing + * language prediction on the input text. + * + * @return A `LanguageDetectorResult` object that contains a list of language predictions. + */ +- (nullable MPPLanguageDetectorResult *)detectText:(NSString *)text + error:(NSError **)error NS_SWIFT_NAME(detect(text:)); + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.mm b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.mm new file mode 100644 index 000000000..4ef98e206 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.mm @@ -0,0 +1,95 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetector.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/core/sources/MPPTextPacketCreator.h" +#import "mediapipe/tasks/ios/text/core/sources/MPPTextTaskRunner.h" +#import "mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.h" +#import "mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.h" + +namespace { +using ::mediapipe::Packet; +using ::mediapipe::tasks::core::PacketMap; +} // namespace + +static NSString *const kClassificationsStreamName = @"classifications_out"; +static NSString *const kClassificationsTag = @"CLASSIFICATIONS"; +static NSString *const kTextInStreamName = @"text_in"; +static NSString *const kTextTag = @"TEXT"; +static NSString *const kTaskGraphName = @"mediapipe.tasks.text.text_classifier.TextClassifierGraph"; + +@interface MPPLanguageDetector () { + /** iOS Text Task Runner */ + MPPTextTaskRunner *_textTaskRunner; +} +@end + +@implementation MPPLanguageDetector + +- (instancetype)initWithOptions:(MPPLanguageDetectorOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ [NSString stringWithFormat:@"%@:%@", kTextTag, kTextInStreamName] ] + outputStreams:@[ [NSString stringWithFormat:@"%@:%@", kClassificationsTag, + kClassificationsStreamName] ] + taskOptions:options + enableFlowLimiting:NO + error:error]; + + if (!taskInfo) { + return nil; + } + + _textTaskRunner = + [[MPPTextTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] + error:error]; + + if (!_textTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPLanguageDetectorOptions *options = [[MPPLanguageDetectorOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPLanguageDetectorResult *)detectText:(NSString *)text error:(NSError **)error { + Packet packet = [MPPTextPacketCreator createWithText:text]; + + std::map packetMap = {{kTextInStreamName.cppString, packet}}; + std::optional outputPacketMap = [_textTaskRunner processPacketMap:packetMap + error:error]; + + if (!outputPacketMap.has_value()) { + return nil; + } + + return + [MPPLanguageDetectorResult languageDetectorResultWithClassificationsPacket: + outputPacketMap.value()[kClassificationsStreamName.cppString]]; +} + +@end diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h new file mode 100644 index 000000000..9674c482b --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h @@ -0,0 +1,61 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * Options for setting up a `LanguageDetector`. + */ +NS_SWIFT_NAME(LanguageDetectorOptions) +@interface MPPLanguageDetectorOptions : MPPTaskOptions + +/** + * The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults + * to English. + */ +@property(nonatomic, copy) NSString *displayNamesLocale; + +/** + * The maximum number of top-scored classification results to return. If < 0, all available results + * will be returned. If 0, an invalid argument error is returned. + */ +@property(nonatomic) NSInteger maxResults; + +/** + * Score threshold to override the one provided in the model metadata (if any). Results below this + * value are rejected. + */ +@property(nonatomic) float scoreThreshold; + +/** + * The allowlist of category names. If non-empty, detection results whose category name is not in + * this set will be filtered out. Duplicate or unknown category names are ignored. Mutually + * exclusive with categoryDenylist. + */ +@property(nonatomic, copy) NSArray *categoryAllowlist; + +/** + * The denylist of category names. If non-empty, detection results whose category name is in this + * set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive + * with categoryAllowlist. + */ +@property(nonatomic, copy) NSArray *categoryDenylist; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.m b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.m new file mode 100644 index 000000000..df36493ef --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.m @@ -0,0 +1,40 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h" + +@implementation MPPLanguageDetectorOptions + +- (instancetype)init { + self = [super init]; + if (self) { + _maxResults = -1; + _scoreThreshold = 0; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + MPPLanguageDetectorOptions *languageDetectorOptions = [super copyWithZone:zone]; + + languageDetectorOptions.scoreThreshold = self.scoreThreshold; + languageDetectorOptions.maxResults = self.maxResults; + languageDetectorOptions.categoryDenylist = self.categoryDenylist; + languageDetectorOptions.categoryAllowlist = self.categoryAllowlist; + languageDetectorOptions.displayNamesLocale = self.displayNamesLocale; + + return languageDetectorOptions; +} + +@end diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h new file mode 100644 index 000000000..a8b9fe735 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h @@ -0,0 +1,70 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/core/sources/MPPTaskResult.h" + +NS_ASSUME_NONNULL_BEGIN + +NS_SWIFT_NAME(LanguagePrediction) +@interface MPPLanguagePrediction : NSObject + +/** The i18n language / locale code for the prediction. */ +@property(nonatomic, readonly) NSString *languageCode; + +/** The probability for the prediction. */ +@property(nonatomic, readonly) float probability; + +/** + * Initializes a new `LanguagePrediction` with the given language code and probability. + * + * @param languageCode The i18n language / locale code for the prediction. + * @param probability The probability for the prediction. + * + * @return An instance of `LanguagePrediction` initialized with the given language code and + * probability. + */ +- (instancetype)initWithLanguageCode:(NSString *)languageCode probability:(float)probability; + +@end + +/** Represents the results generated by `LanguageDetector`. **/ +NS_SWIFT_NAME(LanguageDetectorResult) +@interface MPPLanguageDetectorResult : MPPTaskResult + +/** A list of language predictions. */ +@property(nonatomic, readonly) NSArray *languagePredictions; + +/** + * Initializes a new `LanguageDetectorResult` with the given array of language predictions and + * timestamp (in milliseconds). + * + * @param languagePrediction The array of language predictions in this result. + * @param timestampInMilliseconds The timestamp (in milliseconds) for this result. + * + * @return An instance of `LanguageDetectorResult` initialized with the given array of language + * predictions and timestamp (in milliseconds). + */ +- (instancetype)initWithLanguagePredictions:(NSArray *)languagePredictions + timestampInMilliseconds:(NSInteger)timestampInMilliseconds; + +- (instancetype)initWithTimestampInMilliseconds:(NSInteger)timestampInMilliseconds NS_UNAVAILABLE; + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.m b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.m new file mode 100644 index 000000000..126cf6c67 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.m @@ -0,0 +1,41 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h" + +@implementation MPPLanguagePrediction + +- (instancetype)initWithLanguageCode:(NSString *)languageCode probability:(float)probability { + self = [super init]; + if (self) { + _languageCode = languageCode; + _probability = probability; + } + return self; +} + +@end + +@implementation MPPLanguageDetectorResult + +- (instancetype)initWithLanguagePredictions:(NSArray *)languagePredictions + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + self = [super initWithTimestampInMilliseconds:timestampInMilliseconds]; + if (self) { + _languagePredictions = languagePredictions; + } + return self; +} + +@end diff --git a/mediapipe/tasks/ios/text/language_detector/utils/BUILD b/mediapipe/tasks/ios/text/language_detector/utils/BUILD new file mode 100644 index 000000000..74de385c0 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/utils/BUILD @@ -0,0 +1,44 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPLanguageDetectorOptionsHelpers", + srcs = ["sources/MPPLanguageDetectorOptions+Helpers.mm"], + hdrs = ["sources/MPPLanguageDetectorOptions+Helpers.h"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto", + "//mediapipe/tasks/cc/text/text_classifier/proto:text_classifier_graph_options_cc_proto", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskOptionsProtocol", + "//mediapipe/tasks/ios/core/utils:MPPBaseOptionsHelpers", + "//mediapipe/tasks/ios/text/language_detector:MPPLanguageDetectorOptions", + ], +) + +objc_library( + name = "MPPLanguageDetectorResultHelpers", + srcs = ["sources/MPPLanguageDetectorResult+Helpers.mm"], + hdrs = ["sources/MPPLanguageDetectorResult+Helpers.h"], + deps = [ + "//mediapipe/framework:packet", + "//mediapipe/tasks/cc/components/containers/proto:classifications_cc_proto", + "//mediapipe/tasks/ios/components/containers/utils:MPPClassificationResultHelpers", + "//mediapipe/tasks/ios/text/language_detector:MPPLanguageDetectorResult", + ], +) diff --git a/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.h b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.h new file mode 100644 index 000000000..5406e901d --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.h @@ -0,0 +1,27 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_options.pb.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptionsProtocol.h" +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPLanguageDetectorOptions (Helpers) + +- (void)copyToProto:(::mediapipe::CalculatorOptions *)optionsProto; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.mm b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.mm new file mode 100644 index 000000000..9d75105b4 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.mm @@ -0,0 +1,56 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorOptions+Helpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.h" + +#include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h" +#include "mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options.pb.h" + +namespace { +using CalculatorOptionsProto = ::mediapipe::CalculatorOptions; +using TextClassifierGraphOptionsProto = + ::mediapipe::tasks::text::text_classifier::proto::TextClassifierGraphOptions; +using ClassifierOptionsProto = ::mediapipe::tasks::components::processors::proto::ClassifierOptions; +} // namespace + +@implementation MPPLanguageDetectorOptions (Helpers) + +- (void)copyToProto:(CalculatorOptionsProto *)optionsProto { + TextClassifierGraphOptionsProto *graphOptions = + optionsProto->MutableExtension(TextClassifierGraphOptionsProto::ext); + [self.baseOptions copyToProto:graphOptions->mutable_base_options()]; + + ClassifierOptionsProto *classifierOptionsProto = graphOptions->mutable_classifier_options(); + classifierOptionsProto->Clear(); + + if (self.displayNamesLocale) { + classifierOptionsProto->set_display_names_locale(self.displayNamesLocale.cppString); + } + + classifierOptionsProto->set_max_results((int)self.maxResults); + classifierOptionsProto->set_score_threshold(self.scoreThreshold); + + for (NSString *category in self.categoryAllowlist) { + classifierOptionsProto->add_category_allowlist(category.cppString); + } + + for (NSString *category in self.categoryDenylist) { + classifierOptionsProto->add_category_denylist(category.cppString); + } +} + +@end diff --git a/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.h b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.h new file mode 100644 index 000000000..87431d157 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.h @@ -0,0 +1,28 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/text/language_detector/sources/MPPLanguageDetectorResult.h" + +#include "mediapipe/framework/packet.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPLanguageDetectorResult (Helpers) + ++ (MPPLanguageDetectorResult *)languageDetectorResultWithClassificationsPacket: + (const mediapipe::Packet &)packet; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.mm b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.mm new file mode 100644 index 000000000..567c3eaa7 --- /dev/null +++ b/mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.mm @@ -0,0 +1,61 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPClassificationResult+Helpers.h" +#import "mediapipe/tasks/ios/text/language_detector/utils/sources/MPPLanguageDetectorResult+Helpers.h" + +#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h" + +static const int kMicrosecondsPerMillisecond = 1000; + +namespace { +using ClassificationResultProto = + ::mediapipe::tasks::components::containers::proto::ClassificationResult; +} // namespace + +#define int kMicrosecondsPerMillisecond = 1000; + +@implementation MPPLanguageDetectorResult (Helpers) + ++ (MPPLanguageDetectorResult *)languageDetectorResultWithClassificationsPacket: + (const mediapipe::Packet &)packet { + MPPClassificationResult *classificationResult = [MPPClassificationResult + classificationResultWithProto:packet.Get()]; + + return [MPPLanguageDetectorResult + languageDetectorResultWithClassificationResult:classificationResult + timestampInMilliseconds:(NSInteger)(packet.Timestamp().Value() / + kMicrosecondsPerMillisecond)]; +} + ++ (MPPLanguageDetectorResult *) + languageDetectorResultWithClassificationResult:(MPPClassificationResult *)classificationResult + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + NSMutableArray *languagePredictions = + [NSMutableArray arrayWithCapacity:classificationResult.classifications.count]; + + if (classificationResult.classifications.count > 0) { + for (MPPCategory *category in classificationResult.classifications[0].categories) { + MPPLanguagePrediction *languagePrediction = + [[MPPLanguagePrediction alloc] initWithLanguageCode:category.categoryName + probability:category.score]; + [languagePredictions addObject:languagePrediction]; + } + } + + return [[MPPLanguageDetectorResult alloc] initWithLanguagePredictions:languagePredictions + timestampInMilliseconds:timestampInMilliseconds]; +} + +@end diff --git a/mediapipe/tasks/ios/text/text_classifier/utils/sources/MPPTextClassifierResult+Helpers.mm b/mediapipe/tasks/ios/text/text_classifier/utils/sources/MPPTextClassifierResult+Helpers.mm index 5a924016e..e15d748d5 100644 --- a/mediapipe/tasks/ios/text/text_classifier/utils/sources/MPPTextClassifierResult+Helpers.mm +++ b/mediapipe/tasks/ios/text/text_classifier/utils/sources/MPPTextClassifierResult+Helpers.mm @@ -17,7 +17,7 @@ #include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h" -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; namespace { using ClassificationResultProto = @@ -25,7 +25,7 @@ using ClassificationResultProto = using ::mediapipe::Packet; } // namespace -#define int kMicroSecondsPerMilliSecond = 1000; +#define int kMicrosecondsPerMillisecond = 1000; @implementation MPPTextClassifierResult (Helpers) @@ -36,7 +36,7 @@ using ::mediapipe::Packet; return [[MPPTextClassifierResult alloc] initWithClassificationResult:classificationResult timestampInMilliseconds:(NSInteger)(packet.Timestamp().Value() / - kMicroSecondsPerMilliSecond)]; + kMicrosecondsPerMillisecond)]; } @end diff --git a/mediapipe/tasks/ios/text/text_embedder/utils/sources/MPPTextEmbedderResult+Helpers.mm b/mediapipe/tasks/ios/text/text_embedder/utils/sources/MPPTextEmbedderResult+Helpers.mm index 3534ea66d..411fdd1c8 100644 --- a/mediapipe/tasks/ios/text/text_embedder/utils/sources/MPPTextEmbedderResult+Helpers.mm +++ b/mediapipe/tasks/ios/text/text_embedder/utils/sources/MPPTextEmbedderResult+Helpers.mm @@ -17,14 +17,14 @@ #include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h" -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; namespace { using EmbeddingResultProto = ::mediapipe::tasks::components::containers::proto::EmbeddingResult; using ::mediapipe::Packet; } // namespace -#define int kMicroSecondsPerMilliSecond = 1000; +#define int kMicrosecondsPerMillisecond = 1000; @implementation MPPTextEmbedderResult (Helpers) @@ -35,7 +35,7 @@ using ::mediapipe::Packet; return [[MPPTextEmbedderResult alloc] initWithEmbeddingResult:embeddingResult timestampInMilliseconds:(NSInteger)(packet.Timestamp().Value() / - kMicroSecondsPerMilliSecond)]; + kMicrosecondsPerMillisecond)]; } @end diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index 711b4ff95..3b1e1a65a 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -54,6 +54,8 @@ objc_library( "//mediapipe/framework:timestamp", "//mediapipe/framework/formats:image", "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/tasks/ios/components/containers:MPPRegionOfInterest", + "//mediapipe/tasks/ios/components/containers/utils:MPPRegionOfInterestHelpers", "//mediapipe/tasks/ios/vision/core/utils:MPPImageUtils", ], ) @@ -71,6 +73,7 @@ objc_library( ":MPPRunningMode", ":MPPVisionPacketCreator", "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/framework:packet", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPMask.h b/mediapipe/tasks/ios/vision/core/sources/MPPMask.h index 5d3a6910d..6cb08fdb1 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPMask.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPMask.h @@ -33,15 +33,15 @@ typedef NS_ENUM(NSUInteger, MPPMaskDataType) { * Masks are stored as `UInt8 *` or `float *` objects. * Every mask has an underlying type which can be accessed using `dataType`. You can access the * mask as any other type using the appropriate properties. For example, if the underlying type is - * `MPPMaskDataTypeUInt8`, in addition to accessing the mask using `uint8Data`, you can access - * `float32Data` to get the 32 bit float data (with values ranging from 0.0 to 1.0). The first - * time you access the data as a type different from the underlying type, an expensive type - * conversion is performed. Subsequent accesses return a pointer to the memory location for the same - * type converted array. As type conversions can be expensive, it is recommended to limit the - * accesses to data of types different from the underlying type. + * `uInt8`, in addition to accessing the mask using `uint8Data`, you can access `float32Data` to get + * the 32 bit float data (with values ranging from 0.0 to 1.0). The first time you access the data + * as a type different from the underlying type, an expensive type conversion is performed. + * Subsequent accesses return a pointer to the memory location for the same type converted array. As + * type conversions can be expensive, it is recommended to limit the accesses to data of types + * different from the underlying type. * * Masks that are returned from a MediaPipe Tasks are owned by by the underlying C++ Task. If you - * need to extend the lifetime of these objects, you can invoke the `[MPPMask copy:]` method. + * need to extend the lifetime of these objects, you can invoke the `copy()` method. */ NS_SWIFT_NAME(Mask) @interface MPPMask : NSObject @@ -68,19 +68,18 @@ NS_SWIFT_NAME(Mask) @property(nonatomic, readonly, assign) const float *float32Data; /** - * Initializes an `MPPMask` object of type `MPPMaskDataTypeUInt8` with the given `UInt8*` data, - * width and height. + * Initializes an `Mask` object of type `uInt8` with the given `UInt8*` data, width and height. * - * If `shouldCopy` is set to `YES`, the newly created `MPPMask` stores a reference to a deep copied + * If `shouldCopy` is set to `true`, the newly created `Mask` stores a reference to a deep copied * `uint8Data`. Since deep copies are expensive, it is recommended to not set `shouldCopy` unless - * the `MPPMask` must outlive the passed in `uint8Data`. + * the `Mask` must outlive the passed in `uint8Data`. * * @param uint8Data A pointer to the memory location of the `UInt8` data array. * @param width The width of the mask. * @param height The height of the mask. * @param shouldCopy The height of the mask. * - * @return A new `MPPMask` instance with the given `UInt8*` data, width and height. + * @return A new `Mask` instance with the given `UInt8*` data, width and height. */ - (nullable instancetype)initWithUInt8Data:(const UInt8 *)uint8Data width:(NSInteger)width @@ -88,18 +87,17 @@ NS_SWIFT_NAME(Mask) shouldCopy:(BOOL)shouldCopy NS_DESIGNATED_INITIALIZER; /** - * Initializes an `MPPMask` object of type `MPPMaskDataTypeFloat32` with the given `float*` data, - * width and height. + * Initializes an `Mask` object of type `float32` with the given `float*` data, width and height. * - * If `shouldCopy` is set to `YES`, the newly created `MPPMask` stores a reference to a deep copied + * If `shouldCopy` is set to `true`, the newly created `Mask` stores a reference to a deep copied * `float32Data`. Since deep copies are expensive, it is recommended to not set `shouldCopy` unless - * the `MPPMask` must outlive the passed in `float32Data`. + * the `Mask` must outlive the passed in `float32Data`. * * @param float32Data A pointer to the memory location of the `float` data array. * @param width The width of the mask. * @param height The height of the mask. * - * @return A new `MPPMask` instance with the given `float*` data, width and height. + * @return A new `Mask` instance with the given `float*` data, width and height. */ - (nullable instancetype)initWithFloat32Data:(const float *)float32Data width:(NSInteger)width diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPMask.mm b/mediapipe/tasks/ios/vision/core/sources/MPPMask.mm index 0d78e11d3..262f7fc97 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPMask.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPMask.mm @@ -30,7 +30,6 @@ width:(NSInteger)width height:(NSInteger)height shouldCopy:(BOOL)shouldCopy { - self = [super init]; if (self) { _width = width; diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h index ed07c6d90..9d6ed34c3 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h @@ -14,6 +14,7 @@ #import +#import "mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h" #import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" #include "mediapipe/framework/formats/rect.pb.h" @@ -73,4 +74,18 @@ + (mediapipe::Packet)createPacketWithNormalizedRect:(mediapipe::NormalizedRect &)normalizedRect timestampInMilliseconds:(NSInteger)timestampInMilliseconds; +/** + * Creates a MediapPipe Packet wrapping a `RenderData` constructed from an `MPPRegionOfInterest`. + * + * @param regionOfInterest The `MPPRegionOfInterest` to send to the MediaPipe graph. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return The MediaPipe packet containing the `RenderData` constructed from the given + * `MPPRegionOfInterest`. + */ ++ (std::optional)createRenderDataPacketWithRegionOfInterest: + (MPPRegionOfInterest *)regionOfInterest + error:(NSError **)error; + @end diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm index af419c6d0..e32957eef 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm @@ -13,14 +13,16 @@ // limitations under the License. #import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPRegionOfInterest+Helpers.h" #import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h" #include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/timestamp.h" -static const NSUInteger kMicroSecondsPerMilliSecond = 1000; +static const NSUInteger kMicrosecondsPerMillisecond = 1000; namespace { +using ::mediapipe::RenderData; using ::mediapipe::Image; using ::mediapipe::ImageFrame; using ::mediapipe::MakePacket; @@ -51,7 +53,7 @@ using ::mediapipe::Timestamp; } return MakePacket(std::move(imageFrame)) - .At(Timestamp(int64(timestampInMilliseconds * kMicroSecondsPerMilliSecond))); + .At(Timestamp(int64(timestampInMilliseconds * kMicrosecondsPerMillisecond))); } + (Packet)createPacketWithNormalizedRect:(NormalizedRect &)normalizedRect { @@ -61,7 +63,19 @@ using ::mediapipe::Timestamp; + (Packet)createPacketWithNormalizedRect:(NormalizedRect &)normalizedRect timestampInMilliseconds:(NSInteger)timestampInMilliseconds { return MakePacket(std::move(normalizedRect)) - .At(Timestamp(int64(timestampInMilliseconds * kMicroSecondsPerMilliSecond))); + .At(Timestamp(int64(timestampInMilliseconds * kMicrosecondsPerMillisecond))); +} + ++ (std::optional)createRenderDataPacketWithRegionOfInterest: + (MPPRegionOfInterest *)regionOfInterest + error:(NSError **)error { + std::optional renderData = [regionOfInterest getRenderDataWithError:error]; + + if (!renderData.has_value()) { + return std::nullopt; + } + + return MakePacket(std::move(renderData.value())); } @end diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h index aa0307d71..5c9b9524c 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h @@ -20,6 +20,8 @@ #import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" #import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" +#include "mediapipe/framework/packet.h" + NS_ASSUME_NONNULL_BEGIN /** @@ -190,6 +192,26 @@ NS_ASSUME_NONNULL_BEGIN timestampInMilliseconds:(NSInteger)timeStampInMilliseconds error:(NSError **)error; +/** + * This method creates an input packet map to the C++ task runner with the image and normalized rect + * calculated from the region of interest specified within the bounds of an image. Tasks which need + * to add more entries to the input packet map and build their own custom logic for processing + * images can use this method. + * + * @param image An `MPPImage` input to the task. + * @param regionOfInterest A `CGRect` specifying the region of interest within the given image data + * of type `MPPImage`, on which inference should be performed. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return A `BOOL` indicating if the creation of the input packet map with the image and the + * normalized rect calculated from the region of interest was successful. + */ +- (std::optional>) + inputPacketMapWithMPPImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + error:(NSError **)error; + /** * This method returns a unique dispatch queue name by adding the given suffix and a `UUID` to the * pre-defined queue name prefix for vision tasks. The vision tasks can use this method to get diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index d8156a671..8760484cb 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -30,7 +30,7 @@ namespace { using ::mediapipe::ImageFormat; using ::mediapipe::ImageFrame; -vImage_Buffer EmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shouldAllocate) { +vImage_Buffer CreateEmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shouldAllocate) { UInt8 *data = shouldAllocate ? new UInt8[imageFrame.Height() * imageFrame.WidthStep()] : NULL; return {.data = data, .height = static_cast(imageFrame.Height()), @@ -38,8 +38,8 @@ vImage_Buffer EmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shoul .rowBytes = static_cast(imageFrame.WidthStep())}; } -vImage_Buffer VImageBufferFromImageFrame(ImageFrame &imageFrame) { - vImage_Buffer imageBuffer = EmptyVImageBufferFromImageFrame(imageFrame, false); +vImage_Buffer CreateVImageBufferFromImageFrame(ImageFrame &imageFrame) { + vImage_Buffer imageBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, false); imageBuffer.data = imageFrame.MutablePixelData(); return imageBuffer; } @@ -78,10 +78,9 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; -// Always copies the pixel data of the image frame to the created `CVPixelBuffer`. - // This method is used to create CVPixelBuffer from output images of tasks like `FaceStylizer` only // when the input `MPImage` source type is `pixelBuffer`. +// Always copies the pixel data of the image frame to the created `CVPixelBuffer`. // // The only possible 32 RGBA pixel format of input `CVPixelBuffer` is `kCVPixelFormatType_32BGRA`. // But Mediapipe does not support inference on images of format `BGRA`. Hence the channels of the @@ -185,7 +184,7 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d + (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame shouldCopy:(BOOL)shouldCopy error:(NSError **)error { - vImage_Buffer sourceBuffer = VImageBufferFromImageFrame(imageFrame); + vImage_Buffer sourceBuffer = CreateVImageBufferFromImageFrame(imageFrame); // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure // that pixels are displayed correctly irrespective of their alpha values. @@ -195,7 +194,7 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d switch (imageFrame.Format()) { case ImageFormat::SRGBA: { destinationBuffer = - shouldCopy ? EmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer; + shouldCopy ? CreateEmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer; premultiplyError = vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags); break; @@ -203,15 +202,18 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured"]; + description:@"An error occured while processing the output image " + @"pixels of the vision task."]; return NULL; } } if (premultiplyError != kvImageNoError) { - [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description: + @"An error occured while processing the output image pixels of the vision task."]; return NULL; } @@ -261,7 +263,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while creating a CVPixelBuffer from the " + @"output image of the vision task."]; return NULL; } } @@ -275,14 +278,15 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d } const uint8_t permute_map[4] = {2, 1, 0, 3}; - vImage_Buffer sourceBuffer = EmptyVImageBufferFromImageFrame(imageFrame, NO); + vImage_Buffer sourceBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, NO); sourceBuffer.data = pixelData; if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) != kvImageNoError) { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while creating a CVPixelBuffer from the " + @"output image of the vision task."]; return NULL; } @@ -300,7 +304,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d } [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while creating a CVPixelBuffer from the " + @"output image of the vision task."]; return NULL; } @@ -379,7 +384,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d default: [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while creating a CGImage from the " + @"output image of the vision task."]; return NULL; } @@ -410,7 +416,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d if (!cgImageRef) { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while converting the output image of the " + @"vision task to a CGImage."]; } return cgImageRef; diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm index 1e68f7788..49cfb0c52 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm @@ -198,7 +198,7 @@ static NSString *const kTaskName = @"gestureRecognizer"; NSInteger timestampInMilliseconds = outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / - kMicroSecondsPerMilliSecond; + kMicrosecondsPerMillisecond; dispatch_async(_callbackQueue, ^{ [self.gestureRecognizerLiveStreamDelegate gestureRecognizer:self didFinishRecognitionWithResult:result diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h index 278ae89b6..bc866828f 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h @@ -27,7 +27,7 @@ NS_SWIFT_NAME(GestureRecognizerResult) /** Hand landmarks of detected hands. */ @property(nonatomic, readonly) NSArray *> *landmarks; -/** Hand landmarks in world coordniates of detected hands. */ +/** Hand landmarks in world coordinates of detected hands. */ @property(nonatomic, readonly) NSArray *> *worldLandmarks; /** Handedness of detected hands. */ @@ -45,7 +45,7 @@ NS_SWIFT_NAME(GestureRecognizerResult) * handedness, gestures and timestamp (in milliseconds). * * @param landmarks The hand landmarks of detected hands. - * @param worldLandmarks The hand landmarks in world coordniates of detected hands. + * @param worldLandmarks The hand landmarks in world coordinates of detected hands. * @param handedness The handedness of detected hands. * @param handedness The recognized hand gestures of detected hands. * @param timestampInMilliseconds The timestamp for this result. diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h index 2fe4c9110..3a1b23e77 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h @@ -20,7 +20,7 @@ NS_ASSUME_NONNULL_BEGIN -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; @interface MPPGestureRecognizerResult (Helpers) diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm index daa6d2fb1..0f166ba61 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm @@ -117,7 +117,7 @@ static const NSInteger kDefaultGestureIndex = -1; handLandmarksPacket:(const Packet &)handLandmarksPacket worldLandmarksPacket:(const Packet &)worldLandmarksPacket { NSInteger timestampInMilliseconds = - (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); + (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicrosecondsPerMillisecond); if (handGesturesPacket.IsEmpty()) { return [MPPGestureRecognizerResult diff --git a/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarker.mm b/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarker.mm index 950cfae91..cacdef93b 100644 --- a/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarker.mm +++ b/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarker.mm @@ -222,7 +222,7 @@ static NSString *const kTaskName = @"handLandmarker"; NSInteger timestampInMilliseconds = outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / - kMicroSecondsPerMilliSecond; + kMicrosecondsPerMillisecond; dispatch_async(_callbackQueue, ^{ [self.handLandmarkerLiveStreamDelegate handLandmarker:self didFinishDetectionWithResult:result diff --git a/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarkerResult.h b/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarkerResult.h index 2c00e9ee5..07ec1d78b 100644 --- a/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarkerResult.h +++ b/mediapipe/tasks/ios/vision/hand_landmarker/sources/MPPHandLandmarkerResult.h @@ -27,7 +27,7 @@ NS_SWIFT_NAME(HandLandmarkerResult) /** Hand landmarks of detected hands. */ @property(nonatomic, readonly) NSArray *> *landmarks; -/** Hand landmarks in world coordniates of detected hands. */ +/** Hand landmarks in world coordinates of detected hands. */ @property(nonatomic, readonly) NSArray *> *worldLandmarks; /** Handedness of detected hands. */ @@ -38,7 +38,7 @@ NS_SWIFT_NAME(HandLandmarkerResult) * and timestamp (in milliseconds). * * @param landmarks The hand landmarks of detected hands. - * @param worldLandmarks The hand landmarks in world coordniates of detected hands. + * @param worldLandmarks The hand landmarks in world coordinates of detected hands. * @param handedness The handedness of detected hands. * @param timestampInMilliseconds The timestamp for this result. * diff --git a/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.h b/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.h index 72d266da0..82931084d 100644 --- a/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.h @@ -20,7 +20,7 @@ NS_ASSUME_NONNULL_BEGIN -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; @interface MPPHandLandmarkerResult (Helpers) diff --git a/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.mm b/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.mm index 2936853ab..93ae1a0b0 100644 --- a/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/hand_landmarker/utils/sources/MPPHandLandmarkerResult+Helpers.mm @@ -95,7 +95,7 @@ using ::mediapipe::Packet; worldLandmarksPacket:(const Packet &)worldLandmarksPacket handednessPacket:(const Packet &)handednessPacket { NSInteger timestampInMilliseconds = - (NSInteger)(landmarksPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); + (NSInteger)(landmarksPacket.Timestamp().Value() / kMicrosecondsPerMillisecond); if (landmarksPacket.IsEmpty()) { return [MPPHandLandmarkerResult diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm index 4869537e2..0c359525b 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm @@ -43,7 +43,7 @@ static NSString *const kTaskGraphName = @"mediapipe.tasks.vision.image_classifier.ImageClassifierGraph"; static NSString *const kTaskName = @"imageClassifier"; -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; #define InputPacketMap(imagePacket, normalizedRectPacket) \ { \ @@ -221,7 +221,7 @@ static const int kMicroSecondsPerMilliSecond = 1000; NSInteger timestampInMilliseconds = outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / - kMicroSecondsPerMilliSecond; + kMicrosecondsPerMillisecond; dispatch_async(_callbackQueue, ^{ [self.imageClassifierLiveStreamDelegate imageClassifier:self didFinishClassificationWithResult:result diff --git a/mediapipe/tasks/ios/vision/image_segmenter/BUILD b/mediapipe/tasks/ios/vision/image_segmenter/BUILD index c3bb897ea..3272896b5 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/BUILD +++ b/mediapipe/tasks/ios/vision/image_segmenter/BUILD @@ -51,6 +51,9 @@ objc_library( ":MPPImageSegmenterOptions", ":MPPImageSegmenterResult", "//mediapipe/tasks/cc/vision/image_segmenter:image_segmenter_graph", + "//mediapipe/tasks/cc/vision/image_segmenter/calculators:tensors_to_segmentation_calculator", + "//mediapipe/tasks/cc/vision/image_segmenter/calculators:tensors_to_segmentation_calculator_cc_proto", + "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/common/utils:NSStringHelpers", "//mediapipe/tasks/ios/core:MPPTaskInfo", @@ -58,5 +61,6 @@ objc_library( "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", "//mediapipe/tasks/ios/vision/image_segmenter/utils:MPPImageSegmenterOptionsHelpers", "//mediapipe/tasks/ios/vision/image_segmenter/utils:MPPImageSegmenterResultHelpers", + "//mediapipe/util:label_map_cc_proto", ], ) diff --git a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.h b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.h index f75e7575e..9af9586fb 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.h +++ b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.h @@ -29,27 +29,31 @@ NS_SWIFT_NAME(ImageSegmenter) @interface MPPImageSegmenter : NSObject /** - * Creates a new instance of `MPPImageSegmenter` from an absolute path to a TensorFlow Lite model - * file stored locally on the device and the default `MPPImageSegmenterOptions`. + * Get the category label list of the `ImageSegmenter` can recognize. For CATEGORY_MASK type, the + * index in the category mask corresponds to the category in the label list. For CONFIDENCE_MASK + * type, the output mask list at index corresponds to the category in the label list. If there is no + * labelmap provided in the model file, empty array is returned. + */ +@property(nonatomic, readonly) NSArray *labels; + +/** + * Creates a new instance of `ImageSegmenter` from an absolute path to a TensorFlow Lite model + * file stored locally on the device and the default `ImageSegmenterOptions`. * * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. - * @param error An optional error parameter populated when there is an error in initializing the - * image segmenter. * - * @return A new instance of `MPPImageSegmenter` with the given model path. `nil` if there is an + * @return A new instance of `ImageSegmenter` with the given model path. `nil` if there is an * error in initializing the image segmenter. */ - (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; /** - * Creates a new instance of `MPPImageSegmenter` from the given `MPPImageSegmenterOptions`. + * Creates a new instance of `ImageSegmenter` from the given `ImageSegmenterOptions`. * - * @param options The options of type `MPPImageSegmenterOptions` to use for configuring the - * `MPPImageSegmenter`. - * @param error An optional error parameter populated when there is an error in initializing the - * image segmenter. + * @param options The options of type `ImageSegmenterOptions` to use for configuring the + * `ImageSegmenter`. * - * @return A new instance of `MPPImageSegmenter` with the given options. `nil` if there is an error + * @return A new instance of `ImageSegmenter` with the given options. `nil` if there is an error * in initializing the image segmenter. */ - (nullable instancetype)initWithOptions:(MPPImageSegmenterOptions *)options @@ -57,23 +61,20 @@ NS_SWIFT_NAME(ImageSegmenter) /** * Performs segmentation on the provided MPPImage using the whole image as region of interest. - * Rotation will be applied according to the `orientation` property of the provided `MPPImage`. Only - * use this method when the `MPPImageSegmenter` is created with `MPPRunningModeImage`. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. Only + * use this method when the `ImageSegmenter` is created with running mode, `image`. * - * This method supports RGBA images. If your `MPPImage` has a source type of - * `MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer - * must have one of the following pixel format types: + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: * 1. kCVPixelFormatType_32BGRA * 2. kCVPixelFormatType_32RGBA * - * If your `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color space is - * RGB with an Alpha channel. + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. * - * @param image The `MPPImage` on which segmentation is to be performed. - * @param error An optional error parameter populated when there is an error in performing - * segmentation on the input image. + * @param image The `MPImage` on which segmentation is to be performed. * - * @return An `MPPImageSegmenterResult` that contains the segmented masks. + * @return An `ImageSegmenterResult` that contains the segmented masks. */ - (nullable MPPImageSegmenterResult *)segmentImage:(MPPImage *)image error:(NSError **)error NS_SWIFT_NAME(segment(image:)); @@ -83,22 +84,20 @@ NS_SWIFT_NAME(ImageSegmenter) * invokes the given completion handler block with the response. The method returns synchronously * once the completion handler returns. * - * Rotation will be applied according to the `orientation` property of the provided - * `MPPImage`. Only use this method when the `MPPImageSegmenter` is created with - * `MPPRunningModeImage`. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. Only + * use this method when the `ImageSegmenter` is created with running mode, `image`. * - * This method supports RGBA images. If your `MPPImage` has a source type of - * `MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer - * must have one of the following pixel format types: + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: * 1. kCVPixelFormatType_32BGRA * 2. kCVPixelFormatType_32RGBA * - * If your `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color space is - * RGB with an Alpha channel. + * If your `MPImage` has a source type of `image` ensure that the color space is RGB with an Alpha + * channel. * - * @param image The `MPPImage` on which segmentation is to be performed. + * @param image The `MPImage` on which segmentation is to be performed. * @param completionHandler A block to be invoked with the results of performing segmentation on the - * image. The block takes two arguments, the optional `MPPImageSegmenterResult` that contains the + * image. The block takes two arguments, the optional `ImageSegmenterResult` that contains the * segmented masks if the segmentation was successful and an optional error populated upon failure. * The lifetime of the returned masks is only guaranteed for the duration of the block. */ @@ -108,28 +107,25 @@ NS_SWIFT_NAME(ImageSegmenter) NS_SWIFT_NAME(segment(image:completion:)); /** - * Performs segmentation on the provided video frame of type `MPPImage` using the whole image as + * Performs segmentation on the provided video frame of type `MPImage` using the whole image as * region of interest. * - * Rotation will be applied according to the `orientation` property of the provided `MPPImage`. Only - * use this method when the `MPPImageSegmenter` is created with `MPPRunningModeVideo`. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. Only + * use this method when the `ImageSegmenter` is created with `video`. * - * This method supports RGBA images. If your `MPPImage` has a source type of - * `MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer - * must have one of the following pixel format types: + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: * 1. kCVPixelFormatType_32BGRA * 2. kCVPixelFormatType_32RGBA * - * If your `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color space is - * RGB with an Alpha channel. + * If your `MPImage` has a source type of `image` ensure that the color space is RGB with an Alpha + * channel. * - * @param image The `MPPImage` on which segmentation is to be performed. + * @param image The `MPImage` on which segmentation is to be performed. * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input * timestamps must be monotonically increasing. - * @param error An optional error parameter populated when there is an error in performing - * segmentation on the input image. * - * @return An `MPPImageSegmenterResult` that contains a the segmented masks. + * @return An `ImageSegmenterResult` that contains a the segmented masks. */ - (nullable MPPImageSegmenterResult *)segmentVideoFrame:(MPPImage *)image timestampInMilliseconds:(NSInteger)timestampInMilliseconds @@ -137,27 +133,26 @@ NS_SWIFT_NAME(ImageSegmenter) NS_SWIFT_NAME(segment(videoFrame:timestampInMilliseconds:)); /** - * Performs segmentation on the provided video frame of type `MPPImage` using the whole image as + * Performs segmentation on the provided video frame of type `MPImage` using the whole image as * region of interest invokes the given completion handler block with the response. The method * returns synchronously once the completion handler returns. * - * Rotation will be applied according to the `orientation` property of the provided `MPPImage`. Only - * use this method when the `MPPImageSegmenter` is created with `MPPRunningModeVideo`. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. Only + * use this method when the `ImageSegmenter` is created with running mode, `video`. * - * This method supports RGBA images. If your `MPPImage` has a source type of - * `MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer - * must have one of the following pixel format types: + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: * 1. kCVPixelFormatType_32BGRA * 2. kCVPixelFormatType_32RGBA * - * If your `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color space is - * RGB with an Alpha channel. + * If your `MPImage` has a source type of `image` ensure that the color space is RGB with an Alpha + * channel. * - * @param image The `MPPImage` on which segmentation is to be performed. + * @param image The `MPImage` on which segmentation is to be performed. * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input * timestamps must be monotonically increasing. * @param completionHandler A block to be invoked with the results of performing segmentation on the - * image. The block takes two arguments, the optional `MPPImageSegmenterResult` that contains the + * image. The block takes two arguments, the optional `ImageSegmenterResult` that contains the * segmented masks if the segmentation was successful and an optional error only populated upon * failure. The lifetime of the returned masks is only guaranteed for the duration of the block. */ @@ -168,38 +163,36 @@ NS_SWIFT_NAME(ImageSegmenter) NS_SWIFT_NAME(segment(videoFrame:timestampInMilliseconds:completion:)); /** - * Sends live stream image data of type `MPPImage` to perform segmentation using the whole image as - * region of interest. + * Sends live stream image data of type `MPImage` to perform segmentation using the whole image as + *region of interest. * - * Rotation will be applied according to the `orientation` property of the provided `MPPImage`. Only - * use this method when the `MPPImageSegmenter` is created with`MPPRunningModeLiveStream`. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. Only + *use this method when the `ImageSegmenter` is created with running mode, `liveStream`. * * The object which needs to be continuously notified of the available results of image segmentation - * must confirm to `MPPImageSegmenterLiveStreamDelegate` protocol and implement the - *`imageSegmenter:didFinishSegmentationWithResult:timestampInMilliseconds:error:` delegate method. + * must confirm to `ImageSegmenterLiveStreamDelegate` protocol and implement the + * `imageSegmenter(_:didFinishSegmentationWithResult:timestampInMilliseconds:error:)` delegate + * method. * * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * to the segmenter. The input timestamps must be monotonically increasing. * - * This method supports RGBA images. If your `MPPImage` has a source type of - *`MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer - * must have one of the following pixel format types: + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + *`sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: * 1. kCVPixelFormatType_32BGRA * 2. kCVPixelFormatType_32RGBA * - * If the input `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color - * space is RGB with an Alpha channel. + * If the input `MPImage` has a source type of `image` ensure that the color space is RGB with an + * Alpha channel. * * If this method is used for classifying live camera frames using `AVFoundation`, ensure that you * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its * `videoSettings` property. * - * @param image A live stream image data of type `MPPImage` on which segmentation is to be + * @param image A live stream image data of type `MPImage` on which segmentation is to be * performed. * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input * image is sent to the segmenter. The input timestamps must be monotonically increasing. - * @param error An optional error parameter populated when there is an error when sending the input - * image to the graph. * * @return `YES` if the image was sent to the task successfully, otherwise `NO`. */ diff --git a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.mm b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.mm index 8fad36671..8f26641d0 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.mm +++ b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.mm @@ -14,6 +14,7 @@ #import "mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenter.h" +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" #import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" #import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" #import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" @@ -21,6 +22,9 @@ #import "mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterOptions+Helpers.h" #import "mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterResult+Helpers.h" +#include "mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.pb.h" +#include "mediapipe/util/label_map.pb.h" + static constexpr int kMicrosecondsPerMillisecond = 1000; // Constants for the underlying MP Tasks Graph. See @@ -48,7 +52,9 @@ static NSString *const kTaskName = @"imageSegmenter"; } namespace { +using ::mediapipe::CalculatorGraphConfig; using ::mediapipe::Timestamp; +using ::mediapipe::tasks::TensorsToSegmentationCalculatorOptions; using ::mediapipe::tasks::core::PacketMap; using ::mediapipe::tasks::core::PacketsCallback; } // anonymous namespace @@ -125,10 +131,15 @@ using ::mediapipe::tasks::core::PacketsCallback; imageInputStreamName:kImageInStreamName normRectInputStreamName:kNormRectStreamName error:error]; - if (!_visionTaskRunner) { return nil; } + + _labels = [MPPImageSegmenter populateLabelsWithGraphConfig:_visionTaskRunner.graphConfig + error:error]; + if (!_labels) { + return nil; + } } return self; @@ -197,6 +208,43 @@ using ::mediapipe::tasks::core::PacketsCallback; #pragma mark - Private ++ (NSArray *)populateLabelsWithGraphConfig:(const CalculatorGraphConfig &)graphConfig + error:(NSError **)error { + bool found_tensor_to_segmentation_calculator = false; + + NSMutableArray *labels = [NSMutableArray arrayWithCapacity:(NSUInteger)graphConfig.node_size()]; + for (const auto &node : graphConfig.node()) { + if (node.calculator() == "mediapipe.tasks.TensorsToSegmentationCalculator") { + if (!found_tensor_to_segmentation_calculator) { + found_tensor_to_segmentation_calculator = true; + } else { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeFailedPreconditionError + description:@"The graph has more than one " + @"`mediapipe.tasks.TensorsToSegmentationCalculator`."]; + return nil; + } + TensorsToSegmentationCalculatorOptions options = + node.options().GetExtension(TensorsToSegmentationCalculatorOptions::ext); + if (!options.label_items().empty()) { + for (int i = 0; i < options.label_items_size(); ++i) { + if (!options.label_items().contains(i)) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeFailedPreconditionError + description:[NSString + stringWithFormat:@"The lablemap has no expected key %d.", i]]; + + return nil; + } + [labels addObject:[NSString stringWithCppString:options.label_items().at(i).name()]]; + } + } + } + } + return labels; +} + + (nullable MPPImageSegmenterResult *) imageSegmenterResultWithOptionalOutputPacketMap:(std::optional &)outputPacketMap shouldCopyMaskPacketData:(BOOL)shouldCopyMaskPacketData { diff --git a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterOptions.h b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterOptions.h index b089ac7d3..484f3324b 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterOptions.h +++ b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterOptions.h @@ -23,28 +23,28 @@ NS_ASSUME_NONNULL_BEGIN @class MPPImageSegmenter; /** - * This protocol defines an interface for the delegates of `MPPImageSegmenter` object to receive + * This protocol defines an interface for the delegates of `ImageSegmenter` object to receive * results of performing asynchronous segmentation on images (i.e, when `runningMode` = - * `MPPRunningModeLiveStream`). + * `liveStream`). * - * The delegate of `MPPImageSegmenter` must adopt `MPPImageSegmenterLiveStreamDelegate` protocol. + * The delegate of `ImageSegmenter` must adopt `ImageSegmenterLiveStreamDelegate` protocol. * The methods in this protocol are optional. */ -NS_SWIFT_NAME(ObjectDetectorLiveStreamDelegate) +NS_SWIFT_NAME(ImageSegmenterLiveStreamDelegate) @protocol MPPImageSegmenterLiveStreamDelegate @optional /** * This method notifies a delegate that the results of asynchronous segmentation of - * an image submitted to the `MPPImageSegmenter` is available. + * an image submitted to the `ImageSegmenter` is available. * - * This method is called on a private serial dispatch queue created by the `MPPImageSegmenter` + * This method is called on a private serial dispatch queue created by the `ImageSegmenter` * for performing the asynchronous delegates calls. * * @param imageSegmenter The image segmenter which performed the segmentation. This is useful to - * test equality when there are multiple instances of `MPPImageSegmenter`. - * @param result The `MPPImageSegmenterResult` object that contains a list of category or confidence + * test equality when there are multiple instances of `ImageSegmenter`. + * @param result The `ImageSegmenterResult` object that contains a list of category or confidence * masks and optional quality scores. * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input * image was sent to the image segmenter. @@ -58,26 +58,26 @@ NS_SWIFT_NAME(ObjectDetectorLiveStreamDelegate) NS_SWIFT_NAME(imageSegmenter(_:didFinishSegmentation:timestampInMilliseconds:error:)); @end -/** Options for setting up a `MPPImageSegmenter`. */ +/** Options for setting up a `ImageSegmenter`. */ NS_SWIFT_NAME(ImageSegmenterOptions) @interface MPPImageSegmenterOptions : MPPTaskOptions /** - * Running mode of the image segmenter task. Defaults to `MPPRunningModeImage`. - * `MPPImageSegmenter` can be created with one of the following running modes: - * 1. `MPPRunningModeImage`: The mode for performing segmentation on single image inputs. - * 2. `MPPRunningModeVideo`: The mode for performing segmentation on the decoded frames of a + * Running mode of the image segmenter task. Defaults to `image`. + * `ImageSegmenter` can be created with one of the following running modes: + * 1. `image`: The mode for performing segmentation on single image inputs. + * 2. `video`: The mode for performing segmentation on the decoded frames of a * video. - * 3. `MPPRunningModeLiveStream`: The mode for performing segmentation on a live stream of + * 3. `liveStream`: The mode for performing segmentation on a live stream of * input data, such as from the camera. */ @property(nonatomic) MPPRunningMode runningMode; /** - * An object that confirms to `MPPImageSegmenterLiveStreamDelegate` protocol. This object must - * implement `imageSegmenter:didFinishSegmentationWithResult:timestampInMilliseconds:error:` to + * An object that confirms to `ImageSegmenterLiveStreamDelegate` protocol. This object must + * implement `imageSegmenter(_:didFinishSegmentationWithResult:timestampInMilliseconds:error:)` to * receive the results of performing asynchronous segmentation on images (i.e, when `runningMode` = - * `MPPRunningModeLiveStream`). + * `liveStream`). */ @property(nonatomic, weak, nullable) id imageSegmenterLiveStreamDelegate; diff --git a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterResult.h b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterResult.h index 20bf3fefe..7b8277d2a 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterResult.h +++ b/mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterResult.h @@ -18,22 +18,22 @@ NS_ASSUME_NONNULL_BEGIN -/** Represents the segmentation results generated by `MPPImageSegmenter`. */ +/** Represents the segmentation results generated by `ImageSegmenter`. */ NS_SWIFT_NAME(ImageSegmenterResult) @interface MPPImageSegmenterResult : MPPTaskResult /** - * An optional array of `MPPMask` objects. Each `MPPMask` in the array holds a 32 bit float array of - * size `image width` * `image height` which represents the confidence mask for each category. Each + * An optional array of `Mask` objects. Each `Mask` in the array holds a 32 bit float array of size + * `image width` * `image height` which represents the confidence mask for each category. Each * element of the float array represents the confidence with which the model predicted that the * corresponding pixel belongs to the category that the mask represents, usually in the range [0,1]. */ @property(nonatomic, readonly, nullable) NSArray *confidenceMasks; /** - * An optional `MPPMask` that holds a`UInt8` array of size `image width` * `image height`. Each - * element of this array represents the class to which the pixel in the original image was predicted - * to belong to. + * An optional `Mask` that holds a`UInt8` array of size `image width` * `image height`. Each element + * of this array represents the class to which the pixel in the original image was predicted to + * belong to. */ @property(nonatomic, readonly, nullable) MPPMask *categoryMask; @@ -45,17 +45,17 @@ NS_SWIFT_NAME(ImageSegmenterResult) @property(nonatomic, readonly, nullable) NSArray *qualityScores; /** - * Initializes a new `MPPImageSegmenterResult` with the given array of confidence masks, category - * mask, quality scores and timestamp (in milliseconds). + * Initializes a new `ImageSegmenterResult` with the given array of confidence masks, category mask, + * quality scores and timestamp (in milliseconds). * - * @param confidenceMasks An optional array of `MPPMask` objects. Each `MPPMask` in the array must - * be of type `MPPMaskDataTypeFloat32`. - * @param categoryMask An optional `MPMask` object of type `MPPMaskDataTypeUInt8`. + * @param confidenceMasks An optional array of `Mask` objects. Each `Mask` in the array must + * be of type `float32`. + * @param categoryMask An optional `Mask` object of type `uInt8`. * @param qualityScores The quality scores of the result masks of type NSArray *. Each * `NSNumber` in the array holds a `float`. * @param timestampInMilliseconds The timestamp (in milliseconds) for this result. * - * @return An instance of `MPPImageSegmenterResult` initialized with the given array of confidence + * @return An instance of `ImageSegmenterResult` initialized with the given array of confidence * masks, category mask, quality scores and timestamp (in milliseconds). */ - (instancetype)initWithConfidenceMasks:(nullable NSArray *)confidenceMasks diff --git a/mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterResult+Helpers.mm b/mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterResult+Helpers.mm index 885df734d..c4c3d398c 100644 --- a/mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/image_segmenter/utils/sources/MPPImageSegmenterResult+Helpers.mm @@ -47,7 +47,7 @@ using ::mediapipe::Packet; ->PixelData() width:confidenceMask.width() height:confidenceMask.height() - shouldCopy:shouldCopyMaskPacketData ? YES : NO]]; + shouldCopy:shouldCopyMaskPacketData]]; } } @@ -57,7 +57,7 @@ using ::mediapipe::Packet; initWithUInt8Data:(UInt8 *)cppCategoryMask.GetImageFrameSharedPtr().get()->PixelData() width:cppCategoryMask.width() height:cppCategoryMask.height() - shouldCopy:shouldCopyMaskPacketData ? YES : NO]; + shouldCopy:shouldCopyMaskPacketData]; } if (qualityScoresPacket.ValidateAsType>().ok()) { diff --git a/mediapipe/tasks/ios/vision/interactive_segmenter/BUILD b/mediapipe/tasks/ios/vision/interactive_segmenter/BUILD new file mode 100644 index 000000000..3001737b9 --- /dev/null +++ b/mediapipe/tasks/ios/vision/interactive_segmenter/BUILD @@ -0,0 +1,36 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPInteractiveSegmenterOptions", + srcs = ["sources/MPPInteractiveSegmenterOptions.m"], + hdrs = ["sources/MPPInteractiveSegmenterOptions.h"], + deps = ["//mediapipe/tasks/ios/core:MPPTaskOptions"], +) + +objc_library( + name = "MPPInteractiveSegmenter", + hdrs = ["sources/MPPInteractiveSegmenter.h"], + module_name = "MPPInteractiveSegmenter", + deps = [ + ":MPPInteractiveSegmenterOptions", + "//mediapipe/tasks/ios/components/containers:MPPRegionOfInterest", + "//mediapipe/tasks/ios/vision/core:MPPImage", + "//mediapipe/tasks/ios/vision/image_segmenter:MPPImageSegmenterResult", + ], +) diff --git a/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenter.h b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenter.h new file mode 100644 index 000000000..d7ac01466 --- /dev/null +++ b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenter.h @@ -0,0 +1,136 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/components/containers/sources/MPPRegionOfInterest.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#import "mediapipe/tasks/ios/vision/image_segmenter/sources/MPPImageSegmenterResult.h" +#import "mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @brief Class that performs interactive segmentation on images. + * + * Users can represent user interaction through `RegionOfInterest`, which gives a hint to + * `InteractiveSegmenter` to perform segmentation focusing on the given region of interest. + * + * The API expects a TFLite model with mandatory TFLite Model Metadata. + * + * Input tensor: + * (kTfLiteUInt8/kTfLiteFloat32) + * - image input of size `[batch x height x width x channels]`. + * - batch inference is not supported (`batch` is required to be 1). + * - RGB and greyscale inputs are supported (`channels` is required to be + * 1 or 3). + * - if type is kTfLiteFloat32, NormalizationOptions are required to be attached to the metadata + * for input normalization. Output tensors: (kTfLiteUInt8/kTfLiteFloat32) + * - list of segmented masks. + * - if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1. + * - if `output_type` is CONFIDENCE_MASK, float32 Image list of size `channels`. + * - batch is always 1. + * + * An example of such model can be found at: + * https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2 + */ +NS_SWIFT_NAME(InteractiveSegmenter) +@interface MPPInteractiveSegmenter : NSObject + +/** + * Get the category label list of the `InteractiveSegmenter` can recognize. For CATEGORY_MASK type, + * the index in the category mask corresponds to the category in the label list. For CONFIDENCE_MASK + * type, the output mask list at index corresponds to the category in the label list. If there is no + * labelmap provided in the model file, empty array is returned. + */ +@property(nonatomic, readonly) NSArray *labels; + +/** + * Creates a new instance of `InteractiveSegmenter` from an absolute path to a TensorFlow Lite model + * file stored locally on the device and the default `InteractiveSegmenterOptions`. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * + * @return A new instance of `InteractiveSegmenter` with the given model path. `nil` if there is an + * error in initializing the interactive segmenter. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Creates a new instance of `InteractiveSegmenter` from the given `InteractiveSegmenterOptions`. + * + * @param options The options of type `InteractiveSegmenterOptions` to use for configuring the + * `InteractiveSegmenter`. + * + * @return A new instance of `InteractiveSegmenter` with the given options. `nil` if there is an + * error in initializing the interactive segmenter. + */ +- (nullable instancetype)initWithOptions:(MPPInteractiveSegmenterOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Performs segmentation on the provided MPPImage using the specified user's region of interest. + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. + * + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which segmentation is to be performed. + * + * @return An `ImageSegmenterResult` that contains the segmented masks. + */ +- (nullable MPPImageSegmenterResult *)segmentImage:(MPPImage *)image + regionOfInterest:(MPPRegionOfInterest *)regionOfInterest + error:(NSError **)error + NS_SWIFT_NAME(segment(image:regionOfInterest:)); + +/** + * Performs segmentation on the provided MPPImage using the specified user's region of interest and + * invokes the given completion handler block with the response. The method returns synchronously + * once the completion handler returns. + * + * Rotation will be applied according to the `orientation` property of the provided `MPImage`. + * + * This method supports RGBA images. If your `MPImage` has a source type of `pixelBuffer` or + * `sampleBuffer`, the underlying pixel buffer must have one of the following pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which segmentation is to be performed. + * @param completionHandler A block to be invoked with the results of performing segmentation on the + * image. The block takes two arguments, the optional `ImageSegmenterResult` that contains the + * segmented masks if the segmentation was successful and an optional error populated upon failure. + * The lifetime of the returned masks is only guaranteed for the duration of the block. + */ +- (void)segmentImage:(MPPImage *)image + regionOfInterest:(MPPRegionOfInterest *)regionOfInterest + withCompletionHandler:(void (^)(MPPImageSegmenterResult *_Nullable result, + NSError *_Nullable error))completionHandler + NS_SWIFT_NAME(segment(image:regionOfInterest:completion:)); + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.h b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.h new file mode 100644 index 000000000..9ae45b13f --- /dev/null +++ b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.h @@ -0,0 +1,41 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@class MPPInteractiveSegmenter; + +/** Options for setting up a `InteractiveSegmenter`. */ +NS_SWIFT_NAME(InteractiveSegmenterOptions) +@interface MPPInteractiveSegmenterOptions : MPPTaskOptions + +/** + * The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults + * to English. + */ +@property(nonatomic, copy) NSString *displayNamesLocale; + +/** Represents whether to output confidence masks. */ +@property(nonatomic) BOOL shouldOutputConfidenceMasks; + +/** Represents whether to output category mask. */ +@property(nonatomic) BOOL shouldOutputCategoryMask; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.m b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.m new file mode 100644 index 000000000..798ac11ce --- /dev/null +++ b/mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.m @@ -0,0 +1,38 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/interactive_segmenter/sources/MPPInteractiveSegmenterOptions.h" + +@implementation MPPInteractiveSegmenterOptions + +- (instancetype)init { + self = [super init]; + if (self) { + _displayNamesLocale = @"en"; + _shouldOutputConfidenceMasks = YES; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + MPPInteractiveSegmenterOptions *interactiveSegmenterOptions = [super copyWithZone:zone]; + + interactiveSegmenterOptions.shouldOutputConfidenceMasks = self.shouldOutputConfidenceMasks; + interactiveSegmenterOptions.shouldOutputCategoryMask = self.shouldOutputCategoryMask; + interactiveSegmenterOptions.displayNamesLocale = self.displayNamesLocale; + + return interactiveSegmenterOptions; +} + +@end diff --git a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm index 7d2276b2a..0c88814a2 100644 --- a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm +++ b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm @@ -182,7 +182,7 @@ static NSString *const kTaskName = @"objectDetector"; NSInteger timestampInMilliseconds = outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / - kMicroSecondsPerMilliSecond; + kMicrosecondsPerMillisecond; dispatch_async(_callbackQueue, ^{ [self.objectDetectorLiveStreamDelegate objectDetector:self didFinishDetectionWithResult:result diff --git a/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.h b/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.h index 377e6e323..f51b7dc61 100644 --- a/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.h @@ -18,7 +18,7 @@ NS_ASSUME_NONNULL_BEGIN -static const int kMicroSecondsPerMilliSecond = 1000; +static const int kMicrosecondsPerMillisecond = 1000; @interface MPPObjectDetectorResult (Helpers) diff --git a/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.mm b/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.mm index 3a8a72f71..b068af874 100644 --- a/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorResult+Helpers.mm @@ -27,7 +27,7 @@ using ::mediapipe::Packet; (const Packet &)packet { NSInteger timestampInMilliseconds = (NSInteger)(packet.Timestamp().Value() / - kMicroSecondsPerMilliSecond); + kMicrosecondsPerMillisecond); if (!packet.ValidateAsType>().ok()) { return [[MPPObjectDetectorResult alloc] initWithDetections:@[] timestampInMilliseconds:timestampInMilliseconds]; diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/BUILD b/mediapipe/tasks/ios/vision/pose_landmarker/BUILD new file mode 100644 index 000000000..97cb278d9 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/BUILD @@ -0,0 +1,73 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPPoseLandmarkerResult", + srcs = ["sources/MPPPoseLandmarkerResult.m"], + hdrs = ["sources/MPPPoseLandmarkerResult.h"], + deps = [ + "//mediapipe/tasks/ios/components/containers:MPPLandmark", + "//mediapipe/tasks/ios/core:MPPTaskResult", + "//mediapipe/tasks/ios/vision/core:MPPMask", + ], +) + +objc_library( + name = "MPPPoseLandmarkerOptions", + srcs = ["sources/MPPPoseLandmarkerOptions.m"], + hdrs = ["sources/MPPPoseLandmarkerOptions.h"], + deps = [ + ":MPPPoseLandmarkerResult", + "//mediapipe/tasks/ios/core:MPPTaskOptions", + "//mediapipe/tasks/ios/vision/core:MPPRunningMode", + ], +) + +objc_library( + name = "MPPPoseLandmarksConnections", + hdrs = ["sources/MPPPoseLandmarksConnections.h"], + module_name = "MPPPoseLandmarksConnections", + deps = ["//mediapipe/tasks/ios/components/containers:MPPConnection"], +) + +objc_library( + name = "MPPPoseLandmarker", + srcs = ["sources/MPPPoseLandmarker.mm"], + hdrs = ["sources/MPPPoseLandmarker.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + module_name = "MPPPoseLandmarker", + deps = [ + ":MPPPoseLandmarkerOptions", + ":MPPPoseLandmarkerResult", + ":MPPPoseLandmarksConnections", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_graph", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/components/containers:MPPConnection", + "//mediapipe/tasks/ios/core:MPPTaskInfo", + "//mediapipe/tasks/ios/vision/core:MPPImage", + "//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator", + "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", + "//mediapipe/tasks/ios/vision/pose_landmarker/utils:MPPPoseLandmarkerOptionsHelpers", + "//mediapipe/tasks/ios/vision/pose_landmarker/utils:MPPPoseLandmarkerResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.h b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.h new file mode 100644 index 000000000..d70d1f129 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.h @@ -0,0 +1,160 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/components/containers/sources/MPPConnection.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @brief Performs pose landmarks detection on images. + * + * This API expects a pre-trained pose landmarks model asset bundle. + */ +NS_SWIFT_NAME(PoseLandmarker) +@interface MPPPoseLandmarker : NSObject + +/** The array of connections between all the landmarks in the detected pose. */ +@property(class, nonatomic, readonly) NSArray *poseLandmarks; + +/** + * Creates a new instance of `PoseLandmarker` from an absolute path to a model asset bundle stored + * locally on the device and the default `PoseLandmarkerOptions`. + * + * @param modelPath An absolute path to a model asset bundle stored locally on the device. + * @param error An optional error parameter populated when there is an error in initializing the + * pose landmarker. + * + * @return A new instance of `PoseLandmarker` with the given model path. `nil` if there is an error + * in initializing the pose landmarker. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Creates a new instance of `PoseLandmarker` from the given `PoseLandmarkerOptions`. + * + * @param options The options of type `PoseLandmarkerOptions` to use for configuring the + * `PoseLandmarker`. + * @param error An optional error parameter populated when there is an error in initializing the + * pose landmarker. + * + * @return A new instance of `PoseLandmarker` with the given options. `nil` if there is an error in + * initializing the pose landmarker. + */ +- (nullable instancetype)initWithOptions:(MPPPoseLandmarkerOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Performs pose landmarks detection on the provided `MPImage` using the whole image as region of + * interest. Rotation will be applied according to the `orientation` property of the provided + * `MPImage`. Only use this method when the `PoseLandmarker` is created with running mode `.image`. + * + * This method supports performing pose landmarks detection on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. + * + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which pose landmarks detection is to be performed. + * @param error An optional error parameter populated when there is an error in performing pose + * landmark detection on the input image. + * + * @return An `PoseLandmarkerResult` object that contains the pose landmarks detection + * results. + */ +- (nullable MPPPoseLandmarkerResult *)detectImage:(MPPImage *)image + error:(NSError **)error NS_SWIFT_NAME(detect(image:)); + +/** + * Performs pose landmarks detection on the provided video frame of type `MPImage` using the whole + * image as region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPImage`. Only use this method when the `PoseLandmarker` is created with running + * mode `.video`. + * + * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must + * be monotonically increasing. + * + * This method supports performing pose landmarks detection on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. + * + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which pose landmarks detection is to be performed. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param error An optional error parameter populated when there is an error in performing pose + * landmark detection on the input video frame. + * + * @return An `PoseLandmarkerResult` object that contains the pose landmarks detection + * results. + */ +- (nullable MPPPoseLandmarkerResult *)detectVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error + NS_SWIFT_NAME(detect(videoFrame:timestampInMilliseconds:)); + +/** + * Sends live stream image data of type `MPImage` to perform pose landmarks detection using the + * whole image as region of interest. Rotation will be applied according to the `orientation` + * property of the provided `MPImage`. Only use this method when the `PoseLandmarker` is created + * with running mode`.liveStream`. + * + * The object which needs to be continuously notified of the available results of pose landmark + * detection must confirm to `PoseLandmarkerLiveStreamDelegate` protocol and implement the + * `poseLandmarker(_:didFinishDetectionWithResult:timestampInMilliseconds:error:)` delegate method. + * + * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent + * to the pose landmarker. The input timestamps must be monotonically increasing. + * + * This method supports performing pose landmarks detection on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. + * + * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an + * Alpha channel. + * + * If this method is used for performing pose landmarks detection on live camera frames using + * `AVFoundation`, ensure that you request `AVCaptureVideoDataOutput` to output frames in + * `kCMPixelFormat_32BGRA` using its `videoSettings` property. + * + * @param image A live stream image data of type `MPImage` on which pose landmarks detection is to + * be performed. + * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input + * image is sent to the pose landmarker. The input timestamps must be monotonically increasing. + * @param error An optional error parameter populated when there is an error in performing pose + * landmark detection on the input live stream image data. + * + * @return `YES` if the image was sent to the task successfully, otherwise `NO`. + */ +- (BOOL)detectAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error + NS_SWIFT_NAME(detectAsync(image:timestampInMilliseconds:)); + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.mm b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.mm new file mode 100644 index 000000000..159bf9e65 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.mm @@ -0,0 +1,220 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarker.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarksConnections.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h" + +namespace { +using ::mediapipe::Timestamp; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +static NSString *const kImageTag = @"IMAGE"; +static NSString *const kImageInStreamName = @"image_in"; +static NSString *const kNormRectTag = @"NORM_RECT"; +static NSString *const kNormRectInStreamName = @"norm_rect_in"; +static NSString *const kImageOutStreamName = @"image_out"; +static NSString *const kPoseLandmarksTag = @"NORM_LANDMARKS"; +static NSString *const kPoseLandmarksOutStreamName = @"pose_landmarks"; +static NSString *const kWorldLandmarksTag = @"WORLD_LANDMARKS"; +static NSString *const kWorldLandmarksOutStreamName = @"world_landmarks"; +static NSString *const kSegmentationMasksTag = @"SEGMENTATION_MASK"; +static NSString *const kSegmentationMasksOutStreamName = @"segmentation_masks"; +static NSString *const kTaskGraphName = + @"mediapipe.tasks.vision.pose_landmarker.PoseLandmarkerGraph"; +static NSString *const kTaskName = @"poseLandmarker"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {kImageInStreamName.cppString, imagePacket}, { \ + kNormRectInStreamName.cppString, normalizedRectPacket \ + } \ + } + +#define PoseLandmarkerResultWithOutputPacketMap(outputPacketMap) \ + ([MPPPoseLandmarkerResult \ + poseLandmarkerResultWithLandmarksPacket:outputPacketMap[kPoseLandmarksOutStreamName \ + .cppString] \ + worldLandmarksPacket:outputPacketMap[kWorldLandmarksOutStreamName \ + .cppString] \ + segmentationMasksPacket:&(outputPacketMap[kSegmentationMasksOutStreamName \ + .cppString])]) + +@interface MPPPoseLandmarker () { + /** iOS Vision Task Runner */ + MPPVisionTaskRunner *_visionTaskRunner; + dispatch_queue_t _callbackQueue; +} +@property(nonatomic, weak) id poseLandmarkerLiveStreamDelegate; +@end + +@implementation MPPPoseLandmarker + +#pragma mark - Public + +- (instancetype)initWithOptions:(MPPPoseLandmarkerOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], + [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectInStreamName] + ] + outputStreams:@[ + [NSString + stringWithFormat:@"%@:%@", kPoseLandmarksTag, kPoseLandmarksOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kWorldLandmarksTag, kWorldLandmarksOutStreamName], + [NSString stringWithFormat:@"%@:%@", kSegmentationMasksTag, + kSegmentationMasksOutStreamName], + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] + ] + taskOptions:options + enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream + error:error]; + + if (!taskInfo) { + return nil; + } + + PacketsCallback packetsCallback = nullptr; + + if (options.poseLandmarkerLiveStreamDelegate) { + _poseLandmarkerLiveStreamDelegate = options.poseLandmarkerLiveStreamDelegate; + + // Create a private serial dispatch queue in which the deleagte method will be called + // asynchronously. This is to ensure that if the client performs a long running operation in + // the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is + // freed up to continue with its operations. + _callbackQueue = dispatch_queue_create( + [MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName], nullptr); + + // Capturing `self` as weak in order to avoid `self` being kept in memory + // and cause a retain cycle, after self is set to `nil`. + MPPPoseLandmarker *__weak weakSelf = self; + packetsCallback = [=](absl::StatusOr liveStreamResult) { + [weakSelf processLiveStreamResult:liveStreamResult]; + }; + } + + _visionTaskRunner = [[MPPVisionTaskRunner alloc] initWithTaskInfo:taskInfo + runningMode:options.runningMode + roiAllowed:NO + packetsCallback:std::move(packetsCallback) + imageInputStreamName:kImageInStreamName + normRectInputStreamName:kNormRectInStreamName + error:error]; + + if (!_visionTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPPoseLandmarkerOptions *options = [[MPPPoseLandmarkerOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPPoseLandmarkerResult *)detectImage:(MPPImage *)image error:(NSError **)error { + std::optional outputPacketMap = [_visionTaskRunner processImage:image error:error]; + + return [MPPPoseLandmarker poseLandmarkerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (nullable MPPPoseLandmarkerResult *)detectVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional outputPacketMap = + [_visionTaskRunner processVideoFrame:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + + return [MPPPoseLandmarker poseLandmarkerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (BOOL)detectAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + return [_visionTaskRunner processLiveStreamImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; +} + ++ (NSArray *)poseLandmarks { + return MPPPoseLandmarksConnections; +} + +#pragma mark - Private + +- (void)processLiveStreamResult:(absl::StatusOr)liveStreamResult { + if (![self.poseLandmarkerLiveStreamDelegate + respondsToSelector:@selector(poseLandmarker: + didFinishDetectionWithResult:timestampInMilliseconds:error:)]) { + return; + } + + NSError *callbackError = nil; + if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) { + dispatch_async(_callbackQueue, ^{ + [self.poseLandmarkerLiveStreamDelegate poseLandmarker:self + didFinishDetectionWithResult:nil + timestampInMilliseconds:Timestamp::Unset().Value() + error:callbackError]; + }); + return; + } + + PacketMap &outputPacketMap = liveStreamResult.value(); + if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { + return; + } + + MPPPoseLandmarkerResult *result = PoseLandmarkerResultWithOutputPacketMap(outputPacketMap); + + NSInteger timestampInMilliseconds = + outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / + kMicrosecondsPerMillisecond; + dispatch_async(_callbackQueue, ^{ + [self.poseLandmarkerLiveStreamDelegate poseLandmarker:self + didFinishDetectionWithResult:result + timestampInMilliseconds:timestampInMilliseconds + error:callbackError]; + }); +} + ++ (nullable MPPPoseLandmarkerResult *)poseLandmarkerResultWithOptionalOutputPacketMap: + (std::optional &)outputPacketMap { + if (!outputPacketMap.has_value()) { + return nil; + } + + return PoseLandmarkerResultWithOutputPacketMap(outputPacketMap.value()); +} + +@end diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h new file mode 100644 index 000000000..e0cb4d7bf --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h @@ -0,0 +1,107 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h" + +NS_ASSUME_NONNULL_BEGIN + +@class MPPPoseLandmarker; + +/** + * This protocol defines an interface for the delegates of `PoseLandmarker` to receive + * results of performing asynchronous pose landmark detection on images (i.e, when `runningMode` = + * `.liveStream`). + * + * The delegate of `PoseLandmarker` must adopt `PoseLandmarkerLiveStreamDelegate` protocol. + * The methods in this protocol are optional. + */ +NS_SWIFT_NAME(PoseLandmarkerLiveStreamDelegate) +@protocol MPPPoseLandmarkerLiveStreamDelegate + +/** + * This method notifies a delegate that the results of asynchronous pose landmark detection of an + * image submitted to the `PoseLandmarker` is available. + * + * This method is called on a private serial dispatch queue created by the `PoseLandmarker` + * for performing the asynchronous delegates calls. + * + * @param poseLandmarker The pose landmarker which performed the pose landmark detection. + * This is useful to test equality when there are multiple instances of `PoseLandmarker`. + * @param result The `PoseLandmarkerResult` object that contains a list of landmark. + * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input + * image was sent to the pose landmarker. + * @param error An optional error parameter populated when there is an error in performing pose + * landmark detection on the input live stream image data. + */ +- (void)poseLandmarker:(MPPPoseLandmarker *)poseLandmarker + didFinishDetectionWithResult:(nullable MPPPoseLandmarkerResult *)result + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(nullable NSError *)error + NS_SWIFT_NAME(poseLandmarker(_:didFinishDetection:timestampInMilliseconds:error:)); +@end + +/** Options for setting up a `PoseLandmarker`. */ +NS_SWIFT_NAME(PoseLandmarkerOptions) +@interface MPPPoseLandmarkerOptions : MPPTaskOptions + +/** + * Running mode of the pose landmark dection task. Defaults to `.image`. `PoseLandmarker` can be + * created with one of the following running modes: + * 1. `.image`: The mode for performing pose landmark detection on single image inputs. + * 2. `.video`: The mode for performing pose landmark detection on the decoded frames of a video. + * 3. `.liveStream`: The mode for performing pose landmark detection on a live stream of input + * data, such as from the camera. + */ +@property(nonatomic) MPPRunningMode runningMode; + +/** + * An object that confirms to `PoseLandmarkerLiveStreamDelegate` protocol. This object must + * implement `poseLandmarker(_:didFinishDetectionWithResult:timestampInMilliseconds:error:)` to + * receive the results of performing asynchronous pose landmark detection on images (i.e, when + * `runningMode` = `.liveStream`). + */ +@property(nonatomic, weak, nullable) id + poseLandmarkerLiveStreamDelegate; + +/** The maximum number of poses that can be detected by the `PoseLandmarker`. Defaults to 1. */ +@property(nonatomic) NSInteger numPoses; + +/** + * The minimum confidence score for pose detection to be considered successful. Defaults to 0.5. + */ +@property(nonatomic) float minPoseDetectionConfidence; + +/** + * The minimum confidence score of pose presence score in the pose landmark detection. Defaults to + * 0.5. + */ +@property(nonatomic) float minPosePresenceConfidence; + +/** + * The minimum confidence score for pose tracking to be considered successful. Defaults to 0.5. + */ +@property(nonatomic) float minTrackingConfidence; + +/** + * Whether to output segmentation masks. Defaults to `false`. + */ +@property(nonatomic) BOOL shouldOutputSegmentationMasks; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.m b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.m new file mode 100644 index 000000000..0a838693c --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.m @@ -0,0 +1,45 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h" + +@implementation MPPPoseLandmarkerOptions + +- (instancetype)init { + self = [super init]; + if (self) { + _numPoses = 1; + _minPoseDetectionConfidence = 0.5f; + _minPosePresenceConfidence = 0.5f; + _minTrackingConfidence = 0.5f; + _shouldOutputSegmentationMasks = NO; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + MPPPoseLandmarkerOptions *poseLandmarkerOptions = [super copyWithZone:zone]; + + poseLandmarkerOptions.runningMode = self.runningMode; + poseLandmarkerOptions.numPoses = self.numPoses; + poseLandmarkerOptions.minPoseDetectionConfidence = self.minPoseDetectionConfidence; + poseLandmarkerOptions.minPosePresenceConfidence = self.minPosePresenceConfidence; + poseLandmarkerOptions.minTrackingConfidence = self.minTrackingConfidence; + poseLandmarkerOptions.shouldOutputSegmentationMasks = self.shouldOutputSegmentationMasks; + poseLandmarkerOptions.poseLandmarkerLiveStreamDelegate = self.poseLandmarkerLiveStreamDelegate; + + return poseLandmarkerOptions; +} + +@end diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h new file mode 100644 index 000000000..b3dc72e8f --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h @@ -0,0 +1,60 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/components/containers/sources/MPPLandmark.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskResult.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPMask.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Represents the pose landmarks deection results generated by `PoseLandmarker`. */ +NS_SWIFT_NAME(PoseLandmarkerResult) +@interface MPPPoseLandmarkerResult : MPPTaskResult + +/** Pose landmarks of detected poses. */ +@property(nonatomic, readonly) NSArray *> *landmarks; + +/** Pose landmarks in world coordinates of detected poses. */ +@property(nonatomic, readonly) NSArray *> *worldLandmarks; + +/** Pose segmentation masks. */ +@property(nonatomic, readonly) NSArray *segmentationMasks; + +/** + * Initializes a new `PoseLandmarkerResult` with the given array of landmarks, world landmarks, + * segmentation masks of the detected poses and timestamp (in milliseconds). + * + * @param landmarks An array of `NormalizedLandmark` objects. + * @param worldLandmarks An array of `Landmark` objects. + * @param segmentationMasks An array of `Mask` objects. + * @param timestampInMilliseconds The timestamp (in milliseconds) for this result. + * + * @return An instance of `PoseLandmarkerResult` initialized with the given array of landmarks, + * world landmarks, segmentation masks of the detected poses and timestamp (in milliseconds). + */ +- (instancetype)initWithLandmarks:(NSArray *> *)landmarks + worldLandmarks:(NSArray *> *)worldLandmarks + segmentationMasks:(nullable NSArray *)segmentationMasks + timestampInMilliseconds:(NSInteger)timestampInMilliseconds NS_DESIGNATED_INITIALIZER; + +- (instancetype)initWithTimestampInMilliseconds:(NSInteger)timestampInMilliseconds NS_UNAVAILABLE; + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.m b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.m new file mode 100644 index 000000000..4a0324508 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.m @@ -0,0 +1,34 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h" + +@implementation MPPPoseLandmarkerResult + +- (instancetype)initWithLandmarks:(NSArray *> *)landmarks + worldLandmarks:(NSArray *> *)worldLandmarks + segmentationMasks:(NSArray *)segmentationMasks + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + self = [super initWithTimestampInMilliseconds:timestampInMilliseconds]; + if (self) { + _landmarks = [landmarks copy]; + _worldLandmarks = [worldLandmarks copy]; + _segmentationMasks = [segmentationMasks copy]; + } + return self; +} + +@end diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarksConnections.h b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarksConnections.h new file mode 100644 index 000000000..71dcad6b8 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarksConnections.h @@ -0,0 +1,40 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/components/containers/sources/MPPConnection.h" + +NS_ASSUME_NONNULL_BEGIN + +NSArray *const MPPPoseLandmarksConnections = @[ + [[MPPConnection alloc] initWithStart:0 end:1], [[MPPConnection alloc] initWithStart:1 end:2], + [[MPPConnection alloc] initWithStart:2 end:3], [[MPPConnection alloc] initWithStart:3 end:7], + [[MPPConnection alloc] initWithStart:0 end:4], [[MPPConnection alloc] initWithStart:4 end:5], + [[MPPConnection alloc] initWithStart:5 end:6], [[MPPConnection alloc] initWithStart:6 end:8], + [[MPPConnection alloc] initWithStart:9 end:10], [[MPPConnection alloc] initWithStart:11 end:12], + [[MPPConnection alloc] initWithStart:11 end:13], [[MPPConnection alloc] initWithStart:13 end:15], + [[MPPConnection alloc] initWithStart:15 end:17], [[MPPConnection alloc] initWithStart:15 end:19], + [[MPPConnection alloc] initWithStart:15 end:21], [[MPPConnection alloc] initWithStart:17 end:19], + [[MPPConnection alloc] initWithStart:12 end:14], [[MPPConnection alloc] initWithStart:14 end:16], + [[MPPConnection alloc] initWithStart:16 end:18], [[MPPConnection alloc] initWithStart:16 end:20], + [[MPPConnection alloc] initWithStart:16 end:22], [[MPPConnection alloc] initWithStart:18 end:20], + [[MPPConnection alloc] initWithStart:11 end:23], [[MPPConnection alloc] initWithStart:12 end:24], + [[MPPConnection alloc] initWithStart:23 end:24], [[MPPConnection alloc] initWithStart:23 end:25], + [[MPPConnection alloc] initWithStart:26 end:28], [[MPPConnection alloc] initWithStart:27 end:29], + [[MPPConnection alloc] initWithStart:28 end:30], [[MPPConnection alloc] initWithStart:29 end:31], + [[MPPConnection alloc] initWithStart:30 end:32], [[MPPConnection alloc] initWithStart:27 end:31], + [[MPPConnection alloc] initWithStart:28 end:32] +]; + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/utils/BUILD b/mediapipe/tasks/ios/vision/pose_landmarker/utils/BUILD new file mode 100644 index 000000000..ee5e15bf9 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/utils/BUILD @@ -0,0 +1,56 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPPoseLandmarkerOptionsHelpers", + srcs = ["sources/MPPPoseLandmarkerOptions+Helpers.mm"], + hdrs = ["sources/MPPPoseLandmarkerOptions+Helpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_cc_proto", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskOptionsProtocol", + "//mediapipe/tasks/ios/core/utils:MPPBaseOptionsHelpers", + "//mediapipe/tasks/ios/vision/pose_landmarker:MPPPoseLandmarkerOptions", + ], +) + +objc_library( + name = "MPPPoseLandmarkerResultHelpers", + srcs = ["sources/MPPPoseLandmarkerResult+Helpers.mm"], + hdrs = ["sources/MPPPoseLandmarkerResult+Helpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + "//mediapipe/framework:packet", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/tasks/ios/components/containers/utils:MPPLandmarkHelpers", + "//mediapipe/tasks/ios/vision/pose_landmarker:MPPPoseLandmarkerResult", + ], +) diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.h b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.h new file mode 100644 index 000000000..9a60c195b --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.h @@ -0,0 +1,36 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __cplusplus +#error "This file requires Objective-C++." +#endif // __cplusplus + +#include "mediapipe/framework/calculator_options.pb.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptionsProtocol.h" +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPPoseLandmarkerOptions (Helpers) + +/** + * Populates the provided `CalculatorOptions` proto container with the current settings. + * + * @param optionsProto The `CalculatorOptions` proto object to copy the settings to. + */ +- (void)copyToProto:(::mediapipe::CalculatorOptions *)optionsProto; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.mm b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.mm new file mode 100644 index 000000000..1a112ef41 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.mm @@ -0,0 +1,52 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerOptions+Helpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.h" + +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options.pb.h" + +using CalculatorOptionsProto = ::mediapipe::CalculatorOptions; +using PoseDetectorGraphOptionsProto = + ::mediapipe::tasks::vision::pose_detector::proto::PoseDetectorGraphOptions; +using PoseLandmarksDetectorGraphOptionsProto = + ::mediapipe::tasks::vision::pose_landmarker::proto::PoseLandmarksDetectorGraphOptions; +using PoseLandmarkerGraphOptionsProto = + ::mediapipe::tasks::vision::pose_landmarker::proto::PoseLandmarkerGraphOptions; + +@implementation MPPPoseLandmarkerOptions (Helpers) + +- (void)copyToProto:(CalculatorOptionsProto *)optionsProto { + PoseLandmarkerGraphOptionsProto *poseLandmarkerGraphOptions = + optionsProto->MutableExtension(PoseLandmarkerGraphOptionsProto::ext); + poseLandmarkerGraphOptions->Clear(); + + [self.baseOptions copyToProto:poseLandmarkerGraphOptions->mutable_base_options()]; + poseLandmarkerGraphOptions->set_min_tracking_confidence(self.minTrackingConfidence); + + PoseLandmarksDetectorGraphOptionsProto *poseLandmarksDetectorGraphOptions = + poseLandmarkerGraphOptions->mutable_pose_landmarks_detector_graph_options(); + poseLandmarksDetectorGraphOptions->set_min_detection_confidence(self.minPosePresenceConfidence); + + PoseDetectorGraphOptionsProto *poseDetectorGraphOptions = + poseLandmarkerGraphOptions->mutable_pose_detector_graph_options(); + poseDetectorGraphOptions->set_num_poses(self.numPoses); + poseDetectorGraphOptions->set_min_detection_confidence(self.minPoseDetectionConfidence); +} + +@end diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h new file mode 100644 index 000000000..24c4c69a6 --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h @@ -0,0 +1,66 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/pose_landmarker/sources/MPPPoseLandmarkerResult.h" + +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/packet.h" + +NS_ASSUME_NONNULL_BEGIN + +static const int kMicrosecondsPerMillisecond = 1000; + +@interface MPPPoseLandmarkerResult (Helpers) + +/** + * Creates an `MPPPoseLandmarkerResult` from landmarks, world landmarks and segmentation mask + * packets. + * + * @param landmarksPacket A MediaPipe packet wrapping a `std::vector`. + * @param worldLandmarksPacket A MediaPipe packet wrapping a `std::vector`. + * @param segmentationMasksPacket a MediaPipe packet wrapping a `std::vector`. + * + * @return An `MPPPoseLandmarkerResult` object that contains the hand landmark detection + * results. + */ ++ (MPPPoseLandmarkerResult *) + poseLandmarkerResultWithLandmarksPacket:(const mediapipe::Packet &)landmarksPacket + worldLandmarksPacket:(const mediapipe::Packet &)worldLandmarksPacket + segmentationMasksPacket:(const mediapipe::Packet *)segmentationMasksPacket; + +/** + * Creates an `MPPPoseLandmarkerResult` from landmarks, world landmarks and segmentation mask + * images. + * + * @param landmarksProto A vector of protos of type `std::vector`. + * @param worldLandmarksProto A vector of protos of type `std::vector`. + * @param segmentationMasks A vector of type `std::vector`. + * @param timestampInMilliSeconds The timestamp of the Packet that contained the result. + * + * @return An `MPPPoseLandmarkerResult` object that contains the pose landmark detection + * results. + */ ++ (MPPPoseLandmarkerResult *) + poseLandmarkerResultWithLandmarksProto: + (const std::vector<::mediapipe::NormalizedLandmarkList> &)landmarksProto + worldLandmarksProto: + (const std::vector<::mediapipe::LandmarkList> &)worldLandmarksProto + segmentationMasks: + (nullable const std::vector *)segmentationMasks + timestampInMilliseconds:(NSInteger)timestampInMilliseconds; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.mm b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.mm new file mode 100644 index 000000000..f4e7d7c4e --- /dev/null +++ b/mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.mm @@ -0,0 +1,127 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/pose_landmarker/utils/sources/MPPPoseLandmarkerResult+Helpers.h" + +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPLandmark+Helpers.h" + +namespace { +using LandmarkListProto = ::mediapipe::LandmarkList; +using NormalizedLandmarkListProto = ::mediapipe::NormalizedLandmarkList; +using ::mediapipe::Image; +using ::mediapipe::Packet; +} // namespace + +@implementation MPPPoseLandmarkerResult (Helpers) + ++ (MPPPoseLandmarkerResult *)emptyPoseLandmarkerResultWithTimestampInMilliseconds: + (NSInteger)timestampInMilliseconds { + return [[MPPPoseLandmarkerResult alloc] initWithLandmarks:@[] + worldLandmarks:@[] + segmentationMasks:@[] + timestampInMilliseconds:timestampInMilliseconds]; +} + ++ (MPPPoseLandmarkerResult *) + poseLandmarkerResultWithLandmarksProto: + (const std::vector &)landmarksProto + worldLandmarksProto: + (const std::vector &)worldLandmarksProto + segmentationMasks:(nullable const std::vector *)segmentationMasks + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + NSMutableArray *> *multiplePoseLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)landmarksProto.size()]; + + for (const auto &landmarkListProto : landmarksProto) { + NSMutableArray *landmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)landmarkListProto.landmark().size()]; + for (const auto &normalizedLandmarkProto : landmarkListProto.landmark()) { + MPPNormalizedLandmark *normalizedLandmark = + [MPPNormalizedLandmark normalizedLandmarkWithProto:normalizedLandmarkProto]; + [landmarks addObject:normalizedLandmark]; + } + [multiplePoseLandmarks addObject:landmarks]; + } + + NSMutableArray *> *multiplePoseWorldLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarksProto.size()]; + + for (const auto &worldLandmarkListProto : worldLandmarksProto) { + NSMutableArray *worldLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProto.landmark().size()]; + for (const auto &landmarkProto : worldLandmarkListProto.landmark()) { + MPPLandmark *landmark = [MPPLandmark landmarkWithProto:landmarkProto]; + [worldLandmarks addObject:landmark]; + } + [multiplePoseWorldLandmarks addObject:worldLandmarks]; + } + + if (!segmentationMasks) { + return [[MPPPoseLandmarkerResult alloc] initWithLandmarks:multiplePoseLandmarks + worldLandmarks:multiplePoseWorldLandmarks + segmentationMasks:nil + timestampInMilliseconds:timestampInMilliseconds]; + } + NSMutableArray *confidenceMasks = + [NSMutableArray arrayWithCapacity:(NSUInteger)segmentationMasks->size()]; + + for (const auto &segmentationMask : *segmentationMasks) { + [confidenceMasks addObject:[[MPPMask alloc] initWithFloat32Data:(float *)segmentationMask + .GetImageFrameSharedPtr() + .get() + ->PixelData() + width:segmentationMask.width() + height:segmentationMask.height() + /** Always deep copy */ + shouldCopy:YES]]; + } + + return [[MPPPoseLandmarkerResult alloc] initWithLandmarks:multiplePoseLandmarks + worldLandmarks:multiplePoseWorldLandmarks + segmentationMasks:confidenceMasks + timestampInMilliseconds:timestampInMilliseconds]; + ; +} + ++ (MPPPoseLandmarkerResult *) + poseLandmarkerResultWithLandmarksPacket:(const Packet &)landmarksPacket + worldLandmarksPacket:(const Packet &)worldLandmarksPacket + segmentationMasksPacket:(const Packet *)segmentationMasksPacket { + NSInteger timestampInMilliseconds = + (NSInteger)(landmarksPacket.Timestamp().Value() / kMicrosecondsPerMillisecond); + + if (landmarksPacket.IsEmpty()) { + return [MPPPoseLandmarkerResult + emptyPoseLandmarkerResultWithTimestampInMilliseconds:timestampInMilliseconds]; + } + + if (!landmarksPacket.ValidateAsType>().ok() || + !worldLandmarksPacket.ValidateAsType>().ok()) { + return [MPPPoseLandmarkerResult + emptyPoseLandmarkerResultWithTimestampInMilliseconds:timestampInMilliseconds]; + } + + const std::vector *segmentationMasks = + segmentationMasksPacket ? &(segmentationMasksPacket->Get>()) : nullptr; + + return [MPPPoseLandmarkerResult + poseLandmarkerResultWithLandmarksProto:landmarksPacket + .Get>() + worldLandmarksProto:worldLandmarksPacket + .Get>() + segmentationMasks:segmentationMasks + timestampInMilliseconds:timestampInMilliseconds]; +} + +@end diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/BUILD b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/BUILD index bcdc0e5e5..1149ea036 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/BUILD +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/BUILD @@ -96,6 +96,7 @@ android_library( "-Xep:AndroidJdkLibsChecker:OFF", ], deps = [ + "//mediapipe/framework/formats:landmark_java_proto_lite", "//third_party:autovalue", "@maven//:com_google_guava_guava", ], @@ -108,6 +109,7 @@ android_library( "-Xep:AndroidJdkLibsChecker:OFF", ], deps = [ + "//mediapipe/framework/formats:landmark_java_proto_lite", "//third_party:autovalue", "@maven//:com_google_guava_guava", ], diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Category.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Category.java index 65996c2af..916ad1bed 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Category.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Category.java @@ -16,6 +16,10 @@ package com.google.mediapipe.tasks.components.containers; import com.google.auto.value.AutoValue; import com.google.mediapipe.formats.proto.ClassificationProto; +import com.google.mediapipe.formats.proto.ClassificationProto.Classification; +import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; /** @@ -49,6 +53,22 @@ public abstract class Category { return create(proto.getScore(), proto.getIndex(), proto.getLabel(), proto.getDisplayName()); } + /** + * Creates a list of {@link Category} objects from a {@link + * ClassificationProto.ClassificationList}. + * + * @param classificationListProto the {@link ClassificationProto.ClassificationList} protobuf + * message to convert. + * @return A list of {@link Category} objects. + */ + public static List createListFromProto(ClassificationList classificationListProto) { + List categoryList = new ArrayList<>(); + for (Classification classification : classificationListProto.getClassificationList()) { + categoryList.add(createFromProto(classification)); + } + return categoryList; + } + /** The probability score of this label category. */ public abstract float score(); diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Classifications.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Classifications.java index 9e53590d7..7c2a1fc21 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Classifications.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Classifications.java @@ -15,9 +15,7 @@ package com.google.mediapipe.tasks.components.containers; import com.google.auto.value.AutoValue; -import com.google.mediapipe.formats.proto.ClassificationProto; import com.google.mediapipe.tasks.components.containers.proto.ClassificationsProto; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; @@ -49,11 +47,7 @@ public abstract class Classifications { * @param proto the {@link ClassificationsProto.Classifications} protobuf message to convert. */ public static Classifications createFromProto(ClassificationsProto.Classifications proto) { - List categories = new ArrayList<>(); - for (ClassificationProto.Classification classificationProto : - proto.getClassificationList().getClassificationList()) { - categories.add(Category.createFromProto(classificationProto)); - } + List categories = Category.createListFromProto(proto.getClassificationList()); Optional headName = proto.hasHeadName() ? Optional.of(proto.getHeadName()) : Optional.empty(); return create(categories, proto.getHeadIndex(), headName); diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Landmark.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Landmark.java index e23d9115d..b3bb2d52e 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Landmark.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/Landmark.java @@ -14,7 +14,11 @@ package com.google.mediapipe.tasks.components.containers; +import android.annotation.TargetApi; import com.google.auto.value.AutoValue; +import com.google.mediapipe.formats.proto.LandmarkProto; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.Optional; @@ -24,18 +28,44 @@ import java.util.Optional; * is to the camera. */ @AutoValue +@TargetApi(31) public abstract class Landmark { private static final float TOLERANCE = 1e-6f; + /** Creates a landmark from x, y, z coordinates. */ public static Landmark create(float x, float y, float z) { return new AutoValue_Landmark(x, y, z, Optional.empty(), Optional.empty()); } + /** + * Creates a normalized landmark from x, y, z coordinates with optional visibility and presence. + */ public static Landmark create( float x, float y, float z, Optional visibility, Optional presence) { return new AutoValue_Landmark(x, y, z, visibility, presence); } + /** Creates a landmark from a landmark proto. */ + public static Landmark createFromProto(LandmarkProto.Landmark landmarkProto) { + return Landmark.create( + landmarkProto.getX(), + landmarkProto.getY(), + landmarkProto.getZ(), + landmarkProto.hasVisibility() + ? Optional.of(landmarkProto.getVisibility()) + : Optional.empty(), + landmarkProto.hasPresence() ? Optional.of(landmarkProto.getPresence()) : Optional.empty()); + } + + /** Creates a list of landmarks from a {@link LandmarkList}. */ + public static List createListFromProto(LandmarkProto.LandmarkList landmarkListProto) { + List landmarkList = new ArrayList<>(); + for (LandmarkProto.Landmark landmarkProto : landmarkListProto.getLandmarkList()) { + landmarkList.add(createFromProto(landmarkProto)); + } + return landmarkList; + } + // The x coordinates of the landmark. public abstract float x(); diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/NormalizedLandmark.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/NormalizedLandmark.java index 50a95d565..d6fa618a3 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/NormalizedLandmark.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers/NormalizedLandmark.java @@ -14,7 +14,11 @@ package com.google.mediapipe.tasks.components.containers; +import android.annotation.TargetApi; import com.google.auto.value.AutoValue; +import com.google.mediapipe.formats.proto.LandmarkProto; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.Optional; @@ -25,18 +29,45 @@ import java.util.Optional; * uses roughly the same scale as x. */ @AutoValue +@TargetApi(31) public abstract class NormalizedLandmark { private static final float TOLERANCE = 1e-6f; + /** Creates a normalized landmark from x, y, z coordinates. */ public static NormalizedLandmark create(float x, float y, float z) { return new AutoValue_NormalizedLandmark(x, y, z, Optional.empty(), Optional.empty()); } + /** + * Creates a normalized landmark from x, y, z coordinates with optional visibility and presence. + */ public static NormalizedLandmark create( float x, float y, float z, Optional visibility, Optional presence) { return new AutoValue_NormalizedLandmark(x, y, z, visibility, presence); } + /** Creates a normalized landmark from a normalized landmark proto. */ + public static NormalizedLandmark createFromProto(LandmarkProto.NormalizedLandmark landmarkProto) { + return NormalizedLandmark.create( + landmarkProto.getX(), + landmarkProto.getY(), + landmarkProto.getZ(), + landmarkProto.hasVisibility() + ? Optional.of(landmarkProto.getVisibility()) + : Optional.empty(), + landmarkProto.hasPresence() ? Optional.of(landmarkProto.getPresence()) : Optional.empty()); + } + + /** Creates a list of normalized landmarks from a {@link NormalizedLandmarkList}. */ + public static List createListFromProto( + LandmarkProto.NormalizedLandmarkList landmarkListProto) { + List landmarkList = new ArrayList<>(); + for (LandmarkProto.NormalizedLandmark landmarkProto : landmarkListProto.getLandmarkList()) { + landmarkList.add(createFromProto(landmarkProto)); + } + return landmarkList; + } + // The x coordinates of the normalized landmark. public abstract float x(); diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl index 916323372..f2e4d485f 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl @@ -47,13 +47,14 @@ _VISION_TASKS_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_embedder_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_segmenter/proto:image_segmenter_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_segmenter/proto:segmenter_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/object_detector/proto:object_detector_options_java_proto_lite", "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarker_graph_options_java_proto_lite", @@ -84,6 +85,7 @@ _VISION_TASKS_IMAGE_GENERATOR_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_proto", ] _TEXT_TASKS_JAVA_PROTO_LITE_TARGETS = [ diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD index 60a9806e9..2d5ef7a9c 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD @@ -67,6 +67,7 @@ cc_binary( "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_graph", "//mediapipe/tasks/cc/vision/face_stylizer:face_stylizer_graph", "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph", + "//mediapipe/tasks/cc/vision/holistic_landmarker:holistic_landmarker_graph", "//mediapipe/tasks/cc/vision/image_classifier:image_classifier_graph", "//mediapipe/tasks/cc/vision/image_embedder:image_embedder_graph", "//mediapipe/tasks/cc/vision/image_segmenter:image_segmenter_graph", @@ -208,11 +209,9 @@ android_library( deps = [ ":core", "//mediapipe/framework:calculator_options_java_proto_lite", - "//mediapipe/framework/formats:classification_java_proto_lite", "//mediapipe/framework/formats:landmark_java_proto_lite", "//mediapipe/java/com/google/mediapipe/framework:android_framework", "//mediapipe/java/com/google/mediapipe/framework/image", - "//mediapipe/tasks/cc/components/processors/proto:classifier_options_java_proto_lite", "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite", "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarker_graph_options_java_proto_lite", @@ -222,7 +221,6 @@ android_library( "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:normalized_landmark", "//mediapipe/tasks/java/com/google/mediapipe/tasks/core", "//third_party:autovalue", - "@maven//:androidx_annotation_annotation", "@maven//:com_google_guava_guava", ], ) @@ -246,7 +244,6 @@ android_library( "//mediapipe/framework/formats:landmark_java_proto_lite", "//mediapipe/java/com/google/mediapipe/framework:android_framework", "//mediapipe/java/com/google/mediapipe/framework/image", - "//mediapipe/tasks/cc/components/processors/proto:classifier_options_java_proto_lite", "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", @@ -430,6 +427,39 @@ filegroup( visibility = ["//mediapipe/tasks/java/com/google/mediapipe/tasks/vision:__subpackages__"], ) +android_library( + name = "holisticlandmarker", + srcs = [ + "holisticlandmarker/HolisticLandmarker.java", + "holisticlandmarker/HolisticLandmarkerResult.java", + ], + javacopts = [ + "-Xep:AndroidJdkLibsChecker:OFF", + ], + manifest = "facedetector/AndroidManifest.xml", + deps = [ + ":core", + "//mediapipe/framework/formats:classification_java_proto_lite", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + "//mediapipe/java/com/google/mediapipe/framework/image", + "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:category", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:landmark", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:normalized_landmark", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/core", + "//third_party:any_java_proto", + "//third_party:autovalue", + "@maven//:com_google_guava_guava", + ], +) + load("//mediapipe/tasks/java/com/google/mediapipe/tasks:mediapipe_tasks_aar.bzl", "mediapipe_tasks_vision_aar") mediapipe_tasks_vision_aar( diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facelandmarker/FaceLandmarkerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facelandmarker/FaceLandmarkerResult.java index 0429ecacb..78bc7efb9 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facelandmarker/FaceLandmarkerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facelandmarker/FaceLandmarkerResult.java @@ -16,7 +16,6 @@ package com.google.mediapipe.tasks.vision.facelandmarker; import com.google.auto.value.AutoValue; import com.google.mediapipe.formats.proto.LandmarkProto; -import com.google.mediapipe.formats.proto.ClassificationProto.Classification; import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; import com.google.mediapipe.tasks.components.containers.Category; import com.google.mediapipe.tasks.components.containers.NormalizedLandmark; @@ -47,40 +46,21 @@ public abstract class FaceLandmarkerResult implements TaskResult { long timestampMs) { List> multiFaceLandmarks = new ArrayList<>(); for (LandmarkProto.NormalizedLandmarkList faceLandmarksProto : multiFaceLandmarksProto) { - List faceLandmarks = new ArrayList<>(); - multiFaceLandmarks.add(faceLandmarks); - for (LandmarkProto.NormalizedLandmark faceLandmarkProto : - faceLandmarksProto.getLandmarkList()) { - faceLandmarks.add( - NormalizedLandmark.create( - faceLandmarkProto.getX(), - faceLandmarkProto.getY(), - faceLandmarkProto.getZ(), - faceLandmarkProto.hasVisibility() - ? Optional.of(faceLandmarkProto.getVisibility()) - : Optional.empty(), - faceLandmarkProto.hasPresence() - ? Optional.of(faceLandmarkProto.getPresence()) - : Optional.empty())); - } + List faceLandmarks = + NormalizedLandmark.createListFromProto(faceLandmarksProto); + multiFaceLandmarks.add(Collections.unmodifiableList(faceLandmarks)); } + Optional>> multiFaceBlendshapes = Optional.empty(); if (multiFaceBendshapesProto.isPresent()) { List> blendshapes = new ArrayList<>(); for (ClassificationList faceBendshapeProto : multiFaceBendshapesProto.get()) { - List blendshape = new ArrayList<>(); - blendshapes.add(blendshape); - for (Classification classification : faceBendshapeProto.getClassificationList()) { - blendshape.add( - Category.create( - classification.getScore(), - classification.getIndex(), - classification.getLabel(), - classification.getDisplayName())); - } + List blendshape = Category.createListFromProto(faceBendshapeProto); + blendshapes.add(Collections.unmodifiableList(blendshape)); } multiFaceBlendshapes = Optional.of(Collections.unmodifiableList(blendshapes)); } + Optional> multiFaceTransformationMatrixes = Optional.empty(); if (multiFaceTransformationMatrixesProto.isPresent()) { List matrixes = new ArrayList<>(); @@ -99,6 +79,7 @@ public abstract class FaceLandmarkerResult implements TaskResult { } multiFaceTransformationMatrixes = Optional.of(Collections.unmodifiableList(matrixes)); } + return new AutoValue_FaceLandmarkerResult( timestampMs, Collections.unmodifiableList(multiFaceLandmarks), diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java index c8d43e2ca..09ceac215 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java @@ -75,16 +75,8 @@ public abstract class GestureRecognizerResult implements TaskResult { } } for (ClassificationList handednessProto : handednessesProto) { - List handedness = new ArrayList<>(); - multiHandHandednesses.add(handedness); - for (Classification classification : handednessProto.getClassificationList()) { - handedness.add( - Category.create( - classification.getScore(), - classification.getIndex(), - classification.getLabel(), - classification.getDisplayName())); - } + List handedness = Category.createListFromProto(handednessProto); + multiHandHandednesses.add(Collections.unmodifiableList(handedness)); } for (ClassificationList gestureProto : gesturesProto) { List gestures = new ArrayList<>(); diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java index 14d2fa926..5a1661a52 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java @@ -16,7 +16,6 @@ package com.google.mediapipe.tasks.vision.handlandmarker; import com.google.auto.value.AutoValue; import com.google.mediapipe.formats.proto.LandmarkProto; -import com.google.mediapipe.formats.proto.ClassificationProto.Classification; import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; import com.google.mediapipe.tasks.components.containers.Category; import com.google.mediapipe.tasks.components.containers.Landmark; @@ -25,7 +24,6 @@ import com.google.mediapipe.tasks.core.TaskResult; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Optional; /** Represents the hand landmarks deection results generated by {@link HandLandmarker}. */ @AutoValue @@ -35,71 +33,38 @@ public abstract class HandLandmarkerResult implements TaskResult { * Creates a {@link HandLandmarkerResult} instance from the lists of landmarks and handedness * protobuf messages. * - * @param landmarksProto a List of {@link NormalizedLandmarkList} - * @param worldLandmarksProto a List of {@link LandmarkList} - * @param handednessesProto a List of {@link ClassificationList} + * @param landmarksProtos a List of {@link NormalizedLandmarkList} + * @param worldLandmarksProtos a List of {@link LandmarkList} + * @param handednessesProtos a List of {@link ClassificationList} */ static HandLandmarkerResult create( - List landmarksProto, - List worldLandmarksProto, - List handednessesProto, + List landmarksProtos, + List worldLandmarksProtos, + List handednessesProtos, long timestampMs) { - List> multiHandLandmarks = new ArrayList<>(); - List> multiHandWorldLandmarks = new ArrayList<>(); - List> multiHandHandednesses = new ArrayList<>(); - for (LandmarkProto.NormalizedLandmarkList handLandmarksProto : landmarksProto) { - List handLandmarks = new ArrayList<>(); - multiHandLandmarks.add(handLandmarks); - for (LandmarkProto.NormalizedLandmark handLandmarkProto : - handLandmarksProto.getLandmarkList()) { - handLandmarks.add( - NormalizedLandmark.create( - handLandmarkProto.getX(), - handLandmarkProto.getY(), - handLandmarkProto.getZ(), - handLandmarkProto.hasVisibility() - ? Optional.of(handLandmarkProto.getVisibility()) - : Optional.empty(), - handLandmarkProto.hasPresence() - ? Optional.of(handLandmarkProto.getPresence()) - : Optional.empty())); - } + List> handLandmarks = new ArrayList<>(); + for (LandmarkProto.NormalizedLandmarkList handLandmarksProto : landmarksProtos) { + handLandmarks.add( + Collections.unmodifiableList(NormalizedLandmark.createListFromProto(handLandmarksProto))); } - for (LandmarkProto.LandmarkList handWorldLandmarksProto : worldLandmarksProto) { - List handWorldLandmarks = new ArrayList<>(); - multiHandWorldLandmarks.add(handWorldLandmarks); - for (LandmarkProto.Landmark handWorldLandmarkProto : - handWorldLandmarksProto.getLandmarkList()) { - handWorldLandmarks.add( - com.google.mediapipe.tasks.components.containers.Landmark.create( - handWorldLandmarkProto.getX(), - handWorldLandmarkProto.getY(), - handWorldLandmarkProto.getZ(), - handWorldLandmarkProto.hasVisibility() - ? Optional.of(handWorldLandmarkProto.getVisibility()) - : Optional.empty(), - handWorldLandmarkProto.hasPresence() - ? Optional.of(handWorldLandmarkProto.getPresence()) - : Optional.empty())); - } + + List> handWorldLandmarks = new ArrayList<>(); + for (LandmarkProto.LandmarkList handWorldLandmarksProto : worldLandmarksProtos) { + handWorldLandmarks.add( + Collections.unmodifiableList(Landmark.createListFromProto(handWorldLandmarksProto))); } - for (ClassificationList handednessProto : handednessesProto) { - List handedness = new ArrayList<>(); - multiHandHandednesses.add(handedness); - for (Classification classification : handednessProto.getClassificationList()) { - handedness.add( - Category.create( - classification.getScore(), - classification.getIndex(), - classification.getLabel(), - classification.getDisplayName())); - } + + List> handHandednesses = new ArrayList<>(); + for (ClassificationList handednessProto : handednessesProtos) { + handHandednesses.add( + Collections.unmodifiableList(Category.createListFromProto(handednessProto))); } + return new AutoValue_HandLandmarkerResult( timestampMs, - Collections.unmodifiableList(multiHandLandmarks), - Collections.unmodifiableList(multiHandWorldLandmarks), - Collections.unmodifiableList(multiHandHandednesses)); + Collections.unmodifiableList(handLandmarks), + Collections.unmodifiableList(handWorldLandmarks), + Collections.unmodifiableList(handHandednesses)); } @Override diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml new file mode 100644 index 000000000..a90c388f4 --- /dev/null +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarker.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarker.java new file mode 100644 index 000000000..e80da4fca --- /dev/null +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarker.java @@ -0,0 +1,668 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.vision.holisticlandmarker; + +import android.content.Context; +import android.os.ParcelFileDescriptor; +import com.google.auto.value.AutoValue; +import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; +import com.google.mediapipe.framework.AndroidPacketGetter; +import com.google.mediapipe.framework.MediaPipeException; +import com.google.mediapipe.framework.Packet; +import com.google.mediapipe.framework.PacketGetter; +import com.google.mediapipe.framework.image.BitmapImageBuilder; +import com.google.mediapipe.framework.image.ByteBufferImageBuilder; +import com.google.mediapipe.framework.image.MPImage; +import com.google.mediapipe.tasks.core.BaseOptions; +import com.google.mediapipe.tasks.core.ErrorListener; +import com.google.mediapipe.tasks.core.OutputHandler; +import com.google.mediapipe.tasks.core.OutputHandler.ResultListener; +import com.google.mediapipe.tasks.core.TaskInfo; +import com.google.mediapipe.tasks.core.TaskOptions; +import com.google.mediapipe.tasks.core.TaskRunner; +import com.google.mediapipe.tasks.core.proto.BaseOptionsProto; +import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi; +import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions; +import com.google.mediapipe.tasks.vision.core.RunningMode; +import com.google.mediapipe.tasks.vision.facedetector.proto.FaceDetectorGraphOptionsProto.FaceDetectorGraphOptions; +import com.google.mediapipe.tasks.vision.facelandmarker.proto.FaceLandmarksDetectorGraphOptionsProto.FaceLandmarksDetectorGraphOptions; +import com.google.mediapipe.tasks.vision.handlandmarker.proto.HandLandmarksDetectorGraphOptionsProto.HandLandmarksDetectorGraphOptions; +import com.google.mediapipe.tasks.vision.holisticlandmarker.proto.HolisticLandmarkerGraphOptionsProto.HolisticLandmarkerGraphOptions; +import com.google.mediapipe.tasks.vision.posedetector.proto.PoseDetectorGraphOptionsProto.PoseDetectorGraphOptions; +import com.google.mediapipe.tasks.vision.poselandmarker.proto.PoseLandmarksDetectorGraphOptionsProto.PoseLandmarksDetectorGraphOptions; +import com.google.protobuf.Any; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +/** + * Performs holistic landmarks detection on images. + * + *

This API expects a pre-trained holistic landmarks model asset bundle. + * + *

    + *
  • Input image {@link MPImage} + *
      + *
    • The image that holistic landmarks detection runs on. + *
    + *
  • Output {@link HolisticLandmarkerResult} + *
      + *
    • A HolisticLandmarkerResult containing holistic landmarks. + *
    + *
+ */ +public final class HolisticLandmarker extends BaseVisionTaskApi { + private static final String TAG = HolisticLandmarker.class.getSimpleName(); + + private static final String IMAGE_IN_STREAM_NAME = "image_in"; + private static final String POSE_LANDMARKS_STREAM = "pose_landmarks"; + private static final String POSE_WORLD_LANDMARKS_STREAM = "pose_world_landmarks"; + private static final String POSE_SEGMENTATION_MASK_STREAM = "pose_segmentation_mask"; + private static final String FACE_LANDMARKS_STREAM = "face_landmarks"; + private static final String FACE_BLENDSHAPES_STREAM = "extra_blendshapes"; + private static final String LEFT_HAND_LANDMARKS_STREAM = "left_hand_landmarks"; + private static final String LEFT_HAND_WORLD_LANDMARKS_STREAM = "left_hand_world_landmarks"; + private static final String RIGHT_HAND_LANDMARKS_STREAM = "right_hand_landmarks"; + private static final String RIGHT_HAND_WORLD_LANDMARKS_STREAM = "right_hand_world_landmarks"; + private static final String IMAGE_OUT_STREAM_NAME = "image_out"; + + private static final int FACE_LANDMARKS_OUT_STREAM_INDEX = 0; + private static final int POSE_LANDMARKS_OUT_STREAM_INDEX = 1; + private static final int POSE_WORLD_LANDMARKS_OUT_STREAM_INDEX = 2; + private static final int LEFT_HAND_LANDMARKS_OUT_STREAM_INDEX = 3; + private static final int LEFT_HAND_WORLD_LANDMARKS_OUT_STREAM_INDEX = 4; + private static final int RIGHT_HAND_LANDMARKS_OUT_STREAM_INDEX = 5; + private static final int RIGHT_HAND_WORLD_LANDMARKS_OUT_STREAM_INDEX = 6; + private static final int IMAGE_OUT_STREAM_INDEX = 7; + + private static final float DEFAULT_PRESENCE_THRESHOLD = 0.5f; + private static final float DEFAULT_SUPPRESION_THRESHOLD = 0.3f; + private static final boolean DEFAULT_OUTPUT_FACE_BLENDSHAPES = false; + private static final boolean DEFAULT_OUTPUT_SEGMENTATION_MASKS = false; + + private static final String TASK_GRAPH_NAME = + "mediapipe.tasks.vision.holistic_landmarker.HolisticLandmarkerGraph"; + + @SuppressWarnings("ConstantCaseForConstants") + private static final List INPUT_STREAMS = + Collections.unmodifiableList(Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME)); + + static { + System.loadLibrary("mediapipe_tasks_vision_jni"); + } + + /** + * Creates a {@link HolisticLandmarker} instance from a model asset bundle path and the default + * {@link HolisticLandmarkerOptions}. + * + * @param context an Android {@link Context}. + * @param modelAssetPath path to the holistic landmarks model with metadata in the assets. + * @throws MediaPipeException if there is an error during {@link HolisticLandmarker} creation. + */ + public static HolisticLandmarker createFromFile(Context context, String modelAssetPath) { + BaseOptions baseOptions = BaseOptions.builder().setModelAssetPath(modelAssetPath).build(); + return createFromOptions( + context, HolisticLandmarkerOptions.builder().setBaseOptions(baseOptions).build()); + } + + /** + * Creates a {@link HolisticLandmarker} instance from a model asset bundle file and the default + * {@link HolisticLandmarkerOptions}. + * + * @param context an Android {@link Context}. + * @param modelAssetFile the holistic landmarks model {@link File} instance. + * @throws IOException if an I/O error occurs when opening the tflite model file. + * @throws MediaPipeException if there is an error during {@link HolisticLandmarker} creation. + */ + public static HolisticLandmarker createFromFile(Context context, File modelAssetFile) + throws IOException { + try (ParcelFileDescriptor descriptor = + ParcelFileDescriptor.open(modelAssetFile, ParcelFileDescriptor.MODE_READ_ONLY)) { + BaseOptions baseOptions = + BaseOptions.builder().setModelAssetFileDescriptor(descriptor.getFd()).build(); + return createFromOptions( + context, HolisticLandmarkerOptions.builder().setBaseOptions(baseOptions).build()); + } + } + + /** + * Creates a {@link HolisticLandmarker} instance from a model asset bundle buffer and the default + * {@link HolisticLandmarkerOptions}. + * + * @param context an Android {@link Context}. + * @param modelAssetBuffer a direct {@link ByteBuffer} or a {@link MappedByteBuffer} of the + * detection model. + * @throws MediaPipeException if there is an error during {@link HolisticLandmarker} creation. + */ + public static HolisticLandmarker createFromBuffer( + Context context, final ByteBuffer modelAssetBuffer) { + BaseOptions baseOptions = BaseOptions.builder().setModelAssetBuffer(modelAssetBuffer).build(); + return createFromOptions( + context, HolisticLandmarkerOptions.builder().setBaseOptions(baseOptions).build()); + } + + /** + * Creates a {@link HolisticLandmarker} instance from a {@link HolisticLandmarkerOptions}. + * + * @param context an Android {@link Context}. + * @param landmarkerOptions a {@link HolisticLandmarkerOptions} instance. + * @throws MediaPipeException if there is an error during {@link HolisticLandmarker} creation. + */ + public static HolisticLandmarker createFromOptions( + Context context, HolisticLandmarkerOptions landmarkerOptions) { + List outputStreams = new ArrayList<>(); + outputStreams.add("FACE_LANDMARKS:" + FACE_LANDMARKS_STREAM); + outputStreams.add("POSE_LANDMARKS:" + POSE_LANDMARKS_STREAM); + outputStreams.add("POSE_WORLD_LANDMARKS:" + POSE_WORLD_LANDMARKS_STREAM); + outputStreams.add("LEFT_HAND_LANDMARKS:" + LEFT_HAND_LANDMARKS_STREAM); + outputStreams.add("LEFT_HAND_WORLD_LANDMARKS:" + LEFT_HAND_WORLD_LANDMARKS_STREAM); + outputStreams.add("RIGHT_HAND_LANDMARKS:" + RIGHT_HAND_LANDMARKS_STREAM); + outputStreams.add("RIGHT_HAND_WORLD_LANDMARKS:" + RIGHT_HAND_WORLD_LANDMARKS_STREAM); + outputStreams.add("IMAGE:" + IMAGE_OUT_STREAM_NAME); + + int[] faceBlendshapesOutStreamIndex = new int[] {-1}; + if (landmarkerOptions.outputFaceBlendshapes()) { + outputStreams.add("FACE_BLENDSHAPES:" + FACE_BLENDSHAPES_STREAM); + faceBlendshapesOutStreamIndex[0] = outputStreams.size() - 1; + } + + int[] poseSegmentationMasksOutStreamIndex = new int[] {-1}; + if (landmarkerOptions.outputPoseSegmentationMasks()) { + outputStreams.add("POSE_SEGMENTATION_MASK:" + POSE_SEGMENTATION_MASK_STREAM); + poseSegmentationMasksOutStreamIndex[0] = outputStreams.size() - 1; + } + + OutputHandler handler = new OutputHandler<>(); + handler.setOutputPacketConverter( + new OutputHandler.OutputPacketConverter() { + @Override + public HolisticLandmarkerResult convertToTaskResult(List packets) { + // If there are no detected landmarks, just returns empty lists. + if (packets.get(FACE_LANDMARKS_OUT_STREAM_INDEX).isEmpty()) { + return HolisticLandmarkerResult.createEmpty( + BaseVisionTaskApi.generateResultTimestampMs( + landmarkerOptions.runningMode(), + packets.get(FACE_LANDMARKS_OUT_STREAM_INDEX))); + } + + NormalizedLandmarkList faceLandmarkProtos = + PacketGetter.getProto( + packets.get(FACE_LANDMARKS_OUT_STREAM_INDEX), NormalizedLandmarkList.parser()); + Optional faceBlendshapeProtos = + landmarkerOptions.outputFaceBlendshapes() + ? Optional.of( + PacketGetter.getProto( + packets.get(faceBlendshapesOutStreamIndex[0]), + ClassificationList.parser())) + : Optional.empty(); + NormalizedLandmarkList poseLandmarkProtos = + PacketGetter.getProto( + packets.get(POSE_LANDMARKS_OUT_STREAM_INDEX), NormalizedLandmarkList.parser()); + LandmarkList poseWorldLandmarkProtos = + PacketGetter.getProto( + packets.get(POSE_WORLD_LANDMARKS_OUT_STREAM_INDEX), LandmarkList.parser()); + Optional segmentationMask = + landmarkerOptions.outputPoseSegmentationMasks() + ? Optional.of( + getSegmentationMask(packets, poseSegmentationMasksOutStreamIndex[0])) + : Optional.empty(); + NormalizedLandmarkList leftHandLandmarkProtos = + PacketGetter.getProto( + packets.get(LEFT_HAND_LANDMARKS_OUT_STREAM_INDEX), + NormalizedLandmarkList.parser()); + LandmarkList leftHandWorldLandmarkProtos = + PacketGetter.getProto( + packets.get(LEFT_HAND_WORLD_LANDMARKS_OUT_STREAM_INDEX), LandmarkList.parser()); + NormalizedLandmarkList rightHandLandmarkProtos = + PacketGetter.getProto( + packets.get(RIGHT_HAND_LANDMARKS_OUT_STREAM_INDEX), + NormalizedLandmarkList.parser()); + LandmarkList rightHandWorldLandmarkProtos = + PacketGetter.getProto( + packets.get(RIGHT_HAND_WORLD_LANDMARKS_OUT_STREAM_INDEX), + LandmarkList.parser()); + + return HolisticLandmarkerResult.create( + faceLandmarkProtos, + faceBlendshapeProtos, + poseLandmarkProtos, + poseWorldLandmarkProtos, + segmentationMask, + leftHandLandmarkProtos, + leftHandWorldLandmarkProtos, + rightHandLandmarkProtos, + rightHandWorldLandmarkProtos, + BaseVisionTaskApi.generateResultTimestampMs( + landmarkerOptions.runningMode(), packets.get(FACE_LANDMARKS_OUT_STREAM_INDEX))); + } + + @Override + public MPImage convertToTaskInput(List packets) { + return new BitmapImageBuilder( + AndroidPacketGetter.getBitmapFromRgb(packets.get(IMAGE_OUT_STREAM_INDEX))) + .build(); + } + }); + landmarkerOptions.resultListener().ifPresent(handler::setResultListener); + landmarkerOptions.errorListener().ifPresent(handler::setErrorListener); + TaskRunner runner = + TaskRunner.create( + context, + TaskInfo.builder() + .setTaskName(HolisticLandmarker.class.getSimpleName()) + .setTaskRunningModeName(landmarkerOptions.runningMode().name()) + .setTaskGraphName(TASK_GRAPH_NAME) + .setInputStreams(INPUT_STREAMS) + .setOutputStreams(outputStreams) + .setTaskOptions(landmarkerOptions) + .setEnableFlowLimiting(landmarkerOptions.runningMode() == RunningMode.LIVE_STREAM) + .build(), + handler); + return new HolisticLandmarker(runner, landmarkerOptions.runningMode()); + } + + /** + * Constructor to initialize an {@link HolisticLandmarker} from a {@link TaskRunner} and a {@link + * RunningMode}. + * + * @param taskRunner a {@link TaskRunner}. + * @param runningMode a mediapipe vision task {@link RunningMode}. + */ + private HolisticLandmarker(TaskRunner taskRunner, RunningMode runningMode) { + super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, /* normRectStreamName= */ ""); + } + + /** + * Performs holistic landmarks detection on the provided single image with default image + * processing options, i.e. without any rotation applied. Only use this method when the {@link + * HolisticLandmarker} is created with {@link RunningMode.IMAGE}. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @throws MediaPipeException if there is an internal error. + */ + public HolisticLandmarkerResult detect(MPImage image) { + return detect(image, ImageProcessingOptions.builder().build()); + } + + /** + * Performs holistic landmarks detection on the provided single image. Only use this method when + * the {@link HolisticLandmarker} is created with {@link RunningMode.IMAGE}. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the + * input image before running inference. Note that region-of-interest is not supported + * by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in + * this method throwing an IllegalArgumentException. + * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a + * region-of-interest. + * @throws MediaPipeException if there is an internal error. + */ + public HolisticLandmarkerResult detect( + MPImage image, ImageProcessingOptions imageProcessingOptions) { + validateImageProcessingOptions(imageProcessingOptions); + return (HolisticLandmarkerResult) processImageData(image, imageProcessingOptions); + } + + /** + * Performs holistic landmarks detection on the provided video frame with default image processing + * options, i.e. without any rotation applied. Only use this method when the {@link + * HolisticLandmarker} is created with {@link RunningMode.VIDEO}. + * + *

It's required to provide the video frame"s timestamp (in milliseconds). The input timestamps + * must be monotonically increasing. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @param timestampMs the input timestamp (in milliseconds). + * @throws MediaPipeException if there is an internal error. + */ + public HolisticLandmarkerResult detectForVideo(MPImage image, long timestampMs) { + return detectForVideo(image, ImageProcessingOptions.builder().build(), timestampMs); + } + + /** + * Performs holistic landmarks detection on the provided video frame. Only use this method when + * the {@link HolisticLandmarker} is created with {@link RunningMode.VIDEO}. + * + *

It's required to provide the video frame"s timestamp (in milliseconds). The input timestamps + * must be monotonically increasing. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the + * input image before running inference. Note that region-of-interest is not supported + * by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in + * this method throwing an IllegalArgumentException. + * @param timestampMs the input timestamp (in milliseconds). + * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a + * region-of-interest. + * @throws MediaPipeException if there is an internal error. + */ + public HolisticLandmarkerResult detectForVideo( + MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) { + validateImageProcessingOptions(imageProcessingOptions); + return (HolisticLandmarkerResult) processVideoData(image, imageProcessingOptions, timestampMs); + } + + /** + * Sends live image data to perform holistic landmarks detection with default image processing + * options, i.e. without any rotation applied, and the results will be available via the {@link + * ResultListener} provided in the {@link HolisticLandmarkerOptions}. Only use this method when + * the {@link HolisticLandmarker } is created with {@link RunningMode.LIVE_STREAM}. + * + *

It's required to provide a timestamp (in milliseconds) to indicate when the input image is + * sent to the holistic landmarker. The input timestamps must be monotonically increasing. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @param timestampMs the input timestamp (in milliseconds). + * @throws MediaPipeException if there is an internal error. + */ + public void detectAsync(MPImage image, long timestampMs) { + detectAsync(image, ImageProcessingOptions.builder().build(), timestampMs); + } + + /** + * Sends live image data to perform holistic landmarks detection, and the results will be + * available via the {@link ResultListener} provided in the {@link HolisticLandmarkerOptions}. + * Only use this method when the {@link HolisticLandmarker} is created with {@link + * RunningMode.LIVE_STREAM}. + * + *

It's required to provide a timestamp (in milliseconds) to indicate when the input image is + * sent to the holistic landmarker. The input timestamps must be monotonically increasing. + * + *

{@link HolisticLandmarker} supports the following color space types: + * + *

    + *
  • {@link Bitmap.Config.ARGB_8888} + *
+ * + * @param image a MediaPipe {@link MPImage} object for processing. + * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the + * input image before running inference. Note that region-of-interest is not supported + * by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in + * this method throwing an IllegalArgumentException. + * @param timestampMs the input timestamp (in milliseconds). + * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a + * region-of-interest. + * @throws MediaPipeException if there is an internal error. + */ + public void detectAsync( + MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) { + validateImageProcessingOptions(imageProcessingOptions); + sendLiveStreamData(image, imageProcessingOptions, timestampMs); + } + + /** Options for setting up an {@link HolisticLandmarker}. */ + @AutoValue + public abstract static class HolisticLandmarkerOptions extends TaskOptions { + + /** Builder for {@link HolisticLandmarkerOptions}. */ + @AutoValue.Builder + public abstract static class Builder { + /** Sets the base options for the holistic landmarker task. */ + public abstract Builder setBaseOptions(BaseOptions value); + + /** + * Sets the running mode for the holistic landmarker task. Defaults to the image mode. + * Holistic landmarker has three modes: + * + *
    + *
  • IMAGE: The mode for detecting holistic landmarks on single image inputs. + *
  • VIDEO: The mode for detecting holistic landmarks on the decoded frames of a video. + *
  • LIVE_STREAM: The mode for for detecting holistic landmarks on a live stream of input + * data, such as from camera. In this mode, {@code setResultListener} must be called to + * set up a listener to receive the detection results asynchronously. + *
+ */ + public abstract Builder setRunningMode(RunningMode value); + + /** + * Sets minimum confidence score for the face detection to be considered successful. Defaults + * to 0.5. + */ + public abstract Builder setMinFaceDetectionConfidence(Float value); + + /** + * The minimum threshold for the face suppression score in the face detection. Defaults to + * 0.3. + */ + public abstract Builder setMinFaceSuppressionThreshold(Float value); + + /** + * Sets minimum confidence score for the face landmark detection to be considered successful. + * Defaults to 0.5. + */ + public abstract Builder setMinFaceLandmarksConfidence(Float value); + + /** + * The minimum confidence score for the pose detection to be considered successful. Defaults + * to 0.5. + */ + public abstract Builder setMinPoseDetectionConfidence(Float value); + + /** + * The minimum threshold for the pose suppression score in the pose detection. Defaults to + * 0.3. + */ + public abstract Builder setMinPoseSuppressionThreshold(Float value); + + /** + * The minimum confidence score for the pose landmarks detection to be considered successful. + * Defaults to 0.5. + */ + public abstract Builder setMinPoseLandmarksConfidence(Float value); + + /** + * The minimum confidence score for the hand landmark detection to be considered successful. + * Defaults to 0.5. + */ + public abstract Builder setMinHandLandmarksConfidence(Float value); + + /** Whether to output segmentation masks. Defaults to false. */ + public abstract Builder setOutputPoseSegmentationMasks(Boolean value); + + /** Whether to output face blendshapes. Defaults to false. */ + public abstract Builder setOutputFaceBlendshapes(Boolean value); + + /** + * Sets the result listener to receive the detection results asynchronously when the holistic + * landmarker is in the live stream mode. + */ + public abstract Builder setResultListener( + ResultListener value); + + /** Sets an optional error listener. */ + public abstract Builder setErrorListener(ErrorListener value); + + abstract HolisticLandmarkerOptions autoBuild(); + + /** + * Validates and builds the {@link HolisticLandmarkerOptions} instance. + * + * @throws IllegalArgumentException if the result listener and the running mode are not + * properly configured. The result listener should only be set when the holistic + * landmarker is in the live stream mode. + */ + public final HolisticLandmarkerOptions build() { + HolisticLandmarkerOptions options = autoBuild(); + if (options.runningMode() == RunningMode.LIVE_STREAM) { + if (!options.resultListener().isPresent()) { + throw new IllegalArgumentException( + "The holistic landmarker is in the live stream mode, a user-defined result listener" + + " must be provided in HolisticLandmarkerOptions."); + } + } else if (options.resultListener().isPresent()) { + throw new IllegalArgumentException( + "The holistic landmarker is in the image or the video mode, a user-defined result" + + " listener shouldn't be provided in HolisticLandmarkerOptions."); + } + return options; + } + } + + abstract BaseOptions baseOptions(); + + abstract RunningMode runningMode(); + + abstract Optional minFaceDetectionConfidence(); + + abstract Optional minFaceSuppressionThreshold(); + + abstract Optional minFaceLandmarksConfidence(); + + abstract Optional minPoseDetectionConfidence(); + + abstract Optional minPoseSuppressionThreshold(); + + abstract Optional minPoseLandmarksConfidence(); + + abstract Optional minHandLandmarksConfidence(); + + abstract Boolean outputFaceBlendshapes(); + + abstract Boolean outputPoseSegmentationMasks(); + + abstract Optional> resultListener(); + + abstract Optional errorListener(); + + public static Builder builder() { + return new AutoValue_HolisticLandmarker_HolisticLandmarkerOptions.Builder() + .setRunningMode(RunningMode.IMAGE) + .setMinFaceDetectionConfidence(DEFAULT_PRESENCE_THRESHOLD) + .setMinFaceSuppressionThreshold(DEFAULT_SUPPRESION_THRESHOLD) + .setMinFaceLandmarksConfidence(DEFAULT_PRESENCE_THRESHOLD) + .setMinPoseDetectionConfidence(DEFAULT_PRESENCE_THRESHOLD) + .setMinPoseSuppressionThreshold(DEFAULT_SUPPRESION_THRESHOLD) + .setMinPoseLandmarksConfidence(DEFAULT_PRESENCE_THRESHOLD) + .setMinHandLandmarksConfidence(DEFAULT_PRESENCE_THRESHOLD) + .setOutputFaceBlendshapes(DEFAULT_OUTPUT_FACE_BLENDSHAPES) + .setOutputPoseSegmentationMasks(DEFAULT_OUTPUT_SEGMENTATION_MASKS); + } + + /** Converts a {@link HolisticLandmarkerOptions} to a {@link Any} protobuf message. */ + @Override + public Any convertToAnyProto() { + HolisticLandmarkerGraphOptions.Builder holisticLandmarkerGraphOptions = + HolisticLandmarkerGraphOptions.newBuilder() + .setBaseOptions( + BaseOptionsProto.BaseOptions.newBuilder() + .setUseStreamMode(runningMode() != RunningMode.IMAGE) + .mergeFrom(convertBaseOptionsToProto(baseOptions())) + .build()); + + HandLandmarksDetectorGraphOptions.Builder handLandmarksDetectorGraphOptions = + HandLandmarksDetectorGraphOptions.newBuilder(); + FaceDetectorGraphOptions.Builder faceDetectorGraphOptions = + FaceDetectorGraphOptions.newBuilder(); + FaceLandmarksDetectorGraphOptions.Builder faceLandmarksDetectorGraphOptions = + FaceLandmarksDetectorGraphOptions.newBuilder(); + PoseDetectorGraphOptions.Builder poseDetectorGraphOptions = + PoseDetectorGraphOptions.newBuilder(); + PoseLandmarksDetectorGraphOptions.Builder poseLandmarkerGraphOptions = + PoseLandmarksDetectorGraphOptions.newBuilder(); + + // Configure hand detector options. + minHandLandmarksConfidence() + .ifPresent(handLandmarksDetectorGraphOptions::setMinDetectionConfidence); + + // Configure pose detector options. + minPoseDetectionConfidence().ifPresent(poseDetectorGraphOptions::setMinDetectionConfidence); + minPoseSuppressionThreshold().ifPresent(poseDetectorGraphOptions::setMinSuppressionThreshold); + minPoseLandmarksConfidence().ifPresent(poseLandmarkerGraphOptions::setMinDetectionConfidence); + + // Configure face detector options. + minFaceDetectionConfidence().ifPresent(faceDetectorGraphOptions::setMinDetectionConfidence); + minFaceSuppressionThreshold().ifPresent(faceDetectorGraphOptions::setMinSuppressionThreshold); + minFaceLandmarksConfidence() + .ifPresent(faceLandmarksDetectorGraphOptions::setMinDetectionConfidence); + + holisticLandmarkerGraphOptions + .setHandLandmarksDetectorGraphOptions(handLandmarksDetectorGraphOptions.build()) + .setFaceDetectorGraphOptions(faceDetectorGraphOptions.build()) + .setFaceLandmarksDetectorGraphOptions(faceLandmarksDetectorGraphOptions.build()) + .setPoseDetectorGraphOptions(poseDetectorGraphOptions.build()) + .setPoseLandmarksDetectorGraphOptions(poseLandmarkerGraphOptions.build()); + + return Any.newBuilder() + .setTypeUrl( + "type.googleapis.com/mediapipe.tasks.vision.holistic_landmarker.proto.HolisticLandmarkerGraphOptions") + .setValue(holisticLandmarkerGraphOptions.build().toByteString()) + .build(); + } + } + + /** + * Validates that the provided {@link ImageProcessingOptions} doesn"t contain a + * region-of-interest. + */ + private static void validateImageProcessingOptions( + ImageProcessingOptions imageProcessingOptions) { + if (imageProcessingOptions.regionOfInterest().isPresent()) { + throw new IllegalArgumentException("HolisticLandmarker doesn't support region-of-interest."); + } + } + + private static MPImage getSegmentationMask(List packets, int packetIndex) { + int width = PacketGetter.getImageWidth(packets.get(packetIndex)); + int height = PacketGetter.getImageHeight(packets.get(packetIndex)); + ByteBuffer buffer = ByteBuffer.allocateDirect(width * height * 4); + + if (!PacketGetter.getImageData(packets.get(packetIndex), buffer)) { + throw new MediaPipeException( + MediaPipeException.StatusCode.INTERNAL.ordinal(), + "There was an error getting the sefmentation mask."); + } + + ByteBufferImageBuilder builder = + new ByteBufferImageBuilder(buffer, width, height, MPImage.IMAGE_FORMAT_VEC32F1); + return builder.build(); + } +} diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerResult.java new file mode 100644 index 000000000..06a866771 --- /dev/null +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerResult.java @@ -0,0 +1,141 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.vision.holisticlandmarker; + +import com.google.auto.value.AutoValue; +import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; +import com.google.mediapipe.framework.image.MPImage; +import com.google.mediapipe.tasks.components.containers.Category; +import com.google.mediapipe.tasks.components.containers.Landmark; +import com.google.mediapipe.tasks.components.containers.NormalizedLandmark; +import com.google.mediapipe.tasks.core.TaskResult; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +/** Represents the holistic landmarks detection results generated by {@link HolisticLandmarker}. */ +@AutoValue +@SuppressWarnings("AutoValueImmutableFields") // 3P API that does not depend on Guava +public abstract class HolisticLandmarkerResult implements TaskResult { + + /** + * Creates a {@link HolisticLandmarkerResult} instance from a list of proto and image inputs. + * + * @param faceLandmarkListProto the detected face landmarks in normalized image coordinates + * @param faceBlendshapeProtos the optional face blendshapes result + * @param poseLandmarkListProtos the detected pose landmarks in normalized image coordinates + * @param poseWorldLandmarkListProto the pose landmarks in world coordinates of detected poses + * @param segmentationMask the segmentation mask for the detected pose + * @param leftHandLandmarkListProto left hand landmarks of detected left hands + * @param leftHandWorldLandmarkListProto left hand landmarks in world coordinates of detected left + * hands + * @param rightHandLandmarkListProto right hand landmarks of detected left hands. + * @param rightHandWorldLandmarkListProto right hand landmarks in world coordinates of detected + * left hands + * @param timestampMs the time in milliseconds this result was created at + * @throws IllegalArgumentException if there was an error creating {@link + * HolisticLandmarkerResult} + */ + static HolisticLandmarkerResult create( + NormalizedLandmarkList faceLandmarkListProto, + Optional faceBlendshapeProtos, + NormalizedLandmarkList poseLandmarkListProtos, + LandmarkList poseWorldLandmarkListProto, + Optional segmentationMask, + NormalizedLandmarkList leftHandLandmarkListProto, + LandmarkList leftHandWorldLandmarkListProto, + NormalizedLandmarkList rightHandLandmarkListProto, + LandmarkList rightHandWorldLandmarkListProto, + long timestampMs) { + List faceLandmarks = + NormalizedLandmark.createListFromProto(faceLandmarkListProto); + Optional> faceBlendshapes = + faceBlendshapeProtos.map(Category::createListFromProto); + List poseLandmarks = + NormalizedLandmark.createListFromProto(poseLandmarkListProtos); + List poseWorldLandmarks = Landmark.createListFromProto(poseWorldLandmarkListProto); + List leftHandLandmarks = + NormalizedLandmark.createListFromProto(leftHandLandmarkListProto); + List leftHandWorldLandmarks = + Landmark.createListFromProto(leftHandWorldLandmarkListProto); + List rightHandLandmarks = + NormalizedLandmark.createListFromProto(rightHandLandmarkListProto); + List rightHandWorldLandmarks = + Landmark.createListFromProto(rightHandWorldLandmarkListProto); + + return new AutoValue_HolisticLandmarkerResult( + timestampMs, + Collections.unmodifiableList(faceLandmarks), + faceBlendshapes, + Collections.unmodifiableList(poseLandmarks), + Collections.unmodifiableList(poseWorldLandmarks), + segmentationMask, + Collections.unmodifiableList(leftHandLandmarks), + Collections.unmodifiableList(leftHandWorldLandmarks), + Collections.unmodifiableList(rightHandLandmarks), + Collections.unmodifiableList(rightHandWorldLandmarks)); + } + + /** + * Creates an empty {@link HolisticLandmarkerResult} instance. + * + * @param timestampMs the time in milliseconds this result was created at + */ + static HolisticLandmarkerResult createEmpty(long timestampMs) { + return new AutoValue_HolisticLandmarkerResult( + timestampMs, + Collections.emptyList(), + Optional.empty(), + Collections.emptyList(), + Collections.emptyList(), + Optional.empty(), + Collections.emptyList(), + Collections.emptyList(), + Collections.emptyList(), + Collections.emptyList()); + } + + @Override + public abstract long timestampMs(); + + /** Detected face landmarks in normalized image coordinates. */ + public abstract List faceLandmarks(); + + /** Optional face blendshapes. */ + public abstract Optional> faceBlendshapes(); + + /** Detected pose landmarks in normalized image coordinates. */ + public abstract List poseLandmarks(); + + /** Pose landmarks in world coordinates of the detected pose. */ + public abstract List poseWorldLandmarks(); + + /** Segmentation mask for the detected pose. */ + public abstract Optional segmentationMask(); + + /** Hand landmarks of detected left hands. */ + public abstract List leftHandLandmarks(); + + /** Hnd landmarks in world coordinates of detected left hands. */ + public abstract List leftHandWorldLandmarks(); + + /** Hand landmarks of detected right hands. */ + public abstract List rightHandLandmarks(); + + /** Hand landmarks in world coordinates of detected right hands. */ + public abstract List rightHandWorldLandmarks(); +} diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java index b673b00c9..813dba93c 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java @@ -15,7 +15,6 @@ package com.google.mediapipe.tasks.vision.imagesegmenter; import android.content.Context; -import android.util.Log; import com.google.auto.value.AutoValue; import com.google.mediapipe.proto.CalculatorOptionsProto.CalculatorOptions; import com.google.mediapipe.proto.CalculatorProto.CalculatorGraphConfig; diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/poselandmarker/PoseLandmarkerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/poselandmarker/PoseLandmarkerResult.java index 0dde56700..792d7407d 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/poselandmarker/PoseLandmarkerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/poselandmarker/PoseLandmarkerResult.java @@ -50,43 +50,18 @@ public abstract class PoseLandmarkerResult implements TaskResult { } List> multiPoseLandmarks = new ArrayList<>(); + for (LandmarkProto.NormalizedLandmarkList handLandmarksProto : landmarksProto) { + List poseLandmarks = + NormalizedLandmark.createListFromProto(handLandmarksProto); + multiPoseLandmarks.add(Collections.unmodifiableList(poseLandmarks)); + } + List> multiPoseWorldLandmarks = new ArrayList<>(); - for (LandmarkProto.NormalizedLandmarkList poseLandmarksProto : landmarksProto) { - List poseLandmarks = new ArrayList<>(); - multiPoseLandmarks.add(poseLandmarks); - for (LandmarkProto.NormalizedLandmark poseLandmarkProto : - poseLandmarksProto.getLandmarkList()) { - poseLandmarks.add( - NormalizedLandmark.create( - poseLandmarkProto.getX(), - poseLandmarkProto.getY(), - poseLandmarkProto.getZ(), - poseLandmarkProto.hasVisibility() - ? Optional.of(poseLandmarkProto.getVisibility()) - : Optional.empty(), - poseLandmarkProto.hasPresence() - ? Optional.of(poseLandmarkProto.getPresence()) - : Optional.empty())); - } - } for (LandmarkProto.LandmarkList poseWorldLandmarksProto : worldLandmarksProto) { - List poseWorldLandmarks = new ArrayList<>(); - multiPoseWorldLandmarks.add(poseWorldLandmarks); - for (LandmarkProto.Landmark poseWorldLandmarkProto : - poseWorldLandmarksProto.getLandmarkList()) { - poseWorldLandmarks.add( - Landmark.create( - poseWorldLandmarkProto.getX(), - poseWorldLandmarkProto.getY(), - poseWorldLandmarkProto.getZ(), - poseWorldLandmarkProto.hasVisibility() - ? Optional.of(poseWorldLandmarkProto.getVisibility()) - : Optional.empty(), - poseWorldLandmarkProto.hasPresence() - ? Optional.of(poseWorldLandmarkProto.getPresence()) - : Optional.empty())); - } + List poseWorldLandmarks = Landmark.createListFromProto(poseWorldLandmarksProto); + multiPoseWorldLandmarks.add(Collections.unmodifiableList(poseWorldLandmarks)); } + return new AutoValue_PoseLandmarkerResult( timestampMs, Collections.unmodifiableList(multiPoseLandmarks), @@ -100,7 +75,7 @@ public abstract class PoseLandmarkerResult implements TaskResult { /** Pose landmarks of detected poses. */ public abstract List> landmarks(); - /** Pose landmarks in world coordniates of detected poses. */ + /** Pose landmarks in world coordinates of detected poses. */ public abstract List> worldLandmarks(); /** Pose segmentation masks. */ diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/AndroidManifest.xml b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/AndroidManifest.xml new file mode 100644 index 000000000..4a6416933 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/AndroidManifest.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/BUILD b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/BUILD new file mode 100644 index 000000000..7363a23e0 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/BUILD @@ -0,0 +1,19 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# TODO: Enable these tests in OSS diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/CategoryTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/CategoryTest.java new file mode 100644 index 000000000..ed501ac57 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/CategoryTest.java @@ -0,0 +1,52 @@ +// Copyright 2022 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.components.containers; + +import static com.google.common.truth.Truth.assertThat; + +import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.mediapipe.formats.proto.ClassificationProto.Classification; +import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; +import java.util.List; +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(AndroidJUnit4.class) +public final class CategoryTest { + + @Test + public void create_succeedsWithClassificationProto() { + Classification input = + Classification.newBuilder() + .setScore(0.1f) + .setIndex(1) + .setLabel("label") + .setDisplayName("displayName") + .build(); + Category output = Category.createFromProto(input); + assertThat(output.score()).isEqualTo(0.1f); + assertThat(output.index()).isEqualTo(1); + assertThat(output.categoryName()).isEqualTo("label"); + assertThat(output.displayName()).isEqualTo("displayName"); + } + + @Test + public void create_succeedsWithClassificationListProto() { + Classification element = Classification.newBuilder().setScore(0.1f).build(); + ClassificationList input = ClassificationList.newBuilder().addClassification(element).build(); + List output = Category.createListFromProto(input); + assertThat(output).containsExactly(Category.create(0.1f, 0, "", "")); + } +} diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/LandmarkTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/LandmarkTest.java new file mode 100644 index 000000000..b5ff0564a --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/LandmarkTest.java @@ -0,0 +1,62 @@ +// Copyright 2022 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.components.containers; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.mediapipe.formats.proto.LandmarkProto; +import java.util.List; +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(AndroidJUnit4.class) +public final class LandmarkTest { + + @Test + public void createFromProto_succeedsWithCoordinates() { + LandmarkProto.Landmark input = + LandmarkProto.Landmark.newBuilder().setX(1.0f).setY(2.0f).setZ(3.0f).build(); + Landmark output = Landmark.createFromProto(input); + assertThat(output.x()).isEqualTo(1.0f); + assertThat(output.y()).isEqualTo(2.0f); + assertThat(output.z()).isEqualTo(3.0f); + assertFalse(output.visibility().isPresent()); + assertFalse(output.presence().isPresent()); + } + + @Test + public void createFromProto_succeedsWithVisibility() { + LandmarkProto.Landmark input = + LandmarkProto.Landmark.newBuilder().setVisibility(0.4f).setPresence(0.5f).build(); + Landmark output = Landmark.createFromProto(input); + assertTrue(output.visibility().isPresent()); + assertThat(output.visibility().get()).isEqualTo(0.4f); + assertTrue(output.presence().isPresent()); + assertThat(output.presence().get()).isEqualTo(0.5f); + } + + @Test + public void createListFromProto_succeeds() { + LandmarkProto.Landmark element = + LandmarkProto.Landmark.newBuilder().setX(1.0f).setY(2.0f).setZ(3.0f).build(); + LandmarkProto.LandmarkList input = + LandmarkProto.LandmarkList.newBuilder().addLandmark(element).build(); + List output = Landmark.createListFromProto(input); + assertThat(output).hasSize(1); + } +} diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/NormalizedLandmarkTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/NormalizedLandmarkTest.java new file mode 100644 index 000000000..64b61d263 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/components/containers/NormalizedLandmarkTest.java @@ -0,0 +1,62 @@ +// Copyright 2022 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.components.containers; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.mediapipe.formats.proto.LandmarkProto; +import java.util.List; +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(AndroidJUnit4.class) +public final class NormalizedLandmarkTest { + + @Test + public void createFromProto_succeedsWithCoordinates() { + LandmarkProto.NormalizedLandmark input = + LandmarkProto.NormalizedLandmark.newBuilder().setX(0.1f).setY(0.2f).setZ(0.3f).build(); + NormalizedLandmark output = NormalizedLandmark.createFromProto(input); + assertThat(output.x()).isEqualTo(0.1f); + assertThat(output.y()).isEqualTo(0.2f); + assertThat(output.z()).isEqualTo(0.3f); + assertFalse(output.visibility().isPresent()); + assertFalse(output.presence().isPresent()); + } + + @Test + public void createFromProto_succeedsWithVisibility() { + LandmarkProto.NormalizedLandmark input = + LandmarkProto.NormalizedLandmark.newBuilder().setVisibility(0.4f).setPresence(0.5f).build(); + NormalizedLandmark output = NormalizedLandmark.createFromProto(input); + assertTrue(output.visibility().isPresent()); + assertThat(output.visibility().get()).isEqualTo(0.4f); + assertTrue(output.presence().isPresent()); + assertThat(output.presence().get()).isEqualTo(0.5f); + } + + @Test + public void createListFromProto_succeeds() { + LandmarkProto.NormalizedLandmark element = + LandmarkProto.NormalizedLandmark.newBuilder().setX(0.1f).setY(0.2f).setZ(0.3f).build(); + LandmarkProto.NormalizedLandmarkList input = + LandmarkProto.NormalizedLandmarkList.newBuilder().addLandmark(element).build(); + List output = NormalizedLandmark.createListFromProto(input); + assertThat(output).hasSize(1); + } +} diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml new file mode 100644 index 000000000..22b19b702 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/AndroidManifest.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/BUILD b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/BUILD new file mode 100644 index 000000000..287602c85 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/BUILD @@ -0,0 +1,19 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# TODO: Enable this in OSS diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerTest.java new file mode 100644 index 000000000..f8c87c798 --- /dev/null +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/holisticlandmarker/HolisticLandmarkerTest.java @@ -0,0 +1,512 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.vision.holisticlandmarker; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import android.content.res.AssetManager; +import android.graphics.BitmapFactory; +import android.graphics.RectF; +import androidx.test.core.app.ApplicationProvider; +import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.common.truth.Correspondence; +import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList; +import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList; +import com.google.mediapipe.framework.MediaPipeException; +import com.google.mediapipe.framework.image.BitmapImageBuilder; +import com.google.mediapipe.framework.image.ByteBufferImageBuilder; +import com.google.mediapipe.framework.image.MPImage; +import com.google.mediapipe.tasks.components.containers.Category; +import com.google.mediapipe.tasks.components.containers.NormalizedLandmark; +import com.google.mediapipe.tasks.core.BaseOptions; +import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions; +import com.google.mediapipe.tasks.vision.core.RunningMode; +import com.google.mediapipe.tasks.vision.holisticlandmarker.HolisticLandmarker.HolisticLandmarkerOptions; +import com.google.mediapipe.tasks.vision.holisticlandmarker.HolisticResultProto.HolisticResult; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Optional; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; +import org.junit.runners.Suite.SuiteClasses; + +/** Test for {@link HolisticLandmarker}. */ +@RunWith(Suite.class) +@SuiteClasses({HolisticLandmarkerTest.General.class, HolisticLandmarkerTest.RunningModeTest.class}) +public class HolisticLandmarkerTest { + private static final String HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE = "holistic_landmarker.task"; + private static final String POSE_IMAGE = "male_full_height_hands.jpg"; + private static final String CAT_IMAGE = "cat.jpg"; + private static final String HOLISTIC_RESULT = "male_full_height_hands_result_cpu.pb"; + private static final String TAG = "Holistic Landmarker Test"; + private static final float FACE_LANDMARKS_ERROR_TOLERANCE = 0.03f; + private static final float FACE_BLENDSHAPES_ERROR_TOLERANCE = 0.13f; + private static final MPImage PLACEHOLDER_MASK = + new ByteBufferImageBuilder( + ByteBuffer.allocate(0), /* widht= */ 0, /* height= */ 0, MPImage.IMAGE_FORMAT_VEC32F1) + .build(); + private static final int IMAGE_WIDTH = 638; + private static final int IMAGE_HEIGHT = 1000; + + private static final Correspondence VALIDATE_LANDMARRKS = + Correspondence.from( + (Correspondence.BinaryPredicate) + (actual, expected) -> { + return Correspondence.tolerance(FACE_LANDMARKS_ERROR_TOLERANCE) + .compare(actual.x(), expected.x()) + && Correspondence.tolerance(FACE_LANDMARKS_ERROR_TOLERANCE) + .compare(actual.y(), expected.y()); + }, + "landmarks approximately equal to"); + + private static final Correspondence VALIDATE_BLENDSHAPES = + Correspondence.from( + (Correspondence.BinaryPredicate) + (actual, expected) -> + Correspondence.tolerance(FACE_BLENDSHAPES_ERROR_TOLERANCE) + .compare(actual.score(), expected.score()) + && actual.index() == expected.index() + && actual.categoryName().equals(expected.categoryName()), + "face blendshapes approximately equal to"); + + @RunWith(AndroidJUnit4.class) + public static final class General extends HolisticLandmarkerTest { + + @Test + public void detect_successWithValidModels() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult actualResult = + holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE)); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ false, /* hasSegmentationMask= */ false); + assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult); + } + + @Test + public void detect_successWithBlendshapes() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setOutputFaceBlendshapes(true) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult actualResult = + holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE)); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ true, /* hasSegmentationMask= */ false); + assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult); + } + + @Test + public void detect_successWithSegmentationMasks() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setOutputPoseSegmentationMasks(true) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult actualResult = + holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE)); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ false, /* hasSegmentationMask= */ true); + assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult); + } + + @Test + public void detect_successWithEmptyResult() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult actualResult = + holisticLandmarker.detect(getImageFromAsset(CAT_IMAGE)); + assertThat(actualResult.faceLandmarks()).isEmpty(); + } + + @Test + public void detect_failsWithRegionOfInterest() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + ImageProcessingOptions imageProcessingOptions = + ImageProcessingOptions.builder().setRegionOfInterest(new RectF(0, 0, 1, 1)).build(); + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> + holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE), imageProcessingOptions)); + assertThat(exception) + .hasMessageThat() + .contains("HolisticLandmarker doesn't support region-of-interest"); + } + } + + @RunWith(AndroidJUnit4.class) + public static final class RunningModeTest extends HolisticLandmarkerTest { + private void assertCreationFailsWithResultListenerInNonLiveStreamMode(RunningMode runningMode) + throws Exception { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(runningMode) + .setResultListener((HolisticLandmarkerResult, inputImage) -> {}) + .build()); + assertThat(exception) + .hasMessageThat() + .contains("a user-defined result listener shouldn't be provided"); + } + + @Test + public void create_failsWithIllegalResultListenerInVideoMode() throws Exception { + assertCreationFailsWithResultListenerInNonLiveStreamMode(RunningMode.VIDEO); + } + + @Test + public void create_failsWithIllegalResultListenerInImageMode() throws Exception { + assertCreationFailsWithResultListenerInNonLiveStreamMode(RunningMode.IMAGE); + } + + @Test + public void create_failsWithMissingResultListenerInLiveSteamMode() throws Exception { + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.LIVE_STREAM) + .build()); + assertThat(exception) + .hasMessageThat() + .contains("a user-defined result listener must be provided"); + } + + @Test + public void detect_failsWithCallingWrongApiInImageMode() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.IMAGE) + .build(); + + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + MediaPipeException exception = + assertThrows( + MediaPipeException.class, + () -> + holisticLandmarker.detectForVideo( + getImageFromAsset(POSE_IMAGE), /* timestampsMs= */ 0)); + assertThat(exception).hasMessageThat().contains("not initialized with the video mode"); + exception = + assertThrows( + MediaPipeException.class, + () -> + holisticLandmarker.detectAsync( + getImageFromAsset(POSE_IMAGE), /* timestampsMs= */ 0)); + assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode"); + } + + @Test + public void detect_failsWithCallingWrongApiInVideoMode() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.VIDEO) + .build(); + + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + MediaPipeException exception = + assertThrows( + MediaPipeException.class, + () -> holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE))); + assertThat(exception).hasMessageThat().contains("not initialized with the image mode"); + exception = + assertThrows( + MediaPipeException.class, + () -> + holisticLandmarker.detectAsync( + getImageFromAsset(POSE_IMAGE), /* timestampsMs= */ 0)); + assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode"); + } + + @Test + public void detect_failsWithCallingWrongApiInLiveSteamMode() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.LIVE_STREAM) + .setResultListener((HolisticLandmarkerResult, inputImage) -> {}) + .build(); + + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + MediaPipeException exception = + assertThrows( + MediaPipeException.class, + () -> holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE))); + assertThat(exception).hasMessageThat().contains("not initialized with the image mode"); + exception = + assertThrows( + MediaPipeException.class, + () -> + holisticLandmarker.detectForVideo( + getImageFromAsset(POSE_IMAGE), /* timestampsMs= */ 0)); + assertThat(exception).hasMessageThat().contains("not initialized with the video mode"); + } + + @Test + public void detect_successWithImageMode() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.IMAGE) + .build(); + + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult actualResult = + holisticLandmarker.detect(getImageFromAsset(POSE_IMAGE)); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ false, /* hasSegmentationMask= */ false); + assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult); + } + + @Test + public void detect_successWithVideoMode() throws Exception { + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.VIDEO) + .build(); + HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ false, /* hasSegmentationMask= */ false); + for (int i = 0; i < 3; i++) { + HolisticLandmarkerResult actualResult = + holisticLandmarker.detectForVideo(getImageFromAsset(POSE_IMAGE), /* timestampsMs= */ i); + assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult); + } + } + + @Test + public void detect_failsWithOutOfOrderInputTimestamps() throws Exception { + MPImage image = getImageFromAsset(POSE_IMAGE); + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.LIVE_STREAM) + .setResultListener((actualResult, inputImage) -> {}) + .build(); + try (HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options)) { + holisticLandmarker.detectAsync(image, /* timestampsMs= */ 1); + MediaPipeException exception = + assertThrows( + MediaPipeException.class, + () -> holisticLandmarker.detectAsync(image, /* timestampsMs= */ 0)); + assertThat(exception) + .hasMessageThat() + .contains("having a smaller timestamp than the processed timestamp"); + } + } + + @Test + public void detect_successWithLiveSteamMode() throws Exception { + MPImage image = getImageFromAsset(POSE_IMAGE); + HolisticLandmarkerResult expectedResult = + getExpectedHolisticLandmarkerResult( + HOLISTIC_RESULT, /* hasFaceBlendshapes= */ false, /* hasSegmentationMask= */ false); + HolisticLandmarkerOptions options = + HolisticLandmarkerOptions.builder() + .setBaseOptions( + BaseOptions.builder() + .setModelAssetPath(HOLISTIC_LANDMARKER_BUNDLE_ASSET_FILE) + .build()) + .setRunningMode(RunningMode.LIVE_STREAM) + .setResultListener( + (actualResult, inputImage) -> { + assertActualResultApproximatelyEqualsToExpectedResult( + actualResult, expectedResult); + assertImageSizeIsExpected(inputImage); + }) + .build(); + try (HolisticLandmarker holisticLandmarker = + HolisticLandmarker.createFromOptions( + ApplicationProvider.getApplicationContext(), options)) { + for (int i = 0; i < 3; i++) { + holisticLandmarker.detectAsync(image, /* timestampsMs= */ i); + } + } + } + } + + private static MPImage getImageFromAsset(String filePath) throws Exception { + AssetManager assetManager = ApplicationProvider.getApplicationContext().getAssets(); + InputStream istr = assetManager.open(filePath); + return new BitmapImageBuilder(BitmapFactory.decodeStream(istr)).build(); + } + + private static HolisticLandmarkerResult getExpectedHolisticLandmarkerResult( + String resultPath, boolean hasFaceBlendshapes, boolean hasSegmentationMask) throws Exception { + AssetManager assetManager = ApplicationProvider.getApplicationContext().getAssets(); + + HolisticResult holisticResult = HolisticResult.parseFrom(assetManager.open(resultPath)); + + Optional blendshapes = + hasFaceBlendshapes + ? Optional.of(holisticResult.getFaceBlendshapes()) + : Optional.empty(); + Optional segmentationMask = + hasSegmentationMask ? Optional.of(PLACEHOLDER_MASK) : Optional.empty(); + + return HolisticLandmarkerResult.create( + holisticResult.getFaceLandmarks(), + blendshapes, + holisticResult.getPoseLandmarks(), + LandmarkList.getDefaultInstance(), + segmentationMask, + holisticResult.getLeftHandLandmarks(), + LandmarkList.getDefaultInstance(), + holisticResult.getRightHandLandmarks(), + LandmarkList.getDefaultInstance(), + /* timestampMs= */ 0); + } + + private static void assertActualResultApproximatelyEqualsToExpectedResult( + HolisticLandmarkerResult actualResult, HolisticLandmarkerResult expectedResult) { + // Expects to have the same number of holistics detected. + assertThat(actualResult.faceLandmarks()).hasSize(expectedResult.faceLandmarks().size()); + assertThat(actualResult.faceBlendshapes().isPresent()) + .isEqualTo(expectedResult.faceBlendshapes().isPresent()); + assertThat(actualResult.poseLandmarks()).hasSize(expectedResult.poseLandmarks().size()); + assertThat(actualResult.segmentationMask().isPresent()) + .isEqualTo(expectedResult.segmentationMask().isPresent()); + assertThat(actualResult.leftHandLandmarks()).hasSize(expectedResult.leftHandLandmarks().size()); + assertThat(actualResult.rightHandLandmarks()) + .hasSize(expectedResult.rightHandLandmarks().size()); + + // Actual face landmarks match expected face landmarks. + assertThat(actualResult.faceLandmarks()) + .comparingElementsUsing(VALIDATE_LANDMARRKS) + .containsExactlyElementsIn(expectedResult.faceLandmarks()); + + // Actual face blendshapes match expected face blendshapes. + if (actualResult.faceBlendshapes().isPresent()) { + assertThat(actualResult.faceBlendshapes().get()) + .comparingElementsUsing(VALIDATE_BLENDSHAPES) + .containsExactlyElementsIn(expectedResult.faceBlendshapes().get()); + } + + // Actual pose landmarks match expected pose landmarks. + assertThat(actualResult.poseLandmarks()) + .comparingElementsUsing(VALIDATE_LANDMARRKS) + .containsExactlyElementsIn(expectedResult.poseLandmarks()); + + if (actualResult.segmentationMask().isPresent()) { + assertImageSizeIsExpected(actualResult.segmentationMask().get()); + } + + // Actual left hand landmarks match expected left hand landmarks. + assertThat(actualResult.leftHandLandmarks()) + .comparingElementsUsing(VALIDATE_LANDMARRKS) + .containsExactlyElementsIn(expectedResult.leftHandLandmarks()); + + // Actual right hand landmarks match expected right hand landmarks. + assertThat(actualResult.rightHandLandmarks()) + .comparingElementsUsing(VALIDATE_LANDMARRKS) + .containsExactlyElementsIn(expectedResult.rightHandLandmarks()); + } + + private static void assertImageSizeIsExpected(MPImage inputImage) { + assertThat(inputImage).isNotNull(); + assertThat(inputImage.getWidth()).isEqualTo(IMAGE_WIDTH); + assertThat(inputImage.getHeight()).isEqualTo(IMAGE_HEIGHT); + } +} diff --git a/mediapipe/tasks/python/benchmark/BUILD b/mediapipe/tasks/python/benchmark/BUILD new file mode 100644 index 000000000..0657d1c61 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/BUILD @@ -0,0 +1,24 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_library + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +py_library( + name = "benchmark_utils", + srcs = ["benchmark_utils.py"], +) diff --git a/mediapipe/tasks/python/benchmark/__init__.py b/mediapipe/tasks/python/benchmark/__init__.py new file mode 100644 index 000000000..2eb077987 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/mediapipe/tasks/python/benchmark/benchmark_utils.py b/mediapipe/tasks/python/benchmark/benchmark_utils.py new file mode 100644 index 000000000..10f21f4e2 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/benchmark_utils.py @@ -0,0 +1,70 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Benchmark utils for MediaPipe Tasks.""" + +import os +import numpy as np + + +def nth_percentile(inference_times, percentile): + """Calculate the nth percentile of the inference times.""" + return np.percentile(inference_times, percentile) + + +def average(inference_times): + """Calculate the average of the inference times.""" + return np.mean(inference_times) + + +def get_test_data_path(test_srcdir, file_or_dirname_path: str) -> str: + """Determine the test data path. + + Args: + test_srcdir: The path to the test source directory. + file_or_dirname_path: The path to the file or directory. + + Returns: + The full test data path. + """ + for directory, subdirs, files in os.walk(test_srcdir): + for f in subdirs + files: + path = os.path.join(directory, f) + if path.endswith(file_or_dirname_path): + return path + raise ValueError( + "No %s in test directory: %s." % (file_or_dirname_path, test_srcdir) + ) + + +def get_model_path(custom_model, default_model_path): + """Determine the model path based on the existence of the custom model. + + Args: + custom_model: The path to the custom model provided by the user. + default_model_path: The path to the default model. + + Returns: + The path to the model to be used. + """ + if custom_model is not None and os.path.exists(custom_model): + print(f"Using provided model: {custom_model}") + return custom_model + else: + if custom_model is not None: + print( + f"Warning: Provided model '{custom_model}' not found. " + "Using default model instead." + ) + print(f"Using default model: {default_model_path}") + return default_model_path diff --git a/mediapipe/tasks/python/benchmark/vision/BUILD b/mediapipe/tasks/python/benchmark/vision/BUILD new file mode 100644 index 000000000..b363bf341 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/BUILD @@ -0,0 +1,33 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "benchmark", + srcs = ["benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/__init__.py b/mediapipe/tasks/python/benchmark/vision/__init__.py new file mode 100644 index 000000000..2eb077987 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/mediapipe/tasks/python/benchmark/vision/benchmark.py b/mediapipe/tasks/python/benchmark/vision/benchmark.py new file mode 100644 index 000000000..3c52ae2e4 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/benchmark.py @@ -0,0 +1,99 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe vision benchmarker.""" + +import argparse + +from mediapipe.tasks.python.benchmark import benchmark_utils as bu +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options + + +def benchmarker(benchmark_function, default_model_name): + """Executes a benchmarking process using a specified function ann model. + + Args: + benchmark_function: A callable function to be executed for benchmarking. + This function should contain the logic of the task to be benchmarked and + should be capable of utilizing a model specified by its name. + default_model_name: The name or path of the default model to be used in + the benchmarking process. This is useful when the benchmarking function + requires a model and no other model is explicitly specified. + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + '--mode', + help='Benchmarking mode (e.g., "nth_percentile").', + required=False, + default='nth_percentile', + ) + parser.add_argument('--model', help='Path to the model.', default=None) + parser.add_argument( + '--iterations', + help='Number of iterations for benchmarking.', + type=int, + default=100, + ) + parser.add_argument( + '--percentile', + help='Percentile for benchmarking statistics.', + type=float, + default=95.0, + ) + + args = parser.parse_args() + + # Get the model path + default_model_path = bu.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, default_model_name + ) + model_path = bu.get_model_path(args.model, default_model_path) + + # Define a mapping of modes to their respective function argument lists + mode_args_mapping = { + 'nth_percentile': {'percentile': args.percentile}, + 'average': {}, + } + + # Check if the mode is supported and get the argument dictionary + if args.mode not in mode_args_mapping: + raise ValueError(f'Unsupported benchmarking mode: {args.mode}') + + mode_args = mode_args_mapping[args.mode] + + # Run the benchmark for both CPU and GPU and calculate results based on mode + results = {} + for delegate_type in [ + base_options.BaseOptions.Delegate.CPU, + base_options.BaseOptions.Delegate.GPU, + ]: + inference_times = benchmark_function( + model_path, args.iterations, delegate_type + ) + + # Calculate the benchmark result based on the mode + if args.mode == 'nth_percentile': + results[delegate_type] = bu.nth_percentile(inference_times, **mode_args) + elif args.mode == 'average': + results[delegate_type] = bu.average(inference_times) + + # Report benchmarking results + for delegate_type, result in results.items(): + print( + f'Inference time {delegate_type} {mode_args_mapping[args.mode]}: ' + f'{result:.6f} milliseconds' + ) diff --git a/mediapipe/tasks/python/benchmark/vision/core/BUILD b/mediapipe/tasks/python/benchmark/vision/core/BUILD new file mode 100644 index 000000000..683116068 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/core/BUILD @@ -0,0 +1,22 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_library + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "base_vision_benchmark_api", + srcs = ["base_vision_benchmark_api.py"], +) diff --git a/mediapipe/tasks/python/benchmark/vision/core/__init__.py b/mediapipe/tasks/python/benchmark/vision/core/__init__.py new file mode 100644 index 000000000..b87aebd51 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/core/__init__.py @@ -0,0 +1,14 @@ +"""Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" diff --git a/mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py b/mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py new file mode 100644 index 000000000..5d7693b69 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py @@ -0,0 +1,40 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe vision benchmark base api.""" +import time + +VISION_TEST_DATA_DIR = 'mediapipe/tasks/testdata/vision' + + +def benchmark_task(func, image, n_iterations): + """Collect inference times for a given task after benchmarking. + + Args: + func: The task function used for benchmarking. + image: The input MediaPipe Image. + n_iterations: Number of iterations to run the benchmark. + + Returns: + List of inference times in milliseconds. + """ + inference_times = [] + + for _ in range(n_iterations): + start_time_ns = time.time_ns() + # Run the method for the task (e.g., classify) + func(image) + end_time_ns = time.time_ns() + inference_times.append((end_time_ns - start_time_ns) / 1_000_000) + + return inference_times diff --git a/mediapipe/tasks/python/benchmark/vision/face_aligner/BUILD b/mediapipe/tasks/python/benchmark/vision/face_aligner/BUILD new file mode 100644 index 000000000..14080fded --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_aligner/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "face_aligner_benchmark", + srcs = ["face_aligner_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "face_aligner_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:face_aligner", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/face_aligner/face_aligner_benchmark.py b/mediapipe/tasks/python/benchmark/vision/face_aligner/face_aligner_benchmark.py new file mode 100644 index 000000000..851673591 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_aligner/face_aligner_benchmark.py @@ -0,0 +1,58 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe face aligner benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import face_aligner + +_MODEL_FILE = 'face_landmarker_v2.task' +_IMAGE_FILE = 'portrait.jpg' + + +def run(model_path, n_iterations, delegate): + """Run a face aligner benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the face aligner + options = face_aligner.FaceAlignerOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with face_aligner.FaceAligner.create_from_options(options) as aligner: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + aligner.align, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/face_detector/BUILD b/mediapipe/tasks/python/benchmark/vision/face_detector/BUILD new file mode 100644 index 000000000..d3295331f --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_detector/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "face_detector_benchmark", + srcs = ["face_detector_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "face_detector_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:face_detector", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/face_detector/face_detector_benchmark.py b/mediapipe/tasks/python/benchmark/vision/face_detector/face_detector_benchmark.py new file mode 100644 index 000000000..c70e6f225 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_detector/face_detector_benchmark.py @@ -0,0 +1,58 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe image embedder benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import face_detector + +_MODEL_FILE = 'face_detection_short_range.tflite' +_IMAGE_FILE = 'portrait.jpg' + + +def run(model_path, n_iterations, delegate): + """Run a face detector benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the face detector + options = face_detector.FaceDetectorOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with face_detector.FaceDetector.create_from_options(options) as detector: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + detector.detect, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/face_landmarker/BUILD b/mediapipe/tasks/python/benchmark/vision/face_landmarker/BUILD new file mode 100644 index 000000000..7867a8a8e --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_landmarker/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "face_landmarker_benchmark", + srcs = ["face_landmarker_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "face_landmarker_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:face_landmarker", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/face_landmarker/face_landmarker_benchmark.py b/mediapipe/tasks/python/benchmark/vision/face_landmarker/face_landmarker_benchmark.py new file mode 100644 index 000000000..9a6930a56 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/face_landmarker/face_landmarker_benchmark.py @@ -0,0 +1,60 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe face landmarker benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import face_landmarker + +_MODEL_FILE = 'face_landmarker_v2.task' +_IMAGE_FILE = 'portrait.jpg' + + +def run(model_path, n_iterations, delegate): + """Run a face landmarker benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the face landmarker + options = face_landmarker.FaceLandmarkerOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with face_landmarker.FaceLandmarker.create_from_options( + options + ) as landmarker: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + landmarker.detect, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/hand_landmarker/BUILD b/mediapipe/tasks/python/benchmark/vision/hand_landmarker/BUILD new file mode 100644 index 000000000..645cd1b45 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/hand_landmarker/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "hand_landmarker_benchmark", + srcs = ["hand_landmarker_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "hand_landmarker_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:hand_landmarker", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/hand_landmarker/hand_landmarker_benchmark.py b/mediapipe/tasks/python/benchmark/vision/hand_landmarker/hand_landmarker_benchmark.py new file mode 100644 index 000000000..52acca885 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/hand_landmarker/hand_landmarker_benchmark.py @@ -0,0 +1,60 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe hand landmarker benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import hand_landmarker + +_MODEL_FILE = 'hand_landmarker.task' +_IMAGE_FILE = 'thumb_up.jpg' + + +def run(model_path, n_iterations, delegate): + """Run a hand landmarker benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the hand landmarker + options = hand_landmarker.HandLandmarkerOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with hand_landmarker.HandLandmarker.create_from_options( + options + ) as landmarker: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + landmarker.detect, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/image_classifier/BUILD b/mediapipe/tasks/python/benchmark/vision/image_classifier/BUILD new file mode 100644 index 000000000..455f5defc --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_classifier/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "image_classifier_benchmark", + srcs = ["image_classifier_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "image_classifier_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:image_classifier", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/image_classifier/README.md b/mediapipe/tasks/python/benchmark/vision/image_classifier/README.md new file mode 100644 index 000000000..67ab6350d --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_classifier/README.md @@ -0,0 +1,34 @@ +# MediaPipe Image Classifier Benchmark + +## Download the repository + +First, clone this Git repo. + +Run this commands to download the TFLite models and image files: + +``` +cd mediapipe/mediapipe/tasks/python/benchmark/vision/image_classifier +wget -O classifier.tflite -q https://storage.googleapis.com/mediapipe-models/image_classifier/efficientnet_lite0/float32/1/efficientnet_lite0.tflite +``` + +## Run the benchmark +``` +bazel run -c opt //mediapipe/tasks/python/benchmark/vision/image_classifier:image_classifier_benchmark +``` +* You can optionally specify the `model` parameter to set the TensorFlow Lite + model to be used: + * The default value is `mobilenet_v2_1.0_224.tflite` + * TensorFlow Lite image classification models **with metadata** + * Models from [TensorFlow Hub](https://tfhub.dev/tensorflow/collections/lite/task-library/image-classifier/1) + * Models from [MediaPipe Models](https://developers.google.com/mediapipe/solutions/vision/image_classifier/index#models) + * Models trained with [MediaPipe Model Maker](https://developers.google.com/mediapipe/solutions/customization/image_classifier) are supported. +* You can optionally specify the `iterations` parameter to limit the number of + iterations for benchmarking: + * Supported value: A positive integer. + * Default value: `100` +* Example usage: + ``` + bazel run -c opt :image_classifier_benchmark -- \ + --model classifier.tflite \ + --iterations 200 + ``` diff --git a/mediapipe/tasks/python/benchmark/vision/image_classifier/image_classifier_benchmark.py b/mediapipe/tasks/python/benchmark/vision/image_classifier/image_classifier_benchmark.py new file mode 100644 index 000000000..7f3f5b83c --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_classifier/image_classifier_benchmark.py @@ -0,0 +1,61 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe image classifier benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import image_classifier + +_MODEL_FILE = 'mobilenet_v2_1.0_224.tflite' +_IMAGE_FILE = 'burger.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an image classifier benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the image classifier + options = image_classifier.ImageClassifierOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ), + max_results=1, + ) + + with image_classifier.ImageClassifier.create_from_options( + options + ) as classifier: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + classifier.classify, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/image_embedder/BUILD b/mediapipe/tasks/python/benchmark/vision/image_embedder/BUILD new file mode 100644 index 000000000..7d463d4de --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_embedder/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "image_embedder_benchmark", + srcs = ["image_embedder_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "image_embedder_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:image_embedder", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/image_embedder/image_embedder_benchmark.py b/mediapipe/tasks/python/benchmark/vision/image_embedder/image_embedder_benchmark.py new file mode 100644 index 000000000..5f691f34a --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_embedder/image_embedder_benchmark.py @@ -0,0 +1,58 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe image embedder benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import image_embedder + +_MODEL_FILE = 'mobilenet_v3_small_100_224_embedder.tflite' +_IMAGE_FILE = 'burger.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an image embedding benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the image embedder + options = image_embedder.ImageEmbedderOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with image_embedder.ImageEmbedder.create_from_options(options) as embedder: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + embedder.embed, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/image_segmenter/BUILD b/mediapipe/tasks/python/benchmark/vision/image_segmenter/BUILD new file mode 100644 index 000000000..e192b217b --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_segmenter/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "image_segmenter_benchmark", + srcs = ["image_segmenter_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "image_segmenter_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:image_segmenter", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/image_segmenter/image_segmenter_benchmark.py b/mediapipe/tasks/python/benchmark/vision/image_segmenter/image_segmenter_benchmark.py new file mode 100644 index 000000000..edee60e2f --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/image_segmenter/image_segmenter_benchmark.py @@ -0,0 +1,60 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe image segmenter benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import image_segmenter + +_MODEL_FILE = 'deeplabv3.tflite' +_IMAGE_FILE = 'segmentation_input_rotation0.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an image segmenter benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the image segmenter + options = image_segmenter.ImageSegmenterOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ), + output_confidence_masks=True, + output_category_mask=True, + ) + + with image_segmenter.ImageSegmenter.create_from_options(options) as segmenter: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + segmenter.segment, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/BUILD b/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/BUILD new file mode 100644 index 000000000..be42b48b9 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "interactive_segmenter_benchmark", + srcs = ["interactive_segmenter_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "interactive_segmenter_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:interactive_segmenter", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/interactive_segmenter_benchmark.py b/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/interactive_segmenter_benchmark.py new file mode 100644 index 000000000..96283098e --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/interactive_segmenter/interactive_segmenter_benchmark.py @@ -0,0 +1,68 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe interactive segmenter benchmark.""" +import functools + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.components.containers import keypoint +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import interactive_segmenter + +_MODEL_FILE = 'ptm_512_hdt_ptm_woid.tflite' +_IMAGE_FILE = 'cats_and_dogs.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an interactive segmenter benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the image segmenter + options = interactive_segmenter.InteractiveSegmenterOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ), + output_category_mask=True, + output_confidence_masks=False, + ) + roi = interactive_segmenter.RegionOfInterest( + format=interactive_segmenter.RegionOfInterest.Format.KEYPOINT, + keypoint=keypoint.NormalizedKeypoint(0.44, 0.7), + ) + + with interactive_segmenter.InteractiveSegmenter.create_from_options( + options + ) as segmenter: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + functools.partial(segmenter.segment, roi=roi), mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/object_detector/BUILD b/mediapipe/tasks/python/benchmark/vision/object_detector/BUILD new file mode 100644 index 000000000..44eadbfc5 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/object_detector/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "object_detector_benchmark", + srcs = ["object_detector_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "object_detector_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:object_detector", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/object_detector/object_detector_benchmark.py b/mediapipe/tasks/python/benchmark/vision/object_detector/object_detector_benchmark.py new file mode 100644 index 000000000..cd927703c --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/object_detector/object_detector_benchmark.py @@ -0,0 +1,58 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe object detector benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import object_detector + +_MODEL_FILE = 'coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite' +_IMAGE_FILE = 'cats_and_dogs.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an object detector benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the object detector + options = object_detector.ObjectDetectorOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with object_detector.ObjectDetector.create_from_options(options) as detector: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + detector.detect, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/benchmark/vision/pose_landmarker/BUILD b/mediapipe/tasks/python/benchmark/vision/pose_landmarker/BUILD new file mode 100644 index 000000000..a63d3b482 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/pose_landmarker/BUILD @@ -0,0 +1,35 @@ +# Copyright 2022 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Placeholder: load py_binary + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "pose_landmarker_benchmark", + srcs = ["pose_landmarker_benchmark.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + main = "pose_landmarker_benchmark.py", + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/benchmark:benchmark_utils", + "//mediapipe/tasks/python/benchmark/vision:benchmark", + "//mediapipe/tasks/python/benchmark/vision/core:base_vision_benchmark_api", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/vision:pose_landmarker", + ], +) diff --git a/mediapipe/tasks/python/benchmark/vision/pose_landmarker/pose_landmarker_benchmark.py b/mediapipe/tasks/python/benchmark/vision/pose_landmarker/pose_landmarker_benchmark.py new file mode 100644 index 000000000..cb2e3ce37 --- /dev/null +++ b/mediapipe/tasks/python/benchmark/vision/pose_landmarker/pose_landmarker_benchmark.py @@ -0,0 +1,60 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe pose landmarker benchmark.""" + +from mediapipe.python._framework_bindings import image +from mediapipe.tasks.python.benchmark import benchmark_utils +from mediapipe.tasks.python.benchmark.vision import benchmark +from mediapipe.tasks.python.benchmark.vision.core import base_vision_benchmark_api +from mediapipe.tasks.python.core import base_options +from mediapipe.tasks.python.vision import pose_landmarker + +_MODEL_FILE = 'pose_landmarker.task' +_IMAGE_FILE = 'pose.jpg' + + +def run(model_path, n_iterations, delegate): + """Run an pose landmarker benchmark. + + Args: + model_path: Path to the TFLite model. + n_iterations: Number of iterations to run the benchmark. + delegate: CPU or GPU delegate for inference. + + Returns: + List of inference times. + """ + # Initialize the pose landmarker + options = pose_landmarker.PoseLandmarkerOptions( + base_options=base_options.BaseOptions( + model_asset_path=model_path, delegate=delegate + ) + ) + + with pose_landmarker.PoseLandmarker.create_from_options( + options + ) as landmarker: + mp_image = image.Image.create_from_file( + benchmark_utils.get_test_data_path( + base_vision_benchmark_api.VISION_TEST_DATA_DIR, _IMAGE_FILE + ) + ) + inference_times = base_vision_benchmark_api.benchmark_task( + landmarker.detect, mp_image, n_iterations + ) + return inference_times + + +if __name__ == '__main__': + benchmark.benchmarker(run, _MODEL_FILE) diff --git a/mediapipe/tasks/python/core/base_options.py b/mediapipe/tasks/python/core/base_options.py index 2d4258fed..da81bcd5d 100644 --- a/mediapipe/tasks/python/core/base_options.py +++ b/mediapipe/tasks/python/core/base_options.py @@ -70,7 +70,7 @@ class BaseOptions: platform_name = platform.system() if self.delegate == BaseOptions.Delegate.GPU: - if platform_name == 'Linux': + if platform_name in ['Linux', 'Darwin']: acceleration_proto = _AccelerationProto(gpu=_DelegateProto.Gpu()) else: raise NotImplementedError( diff --git a/mediapipe/tasks/python/core/pybind/BUILD b/mediapipe/tasks/python/core/pybind/BUILD index 88ea05f4f..391712f27 100644 --- a/mediapipe/tasks/python/core/pybind/BUILD +++ b/mediapipe/tasks/python/core/pybind/BUILD @@ -26,9 +26,11 @@ pybind_library( "//mediapipe/framework:calculator_cc_proto", "//mediapipe/framework/api2:builder", "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/gpu:gpu_shared_data_internal", "//mediapipe/python/pybind:util", "//mediapipe/tasks/cc/core:mediapipe_builtin_op_resolver", "//mediapipe/tasks/cc/core:task_runner", + "@com_google_absl//absl/log:absl_log", "@org_tensorflow//tensorflow/lite/core/api:op_resolver", "@pybind11_protobuf//pybind11_protobuf:native_proto_caster", ], diff --git a/mediapipe/tasks/python/core/pybind/task_runner.cc b/mediapipe/tasks/python/core/pybind/task_runner.cc index f95cddde8..0de7d24d8 100644 --- a/mediapipe/tasks/python/core/pybind/task_runner.cc +++ b/mediapipe/tasks/python/core/pybind/task_runner.cc @@ -14,6 +14,7 @@ #include "mediapipe/tasks/python/core/pybind/task_runner.h" +#include "absl/log/absl_log.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/python/pybind/util.h" #include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" @@ -21,6 +22,9 @@ #include "pybind11/stl.h" #include "pybind11_protobuf/native_proto_caster.h" #include "tensorflow/lite/core/api/op_resolver.h" +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gpu_shared_data_internal.h" +#endif // MEDIAPIPE_DISABLE_GPU namespace mediapipe { namespace tasks { @@ -74,10 +78,27 @@ mode) or not (synchronous mode).)doc"); return absl::OkStatus(); }; } + +#if !MEDIAPIPE_DISABLE_GPU + auto gpu_resources_ = mediapipe::GpuResources::Create(); + if (!gpu_resources_.ok()) { + ABSL_LOG(INFO) << "GPU suport is not available: " + << gpu_resources_.status(); + gpu_resources_ = nullptr; + } + auto task_runner = TaskRunner::Create( + std::move(graph_config), + absl::make_unique(), + std::move(callback), + /* default_executor= */ nullptr, + /* input_side_packes= */ std::nullopt, std::move(*gpu_resources_)); +#else auto task_runner = TaskRunner::Create( std::move(graph_config), absl::make_unique(), std::move(callback)); +#endif // !MEDIAPIPE_DISABLE_GPU + RaisePyErrorIfNotOk(task_runner.status()); return std::move(*task_runner); }, diff --git a/mediapipe/tasks/python/test/vision/BUILD b/mediapipe/tasks/python/test/vision/BUILD index ae3d53d61..c6fae0e6c 100644 --- a/mediapipe/tasks/python/test/vision/BUILD +++ b/mediapipe/tasks/python/test/vision/BUILD @@ -211,3 +211,20 @@ py_test( "//mediapipe/tasks/python/vision/core:image_processing_options", ], ) + +py_test( + name = "face_stylizer_test", + srcs = ["face_stylizer_test.py"], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + deps = [ + "//mediapipe/python:_framework_bindings", + "//mediapipe/tasks/python/components/containers:rect", + "//mediapipe/tasks/python/core:base_options", + "//mediapipe/tasks/python/test:test_utils", + "//mediapipe/tasks/python/vision:face_stylizer", + "//mediapipe/tasks/python/vision/core:image_processing_options", + ], +) diff --git a/mediapipe/tasks/python/test/vision/face_stylizer_test.py b/mediapipe/tasks/python/test/vision/face_stylizer_test.py new file mode 100644 index 000000000..1f6b35db4 --- /dev/null +++ b/mediapipe/tasks/python/test/vision/face_stylizer_test.py @@ -0,0 +1,191 @@ +# Copyright 2023 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for face stylizer.""" + +import enum +import os + +from absl.testing import absltest +from absl.testing import parameterized + +from mediapipe.python._framework_bindings import image as image_module +from mediapipe.tasks.python.components.containers import rect +from mediapipe.tasks.python.core import base_options as base_options_module +from mediapipe.tasks.python.test import test_utils +from mediapipe.tasks.python.vision import face_stylizer +from mediapipe.tasks.python.vision.core import image_processing_options as image_processing_options_module + + +_BaseOptions = base_options_module.BaseOptions +_Rect = rect.Rect +_Image = image_module.Image +_FaceStylizer = face_stylizer.FaceStylizer +_FaceStylizerOptions = face_stylizer.FaceStylizerOptions +_ImageProcessingOptions = image_processing_options_module.ImageProcessingOptions + +_MODEL = 'face_stylizer_color_ink.task' +_LARGE_FACE_IMAGE = 'portrait.jpg' +_MODEL_IMAGE_SIZE = 256 +_TEST_DATA_DIR = 'mediapipe/tasks/testdata/vision' + + +class ModelFileType(enum.Enum): + FILE_CONTENT = 1 + FILE_NAME = 2 + + +class FaceStylizerTest(parameterized.TestCase): + + def setUp(self): + super().setUp() + self.test_image = _Image.create_from_file( + test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, _LARGE_FACE_IMAGE) + ) + ) + self.model_path = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, _MODEL) + ) + + def test_create_from_file_succeeds_with_valid_model_path(self): + # Creates with default option and valid model file successfully. + with _FaceStylizer.create_from_model_path(self.model_path) as stylizer: + self.assertIsInstance(stylizer, _FaceStylizer) + + def test_create_from_options_succeeds_with_valid_model_path(self): + # Creates with options containing model file successfully. + base_options = _BaseOptions(model_asset_path=self.model_path) + options = _FaceStylizerOptions(base_options=base_options) + with _FaceStylizer.create_from_options(options) as stylizer: + self.assertIsInstance(stylizer, _FaceStylizer) + + def test_create_from_options_fails_with_invalid_model_path(self): + with self.assertRaisesRegex( + RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite' + ): + base_options = _BaseOptions( + model_asset_path='/path/to/invalid/model.tflite' + ) + options = _FaceStylizerOptions(base_options=base_options) + _FaceStylizer.create_from_options(options) + + def test_create_from_options_succeeds_with_valid_model_content(self): + # Creates with options containing model content successfully. + with open(self.model_path, 'rb') as f: + base_options = _BaseOptions(model_asset_buffer=f.read()) + options = _FaceStylizerOptions(base_options=base_options) + stylizer = _FaceStylizer.create_from_options(options) + self.assertIsInstance(stylizer, _FaceStylizer) + + @parameterized.parameters( + (ModelFileType.FILE_NAME, _LARGE_FACE_IMAGE), + (ModelFileType.FILE_CONTENT, _LARGE_FACE_IMAGE), + ) + def test_stylize(self, model_file_type, image_file_name): + # Load the test image. + self.test_image = _Image.create_from_file( + test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, image_file_name) + ) + ) + # Creates stylizer. + if model_file_type is ModelFileType.FILE_NAME: + base_options = _BaseOptions(model_asset_path=self.model_path) + elif model_file_type is ModelFileType.FILE_CONTENT: + with open(self.model_path, 'rb') as f: + model_content = f.read() + base_options = _BaseOptions(model_asset_buffer=model_content) + else: + # Should never happen + raise ValueError('model_file_type is invalid.') + + options = _FaceStylizerOptions(base_options=base_options) + stylizer = _FaceStylizer.create_from_options(options) + + # Performs face stylization on the input. + stylized_image = stylizer.stylize(self.test_image) + self.assertIsInstance(stylized_image, _Image) + # Closes the stylizer explicitly when the stylizer is not used in + # a context. + stylizer.close() + + @parameterized.parameters( + (ModelFileType.FILE_NAME, _LARGE_FACE_IMAGE), + (ModelFileType.FILE_CONTENT, _LARGE_FACE_IMAGE), + ) + def test_stylize_in_context(self, model_file_type, image_file_name): + # Load the test image. + self.test_image = _Image.create_from_file( + test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, image_file_name) + ) + ) + # Creates stylizer. + if model_file_type is ModelFileType.FILE_NAME: + base_options = _BaseOptions(model_asset_path=self.model_path) + elif model_file_type is ModelFileType.FILE_CONTENT: + with open(self.model_path, 'rb') as f: + model_content = f.read() + base_options = _BaseOptions(model_asset_buffer=model_content) + else: + # Should never happen + raise ValueError('model_file_type is invalid.') + + options = _FaceStylizerOptions(base_options=base_options) + with _FaceStylizer.create_from_options(options) as stylizer: + # Performs face stylization on the input. + stylized_image = stylizer.stylize(self.test_image) + self.assertIsInstance(stylized_image, _Image) + self.assertEqual(stylized_image.width, _MODEL_IMAGE_SIZE) + self.assertEqual(stylized_image.height, _MODEL_IMAGE_SIZE) + + def test_stylize_succeeds_with_region_of_interest(self): + base_options = _BaseOptions(model_asset_path=self.model_path) + options = _FaceStylizerOptions(base_options=base_options) + with _FaceStylizer.create_from_options(options) as stylizer: + # Load the test image. + test_image = _Image.create_from_file( + test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, _LARGE_FACE_IMAGE) + ) + ) + # Region-of-interest around the face. + roi = _Rect(left=0.32, top=0.02, right=0.67, bottom=0.32) + image_processing_options = _ImageProcessingOptions(roi) + # Performs face stylization on the input. + stylized_image = stylizer.stylize(test_image, image_processing_options) + self.assertIsInstance(stylized_image, _Image) + self.assertEqual(stylized_image.width, _MODEL_IMAGE_SIZE) + self.assertEqual(stylized_image.height, _MODEL_IMAGE_SIZE) + + def test_stylize_succeeds_with_no_face_detected(self): + base_options = _BaseOptions(model_asset_path=self.model_path) + options = _FaceStylizerOptions(base_options=base_options) + with _FaceStylizer.create_from_options(options) as stylizer: + # Load the test image. + test_image = _Image.create_from_file( + test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, _LARGE_FACE_IMAGE) + ) + ) + # Region-of-interest that doesn't contain a human face. + roi = _Rect(left=0.1, top=0.1, right=0.2, bottom=0.2) + image_processing_options = _ImageProcessingOptions(roi) + # Performs face stylization on the input. + stylized_image = stylizer.stylize(test_image, image_processing_options) + self.assertIsNone(stylized_image) + + +if __name__ == '__main__': + absltest.main() diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index 6e10663e0..2f5157309 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -48,6 +48,7 @@ mediapipe_files(srcs = [ "face_landmark.tflite", "face_landmarker.task", "face_landmarker_v2.task", + "face_stylizer_color_ink.task", "fist.jpg", "fist.png", "gesture_recognizer.task", @@ -89,6 +90,7 @@ mediapipe_files(srcs = [ "pose_landmark_lite.tflite", "pose_landmarker.task", "pose_segmentation_mask_golden.png", + "ptm_512_hdt_ptm_woid.tflite", "right_hands.jpg", "right_hands_rotated.jpg", "segmentation_golden_rotation0.png", @@ -183,6 +185,8 @@ filegroup( "face_detection_short_range.tflite", "face_landmarker.task", "face_landmarker_v2.task", + "face_stylizer_color_ink.task", + "gesture_recognizer.task", "hair_segmentation.tflite", "hand_landmark_full.tflite", "hand_landmark_lite.tflite", @@ -200,6 +204,7 @@ filegroup( "pose_detection.tflite", "pose_landmark_lite.tflite", "pose_landmarker.task", + "ptm_512_hdt_ptm_woid.tflite", "selfie_segm_128_128_3.tflite", "selfie_segm_144_256_3.tflite", "selfie_segmentation.tflite", @@ -222,6 +227,7 @@ filegroup( "hand_detector_result_one_hand.pbtxt", "hand_detector_result_one_hand_rotated.pbtxt", "hand_detector_result_two_hands.pbtxt", + "male_full_height_hands_result_cpu.pbtxt", "pointing_up_landmarks.pbtxt", "pointing_up_rotated_landmarks.pbtxt", "portrait_expected_detection.pbtxt", diff --git a/mediapipe/tasks/testdata/vision/male_full_height_hands_result_cpu.pbtxt b/mediapipe/tasks/testdata/vision/male_full_height_hands_result_cpu.pbtxt index 199dc6366..e50f777c5 100644 --- a/mediapipe/tasks/testdata/vision/male_full_height_hands_result_cpu.pbtxt +++ b/mediapipe/tasks/testdata/vision/male_full_height_hands_result_cpu.pbtxt @@ -2854,7 +2854,262 @@ auxiliary_landmarks { face_blendshapes { classification { index: 0 - score: 1.6770242e-05 - label: "tongueOut" + score: 8.47715e-07 + label: "_neutral" + } + classification { + index: 1 + score: 0.020850565 + label: "browDownLeft" + } + classification { + index: 2 + score: 0.007629181 + label: "browDownRight" + } + classification { + index: 3 + score: 0.26410568 + label: "browInnerUp" + } + classification { + index: 4 + score: 0.04212071 + label: "browOuterUpLeft" + } + classification { + index: 5 + score: 0.07319052 + label: "browOuterUpRight" + } + classification { + index: 6 + score: 9.39117e-06 + label: "cheekPuff" + } + classification { + index: 7 + score: 1.9243858e-07 + label: "cheekSquintLeft" + } + classification { + index: 8 + score: 4.066475e-08 + label: "cheekSquintRight" + } + classification { + index: 9 + score: 0.46092203 + label: "eyeBlinkLeft" + } + classification { + index: 10 + score: 0.40371567 + label: "eyeBlinkRight" + } + classification { + index: 11 + score: 0.65011656 + label: "eyeLookDownLeft" + } + classification { + index: 12 + score: 0.6423024 + label: "eyeLookDownRight" + } + classification { + index: 13 + score: 0.04721973 + label: "eyeLookInLeft" + } + classification { + index: 14 + score: 0.08176838 + label: "eyeLookInRight" + } + classification { + index: 15 + score: 0.09520102 + label: "eyeLookOutLeft" + } + classification { + index: 16 + score: 0.07271895 + label: "eyeLookOutRight" + } + classification { + index: 17 + score: 0.011193463 + label: "eyeLookUpLeft" + } + classification { + index: 18 + score: 0.007041815 + label: "eyeLookUpRight" + } + classification { + index: 19 + score: 0.27120194 + label: "eyeSquintLeft" + } + classification { + index: 20 + score: 0.21675573 + label: "eyeSquintRight" + } + classification { + index: 21 + score: 0.0018824162 + label: "eyeWideLeft" + } + classification { + index: 22 + score: 0.0011966582 + label: "eyeWideRight" + } + classification { + index: 23 + score: 1.9298719e-05 + label: "jawForward" + } + classification { + index: 24 + score: 9.670858e-06 + label: "jawLeft" + } + classification { + index: 25 + score: 0.000115385694 + label: "jawOpen" + } + classification { + index: 26 + score: 0.00023342477 + label: "jawRight" + } + classification { + index: 27 + score: 2.8894076e-05 + label: "mouthClose" + } + classification { + index: 28 + score: 0.003933548 + label: "mouthDimpleLeft" + } + classification { + index: 29 + score: 0.0051949574 + label: "mouthDimpleRight" + } + classification { + index: 30 + score: 0.00067943585 + label: "mouthFrownLeft" + } + classification { + index: 31 + score: 0.0006520291 + label: "mouthFrownRight" + } + classification { + index: 32 + score: 0.0006695333 + label: "mouthFunnel" + } + classification { + index: 33 + score: 8.578597e-05 + label: "mouthLeft" + } + classification { + index: 34 + score: 2.6707421e-05 + label: "mouthLowerDownLeft" + } + classification { + index: 35 + score: 2.153054e-05 + label: "mouthLowerDownRight" + } + classification { + index: 36 + score: 0.0132145975 + label: "mouthPressLeft" + } + classification { + index: 37 + score: 0.009528495 + label: "mouthPressRight" + } + classification { + index: 38 + score: 0.056963783 + label: "mouthPucker" + } + classification { + index: 39 + score: 0.027331185 + label: "mouthRight" + } + classification { + index: 40 + score: 0.00072388636 + label: "mouthRollLower" + } + classification { + index: 41 + score: 0.00021191382 + label: "mouthRollUpper" + } + classification { + index: 42 + score: 0.23938002 + label: "mouthShrugLower" + } + classification { + index: 43 + score: 0.052946873 + label: "mouthShrugUpper" + } + classification { + index: 44 + score: 0.68681276 + label: "mouthSmileLeft" + } + classification { + index: 45 + score: 0.68557316 + label: "mouthSmileRight" + } + classification { + index: 46 + score: 0.0030625665 + label: "mouthStretchLeft" + } + classification { + index: 47 + score: 0.003999545 + label: "mouthStretchRight" + } + classification { + index: 48 + score: 0.013184475 + label: "mouthUpperUpLeft" + } + classification { + index: 49 + score: 0.017995607 + label: "mouthUpperUpRight" + } + classification { + index: 50 + score: 2.0452394e-06 + label: "noseSneerLeft" + } + classification { + index: 51 + score: 3.7912793e-07 + label: "noseSneerRight" } } diff --git a/mediapipe/tasks/web/vision/README.md b/mediapipe/tasks/web/vision/README.md index 816ef9e4f..c603beaea 100644 --- a/mediapipe/tasks/web/vision/README.md +++ b/mediapipe/tasks/web/vision/README.md @@ -66,7 +66,7 @@ const vision = await FilesetResolver.forVisionTasks( "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm" ); const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision, - "hhttps://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task" + "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task" ); const image = document.getElementById("image") as HTMLImageElement; const recognitions = gestureRecognizer.recognize(image); diff --git a/mediapipe/tasks/web/vision/core/BUILD b/mediapipe/tasks/web/vision/core/BUILD index dfbbb9f91..db9c27e0f 100644 --- a/mediapipe/tasks/web/vision/core/BUILD +++ b/mediapipe/tasks/web/vision/core/BUILD @@ -31,27 +31,58 @@ mediapipe_ts_library( mediapipe_ts_library( name = "drawing_utils", - srcs = ["drawing_utils.ts"], + srcs = [ + "drawing_utils.ts", + "drawing_utils_category_mask.ts", + "drawing_utils_confidence_mask.ts", + ], deps = [ + ":image", + ":image_shader_context", + ":mask", ":types", "//mediapipe/tasks/web/components/containers:bounding_box", "//mediapipe/tasks/web/components/containers:landmark", + "//mediapipe/web/graph_runner:graph_runner_ts", ], ) mediapipe_ts_library( - name = "image", - srcs = [ - "image.ts", - "image_shader_context.ts", + name = "drawing_utils_test_lib", + testonly = True, + srcs = ["drawing_utils.test.ts"], + deps = [ + ":drawing_utils", + ":image", + ":image_shader_context", + ":mask", ], ) +jasmine_node_test( + name = "drawing_utils_test", + deps = [":drawing_utils_test_lib"], +) + +mediapipe_ts_library( + name = "image", + srcs = ["image.ts"], + deps = ["image_shader_context"], +) + +mediapipe_ts_library( + name = "image_shader_context", + srcs = ["image_shader_context.ts"], +) + mediapipe_ts_library( name = "image_test_lib", testonly = True, srcs = ["image.test.ts"], - deps = [":image"], + deps = [ + ":image", + ":image_shader_context", + ], ) jasmine_node_test( @@ -64,6 +95,7 @@ mediapipe_ts_library( srcs = ["mask.ts"], deps = [ ":image", + ":image_shader_context", "//mediapipe/web/graph_runner:platform_utils", ], ) @@ -74,6 +106,7 @@ mediapipe_ts_library( srcs = ["mask.test.ts"], deps = [ ":image", + ":image_shader_context", ":mask", ], ) @@ -89,6 +122,7 @@ mediapipe_ts_library( deps = [ ":image", ":image_processing_options", + ":image_shader_context", ":mask", ":vision_task_options", "//mediapipe/framework/formats:rect_jspb_proto", @@ -116,11 +150,6 @@ mediapipe_ts_library( ], ) -mediapipe_ts_library( - name = "render_utils", - srcs = ["render_utils.ts"], -) - jasmine_node_test( name = "vision_task_runner_test", deps = [":vision_task_runner_test_lib"], diff --git a/mediapipe/tasks/web/vision/core/drawing_utils.test.ts b/mediapipe/tasks/web/vision/core/drawing_utils.test.ts new file mode 100644 index 000000000..c32a5fc56 --- /dev/null +++ b/mediapipe/tasks/web/vision/core/drawing_utils.test.ts @@ -0,0 +1,204 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'jasmine'; + +import {DrawingUtils} from './drawing_utils'; +import {MPImageShaderContext} from './image_shader_context'; +import {MPMask} from './mask'; + +const WIDTH = 2; +const HEIGHT = 2; + +const skip = typeof document === 'undefined'; +if (skip) { + console.log('These tests must be run in a browser.'); +} + +(skip ? xdescribe : describe)('DrawingUtils', () => { + let shaderContext = new MPImageShaderContext(); + let canvas2D: OffscreenCanvas; + let context2D: OffscreenCanvasRenderingContext2D; + let drawingUtils2D: DrawingUtils; + let canvasWebGL: OffscreenCanvas; + let contextWebGL: WebGL2RenderingContext; + let drawingUtilsWebGL: DrawingUtils; + + beforeEach(() => { + canvas2D = canvas2D ?? new OffscreenCanvas(WIDTH, HEIGHT); + canvasWebGL = canvasWebGL ?? new OffscreenCanvas(WIDTH, HEIGHT); + + shaderContext = new MPImageShaderContext(); + contextWebGL = canvasWebGL.getContext('webgl2')!; + drawingUtilsWebGL = new DrawingUtils(contextWebGL); + context2D = canvas2D.getContext('2d')!; + drawingUtils2D = new DrawingUtils(context2D, contextWebGL); + }); + + afterEach(() => { + shaderContext.close(); + drawingUtils2D.close(); + drawingUtilsWebGL.close(); + }); + + describe( + 'drawConfidenceMask() blends background with foreground color', () => { + const defaultColor = [255, 255, 255, 255]; + const overlayImage = new ImageData( + new Uint8ClampedArray( + [0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255]), + WIDTH, HEIGHT); + const expectedResult = new Uint8Array([ + 255, 255, 255, 255, 178, 178, 178, 255, 102, 102, 102, 255, 0, 0, 0, + 255 + ]); + + it('on 2D canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array([0.0, 0.3, 0.6, 1.0])], + /* interpolateValues= */ true, + /* ownsWebGLTexture= */ false, canvas2D, shaderContext, WIDTH, + HEIGHT); + + drawingUtils2D.drawConfidenceMask( + confidenceMask, defaultColor, overlayImage); + + const actualResult = context2D.getImageData(0, 0, WIDTH, HEIGHT).data; + expect(actualResult) + .toEqual(new Uint8ClampedArray(expectedResult.buffer)); + confidenceMask.close(); + }); + + it('on WebGL canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array( + [0.6, 1.0, 0.0, 0.3])], // Note: Vertically flipped + /* interpolateValues= */ true, + /* ownsWebGLTexture= */ false, canvasWebGL, shaderContext, WIDTH, + HEIGHT); + + drawingUtilsWebGL.drawConfidenceMask( + confidenceMask, defaultColor, overlayImage); + + const actualResult = new Uint8Array(WIDTH * HEIGHT * 4); + contextWebGL.readPixels( + 0, 0, WIDTH, HEIGHT, contextWebGL.RGBA, + contextWebGL.UNSIGNED_BYTE, actualResult); + expect(actualResult).toEqual(expectedResult); + confidenceMask.close(); + }); + }); + + + describe( + 'drawConfidenceMask() blends background with foreground image', () => { + const defaultImage = new ImageData( + new Uint8ClampedArray([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255 + ]), + WIDTH, HEIGHT); + const overlayImage = new ImageData( + new Uint8ClampedArray( + [0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255]), + WIDTH, HEIGHT); + const expectedResult = new Uint8Array([ + 255, 255, 255, 255, 178, 178, 178, 255, 102, 102, 102, 255, 0, 0, 0, + 255 + ]); + + it('on 2D canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array([0.0, 0.3, 0.6, 1.0])], + /* interpolateValues= */ true, + /* ownsWebGLTexture= */ false, canvas2D, shaderContext, WIDTH, + HEIGHT); + + drawingUtils2D.drawConfidenceMask( + confidenceMask, defaultImage, overlayImage); + + const actualResult = context2D.getImageData(0, 0, WIDTH, HEIGHT).data; + expect(actualResult) + .toEqual(new Uint8ClampedArray(expectedResult.buffer)); + confidenceMask.close(); + }); + + it('on WebGL canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array( + [0.6, 1.0, 0.0, 0.3])], // Note: Vertically flipped + /* interpolateValues= */ true, + /* ownsWebGLTexture= */ false, canvasWebGL, shaderContext, WIDTH, + HEIGHT); + + drawingUtilsWebGL.drawConfidenceMask( + confidenceMask, defaultImage, overlayImage); + + const actualResult = new Uint8Array(WIDTH * HEIGHT * 4); + contextWebGL.readPixels( + 0, 0, WIDTH, HEIGHT, contextWebGL.RGBA, + contextWebGL.UNSIGNED_BYTE, actualResult); + expect(actualResult).toEqual(expectedResult); + confidenceMask.close(); + }); + }); + + describe('drawCategoryMask() ', () => { + const colors = [ + [0, 0, 0, 255], + [0, 255, 0, 255], + [0, 0, 255, 255], + [255, 255, 255, 255], + ]; + const expectedResult = new Uint8Array( + [0, 0, 0, 255, 0, 255, 0, 255, 0, 0, 255, 255, 255, 255, 255, 255], + ); + + it('on 2D canvas', () => { + const categoryMask = new MPMask( + [new Uint8Array([0, 1, 2, 3])], + /* interpolateValues= */ false, + /* ownsWebGLTexture= */ false, canvas2D, shaderContext, WIDTH, + HEIGHT); + + drawingUtils2D.drawCategoryMask(categoryMask, colors); + + const actualResult = context2D.getImageData(0, 0, WIDTH, HEIGHT).data; + expect(actualResult) + .toEqual(new Uint8ClampedArray(expectedResult.buffer)); + categoryMask.close(); + }); + + it('on WebGL canvas', () => { + const categoryMask = new MPMask( + [new Uint8Array([2, 3, 0, 1])], // Note: Vertically flipped + /* interpolateValues= */ false, + /* ownsWebGLTexture= */ false, canvasWebGL, shaderContext, WIDTH, + HEIGHT); + + drawingUtilsWebGL.drawCategoryMask(categoryMask, colors); + + const actualResult = new Uint8Array(WIDTH * WIDTH * 4); + contextWebGL.readPixels( + 0, 0, WIDTH, HEIGHT, contextWebGL.RGBA, contextWebGL.UNSIGNED_BYTE, + actualResult); + expect(actualResult).toEqual(expectedResult); + categoryMask.close(); + }); + }); + + // TODO: Add tests for drawConnectors/drawLandmarks/drawBoundingBox +}); diff --git a/mediapipe/tasks/web/vision/core/drawing_utils.ts b/mediapipe/tasks/web/vision/core/drawing_utils.ts index c1e84fa11..f3c3d5d75 100644 --- a/mediapipe/tasks/web/vision/core/drawing_utils.ts +++ b/mediapipe/tasks/web/vision/core/drawing_utils.ts @@ -16,7 +16,12 @@ import {BoundingBox} from '../../../../tasks/web/components/containers/bounding_box'; import {NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark'; +import {CategoryMaskShaderContext, CategoryToColorMap, RGBAColor} from '../../../../tasks/web/vision/core/drawing_utils_category_mask'; +import {ConfidenceMaskShaderContext} from '../../../../tasks/web/vision/core/drawing_utils_confidence_mask'; +import {MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; +import {MPMask} from '../../../../tasks/web/vision/core/mask'; import {Connection} from '../../../../tasks/web/vision/core/types'; +import {ImageSource} from '../../../../web/graph_runner/graph_runner'; /** * A user-defined callback to take input data and map it to a custom output @@ -24,6 +29,9 @@ import {Connection} from '../../../../tasks/web/vision/core/types'; */ export type Callback = (input: I) => O; +// Used in public API +export {ImageSource}; + /** Data that a user can use to specialize drawing options. */ export declare interface LandmarkData { index?: number; @@ -31,6 +39,32 @@ export declare interface LandmarkData { to?: NormalizedLandmark; } +/** A color map with 22 classes. Used in our demos. */ +export const DEFAULT_CATEGORY_TO_COLOR_MAP = [ + [0, 0, 0, 0], // class 0 is BG = transparent + [255, 0, 0, 255], // class 1 is red + [0, 255, 0, 255], // class 2 is light green + [0, 0, 255, 255], // class 3 is blue + [255, 255, 0, 255], // class 4 is yellow + [255, 0, 255, 255], // class 5 is light purple / magenta + [0, 255, 255, 255], // class 6 is light blue / aqua + [128, 128, 128, 255], // class 7 is gray + [255, 100, 0, 255], // class 8 is dark orange + [128, 0, 255, 255], // class 9 is dark purple + [0, 150, 0, 255], // class 10 is green + [255, 255, 255, 255], // class 11 is white + [255, 105, 180, 255], // class 12 is pink + [255, 150, 0, 255], // class 13 is orange + [255, 250, 224, 255], // class 14 is light yellow + [148, 0, 211, 255], // class 15 is dark violet + [0, 100, 0, 255], // class 16 is dark green + [0, 0, 128, 255], // class 17 is navy blue + [165, 42, 42, 255], // class 18 is brown + [64, 224, 208, 255], // class 19 is turquoise + [255, 218, 185, 255], // class 20 is peach + [192, 192, 192, 255], // class 21 is silver +]; + /** * Options for customizing the drawing routines */ @@ -77,14 +111,51 @@ function resolve(value: O|Callback, data: I): O { return value instanceof Function ? value(data) : value; } +export {RGBAColor, CategoryToColorMap}; + /** Helper class to visualize the result of a MediaPipe Vision task. */ export class DrawingUtils { + private categoryMaskShaderContext?: CategoryMaskShaderContext; + private confidenceMaskShaderContext?: ConfidenceMaskShaderContext; + private convertToWebGLTextureShaderContext?: MPImageShaderContext; + private readonly context2d?: CanvasRenderingContext2D| + OffscreenCanvasRenderingContext2D; + private readonly contextWebGL?: WebGL2RenderingContext; + /** * Creates a new DrawingUtils class. * - * @param ctx The canvas to render onto. + * @param gpuContext The WebGL canvas rendering context to render into. If + * your Task is using a GPU delegate, the context must be obtained from + * its canvas (provided via `setOptions({ canvas: .. })`). */ - constructor(private readonly ctx: CanvasRenderingContext2D) {} + constructor(gpuContext: WebGL2RenderingContext); + /** + * Creates a new DrawingUtils class. + * + * @param cpuContext The 2D canvas rendering context to render into. If + * you are rendering GPU data you must also provide `gpuContext` to allow + * for data conversion. + * @param gpuContext A WebGL canvas that is used for GPU rendering and for + * converting GPU to CPU data. If your Task is using a GPU delegate, the + * context must be obtained from its canvas (provided via + * `setOptions({ canvas: .. })`). + */ + constructor( + cpuContext: CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D, + gpuContext?: WebGL2RenderingContext); + constructor( + cpuOrGpuGontext: CanvasRenderingContext2D| + OffscreenCanvasRenderingContext2D|WebGL2RenderingContext, + gpuContext?: WebGL2RenderingContext) { + if (cpuOrGpuGontext instanceof CanvasRenderingContext2D || + cpuOrGpuGontext instanceof OffscreenCanvasRenderingContext2D) { + this.context2d = cpuOrGpuGontext; + this.contextWebGL = gpuContext; + } else { + this.contextWebGL = cpuOrGpuGontext; + } + } /** * Restricts a number between two endpoints (order doesn't matter). @@ -120,9 +191,43 @@ export class DrawingUtils { return DrawingUtils.clamp(out, y0, y1); } + private getCanvasRenderingContext(): CanvasRenderingContext2D + |OffscreenCanvasRenderingContext2D { + if (!this.context2d) { + throw new Error( + 'CPU rendering requested but CanvasRenderingContext2D not provided.'); + } + return this.context2d; + } + + private getWebGLRenderingContext(): WebGL2RenderingContext { + if (!this.contextWebGL) { + throw new Error( + 'GPU rendering requested but WebGL2RenderingContext not provided.'); + } + return this.contextWebGL; + } + + private getCategoryMaskShaderContext(): CategoryMaskShaderContext { + if (!this.categoryMaskShaderContext) { + this.categoryMaskShaderContext = new CategoryMaskShaderContext(); + } + return this.categoryMaskShaderContext; + } + + private getConfidenceMaskShaderContext(): ConfidenceMaskShaderContext { + if (!this.confidenceMaskShaderContext) { + this.confidenceMaskShaderContext = new ConfidenceMaskShaderContext(); + } + return this.confidenceMaskShaderContext; + } + /** * Draws circles onto the provided landmarks. * + * This method can only be used when `DrawingUtils` is initialized with a + * `CanvasRenderingContext2D`. + * * @export * @param landmarks The landmarks to draw. * @param style The style to visualize the landmarks. @@ -132,7 +237,7 @@ export class DrawingUtils { if (!landmarks) { return; } - const ctx = this.ctx; + const ctx = this.getCanvasRenderingContext(); const options = addDefaultOptions(style); ctx.save(); const canvas = ctx.canvas; @@ -159,6 +264,9 @@ export class DrawingUtils { /** * Draws lines between landmarks (given a connection graph). * + * This method can only be used when `DrawingUtils` is initialized with a + * `CanvasRenderingContext2D`. + * * @export * @param landmarks The landmarks to draw. * @param connections The connections array that contains the start and the @@ -171,7 +279,7 @@ export class DrawingUtils { if (!landmarks || !connections) { return; } - const ctx = this.ctx; + const ctx = this.getCanvasRenderingContext(); const options = addDefaultOptions(style); ctx.save(); const canvas = ctx.canvas; @@ -195,12 +303,15 @@ export class DrawingUtils { /** * Draws a bounding box. * + * This method can only be used when `DrawingUtils` is initialized with a + * `CanvasRenderingContext2D`. + * * @export * @param boundingBox The bounding box to draw. * @param style The style to visualize the boundin box. */ drawBoundingBox(boundingBox: BoundingBox, style?: DrawingOptions): void { - const ctx = this.ctx; + const ctx = this.getCanvasRenderingContext(); const options = addDefaultOptions(style); ctx.save(); ctx.beginPath(); @@ -218,6 +329,187 @@ export class DrawingUtils { ctx.fill(); ctx.restore(); } + + /** Draws a category mask on a CanvasRenderingContext2D. */ + private drawCategoryMask2D( + mask: MPMask, background: RGBAColor|ImageSource, + categoryToColorMap: Map|RGBAColor[]): void { + // Use the WebGL renderer to draw result on our internal canvas. + const gl = this.getWebGLRenderingContext(); + this.runWithWebGLTexture(mask, texture => { + this.drawCategoryMaskWebGL(texture, background, categoryToColorMap); + // Draw the result on the user canvas. + const ctx = this.getCanvasRenderingContext(); + ctx.drawImage(gl.canvas, 0, 0, ctx.canvas.width, ctx.canvas.height); + }); + } + + /** Draws a category mask on a WebGL2RenderingContext2D. */ + private drawCategoryMaskWebGL( + categoryTexture: WebGLTexture, background: RGBAColor|ImageSource, + categoryToColorMap: Map|RGBAColor[]): void { + const shaderContext = this.getCategoryMaskShaderContext(); + const gl = this.getWebGLRenderingContext(); + const backgroundImage = Array.isArray(background) ? + new ImageData(new Uint8ClampedArray(background), 1, 1) : + background; + + shaderContext.run(gl, /* flipTexturesVertically= */ true, () => { + shaderContext.bindAndUploadTextures( + categoryTexture, backgroundImage, categoryToColorMap); + gl.clearColor(0, 0, 0, 0); + gl.clear(gl.COLOR_BUFFER_BIT); + gl.drawArrays(gl.TRIANGLE_FAN, 0, 4); + shaderContext.unbindTextures(); + }); + } + + /** + * Draws a category mask using the provided category-to-color mapping. + * + * @export + * @param mask A category mask that was returned from a segmentation task. + * @param categoryToColorMap A map that maps category indices to RGBA + * values. You must specify a map entry for each category. + * @param background A color or image to use as the background. Defaults to + * black. + */ + drawCategoryMask( + mask: MPMask, categoryToColorMap: Map, + background?: RGBAColor|ImageSource): void; + /** + * Draws a category mask using the provided color array. + * + * @export + * @param mask A category mask that was returned from a segmentation task. + * @param categoryToColorMap An array that maps indices to RGBA values. The + * array's indices must correspond to the category indices of the model + * and an entry must be provided for each category. + * @param background A color or image to use as the background. Defaults to + * black. + */ + drawCategoryMask( + mask: MPMask, categoryToColorMap: RGBAColor[], + background?: RGBAColor|ImageSource): void; + /** @export */ + drawCategoryMask( + mask: MPMask, categoryToColorMap: CategoryToColorMap, + background: RGBAColor|ImageSource = [0, 0, 0, 255]): void { + if (this.context2d) { + this.drawCategoryMask2D(mask, background, categoryToColorMap); + } else { + this.drawCategoryMaskWebGL( + mask.getAsWebGLTexture(), background, categoryToColorMap); + } + } + + /** + * Converts the given mask to a WebGLTexture and runs the callback. Cleans + * up any new resources after the callback finished executing. + */ + private runWithWebGLTexture( + mask: MPMask, callback: (texture: WebGLTexture) => void): void { + if (!mask.hasWebGLTexture()) { + // Re-create the MPMask but use our the WebGL canvas so we can draw the + // texture directly. + const data = mask.hasFloat32Array() ? mask.getAsFloat32Array() : + mask.getAsUint8Array(); + this.convertToWebGLTextureShaderContext = + this.convertToWebGLTextureShaderContext ?? new MPImageShaderContext(); + const gl = this.getWebGLRenderingContext(); + + const convertedMask = new MPMask( + [data], + mask.interpolateValues, + /* ownsWebGlTexture= */ false, + gl.canvas, + this.convertToWebGLTextureShaderContext, + mask.width, + mask.height, + ); + callback(convertedMask.getAsWebGLTexture()); + convertedMask.close(); + } else { + callback(mask.getAsWebGLTexture()); + } + } + + /** Draws a confidence mask on a WebGL2RenderingContext2D. */ + private drawConfidenceMaskWebGL( + maskTexture: WebGLTexture, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + const gl = this.getWebGLRenderingContext(); + const shaderContext = this.getConfidenceMaskShaderContext(); + const defaultImage = Array.isArray(defaultTexture) ? + new ImageData(new Uint8ClampedArray(defaultTexture), 1, 1) : + defaultTexture; + const overlayImage = Array.isArray(overlayTexture) ? + new ImageData(new Uint8ClampedArray(overlayTexture), 1, 1) : + overlayTexture; + + shaderContext.run(gl, /* flipTexturesVertically= */ true, () => { + shaderContext.bindAndUploadTextures( + defaultImage, overlayImage, maskTexture); + gl.clearColor(0, 0, 0, 0); + gl.clear(gl.COLOR_BUFFER_BIT); + gl.drawArrays(gl.TRIANGLE_FAN, 0, 4); + gl.bindTexture(gl.TEXTURE_2D, null); + shaderContext.unbindTextures(); + }); + } + + /** Draws a confidence mask on a CanvasRenderingContext2D. */ + private drawConfidenceMask2D( + mask: MPMask, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + // Use the WebGL renderer to draw result on our internal canvas. + const gl = this.getWebGLRenderingContext(); + this.runWithWebGLTexture(mask, texture => { + this.drawConfidenceMaskWebGL(texture, defaultTexture, overlayTexture); + // Draw the result on the user canvas. + const ctx = this.getCanvasRenderingContext(); + ctx.drawImage(gl.canvas, 0, 0, ctx.canvas.width, ctx.canvas.height); + }); + } + + /** + * Blends two images using the provided confidence mask. + * + * If you are using an `ImageData` or `HTMLImageElement` as your data source + * and drawing the result onto a `WebGL2RenderingContext`, this method uploads + * the image data to the GPU. For still image input that gets re-used every + * frame, you can reduce the cost of re-uploading these images by passing a + * `HTMLCanvasElement` instead. + * + * @export + * @param mask A confidence mask that was returned from a segmentation task. + * @param defaultTexture An image or a four-channel color that will be used + * when confidence values are low. + * @param overlayTexture An image or four-channel color that will be used when + * confidence values are high. + */ + drawConfidenceMask( + mask: MPMask, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + if (this.context2d) { + this.drawConfidenceMask2D(mask, defaultTexture, overlayTexture); + } else { + this.drawConfidenceMaskWebGL( + mask.getAsWebGLTexture(), defaultTexture, overlayTexture); + } + } + /** + * Frees all WebGL resources held by this class. + * @export + */ + close(): void { + this.categoryMaskShaderContext?.close(); + this.categoryMaskShaderContext = undefined; + this.confidenceMaskShaderContext?.close(); + this.confidenceMaskShaderContext = undefined; + this.convertToWebGLTextureShaderContext?.close(); + this.convertToWebGLTextureShaderContext = undefined; + } } diff --git a/mediapipe/tasks/web/vision/core/drawing_utils_category_mask.ts b/mediapipe/tasks/web/vision/core/drawing_utils_category_mask.ts new file mode 100644 index 000000000..3b7cc0b47 --- /dev/null +++ b/mediapipe/tasks/web/vision/core/drawing_utils_category_mask.ts @@ -0,0 +1,190 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {assertNotNull, MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; +import {ImageSource} from '../../../../web/graph_runner/graph_runner'; + +/** + * A fragment shader that maps categories to colors based on a background + * texture, a mask texture and a 256x1 "color mapping texture" that contains one + * color for each pixel. + */ +const FRAGMENT_SHADER = ` + precision mediump float; + uniform sampler2D backgroundTexture; + uniform sampler2D maskTexture; + uniform sampler2D colorMappingTexture; + varying vec2 vTex; + void main() { + vec4 backgroundColor = texture2D(backgroundTexture, vTex); + float category = texture2D(maskTexture, vTex).r; + vec4 categoryColor = texture2D(colorMappingTexture, vec2(category, 0.0)); + gl_FragColor = mix(backgroundColor, categoryColor, categoryColor.a); + } + `; + +/** + * A four channel color with values for red, green, blue and alpha + * respectively. + */ +export type RGBAColor = [number, number, number, number]|number[]; + +/** + * A category to color mapping that uses either a map or an array to assign + * category indexes to RGBA colors. + */ +export type CategoryToColorMap = Map|RGBAColor[]; + + +/** Checks CategoryToColorMap maps for deep equality. */ +function isEqualColorMap( + a: CategoryToColorMap, b: CategoryToColorMap): boolean { + if (a !== b) { + return false; + } + + const aEntries = a.entries(); + const bEntries = b.entries(); + for (const [aKey, aValue] of aEntries) { + const bNext = bEntries.next(); + if (bNext.done) { + return false; + } + + const [bKey, bValue] = bNext.value; + if (aKey !== bKey) { + return false; + } + + if (aValue[0] !== bValue[0] || aValue[1] !== bValue[1] || + aValue[2] !== bValue[2] || aValue[3] !== bValue[3]) { + return false; + } + } + return !!bEntries.next().done; +} + + +/** A drawing util class for category masks. */ +export class CategoryMaskShaderContext extends MPImageShaderContext { + backgroundTexture?: WebGLTexture; + colorMappingTexture?: WebGLTexture; + colorMappingTextureUniform?: WebGLUniformLocation; + backgroundTextureUniform?: WebGLUniformLocation; + maskTextureUniform?: WebGLUniformLocation; + currentColorMap?: CategoryToColorMap; + + bindAndUploadTextures( + categoryMask: WebGLTexture, background: ImageSource, + colorMap: Map|number[][]) { + const gl = this.gl!; + + // Bind category mask + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, categoryMask); + + // TODO: We should avoid uploading textures from CPU to GPU + // if the textures haven't changed. This can lead to drastic performance + // slowdowns (~50ms per frame). Users can reduce the penalty by passing a + // canvas object instead of ImageData/HTMLImageElement. + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, this.backgroundTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, background); + + // Bind color mapping texture if changed. + if (!this.currentColorMap || + !isEqualColorMap(this.currentColorMap, colorMap)) { + this.currentColorMap = colorMap; + + const pixels = new Array(256 * 4).fill(0); + colorMap.forEach((rgba, index) => { + if (rgba.length !== 4) { + throw new Error( + `Color at index ${index} is not a four-channel value.`); + } + pixels[index * 4] = rgba[0]; + pixels[index * 4 + 1] = rgba[1]; + pixels[index * 4 + 2] = rgba[2]; + pixels[index * 4 + 3] = rgba[3]; + }); + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, this.colorMappingTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, 256, 1, 0, gl.RGBA, gl.UNSIGNED_BYTE, + new Uint8Array(pixels)); + } else { + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, this.colorMappingTexture!); + } + } + + unbindTextures() { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, null); + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, null); + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, null); + } + + protected override getFragmentShader(): string { + return FRAGMENT_SHADER; + } + + protected override setupTextures(): void { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE1); + this.backgroundTexture = this.createTexture(gl, gl.LINEAR); + // Use `gl.NEAREST` to prevent interpolating values in our category to + // color map. + gl.activeTexture(gl.TEXTURE2); + this.colorMappingTexture = this.createTexture(gl, gl.NEAREST); + } + + protected override setupShaders(): void { + super.setupShaders(); + const gl = this.gl!; + this.backgroundTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'backgroundTexture'), + 'Uniform location'); + this.colorMappingTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'colorMappingTexture'), + 'Uniform location'); + this.maskTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'maskTexture'), + 'Uniform location'); + } + + protected override configureUniforms(): void { + super.configureUniforms(); + const gl = this.gl!; + gl.uniform1i(this.maskTextureUniform!, 0); + gl.uniform1i(this.backgroundTextureUniform!, 1); + gl.uniform1i(this.colorMappingTextureUniform!, 2); + } + + override close(): void { + if (this.backgroundTexture) { + this.gl!.deleteTexture(this.backgroundTexture); + } + if (this.colorMappingTexture) { + this.gl!.deleteTexture(this.colorMappingTexture); + } + super.close(); + } +} diff --git a/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts b/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts new file mode 100644 index 000000000..953911f01 --- /dev/null +++ b/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts @@ -0,0 +1,125 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {assertNotNull, MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; +import {ImageSource} from '../../../../web/graph_runner/graph_runner'; + +/** + * A fragment shader that blends a default image and overlay texture based on an + * input texture that contains confidence values. + */ +const FRAGMENT_SHADER = ` + precision mediump float; + uniform sampler2D maskTexture; + uniform sampler2D defaultTexture; + uniform sampler2D overlayTexture; + varying vec2 vTex; + void main() { + float confidence = texture2D(maskTexture, vTex).r; + vec4 defaultColor = texture2D(defaultTexture, vTex); + vec4 overlayColor = texture2D(overlayTexture, vTex); + // Apply the alpha from the overlay and merge in the default color + overlayColor = mix(defaultColor, overlayColor, overlayColor.a); + gl_FragColor = mix(defaultColor, overlayColor, confidence); + } + `; + +/** A drawing util class for confidence masks. */ +export class ConfidenceMaskShaderContext extends MPImageShaderContext { + defaultTexture?: WebGLTexture; + overlayTexture?: WebGLTexture; + defaultTextureUniform?: WebGLUniformLocation; + overlayTextureUniform?: WebGLUniformLocation; + maskTextureUniform?: WebGLUniformLocation; + + protected override getFragmentShader(): string { + return FRAGMENT_SHADER; + } + + protected override setupTextures(): void { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE1); + this.defaultTexture = this.createTexture(gl); + gl.activeTexture(gl.TEXTURE2); + this.overlayTexture = this.createTexture(gl); + } + + protected override setupShaders(): void { + super.setupShaders(); + const gl = this.gl!; + this.defaultTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'defaultTexture'), + 'Uniform location'); + this.overlayTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'overlayTexture'), + 'Uniform location'); + this.maskTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'maskTexture'), + 'Uniform location'); + } + + protected override configureUniforms(): void { + super.configureUniforms(); + const gl = this.gl!; + gl.uniform1i(this.maskTextureUniform!, 0); + gl.uniform1i(this.defaultTextureUniform!, 1); + gl.uniform1i(this.overlayTextureUniform!, 2); + } + + bindAndUploadTextures( + defaultImage: ImageSource, overlayImage: ImageSource, + confidenceMask: WebGLTexture) { + // TODO: We should avoid uploading textures from CPU to GPU + // if the textures haven't changed. This can lead to drastic performance + // slowdowns (~50ms per frame). Users can reduce the penalty by passing a + // canvas object instead of ImageData/HTMLImageElement. + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, confidenceMask); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, this.defaultTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, defaultImage); + + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, this.overlayTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, overlayImage); + } + + unbindTextures() { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, null); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, null); + + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, null); + } + + override close(): void { + if (this.defaultTexture) { + this.gl!.deleteTexture(this.defaultTexture); + } + if (this.overlayTexture) { + this.gl!.deleteTexture(this.overlayTexture); + } + super.close(); + } +} diff --git a/mediapipe/tasks/web/vision/core/image.ts b/mediapipe/tasks/web/vision/core/image.ts index 570d32318..bb88c0ee1 100644 --- a/mediapipe/tasks/web/vision/core/image.ts +++ b/mediapipe/tasks/web/vision/core/image.ts @@ -198,10 +198,8 @@ export class MPImage { // Create a new texture and use it to back a framebuffer gl.activeTexture(gl.TEXTURE1); - destinationContainer = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + destinationContainer = shaderContext.createTexture(gl); gl.bindTexture(gl.TEXTURE_2D, destinationContainer); - this.configureTextureParams(); gl.texImage2D( gl.TEXTURE_2D, 0, gl.RGBA, this.width, this.height, 0, gl.RGBA, gl.UNSIGNED_BYTE, null); @@ -252,7 +250,7 @@ export class MPImage { } if (!this.gl) { this.gl = assertNotNull( - this.canvas.getContext('webgl2') as WebGL2RenderingContext | null, + this.canvas.getContext('webgl2'), 'You cannot use a canvas that is already bound to a different ' + 'type of rendering context.'); } @@ -317,20 +315,6 @@ export class MPImage { return webGLTexture; } - /** Sets texture params for the currently bound texture. */ - private configureTextureParams() { - const gl = this.getGL(); - // `gl.LINEAR` might break rendering for some textures, but it allows us to - // do smooth resizing. Ideally, this would be user-configurable, but for now - // we hard-code the value here to `gl.LINEAR` (versus `gl.NEAREST` for - // `MPMask` where we do not want to interpolate mask values, especially for - // category masks). - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR); - } - /** * Binds the backing texture to the canvas. If the texture does not yet * exist, creates it first. @@ -343,16 +327,13 @@ export class MPImage { let webGLTexture = this.getContainer(MPImageType.WEBGL_TEXTURE); if (!webGLTexture) { - webGLTexture = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + const shaderContext = this.getShaderContext(); + webGLTexture = shaderContext.createTexture(gl); this.containers.push(webGLTexture); this.ownsWebGLTexture = true; - - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); - this.configureTextureParams(); - } else { - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); } + + gl.bindTexture(gl.TEXTURE_2D, webGLTexture); return webGLTexture; } diff --git a/mediapipe/tasks/web/vision/core/image_shader_context.ts b/mediapipe/tasks/web/vision/core/image_shader_context.ts index eb17d001a..3dec9da95 100644 --- a/mediapipe/tasks/web/vision/core/image_shader_context.ts +++ b/mediapipe/tasks/web/vision/core/image_shader_context.ts @@ -27,9 +27,9 @@ const FRAGMENT_SHADER = ` precision mediump float; varying vec2 vTex; uniform sampler2D inputTexture; - void main() { - gl_FragColor = texture2D(inputTexture, vTex); - } + void main() { + gl_FragColor = texture2D(inputTexture, vTex); + } `; /** Helper to assert that `value` is not null. */ @@ -73,9 +73,9 @@ class MPImageShaderBuffers { * For internal use only. */ export class MPImageShaderContext { - private gl?: WebGL2RenderingContext; + protected gl?: WebGL2RenderingContext; private framebuffer?: WebGLFramebuffer; - private program?: WebGLProgram; + protected program?: WebGLProgram; private vertexShader?: WebGLShader; private fragmentShader?: WebGLShader; private aVertex?: GLint; @@ -94,6 +94,14 @@ export class MPImageShaderContext { */ private shaderBuffersFlipVertically?: MPImageShaderBuffers; + protected getFragmentShader(): string { + return FRAGMENT_SHADER; + } + + protected getVertexShader(): string { + return VERTEX_SHADER; + } + private compileShader(source: string, type: number): WebGLShader { const gl = this.gl!; const shader = @@ -108,14 +116,15 @@ export class MPImageShaderContext { return shader; } - private setupShaders(): void { + protected setupShaders(): void { const gl = this.gl!; this.program = assertNotNull(gl.createProgram()!, 'Failed to create WebGL program'); - this.vertexShader = this.compileShader(VERTEX_SHADER, gl.VERTEX_SHADER); + this.vertexShader = + this.compileShader(this.getVertexShader(), gl.VERTEX_SHADER); this.fragmentShader = - this.compileShader(FRAGMENT_SHADER, gl.FRAGMENT_SHADER); + this.compileShader(this.getFragmentShader(), gl.FRAGMENT_SHADER); gl.linkProgram(this.program); const linked = gl.getProgramParameter(this.program, gl.LINK_STATUS); @@ -128,6 +137,10 @@ export class MPImageShaderContext { this.aTex = gl.getAttribLocation(this.program, 'aTex'); } + protected setupTextures(): void {} + + protected configureUniforms(): void {} + private createBuffers(flipVertically: boolean): MPImageShaderBuffers { const gl = this.gl!; const vertexArrayObject = @@ -193,17 +206,44 @@ export class MPImageShaderContext { if (!this.program) { this.setupShaders(); + this.setupTextures(); } const shaderBuffers = this.getShaderBuffers(flipVertically); gl.useProgram(this.program!); shaderBuffers.bind(); + this.configureUniforms(); const result = callback(); shaderBuffers.unbind(); return result; } + /** + * Creates and configures a texture. + * + * @param gl The rendering context. + * @param filter The setting to use for `gl.TEXTURE_MIN_FILTER` and + * `gl.TEXTURE_MAG_FILTER`. Defaults to `gl.LINEAR`. + * @param wrapping The setting to use for `gl.TEXTURE_WRAP_S` and + * `gl.TEXTURE_WRAP_T`. Defaults to `gl.CLAMP_TO_EDGE`. + */ + createTexture(gl: WebGL2RenderingContext, filter?: GLenum, wrapping?: GLenum): + WebGLTexture { + this.maybeInitGL(gl); + const texture = + assertNotNull(gl.createTexture(), 'Failed to create texture'); + gl.bindTexture(gl.TEXTURE_2D, texture); + gl.texParameteri( + gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, wrapping ?? gl.CLAMP_TO_EDGE); + gl.texParameteri( + gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, wrapping ?? gl.CLAMP_TO_EDGE); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, filter ?? gl.LINEAR); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, filter ?? gl.LINEAR); + gl.bindTexture(gl.TEXTURE_2D, null); + return texture; + } + /** * Binds a framebuffer to the canvas. If the framebuffer does not yet exist, * creates it first. Binds the provided texture to the framebuffer. diff --git a/mediapipe/tasks/web/vision/core/mask.test.ts b/mediapipe/tasks/web/vision/core/mask.test.ts index d2f5ddb09..29ed5ea02 100644 --- a/mediapipe/tasks/web/vision/core/mask.test.ts +++ b/mediapipe/tasks/web/vision/core/mask.test.ts @@ -136,7 +136,7 @@ class MPMaskTestContext { shaderContext: MPImageShaderContext, input: MaskType, width: number, height: number): MPMask { return new MPMask( - [input], + [input], /* interpolateValues= */ false, /* ownsWebGLTexture= */ false, context.canvas, shaderContext, width, height); } @@ -182,7 +182,7 @@ class MPMaskTestContext { const shaderContext = new MPImageShaderContext(); const mask = new MPMask( - [context.webGLTexture], + [context.webGLTexture], /* interpolateValues= */ false, /* ownsWebGLTexture= */ false, context.canvas, shaderContext, WIDTH, HEIGHT); @@ -196,7 +196,7 @@ class MPMaskTestContext { const shaderContext = new MPImageShaderContext(); const mask = new MPMask( - [context.webGLTexture], + [context.webGLTexture], /* interpolateValues= */ false, /* ownsWebGLTexture= */ false, context.canvas, shaderContext, WIDTH, HEIGHT); diff --git a/mediapipe/tasks/web/vision/core/mask.ts b/mediapipe/tasks/web/vision/core/mask.ts index 6ef852508..d08145a2d 100644 --- a/mediapipe/tasks/web/vision/core/mask.ts +++ b/mediapipe/tasks/web/vision/core/mask.ts @@ -62,9 +62,25 @@ export class MPMask { /** The format used to write pixel values from textures. */ private static texImage2DFormat?: GLenum; - /** @hideconstructor */ + /** + * @param containers The data source for this mask as a `WebGLTexture`, + * `Unit8Array` or `Float32Array`. Multiple sources of the same data can + * be provided to reduce conversions. + * @param interpolateValues If enabled, uses `gl.LINEAR` instead of + * `gl.NEAREST` to interpolate between mask values. + * @param ownsWebGLTexture Whether the MPMask should take ownership of the + * `WebGLTexture` and free it when closed. + * @param canvas The canvas to use for rendering and conversion. Must be the + * same canvas for any WebGL resources. + * @param shaderContext A shader context that is shared between all masks from + * a single task. + * @param width The width of the mask. + * @param height The height of the mask. + * @hideconstructor + */ constructor( private readonly containers: MPMaskContainer[], + readonly interpolateValues: boolean, private ownsWebGLTexture: boolean, /** Returns the canvas element that the mask is bound to. */ readonly canvas: HTMLCanvasElement|OffscreenCanvas|undefined, @@ -215,10 +231,9 @@ export class MPMask { // Create a new texture and use it to back a framebuffer gl.activeTexture(gl.TEXTURE1); - destinationContainer = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + destinationContainer = shaderContext.createTexture( + gl, this.interpolateValues ? gl.LINEAR : gl.NEAREST); gl.bindTexture(gl.TEXTURE_2D, destinationContainer); - this.configureTextureParams(); const format = this.getTexImage2DFormat(); gl.texImage2D( gl.TEXTURE_2D, 0, format, this.width, this.height, 0, gl.RED, @@ -244,8 +259,8 @@ export class MPMask { } return new MPMask( - destinationContainers, this.hasWebGLTexture(), this.canvas, - this.shaderContext, this.width, this.height); + destinationContainers, this.interpolateValues, this.hasWebGLTexture(), + this.canvas, this.shaderContext, this.width, this.height); } private getGL(): WebGL2RenderingContext { @@ -256,7 +271,7 @@ export class MPMask { } if (!this.gl) { this.gl = assertNotNull( - this.canvas.getContext('webgl2') as WebGL2RenderingContext | null, + this.canvas.getContext('webgl2'), 'You cannot use a canvas that is already bound to a different ' + 'type of rendering context.'); } @@ -339,19 +354,6 @@ export class MPMask { return webGLTexture; } - /** Sets texture params for the currently bound texture. */ - private configureTextureParams() { - const gl = this.getGL(); - // `gl.NEAREST` ensures that we do not get interpolated values for - // masks. In some cases, the user might want interpolation (e.g. for - // confidence masks), so we might want to make this user-configurable. - // Note that `MPImage` uses `gl.LINEAR`. - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); - } - /** * Binds the backing texture to the canvas. If the texture does not yet * exist, creates it first. @@ -364,17 +366,14 @@ export class MPMask { let webGLTexture = this.getContainer(MPMaskType.WEBGL_TEXTURE); if (!webGLTexture) { - webGLTexture = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + const shaderContext = this.getShaderContext(); + webGLTexture = shaderContext.createTexture( + gl, this.interpolateValues ? gl.LINEAR : gl.NEAREST); this.containers.push(webGLTexture); this.ownsWebGLTexture = true; - - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); - this.configureTextureParams(); - } else { - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); } + gl.bindTexture(gl.TEXTURE_2D, webGLTexture); return webGLTexture; } diff --git a/mediapipe/tasks/web/vision/core/render_utils.ts b/mediapipe/tasks/web/vision/core/render_utils.ts deleted file mode 100644 index ebb3be16a..000000000 --- a/mediapipe/tasks/web/vision/core/render_utils.ts +++ /dev/null @@ -1,69 +0,0 @@ -/** @fileoverview Utility functions used in the vision demos. */ - -/** - * Copyright 2023 The MediaPipe Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Pre-baked color table for a maximum of 12 classes. -const CM_ALPHA = 128; -const COLOR_MAP: Array<[number, number, number, number]> = [ - [0, 0, 0, CM_ALPHA], // class 0 is BG = transparent - [255, 0, 0, CM_ALPHA], // class 1 is red - [0, 255, 0, CM_ALPHA], // class 2 is light green - [0, 0, 255, CM_ALPHA], // class 3 is blue - [255, 255, 0, CM_ALPHA], // class 4 is yellow - [255, 0, 255, CM_ALPHA], // class 5 is light purple / magenta - [0, 255, 255, CM_ALPHA], // class 6 is light blue / aqua - [128, 128, 128, CM_ALPHA], // class 7 is gray - [255, 128, 0, CM_ALPHA], // class 8 is orange - [128, 0, 255, CM_ALPHA], // class 9 is dark purple - [0, 128, 0, CM_ALPHA], // class 10 is dark green - [255, 255, 255, CM_ALPHA] // class 11 is white; could do black instead? -]; - - -/** Helper function to draw a confidence mask */ -export function drawConfidenceMask( - ctx: CanvasRenderingContext2D, image: Float32Array, width: number, - height: number): void { - const uint8Array = new Uint8ClampedArray(width * height * 4); - for (let i = 0; i < image.length; i++) { - uint8Array[4 * i] = 128; - uint8Array[4 * i + 1] = 0; - uint8Array[4 * i + 2] = 0; - uint8Array[4 * i + 3] = image[i] * 255; - } - ctx.putImageData(new ImageData(uint8Array, width, height), 0, 0); -} - -/** - * Helper function to draw a category mask. For GPU, we only have F32Arrays - * for now. - */ -export function drawCategoryMask( - ctx: CanvasRenderingContext2D, image: Uint8Array|Float32Array, - width: number, height: number): void { - const rgbaArray = new Uint8ClampedArray(width * height * 4); - const isFloatArray = image instanceof Float32Array; - for (let i = 0; i < image.length; i++) { - const colorIndex = isFloatArray ? Math.round(image[i] * 255) : image[i]; - const color = COLOR_MAP[colorIndex % COLOR_MAP.length]; - rgbaArray[4 * i] = color[0]; - rgbaArray[4 * i + 1] = color[1]; - rgbaArray[4 * i + 2] = color[2]; - rgbaArray[4 * i + 3] = color[3]; - } - ctx.putImageData(new ImageData(rgbaArray, width, height), 0, 0); -} diff --git a/mediapipe/tasks/web/vision/core/vision_task_runner.ts b/mediapipe/tasks/web/vision/core/vision_task_runner.ts index b9aa5e352..292a37eec 100644 --- a/mediapipe/tasks/web/vision/core/vision_task_runner.ts +++ b/mediapipe/tasks/web/vision/core/vision_task_runner.ts @@ -274,8 +274,9 @@ export abstract class VisionTaskRunner extends TaskRunner { } /** Converts a WasmImage to an MPMask. */ - protected convertToMPMask(wasmImage: WasmImage, shouldCopyData: boolean): - MPMask { + protected convertToMPMask( + wasmImage: WasmImage, interpolateValues: boolean, + shouldCopyData: boolean): MPMask { const {data, width, height} = wasmImage; const pixels = width * height; @@ -291,7 +292,7 @@ export abstract class VisionTaskRunner extends TaskRunner { } const mask = new MPMask( - [container], + [container], interpolateValues, /* ownsWebGLTexture= */ false, this.graphRunner.wasmModule.canvas!, this.shaderContext, width, height); return shouldCopyData ? mask.clone() : mask; diff --git a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts index 7ad78aa67..49a8a4941 100644 --- a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts +++ b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts @@ -26,7 +26,7 @@ export declare interface GestureRecognizerResult { /** Hand landmarks of detected hands. */ landmarks: NormalizedLandmark[][]; - /** Hand landmarks in world coordniates of detected hands. */ + /** Hand landmarks in world coordinates of detected hands. */ worldLandmarks: Landmark[][]; /** Handedness of detected hands. */ diff --git a/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts b/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts index d8751b9e3..cbd20450b 100644 --- a/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts +++ b/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts @@ -424,7 +424,10 @@ export class ImageSegmenter extends VisionTaskRunner { CONFIDENCE_MASKS_STREAM, (masks, timestamp) => { this.confidenceMasks = masks.map( wasmImage => this.convertToMPMask( - wasmImage, /* shouldCopyData= */ !this.userCallback)); + wasmImage, + /* interpolateValues= */ true, + /* shouldCopyData= */ !this.userCallback, + )); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( @@ -442,7 +445,10 @@ export class ImageSegmenter extends VisionTaskRunner { this.graphRunner.attachImageListener( CATEGORY_MASK_STREAM, (mask, timestamp) => { this.categoryMask = this.convertToMPMask( - mask, /* shouldCopyData= */ !this.userCallback); + mask, + /* interpolateValues= */ false, + /* shouldCopyData= */ !this.userCallback, + ); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( diff --git a/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts b/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts index 887f55839..5a37b9ff0 100644 --- a/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts +++ b/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts @@ -341,7 +341,10 @@ export class InteractiveSegmenter extends VisionTaskRunner { CONFIDENCE_MASKS_STREAM, (masks, timestamp) => { this.confidenceMasks = masks.map( wasmImage => this.convertToMPMask( - wasmImage, /* shouldCopyData= */ !this.userCallback)); + wasmImage, + /* interpolateValues= */ true, + /* shouldCopyData= */ !this.userCallback, + )); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( @@ -359,7 +362,8 @@ export class InteractiveSegmenter extends VisionTaskRunner { this.graphRunner.attachImageListener( CATEGORY_MASK_STREAM, (mask, timestamp) => { this.categoryMask = this.convertToMPMask( - mask, /* shouldCopyData= */ !this.userCallback); + mask, /* interpolateValues= */ false, + /* shouldCopyData= */ !this.userCallback); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( diff --git a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts index 8f6531827..262966d72 100644 --- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts +++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts @@ -470,7 +470,8 @@ export class PoseLandmarker extends VisionTaskRunner { SEGMENTATION_MASK_STREAM, (masks, timestamp) => { this.segmentationMasks = masks.map( wasmImage => this.convertToMPMask( - wasmImage, /* shouldCopyData= */ !this.userCallback)); + wasmImage, /* interpolateValues= */ true, + /* shouldCopyData= */ !this.userCallback)); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( diff --git a/mediapipe/util/image_test_utils.cc b/mediapipe/util/image_test_utils.cc index 9e10f40c1..325b308f1 100644 --- a/mediapipe/util/image_test_utils.cc +++ b/mediapipe/util/image_test_utils.cc @@ -17,6 +17,34 @@ namespace mediapipe { +namespace { + +template +ImageFrame CreateTestImageFrame(int width, int height, DataType max_value) { + ImageFrame image_frame(Format, width, height, + /*alignment_boundary=*/1); + const int num_channels = image_frame.NumberOfChannels(); + const float num_values = width * height * num_channels; + uint8_t* const data_ptr = + reinterpret_cast(image_frame.MutablePixelData()); + for (int y = 0; y < height; ++y) { + uint8_t* const row = data_ptr + image_frame.WidthStep() * y; + for (int x = 0; x < width; ++x) { + DataType* pixel = reinterpret_cast(row) + x * num_channels; + for (int c = 0; c < num_channels; ++c) { + // Fill pixel channel with a value in [0:max_value] range. + pixel[c] = + static_cast(static_cast(y * width * num_channels + + x * num_channels + c) / + num_values * max_value); + } + } + } + return image_frame; +} + +} // namespace + cv::Mat GetRgb(const std::string& path) { cv::Mat bgr = cv::imread(path); cv::Mat rgb; @@ -71,4 +99,14 @@ cv::Mat RgbaToBgr(cv::Mat rgba) { return bgra; } +ImageFrame CreateTestFloat32ImageFrame(int width, int height) { + return CreateTestImageFrame(width, height, + /*max_value=*/1.0f); +} + +ImageFrame CreateTestGrey8ImageFrame(int width, int height) { + return CreateTestImageFrame(width, height, + /*max_value=*/255); +} + } // namespace mediapipe diff --git a/mediapipe/util/image_test_utils.h b/mediapipe/util/image_test_utils.h index 15a21c5b1..49943382f 100644 --- a/mediapipe/util/image_test_utils.h +++ b/mediapipe/util/image_test_utils.h @@ -4,6 +4,7 @@ #include #include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/packet.h" #include "mediapipe/framework/port/opencv_core_inc.h" @@ -30,6 +31,12 @@ Packet MakeImagePacket(cv::Mat input, int timestamp = 0); // Converts RGBA Mat to BGR. cv::Mat RgbaToBgr(cv::Mat rgba); +// Generates single-channel float32 ImageFrame with increasing [0,1] values. +ImageFrame CreateTestFloat32ImageFrame(int width, int height); + +// Generates single-channel uint8 ImageFrame with increasing [0,255] values. +ImageFrame CreateTestGrey8ImageFrame(int width, int height); + } // namespace mediapipe #endif // MEDIAPIPE_UTIL_IMAGE_TEST_UTILS_H_ diff --git a/mediapipe/util/tflite/BUILD b/mediapipe/util/tflite/BUILD index 350ac230c..b34d0e080 100644 --- a/mediapipe/util/tflite/BUILD +++ b/mediapipe/util/tflite/BUILD @@ -129,7 +129,9 @@ cc_library_with_tflite( name = "tflite_model_loader", srcs = ["tflite_model_loader.cc"], hdrs = ["tflite_model_loader.h"], - tflite_deps = ["@org_tensorflow//tensorflow/lite:framework_stable"], + tflite_deps = [ + "@org_tensorflow//tensorflow/lite:framework_stable", + ], visibility = ["//visibility:public"], deps = [ "//mediapipe/framework/api2:packet", diff --git a/mediapipe/util/tflite/tflite_model_loader.h b/mediapipe/util/tflite/tflite_model_loader.h index 65bd9ba72..f1a021f4d 100644 --- a/mediapipe/util/tflite/tflite_model_loader.h +++ b/mediapipe/util/tflite/tflite_model_loader.h @@ -25,6 +25,7 @@ #include "tensorflow/lite/model.h" namespace mediapipe { + // Represents a TfLite model as a FlatBuffer. using TfLiteModelPtr = std::unique_ptr