diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 127280107..69d666092 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -53,14 +53,6 @@ mediapipe_proto_library( cc_library( name = "audio_to_tensor_calculator", srcs = ["audio_to_tensor_calculator.cc"], - copts = select({ - # b/215212850 - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", - ], - "//conditions:default": [], - }), deps = [ ":audio_to_tensor_calculator_cc_proto", "//mediapipe/framework:calculator_framework", @@ -161,14 +153,6 @@ mediapipe_proto_library( cc_library( name = "feedback_tensors_calculator", srcs = ["feedback_tensors_calculator.cc"], - copts = select({ - # b/215212850 - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", - ], - "//conditions:default": [], - }), deps = [ ":feedback_tensors_calculator_cc_proto", "//mediapipe/framework:calculator_framework", @@ -207,14 +191,6 @@ mediapipe_proto_library( cc_library( name = "bert_preprocessor_calculator", srcs = ["bert_preprocessor_calculator.cc"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":bert_preprocessor_calculator_cc_proto", "//mediapipe/framework:calculator_framework", @@ -267,14 +243,6 @@ mediapipe_proto_library( cc_library( name = "regex_preprocessor_calculator", srcs = ["regex_preprocessor_calculator.cc"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":regex_preprocessor_calculator_cc_proto", "//mediapipe/framework:calculator_framework", @@ -316,14 +284,6 @@ cc_test( cc_library( name = "text_to_tensor_calculator", srcs = ["text_to_tensor_calculator.cc"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_framework", @@ -414,14 +374,6 @@ cc_library( name = "inference_calculator_interface", srcs = ["inference_calculator.cc"], hdrs = ["inference_calculator.h"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":inference_calculator_cc_proto", ":inference_calculator_options_lib", @@ -495,6 +447,7 @@ cc_library( tags = ["ios"], deps = [ "inference_calculator_interface", + "//mediapipe/framework/formats:tensor", "//mediapipe/gpu:MPPMetalHelper", "//mediapipe/gpu:MPPMetalUtil", "//mediapipe/gpu:gpu_buffer", @@ -513,14 +466,6 @@ cc_library( cc_library( name = "inference_runner", hdrs = ["inference_runner.h"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ "//mediapipe/framework:calculator_context", "//mediapipe/framework/formats:tensor", @@ -532,14 +477,6 @@ cc_library( name = "inference_interpreter_delegate_runner", srcs = ["inference_interpreter_delegate_runner.cc"], hdrs = ["inference_interpreter_delegate_runner.h"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":inference_runner", "//mediapipe/framework:mediapipe_profiling", @@ -561,14 +498,6 @@ cc_library( srcs = [ "inference_calculator_cpu.cc", ], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":inference_calculator_interface", ":inference_calculator_utils", @@ -607,14 +536,6 @@ cc_library( srcs = [ "inference_calculator_xnnpack.cc", ], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":inference_calculator_interface", ":inference_calculator_utils", diff --git a/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc b/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc index a8211d39b..354547042 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc @@ -36,6 +36,10 @@ #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/types.h" +#if MEDIAPIPE_METAL_ENABLED +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" +#endif // MEDIAPIPE_METAL_ENABLED + namespace mediapipe { namespace { @@ -376,7 +380,7 @@ class MetalProcessor : public ImageToTensorConverter { id command_buffer = [metal_helper_ commandBuffer]; const auto& buffer_view = - output_tensor.GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(output_tensor, command_buffer); MP_RETURN_IF_ERROR(extractor_->Execute( texture, roi, /*flip_horizontaly=*/false, transform.scale, transform.offset, diff --git a/mediapipe/calculators/tensor/inference_calculator_metal.cc b/mediapipe/calculators/tensor/inference_calculator_metal.cc index 750f0456e..fba18a81c 100644 --- a/mediapipe/calculators/tensor/inference_calculator_metal.cc +++ b/mediapipe/calculators/tensor/inference_calculator_metal.cc @@ -24,6 +24,8 @@ #include "absl/memory/memory.h" #include "absl/strings/str_format.h" #include "mediapipe/calculators/tensor/inference_calculator.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" #import "mediapipe/gpu/MPPMetalHelper.h" #include "mediapipe/gpu/MPPMetalUtil.h" #include "mediapipe/gpu/gpu_buffer.h" @@ -150,11 +152,12 @@ absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) { command_buffer.label = @"InferenceCalculator"; // Explicit copy input with conversion float 32 bits to 16 bits. for (int i = 0; i < input_tensors.size(); ++i) { - auto input_view = input_tensors[i].GetMtlBufferReadView(command_buffer); + auto input_view = + MtlBufferView::GetReadView(input_tensors[i], command_buffer); // Reshape tensor. tflite::gpu::BHWC shape = BhwcFromTensorShape(input_tensors[i].shape()); auto gpu_buffer_view = - gpu_buffers_in_[i]->GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(*gpu_buffers_in_[i], command_buffer); id input_encoder = [command_buffer computeCommandEncoder]; [converter_to_BPHWC4_ convertWithEncoder:input_encoder @@ -174,9 +177,10 @@ absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) { output_shapes_[i]); // Reshape tensor. tflite::gpu::BHWC shape = BhwcFromTensorShape(output_shapes_[i]); - auto read_view = gpu_buffers_out_[i]->GetMtlBufferReadView(command_buffer); + auto read_view = + MtlBufferView::GetReadView(*gpu_buffers_out_[i], command_buffer); auto write_view = - output_tensors->at(i).GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(output_tensors->at(i), command_buffer); id output_encoder = [command_buffer computeCommandEncoder]; [converter_from_BPHWC4_ convertWithEncoder:output_encoder @@ -258,7 +262,7 @@ absl::Status InferenceCalculatorMetalImpl::CreateConverters( : Tensor::ElementType::kFloat32, Tensor::Shape{dims})); auto buffer_view = - gpu_buffers_in_[i]->GetMtlBufferWriteView(gpu_helper_.mtlDevice); + MtlBufferView::GetWriteView(*gpu_buffers_in_[i], gpu_helper_.mtlDevice); RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor( delegate_.get(), input_indices[i], buffer_view.buffer()), true); @@ -286,8 +290,8 @@ absl::Status InferenceCalculatorMetalImpl::CreateConverters( Tensor::Shape{dims})); RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor( delegate_.get(), output_indices[i], - gpu_buffers_out_[i] - ->GetMtlBufferWriteView(gpu_helper_.mtlDevice) + MtlBufferView::GetWriteView(*gpu_buffers_out_[i], + gpu_helper_.mtlDevice) .buffer()), true); } diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.cc b/mediapipe/calculators/tensor/tensor_converter_calculator.cc index 0b750b859..4b05488fd 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.cc @@ -31,6 +31,7 @@ #import #import +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" #import "mediapipe/gpu/MPPMetalHelper.h" #elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #include "mediapipe/gpu/gl_calculator_helper.h" @@ -304,7 +305,7 @@ absl::Status TensorConverterCalculator::ProcessGPU(CalculatorContext* cc) { id src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input]; [compute_encoder setTexture:src_texture atIndex:0]; auto output_view = - output_tensors->at(0).GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(output_tensors->at(0), command_buffer); [compute_encoder setBuffer:output_view.buffer() offset:0 atIndex:1]; MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1); MTLSize threadgroups = diff --git a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc index 97ef01b4c..4bb3f0f57 100644 --- a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc @@ -41,6 +41,7 @@ #import #import +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" #import "mediapipe/gpu/MPPMetalHelper.h" #include "mediapipe/gpu/MPPMetalUtil.h" #endif // MEDIAPIPE_METAL_ENABLED @@ -536,10 +537,11 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU( if (input_tensors.size() == kNumInputTensorsWithAnchors) { RET_CHECK_EQ(input_tensors.size(), kNumInputTensorsWithAnchors); auto command_buffer = [gpu_helper_ commandBuffer]; - auto src_buffer = input_tensors[tensor_mapping_.anchors_tensor_index()] - .GetMtlBufferReadView(command_buffer); + auto src_buffer = MtlBufferView::GetReadView( + input_tensors[tensor_mapping_.anchors_tensor_index()], + command_buffer); auto dest_buffer = - raw_anchors_buffer_->GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(*raw_anchors_buffer_, command_buffer); id blit_command = [command_buffer blitCommandEncoder]; [blit_command copyFromBuffer:src_buffer.buffer() @@ -571,15 +573,16 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU( [command_encoder setComputePipelineState:decode_program_]; { auto scored_boxes_view = - scored_boxes_buffer_->GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(*scored_boxes_buffer_, command_buffer); auto decoded_boxes_view = - decoded_boxes_buffer_->GetMtlBufferWriteView(command_buffer); + MtlBufferView::GetWriteView(*decoded_boxes_buffer_, command_buffer); [command_encoder setBuffer:decoded_boxes_view.buffer() offset:0 atIndex:0]; - auto input0_view = input_tensors[tensor_mapping_.detections_tensor_index()] - .GetMtlBufferReadView(command_buffer); + auto input0_view = MtlBufferView::GetReadView( + input_tensors[tensor_mapping_.detections_tensor_index()], + command_buffer); [command_encoder setBuffer:input0_view.buffer() offset:0 atIndex:1]; auto raw_anchors_view = - raw_anchors_buffer_->GetMtlBufferReadView(command_buffer); + MtlBufferView::GetReadView(*raw_anchors_buffer_, command_buffer); [command_encoder setBuffer:raw_anchors_view.buffer() offset:0 atIndex:2]; MTLSize decode_threads_per_group = MTLSizeMake(1, 1, 1); MTLSize decode_threadgroups = MTLSizeMake(num_boxes_, 1, 1); @@ -588,8 +591,8 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU( [command_encoder setComputePipelineState:score_program_]; [command_encoder setBuffer:scored_boxes_view.buffer() offset:0 atIndex:0]; - auto input1_view = input_tensors[tensor_mapping_.scores_tensor_index()] - .GetMtlBufferReadView(command_buffer); + auto input1_view = MtlBufferView::GetReadView( + input_tensors[tensor_mapping_.scores_tensor_index()], command_buffer); [command_encoder setBuffer:input1_view.buffer() offset:0 atIndex:1]; MTLSize score_threads_per_group = MTLSizeMake(1, num_classes_, 1); MTLSize score_threadgroups = MTLSizeMake(num_boxes_, 1, 1); diff --git a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc index 172f70880..839451ab7 100644 --- a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc @@ -53,6 +53,7 @@ #import #import +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" #import "mediapipe/gpu/MPPMetalHelper.h" #include "mediapipe/gpu/MPPMetalUtil.h" #endif // MEDIAPIPE_METAL_ENABLED @@ -485,7 +486,8 @@ absl::Status TensorsToSegmentationCalculator::ProcessGpu( [command_buffer computeCommandEncoder]; [command_encoder setComputePipelineState:mask_program_]; - auto read_view = input_tensors[0].GetMtlBufferReadView(command_buffer); + auto read_view = + MtlBufferView::GetReadView(input_tensors[0], command_buffer); [command_encoder setBuffer:read_view.buffer() offset:0 atIndex:0]; mediapipe::GpuBuffer small_mask_buffer = [metal_helper_ diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index 371f23ed1..10aa3fca0 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -431,7 +431,10 @@ cc_library( hdrs = [ "tensor.h", "//mediapipe/framework/formats/tensor:internal.h", - ], + ] + select({ + "//mediapipe:ios": ["tensor_mtl_buffer_view.h"], + "//conditions:default": [], + }), copts = select({ "//mediapipe:apple": [ "-x objective-c++", diff --git a/mediapipe/framework/formats/tensor.cc b/mediapipe/framework/formats/tensor.cc index 3f11d368a..1dbd8f8ac 100644 --- a/mediapipe/framework/formats/tensor.cc +++ b/mediapipe/framework/formats/tensor.cc @@ -25,8 +25,11 @@ #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #if MEDIAPIPE_METAL_ENABLED +#import #include #include + +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" #else #include #endif // MEDIAPIPE_METAL_ENABLED @@ -61,6 +64,12 @@ int BhwcDepthFromShape(const Tensor::Shape& shape) { // 3) pad/"unpad" the bitmap after transfer CPU <-> GPU #if MEDIAPIPE_METAL_ENABLED +// No ODR violation here because this file compiled just once per project. +struct MtlResources { + id command_buffer = nil; + id device = nil; + id metal_buffer = nil; +}; namespace { // MTLBuffer can use existing properly aligned and allocated CPU memory. size_t AlignToPageSize(size_t size) { @@ -83,52 +92,56 @@ void DeallocateVirtualMemory(void* pointer, size_t size) { } } // namespace -Tensor::MtlBufferView Tensor::GetMtlBufferReadView( - id command_buffer) const { - LOG_IF(FATAL, valid_ == kValidNone) +void MtlBufferView::AllocateMtlBuffer(const Tensor& tensor, + id device) { + tensor.mtl_resources_->device = device; + if (!tensor.cpu_buffer_) { + // It also means that the metal buffer is not allocated yet. + tensor.cpu_buffer_ = AllocateVirtualMemory(tensor.bytes()); + } + if (!tensor.mtl_resources_->metal_buffer) { + tensor.mtl_resources_->metal_buffer = [tensor.mtl_resources_->device + newBufferWithBytesNoCopy:tensor.cpu_buffer_ + length:AlignToPageSize(tensor.bytes()) + options:MTLResourceStorageModeShared | + MTLResourceCPUCacheModeDefaultCache + deallocator:^(void* pointer, NSUInteger length) { + DeallocateVirtualMemory(pointer, length); + }]; + } +} + +MtlBufferView MtlBufferView::GetReadView(const Tensor& tensor, + id command_buffer) { + LOG_IF(FATAL, tensor.valid_ == Tensor::kValidNone) << "Tensor must be written prior to read from."; - LOG_IF(FATAL, !(valid_ & (kValidCpu | kValidMetalBuffer))) + LOG_IF(FATAL, + !(tensor.valid_ & (Tensor::kValidCpu | Tensor::kValidMetalBuffer))) << "Tensor conversion between different GPU resources is not supported " "yet."; - auto lock(absl::make_unique(&view_mutex_)); - valid_ |= kValidMetalBuffer; - AllocateMtlBuffer([command_buffer device]); - return {metal_buffer_, std::move(lock)}; + auto lock(absl::make_unique(&tensor.view_mutex_)); + tensor.valid_ |= Tensor::kValidMetalBuffer; + AllocateMtlBuffer(tensor, [command_buffer device]); + return {tensor.mtl_resources_->metal_buffer, std::move(lock)}; } -Tensor::MtlBufferView Tensor::GetMtlBufferWriteView( - id command_buffer) const { +MtlBufferView MtlBufferView::GetWriteView(const Tensor& tensor, + id command_buffer) { // Don't overwrite command buffer at which the metal buffer has been written // so we can wait until completed. - command_buffer_ = command_buffer; - return GetMtlBufferWriteView([command_buffer device]); + tensor.mtl_resources_->command_buffer = command_buffer; + return GetWriteView(tensor, [command_buffer device]); } -Tensor::MtlBufferView Tensor::GetMtlBufferWriteView( - id device) const { - auto lock(absl::make_unique(&view_mutex_)); - valid_ = kValidMetalBuffer; - AllocateMtlBuffer(device); - return {metal_buffer_, std::move(lock)}; -} - -void Tensor::AllocateMtlBuffer(id device) const { - device_ = device; - if (!cpu_buffer_) { - // It also means that the metal buffer is not allocated yet. - cpu_buffer_ = AllocateVirtualMemory(bytes()); - } - if (!metal_buffer_) { - metal_buffer_ = - [device_ newBufferWithBytesNoCopy:cpu_buffer_ - length:AlignToPageSize(bytes()) - options:MTLResourceStorageModeShared | - MTLResourceCPUCacheModeDefaultCache - deallocator:^(void* pointer, NSUInteger length) { - DeallocateVirtualMemory(pointer, length); - }]; - } +MtlBufferView MtlBufferView::GetWriteView(const Tensor& tensor, + id device) { + auto lock(absl::make_unique(&tensor.view_mutex_)); + tensor.valid_ = Tensor::kValidMetalBuffer; + AllocateMtlBuffer(tensor, device); + return {tensor.mtl_resources_->metal_buffer, std::move(lock)}; } +#else +struct MtlResources {}; #endif // MEDIAPIPE_METAL_ENABLED #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 @@ -379,6 +392,9 @@ Tensor& Tensor::operator=(Tensor&& src) { return *this; } +Tensor::Tensor(Tensor&& src) { Move(&src); } +Tensor::~Tensor() { Invalidate(); } + void Tensor::Move(Tensor* src) { valid_ = src->valid_; src->valid_ = kValidNone; @@ -388,15 +404,7 @@ void Tensor::Move(Tensor* src) { cpu_buffer_ = src->cpu_buffer_; src->cpu_buffer_ = nullptr; ahwb_tracking_key_ = src->ahwb_tracking_key_; -#if MEDIAPIPE_METAL_ENABLED - device_ = src->device_; - src->device_ = nil; - command_buffer_ = src->command_buffer_; - src->command_buffer_ = nil; - metal_buffer_ = src->metal_buffer_; - src->metal_buffer_ = nil; -#endif // MEDIAPIPE_METAL_ENABLED - + mtl_resources_ = std::move(src->mtl_resources_); MoveAhwbStuff(src); #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 @@ -415,12 +423,15 @@ void Tensor::Move(Tensor* src) { } Tensor::Tensor(ElementType element_type, const Shape& shape) - : element_type_(element_type), shape_(shape) {} + : element_type_(element_type), + shape_(shape), + mtl_resources_(std::make_unique()) {} Tensor::Tensor(ElementType element_type, const Shape& shape, const QuantizationParameters& quantization_parameters) : element_type_(element_type), shape_(shape), - quantization_parameters_(quantization_parameters) {} + quantization_parameters_(quantization_parameters), + mtl_resources_(std::make_unique()) {} #if MEDIAPIPE_METAL_ENABLED void Tensor::Invalidate() { @@ -432,13 +443,16 @@ void Tensor::Invalidate() { absl::MutexLock lock(&view_mutex_); // If memory is allocated and not owned by the metal buffer. // TODO: Re-design cpu buffer memory management. - if (cpu_buffer_ && !metal_buffer_) { + if (cpu_buffer_ && !mtl_resources_->metal_buffer) { DeallocateVirtualMemory(cpu_buffer_, AlignToPageSize(bytes())); } - metal_buffer_ = nil; - command_buffer_ = nil; - device_ = nil; cpu_buffer_ = nullptr; + // This becomes NULL if the tensor is moved. + if (mtl_resources_) { + mtl_resources_->metal_buffer = nil; + mtl_resources_->command_buffer = nil; + mtl_resources_->device = nil; + } #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 // Don't need to wait for the resource to be deleted bacause if will be // released on last reference deletion inside the OpenGL driver. @@ -532,10 +546,11 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const { // GPU-to-CPU synchronization and read-back. #if MEDIAPIPE_METAL_ENABLED if (valid_ & kValidMetalBuffer) { - LOG_IF(FATAL, !command_buffer_) << "Metal -> CPU synchronization " - "requires MTLCommandBuffer to be set."; - if (command_buffer_) { - [command_buffer_ waitUntilCompleted]; + LOG_IF(FATAL, !mtl_resources_->command_buffer) + << "Metal -> CPU synchronization " + "requires MTLCommandBuffer to be set."; + if (mtl_resources_->command_buffer) { + [mtl_resources_->command_buffer waitUntilCompleted]; } } #endif // MEDIAPIPE_METAL_ENABLED diff --git a/mediapipe/framework/formats/tensor.h b/mediapipe/framework/formats/tensor.h index fe0be31d1..1d670d805 100644 --- a/mediapipe/framework/formats/tensor.h +++ b/mediapipe/framework/formats/tensor.h @@ -29,9 +29,6 @@ #include "mediapipe/framework/formats/tensor/internal.h" #include "mediapipe/framework/port.h" -#if MEDIAPIPE_METAL_ENABLED -#import -#endif // MEDIAPIPE_METAL_ENABLED #ifndef MEDIAPIPE_NO_JNI #if __ANDROID_API__ >= 26 || defined(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__) #define MEDIAPIPE_TENSOR_USE_AHWB 1 @@ -66,7 +63,6 @@ #endif namespace mediapipe { - // Tensor is a container of multi-dimensional data that supports sharing the // content across different backends and APIs, currently: CPU / Metal / OpenGL. // Texture2DView is limited to 4 dimensions. @@ -91,6 +87,7 @@ namespace mediapipe { // float* pointer = view.buffer(); // ...reading the cpu memory... +struct MtlResources; class Tensor { class View { public: @@ -144,9 +141,9 @@ class Tensor { Tensor(const Tensor&) = delete; Tensor& operator=(const Tensor&) = delete; // Move-only. - Tensor(Tensor&& src) { Move(&src); } + Tensor(Tensor&& src); Tensor& operator=(Tensor&&); - ~Tensor() { Invalidate(); } + ~Tensor(); template class CpuView : public View { @@ -182,33 +179,6 @@ class Tensor { uint64_t source_location_hash = tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const; -#if MEDIAPIPE_METAL_ENABLED - // TODO: id vs. MtlBufferView. - class MtlBufferView : public View { - public: - id buffer() const { return buffer_; } - MtlBufferView(MtlBufferView&& src) - : View(std::move(src)), buffer_(src.buffer_) { - src.buffer_ = nil; - } - - protected: - friend class Tensor; - MtlBufferView(id buffer, std::unique_ptr&& lock) - : View(std::move(lock)), buffer_(buffer) {} - id buffer_; - }; - // The command buffer status is checked for completeness if GPU-to-CPU - // synchronization is required. - // TODO: Design const and non-const view acquiring. - MtlBufferView GetMtlBufferReadView(id command_buffer) const; - MtlBufferView GetMtlBufferWriteView( - id command_buffer) const; - // Allocate new buffer. - // TODO: GPU-to-CPU design considerations. - MtlBufferView GetMtlBufferWriteView(id device) const; -#endif // MEDIAPIPE_METAL_ENABLED - #ifdef MEDIAPIPE_TENSOR_USE_AHWB using FinishingFunc = std::function; class AHardwareBufferView : public View { @@ -372,6 +342,7 @@ class Tensor { } private: + friend class MtlBufferView; void Move(Tensor*); void Invalidate(); @@ -396,12 +367,9 @@ class Tensor { mutable void* cpu_buffer_ = nullptr; void AllocateCpuBuffer() const; -#if MEDIAPIPE_METAL_ENABLED - mutable id command_buffer_ = nil; - mutable id device_ = nil; - mutable id metal_buffer_ = nil; - void AllocateMtlBuffer(id device) const; -#endif // MEDIAPIPE_METAL_ENABLED + // Forward declaration of the MtlResources provides compile-time verification + // of ODR if this header includes any actual code that uses MtlResources. + mutable std::unique_ptr mtl_resources_; #ifdef MEDIAPIPE_TENSOR_USE_AHWB mutable AHardwareBuffer* ahwb_ = nullptr; diff --git a/mediapipe/framework/formats/tensor_mtl_buffer_view.h b/mediapipe/framework/formats/tensor_mtl_buffer_view.h new file mode 100644 index 000000000..a61659d3d --- /dev/null +++ b/mediapipe/framework/formats/tensor_mtl_buffer_view.h @@ -0,0 +1,61 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_TENSOR_MTL_BUFFER_VIEW_H_ +#define MEDIAPIPE_FRAMEWORK_FORMATS_TENSOR_MTL_BUFFER_VIEW_H_ + +#import + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/synchronization/mutex.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port.h" + +namespace mediapipe { +class MtlBufferView : public Tensor::View { + public: + // The command buffer status is checked for completeness if GPU-to-CPU + // synchronization is required. + static MtlBufferView GetReadView(const Tensor& tensor, + id command_buffer); + static MtlBufferView GetWriteView(const Tensor& tensor, + id command_buffer); + static MtlBufferView GetWriteView(const Tensor& tensor, id device); + + id buffer() const { return buffer_; } + MtlBufferView(MtlBufferView&& src) + : Tensor::View(std::move(src)), buffer_(src.buffer_) { + src.buffer_ = nil; + } + + protected: + friend class Tensor; + static void AllocateMtlBuffer(const Tensor& tensor, id device); + MtlBufferView(id buffer, std::unique_ptr&& lock) + : Tensor::View(std::move(lock)), buffer_(buffer) {} + id buffer_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_FRAMEWORK_FORMATS_TENSOR_MTL_BUFFER_VIEW_H_ diff --git a/mediapipe/tasks/cc/components/calculators/BUILD b/mediapipe/tasks/cc/components/calculators/BUILD index bf31134e4..16931811c 100644 --- a/mediapipe/tasks/cc/components/calculators/BUILD +++ b/mediapipe/tasks/cc/components/calculators/BUILD @@ -79,14 +79,6 @@ mediapipe_proto_library( cc_library( name = "score_calibration_calculator", srcs = ["score_calibration_calculator.cc"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ ":score_calibration_calculator_cc_proto", "//mediapipe/framework:calculator_framework", diff --git a/mediapipe/tasks/cc/components/processors/BUILD b/mediapipe/tasks/cc/components/processors/BUILD index 517a27114..cec44a9e3 100644 --- a/mediapipe/tasks/cc/components/processors/BUILD +++ b/mediapipe/tasks/cc/components/processors/BUILD @@ -28,14 +28,6 @@ cc_library( name = "classification_postprocessing_graph", srcs = ["classification_postprocessing_graph.cc"], hdrs = ["classification_postprocessing_graph.h"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ "//mediapipe/calculators/core:split_vector_calculator", "//mediapipe/calculators/core:split_vector_calculator_cc_proto", @@ -148,14 +140,6 @@ cc_library( name = "text_preprocessing_graph", srcs = ["text_preprocessing_graph.cc"], hdrs = ["text_preprocessing_graph.h"], - copts = select({ - # TODO: fix tensor.h not to require this, if possible - "//mediapipe:apple": [ - "-x objective-c++", - "-fobjc-arc", # enable reference-counting - ], - "//conditions:default": [], - }), deps = [ "//mediapipe/calculators/tensor:bert_preprocessor_calculator", "//mediapipe/calculators/tensor:bert_preprocessor_calculator_cc_proto",