diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 55651107a..8204e5a47 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -277,6 +277,38 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "inference_calculator_onnx_tensorrt", + srcs = [ + "inference_calculator_onnx_tensorrt.cc", + ], + copts = select({ + # TODO: fix tensor.h not to require this, if possible + "//mediapipe:apple": [ + "-x objective-c++", + "-fobjc-arc", # enable reference-counting + ], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], + deps = [ + ":inference_calculator_interface", + "@com_google_absl//absl/memory", + "@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", + "@org_tensorflow//tensorflow/lite:framework_stable", + "@org_tensorflow//tensorflow/lite/c:c_api_types", + "@windows_onnxruntime//:onnxruntime", + ] + select({ + "//conditions:default": [ + "//mediapipe/util:cpu_util", + ], + }) + select({ + "//conditions:default": [], + "//mediapipe:android": ["@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate"], + }), + alwayslink = 1, +) + cc_library( name = "inference_calculator_gl_if_compute_shader_available", visibility = ["//visibility:public"], @@ -295,6 +327,8 @@ cc_library( deps = [ ":inference_calculator_interface", ":inference_calculator_cpu", + ":inference_calculator_onnx_cuda", + ":inference_calculator_onnx_tensorrt", ] + select({ "//conditions:default": [":inference_calculator_gl_if_compute_shader_available"], "//mediapipe:ios": [":inference_calculator_metal"], diff --git a/mediapipe/calculators/tensor/inference_calculator.cc b/mediapipe/calculators/tensor/inference_calculator.cc index 20e65cf44..e0a176c1c 100644 --- a/mediapipe/calculators/tensor/inference_calculator.cc +++ b/mediapipe/calculators/tensor/inference_calculator.cc @@ -37,6 +37,14 @@ public: subgraph_node); std::vector impls; + if ((options.has_delegate() && options.delegate().has_cuda())) { + impls.emplace_back("OnnxCUDA"); + } + + if ((options.has_delegate() && options.delegate().has_tensorrt())) { + impls.emplace_back("OnnxTensorRT"); + } + const bool should_use_gpu = !options.has_delegate() || // Use GPU delegate if not specified (options.has_delegate() && options.delegate().has_gpu()); @@ -58,7 +66,10 @@ public: impls.emplace_back("Cpu"); for (const auto& suffix : impls) { const auto impl = absl::StrCat("InferenceCalculator", suffix); - if (!mediapipe::CalculatorBaseRegistry::IsRegistered(impl)) continue; + if (!mediapipe::CalculatorBaseRegistry::IsRegistered(impl)) { + LOG(INFO) << impl; + continue; + } CalculatorGraphConfig::Node impl_node = subgraph_node; impl_node.set_calculator(impl); return tool::MakeSingleNodeGraph(std::move(impl_node)); diff --git a/mediapipe/calculators/tensor/inference_calculator.h b/mediapipe/calculators/tensor/inference_calculator.h index 126567de0..fff973057 100644 --- a/mediapipe/calculators/tensor/inference_calculator.h +++ b/mediapipe/calculators/tensor/inference_calculator.h @@ -149,6 +149,10 @@ struct InferenceCalculatorOnnxCUDA : public InferenceCalculator { static constexpr char kCalculatorName[] = "InferenceCalculatorOnnxCUDA"; }; +struct InferenceCalculatorOnnxTensorRT : public InferenceCalculator { + static constexpr char kCalculatorName[] = "InferenceCalculatorOnnxTensorRT"; +}; + } // namespace api2 } // namespace mediapipe diff --git a/mediapipe/calculators/tensor/inference_calculator.proto b/mediapipe/calculators/tensor/inference_calculator.proto index 7bb79b32e..8d6d426b2 100644 --- a/mediapipe/calculators/tensor/inference_calculator.proto +++ b/mediapipe/calculators/tensor/inference_calculator.proto @@ -126,11 +126,17 @@ message InferenceCalculatorOptions { optional int32 num_threads = 1 [default = -1]; } + message OnnxCUDA {} + + message OnnxTensorRT {} + oneof delegate { TfLite tflite = 1; Gpu gpu = 2; Nnapi nnapi = 3; Xnnpack xnnpack = 4; + OnnxCUDA cuda = 5; + OnnxTensorRT tensorrt = 6; } } @@ -163,4 +169,5 @@ message InferenceCalculatorOptions { // NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes // precedence over use_* deprecated options.) optional Delegate delegate = 5; + optional string landmark_path = 6; } diff --git a/mediapipe/calculators/tensor/inference_calculator_onnx_cuda.cc b/mediapipe/calculators/tensor/inference_calculator_onnx_cuda.cc index d9bcf384a..496b348f9 100644 --- a/mediapipe/calculators/tensor/inference_calculator_onnx_cuda.cc +++ b/mediapipe/calculators/tensor/inference_calculator_onnx_cuda.cc @@ -15,9 +15,6 @@ #include "absl/memory/memory.h" #include "mediapipe/calculators/tensor/inference_calculator.h" #include "onnxruntime_cxx_api.h" -#include "tensorflow/lite/c/c_api_types.h" -#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" -#include "tensorflow/lite/interpreter_builder.h" #include #include #include @@ -37,14 +34,12 @@ int64_t value_size_of(const std::vector& dims) { } // namespace -class InferenceCalculatorOnnxCUDAImpl - : public NodeImpl { +class InferenceCalculatorOnnxCUDAImpl : public NodeImpl { public: static absl::Status UpdateContract(CalculatorContract* cc); absl::Status Open(CalculatorContext* cc) override; absl::Status Process(CalculatorContext* cc) override; - absl::Status Close(CalculatorContext* cc) override; private: absl::Status LoadModel(const std::string& path); @@ -57,15 +52,14 @@ private: std::vector m_output_names; }; -absl::Status InferenceCalculatorOnnxCUDAImpl::UpdateContract( - CalculatorContract* cc) { +absl::Status InferenceCalculatorOnnxCUDAImpl::UpdateContract(CalculatorContract* cc) { const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>(); RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected()) << "Either model as side packet or model path in options is required."; return absl::OkStatus(); } -absl::Status InferenceCalculatorCpuImpl::LoadModel(const std::string& path) { +absl::Status InferenceCalculatorOnnxCUDAImpl::LoadModel(const std::string& path) { auto model_path = std::wstring(path.begin(), path.end()); Ort::SessionOptions session_options; OrtCUDAProviderOptions cuda_options; @@ -91,9 +85,6 @@ absl::Status InferenceCalculatorOnnxCUDAImpl::Open(CalculatorContext* cc) { if (!options.model_path().empty()) { return LoadModel(options.model_path()); } - if (!options.landmark_path().empty()) { - return LoadModel(options.landmark_path()); - } return absl::Status(mediapipe::StatusCode::kNotFound, "Must specify Onnx model path."); } @@ -140,11 +131,5 @@ absl::Status InferenceCalculatorOnnxCUDAImpl::Process(CalculatorContext* cc) { return absl::OkStatus(); } -absl::Status InferenceCalculatorOnnxCUDAImpl::Close(CalculatorContext* cc) { - interpreter_ = nullptr; - delegate_ = nullptr; - return absl::OkStatus(); -} - } // namespace api2 -} // namespace mediapipe +} // namespace mediapipe \ No newline at end of file diff --git a/mediapipe/calculators/tensor/inference_calculator_onnx_tensorrt.cc b/mediapipe/calculators/tensor/inference_calculator_onnx_tensorrt.cc new file mode 100644 index 000000000..a9f9b0b12 --- /dev/null +++ b/mediapipe/calculators/tensor/inference_calculator_onnx_tensorrt.cc @@ -0,0 +1,142 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/memory/memory.h" +#include "mediapipe/calculators/tensor/inference_calculator.h" +#include "onnxruntime_cxx_api.h" +#include +#include +#include +#include + +namespace mediapipe { +namespace api2 { + +namespace { + +int64_t value_size_of(const std::vector& dims) { + if (dims.empty()) return 0; + int64_t value_size = 1; + for (const auto& size : dims) value_size *= size; + return value_size; +} + +} // namespace + +class InferenceCalculatorOnnxTensorRTImpl : public NodeImpl { +public: + static absl::Status UpdateContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + +private: + absl::Status LoadModel(const std::string& path); + + Ort::Env env_; + std::unique_ptr session_; + Ort::AllocatorWithDefaultOptions allocator; + Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + std::vector m_input_names; + std::vector m_output_names; +}; + +absl::Status InferenceCalculatorOnnxTensorRTImpl::UpdateContract(CalculatorContract* cc) { + const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>(); + RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected()) + << "Either model as side packet or model path in options is required."; + return absl::OkStatus(); +} + +absl::Status InferenceCalculatorOnnxTensorRTImpl::LoadModel(const std::string& path) { + auto model_path = std::wstring(path.begin(), path.end()); + Ort::SessionOptions session_options; + OrtTensorRTProviderOptions trt_options{}; + trt_options.device_id = 0; + trt_options.trt_max_workspace_size = 1073741824; + trt_options.trt_max_partition_iterations = 1000; + trt_options.trt_min_subgraph_size = 1; + trt_options.trt_engine_cache_enable = 1; + trt_options.trt_engine_cache_path = "D:/code/mediapipe/mediapipe/modules/tensorrt/"; + trt_options.trt_dump_subgraphs = 1; + session_options.AppendExecutionProvider_TensorRT(trt_options); + session_ = std::make_unique(env_, model_path.c_str(), session_options); + size_t num_input_nodes = session_->GetInputCount(); + size_t num_output_nodes = session_->GetOutputCount(); + m_input_names.reserve(num_input_nodes); + m_output_names.reserve(num_output_nodes); + for (int i = 0; i < num_input_nodes; i++) { + char* input_name = session_->GetInputName(i, allocator); + m_input_names.push_back(input_name); + } + for (int i = 0; i < num_output_nodes; i++) { + char* output_name = session_->GetOutputName(i, allocator); + m_output_names.push_back(output_name); + } + return absl::OkStatus(); +} + +absl::Status InferenceCalculatorOnnxTensorRTImpl::Open(CalculatorContext* cc) { + const auto& options = cc->Options(); + if (!options.model_path().empty()) { + return LoadModel(options.model_path()); + } + return absl::Status(mediapipe::StatusCode::kNotFound, "Must specify Onnx model path."); +} + +absl::Status InferenceCalculatorOnnxTensorRTImpl::Process(CalculatorContext* cc) { + if (kInTensors(cc).IsEmpty()) { + return absl::OkStatus(); + } + const auto& input_tensors = *kInTensors(cc); + RET_CHECK(!input_tensors.empty()); + auto input_tensor_type = int(input_tensors[0].element_type()); + std::vector ort_input_tensors; + ort_input_tensors.reserve(input_tensors.size()); + for (const auto& tensor : input_tensors) { + auto& inputDims = tensor.shape().dims; + std::vector src_dims{inputDims[0], inputDims[1], inputDims[2], inputDims[3]}; + auto src_value_size = value_size_of(src_dims); + auto input_tensor_view = tensor.GetCpuReadView(); + auto input_tensor_buffer = const_cast(input_tensor_view.buffer()); + auto tmp_tensor = Ort::Value::CreateTensor(memory_info_handler, input_tensor_buffer, src_value_size, src_dims.data(), src_dims.size()); + ort_input_tensors.emplace_back(std::move(tmp_tensor)); + } + auto output_tensors = absl::make_unique>(); + std::vector onnx_output_tensors; + try { + onnx_output_tensors = session_->Run( + Ort::RunOptions{nullptr}, m_input_names.data(), + ort_input_tensors.data(), ort_input_tensors.size(), m_output_names.data(), + m_output_names.size()); + } catch (Ort::Exception& e) { + LOG(ERROR) << "Run error msg:" << e.what(); + } + for (const auto& tensor : onnx_output_tensors) { + auto info = tensor.GetTensorTypeAndShapeInfo(); + auto dims = info.GetShape(); + std::vector tmp_dims; + for (const auto& i : dims) { + tmp_dims.push_back(i); + } + output_tensors->emplace_back(Tensor::ElementType::kFloat32, Tensor::Shape{tmp_dims}); + auto cpu_view = output_tensors->back().GetCpuWriteView(); + std::memcpy(cpu_view.buffer(), tensor.GetTensorData(), output_tensors->back().bytes()); + } + kOutTensors(cc).Send(std::move(output_tensors)); + return absl::OkStatus(); +} + +} // namespace api2 +} // namespace mediapipe