增加onnxruntime cuda和tensorrt的推理引擎
This commit is contained in:
parent
7fdc966271
commit
a440427bb2
|
@ -277,6 +277,38 @@ cc_library(
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "inference_calculator_onnx_tensorrt",
|
||||||
|
srcs = [
|
||||||
|
"inference_calculator_onnx_tensorrt.cc",
|
||||||
|
],
|
||||||
|
copts = select({
|
||||||
|
# TODO: fix tensor.h not to require this, if possible
|
||||||
|
"//mediapipe:apple": [
|
||||||
|
"-x objective-c++",
|
||||||
|
"-fobjc-arc", # enable reference-counting
|
||||||
|
],
|
||||||
|
"//conditions:default": [],
|
||||||
|
}),
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":inference_calculator_interface",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
|
||||||
|
"@org_tensorflow//tensorflow/lite:framework_stable",
|
||||||
|
"@org_tensorflow//tensorflow/lite/c:c_api_types",
|
||||||
|
"@windows_onnxruntime//:onnxruntime",
|
||||||
|
] + select({
|
||||||
|
"//conditions:default": [
|
||||||
|
"//mediapipe/util:cpu_util",
|
||||||
|
],
|
||||||
|
}) + select({
|
||||||
|
"//conditions:default": [],
|
||||||
|
"//mediapipe:android": ["@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate"],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "inference_calculator_gl_if_compute_shader_available",
|
name = "inference_calculator_gl_if_compute_shader_available",
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
|
@ -295,6 +327,8 @@ cc_library(
|
||||||
deps = [
|
deps = [
|
||||||
":inference_calculator_interface",
|
":inference_calculator_interface",
|
||||||
":inference_calculator_cpu",
|
":inference_calculator_cpu",
|
||||||
|
":inference_calculator_onnx_cuda",
|
||||||
|
":inference_calculator_onnx_tensorrt",
|
||||||
] + select({
|
] + select({
|
||||||
"//conditions:default": [":inference_calculator_gl_if_compute_shader_available"],
|
"//conditions:default": [":inference_calculator_gl_if_compute_shader_available"],
|
||||||
"//mediapipe:ios": [":inference_calculator_metal"],
|
"//mediapipe:ios": [":inference_calculator_metal"],
|
||||||
|
|
|
@ -37,6 +37,14 @@ public:
|
||||||
subgraph_node);
|
subgraph_node);
|
||||||
std::vector<absl::string_view> impls;
|
std::vector<absl::string_view> impls;
|
||||||
|
|
||||||
|
if ((options.has_delegate() && options.delegate().has_cuda())) {
|
||||||
|
impls.emplace_back("OnnxCUDA");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((options.has_delegate() && options.delegate().has_tensorrt())) {
|
||||||
|
impls.emplace_back("OnnxTensorRT");
|
||||||
|
}
|
||||||
|
|
||||||
const bool should_use_gpu =
|
const bool should_use_gpu =
|
||||||
!options.has_delegate() || // Use GPU delegate if not specified
|
!options.has_delegate() || // Use GPU delegate if not specified
|
||||||
(options.has_delegate() && options.delegate().has_gpu());
|
(options.has_delegate() && options.delegate().has_gpu());
|
||||||
|
@ -58,7 +66,10 @@ public:
|
||||||
impls.emplace_back("Cpu");
|
impls.emplace_back("Cpu");
|
||||||
for (const auto& suffix : impls) {
|
for (const auto& suffix : impls) {
|
||||||
const auto impl = absl::StrCat("InferenceCalculator", suffix);
|
const auto impl = absl::StrCat("InferenceCalculator", suffix);
|
||||||
if (!mediapipe::CalculatorBaseRegistry::IsRegistered(impl)) continue;
|
if (!mediapipe::CalculatorBaseRegistry::IsRegistered(impl)) {
|
||||||
|
LOG(INFO) << impl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
CalculatorGraphConfig::Node impl_node = subgraph_node;
|
CalculatorGraphConfig::Node impl_node = subgraph_node;
|
||||||
impl_node.set_calculator(impl);
|
impl_node.set_calculator(impl);
|
||||||
return tool::MakeSingleNodeGraph(std::move(impl_node));
|
return tool::MakeSingleNodeGraph(std::move(impl_node));
|
||||||
|
|
|
@ -149,6 +149,10 @@ struct InferenceCalculatorOnnxCUDA : public InferenceCalculator {
|
||||||
static constexpr char kCalculatorName[] = "InferenceCalculatorOnnxCUDA";
|
static constexpr char kCalculatorName[] = "InferenceCalculatorOnnxCUDA";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct InferenceCalculatorOnnxTensorRT : public InferenceCalculator {
|
||||||
|
static constexpr char kCalculatorName[] = "InferenceCalculatorOnnxTensorRT";
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace api2
|
} // namespace api2
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
|
|
@ -126,11 +126,17 @@ message InferenceCalculatorOptions {
|
||||||
optional int32 num_threads = 1 [default = -1];
|
optional int32 num_threads = 1 [default = -1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message OnnxCUDA {}
|
||||||
|
|
||||||
|
message OnnxTensorRT {}
|
||||||
|
|
||||||
oneof delegate {
|
oneof delegate {
|
||||||
TfLite tflite = 1;
|
TfLite tflite = 1;
|
||||||
Gpu gpu = 2;
|
Gpu gpu = 2;
|
||||||
Nnapi nnapi = 3;
|
Nnapi nnapi = 3;
|
||||||
Xnnpack xnnpack = 4;
|
Xnnpack xnnpack = 4;
|
||||||
|
OnnxCUDA cuda = 5;
|
||||||
|
OnnxTensorRT tensorrt = 6;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,4 +169,5 @@ message InferenceCalculatorOptions {
|
||||||
// NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
|
// NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
|
||||||
// precedence over use_* deprecated options.)
|
// precedence over use_* deprecated options.)
|
||||||
optional Delegate delegate = 5;
|
optional Delegate delegate = 5;
|
||||||
|
optional string landmark_path = 6;
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,9 +15,6 @@
|
||||||
#include "absl/memory/memory.h"
|
#include "absl/memory/memory.h"
|
||||||
#include "mediapipe/calculators/tensor/inference_calculator.h"
|
#include "mediapipe/calculators/tensor/inference_calculator.h"
|
||||||
#include "onnxruntime_cxx_api.h"
|
#include "onnxruntime_cxx_api.h"
|
||||||
#include "tensorflow/lite/c/c_api_types.h"
|
|
||||||
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
|
|
||||||
#include "tensorflow/lite/interpreter_builder.h"
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -37,14 +34,12 @@ int64_t value_size_of(const std::vector<int64_t>& dims) {
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
class InferenceCalculatorOnnxCUDAImpl
|
class InferenceCalculatorOnnxCUDAImpl : public NodeImpl<InferenceCalculatorOnnxCUDA, InferenceCalculatorOnnxCUDAImpl> {
|
||||||
: public NodeImpl<InferenceCalculatorOnnxCUDA, InferenceCalculatorOnnxCUDAImpl> {
|
|
||||||
public:
|
public:
|
||||||
static absl::Status UpdateContract(CalculatorContract* cc);
|
static absl::Status UpdateContract(CalculatorContract* cc);
|
||||||
|
|
||||||
absl::Status Open(CalculatorContext* cc) override;
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
absl::Status Process(CalculatorContext* cc) override;
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
absl::Status Close(CalculatorContext* cc) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
absl::Status LoadModel(const std::string& path);
|
absl::Status LoadModel(const std::string& path);
|
||||||
|
@ -57,15 +52,14 @@ private:
|
||||||
std::vector<const char*> m_output_names;
|
std::vector<const char*> m_output_names;
|
||||||
};
|
};
|
||||||
|
|
||||||
absl::Status InferenceCalculatorOnnxCUDAImpl::UpdateContract(
|
absl::Status InferenceCalculatorOnnxCUDAImpl::UpdateContract(CalculatorContract* cc) {
|
||||||
CalculatorContract* cc) {
|
|
||||||
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
||||||
RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected())
|
RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected())
|
||||||
<< "Either model as side packet or model path in options is required.";
|
<< "Either model as side packet or model path in options is required.";
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorCpuImpl::LoadModel(const std::string& path) {
|
absl::Status InferenceCalculatorOnnxCUDAImpl::LoadModel(const std::string& path) {
|
||||||
auto model_path = std::wstring(path.begin(), path.end());
|
auto model_path = std::wstring(path.begin(), path.end());
|
||||||
Ort::SessionOptions session_options;
|
Ort::SessionOptions session_options;
|
||||||
OrtCUDAProviderOptions cuda_options;
|
OrtCUDAProviderOptions cuda_options;
|
||||||
|
@ -91,9 +85,6 @@ absl::Status InferenceCalculatorOnnxCUDAImpl::Open(CalculatorContext* cc) {
|
||||||
if (!options.model_path().empty()) {
|
if (!options.model_path().empty()) {
|
||||||
return LoadModel(options.model_path());
|
return LoadModel(options.model_path());
|
||||||
}
|
}
|
||||||
if (!options.landmark_path().empty()) {
|
|
||||||
return LoadModel(options.landmark_path());
|
|
||||||
}
|
|
||||||
return absl::Status(mediapipe::StatusCode::kNotFound, "Must specify Onnx model path.");
|
return absl::Status(mediapipe::StatusCode::kNotFound, "Must specify Onnx model path.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,11 +131,5 @@ absl::Status InferenceCalculatorOnnxCUDAImpl::Process(CalculatorContext* cc) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorOnnxCUDAImpl::Close(CalculatorContext* cc) {
|
|
||||||
interpreter_ = nullptr;
|
|
||||||
delegate_ = nullptr;
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace api2
|
} // namespace api2
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
|
@ -0,0 +1,142 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/calculators/tensor/inference_calculator.h"
|
||||||
|
#include "onnxruntime_cxx_api.h"
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace api2 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
int64_t value_size_of(const std::vector<int64_t>& dims) {
|
||||||
|
if (dims.empty()) return 0;
|
||||||
|
int64_t value_size = 1;
|
||||||
|
for (const auto& size : dims) value_size *= size;
|
||||||
|
return value_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class InferenceCalculatorOnnxTensorRTImpl : public NodeImpl<InferenceCalculatorOnnxTensorRT, InferenceCalculatorOnnxTensorRTImpl> {
|
||||||
|
public:
|
||||||
|
static absl::Status UpdateContract(CalculatorContract* cc);
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
absl::Status LoadModel(const std::string& path);
|
||||||
|
|
||||||
|
Ort::Env env_;
|
||||||
|
std::unique_ptr<Ort::Session> session_;
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator;
|
||||||
|
Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||||
|
std::vector<const char*> m_input_names;
|
||||||
|
std::vector<const char*> m_output_names;
|
||||||
|
};
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorOnnxTensorRTImpl::UpdateContract(CalculatorContract* cc) {
|
||||||
|
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
||||||
|
RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected())
|
||||||
|
<< "Either model as side packet or model path in options is required.";
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorOnnxTensorRTImpl::LoadModel(const std::string& path) {
|
||||||
|
auto model_path = std::wstring(path.begin(), path.end());
|
||||||
|
Ort::SessionOptions session_options;
|
||||||
|
OrtTensorRTProviderOptions trt_options{};
|
||||||
|
trt_options.device_id = 0;
|
||||||
|
trt_options.trt_max_workspace_size = 1073741824;
|
||||||
|
trt_options.trt_max_partition_iterations = 1000;
|
||||||
|
trt_options.trt_min_subgraph_size = 1;
|
||||||
|
trt_options.trt_engine_cache_enable = 1;
|
||||||
|
trt_options.trt_engine_cache_path = "D:/code/mediapipe/mediapipe/modules/tensorrt/";
|
||||||
|
trt_options.trt_dump_subgraphs = 1;
|
||||||
|
session_options.AppendExecutionProvider_TensorRT(trt_options);
|
||||||
|
session_ = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
|
||||||
|
size_t num_input_nodes = session_->GetInputCount();
|
||||||
|
size_t num_output_nodes = session_->GetOutputCount();
|
||||||
|
m_input_names.reserve(num_input_nodes);
|
||||||
|
m_output_names.reserve(num_output_nodes);
|
||||||
|
for (int i = 0; i < num_input_nodes; i++) {
|
||||||
|
char* input_name = session_->GetInputName(i, allocator);
|
||||||
|
m_input_names.push_back(input_name);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < num_output_nodes; i++) {
|
||||||
|
char* output_name = session_->GetOutputName(i, allocator);
|
||||||
|
m_output_names.push_back(output_name);
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorOnnxTensorRTImpl::Open(CalculatorContext* cc) {
|
||||||
|
const auto& options = cc->Options<mediapipe::InferenceCalculatorOptions>();
|
||||||
|
if (!options.model_path().empty()) {
|
||||||
|
return LoadModel(options.model_path());
|
||||||
|
}
|
||||||
|
return absl::Status(mediapipe::StatusCode::kNotFound, "Must specify Onnx model path.");
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorOnnxTensorRTImpl::Process(CalculatorContext* cc) {
|
||||||
|
if (kInTensors(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const auto& input_tensors = *kInTensors(cc);
|
||||||
|
RET_CHECK(!input_tensors.empty());
|
||||||
|
auto input_tensor_type = int(input_tensors[0].element_type());
|
||||||
|
std::vector<Ort::Value> ort_input_tensors;
|
||||||
|
ort_input_tensors.reserve(input_tensors.size());
|
||||||
|
for (const auto& tensor : input_tensors) {
|
||||||
|
auto& inputDims = tensor.shape().dims;
|
||||||
|
std::vector<int64_t> src_dims{inputDims[0], inputDims[1], inputDims[2], inputDims[3]};
|
||||||
|
auto src_value_size = value_size_of(src_dims);
|
||||||
|
auto input_tensor_view = tensor.GetCpuReadView();
|
||||||
|
auto input_tensor_buffer = const_cast<float*>(input_tensor_view.buffer<float>());
|
||||||
|
auto tmp_tensor = Ort::Value::CreateTensor<float>(memory_info_handler, input_tensor_buffer, src_value_size, src_dims.data(), src_dims.size());
|
||||||
|
ort_input_tensors.emplace_back(std::move(tmp_tensor));
|
||||||
|
}
|
||||||
|
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
|
||||||
|
std::vector<Ort::Value> onnx_output_tensors;
|
||||||
|
try {
|
||||||
|
onnx_output_tensors = session_->Run(
|
||||||
|
Ort::RunOptions{nullptr}, m_input_names.data(),
|
||||||
|
ort_input_tensors.data(), ort_input_tensors.size(), m_output_names.data(),
|
||||||
|
m_output_names.size());
|
||||||
|
} catch (Ort::Exception& e) {
|
||||||
|
LOG(ERROR) << "Run error msg:" << e.what();
|
||||||
|
}
|
||||||
|
for (const auto& tensor : onnx_output_tensors) {
|
||||||
|
auto info = tensor.GetTensorTypeAndShapeInfo();
|
||||||
|
auto dims = info.GetShape();
|
||||||
|
std::vector<int> tmp_dims;
|
||||||
|
for (const auto& i : dims) {
|
||||||
|
tmp_dims.push_back(i);
|
||||||
|
}
|
||||||
|
output_tensors->emplace_back(Tensor::ElementType::kFloat32, Tensor::Shape{tmp_dims});
|
||||||
|
auto cpu_view = output_tensors->back().GetCpuWriteView();
|
||||||
|
std::memcpy(cpu_view.buffer<float>(), tensor.GetTensorData<float>(), output_tensors->back().bytes());
|
||||||
|
}
|
||||||
|
kOutTensors(cc).Send(std::move(output_tensors));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace api2
|
||||||
|
} // namespace mediapipe
|
Loading…
Reference in New Issue
Block a user