// Copyright 2020 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "mediapipe/util/tflite/tflite_gpu_runner.h" #include #include #include #include "absl/strings/substitute.h" #include "mediapipe/framework/port/canonical_errors.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status_macros.h" #include "mediapipe/framework/port/statusor.h" #include "tensorflow/lite/core/api/op_resolver.h" #include "tensorflow/lite/delegates/gpu/api.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model_builder.h" #include "tensorflow/lite/delegates/gpu/gl/api2.h" #include "tensorflow/lite/model.h" // This code should be enabled as soon as TensorFlow version, which mediapipe // uses, will include this module. #ifdef __ANDROID__ #include "tensorflow/lite/delegates/gpu/cl/api.h" #endif namespace tflite { namespace gpu { namespace { // TODO: Find a better place for these utility functions. void UpdateShapes(const tflite::Interpreter& interpreter, const std::vector& indices, std::vector>* shapes) { shapes->resize(indices.size()); for (int i = 0; i < indices.size(); ++i) { const TfLiteTensor* tensor = interpreter.tensor(indices[i]); shapes->at(i).resize(tensor->dims->size); for (int j = 0; j < tensor->dims->size; ++j) { shapes->at(i)[j] = tensor->dims->data[j]; } } } absl::Status InitializeShapes(const tflite::FlatBufferModel& flatbuffer, const tflite::OpResolver& op_resolver, std::vector>* input_shapes, std::vector>* output_shapes) { std::unique_ptr interpreter; tflite::InterpreterBuilder interpreter_builder(flatbuffer, op_resolver); if (interpreter_builder(&interpreter) != kTfLiteOk || !interpreter) { return absl::InternalError("Unable to prepare TfLite interpreter."); } UpdateShapes(*interpreter, interpreter->inputs(), input_shapes); UpdateShapes(*interpreter, interpreter->outputs(), output_shapes); return absl::OkStatus(); } ObjectDef GetSSBOObjectDef(int channels) { ObjectDef gpu_object_def; gpu_object_def.data_type = DataType::FLOAT32; gpu_object_def.data_layout = DataLayout::BHWC; if (channels == 4) { gpu_object_def.data_layout = DataLayout::DHWC4; } gpu_object_def.object_type = ObjectType::OPENGL_SSBO; gpu_object_def.user_provided = true; return gpu_object_def; } } // namespace absl::Status TFLiteGPURunner::InitializeWithModel( const tflite::FlatBufferModel& flatbuffer, const tflite::OpResolver& op_resolver, bool allow_quant_ops) { // GraphFloat32 is created twice because, when OpenCL and OpenGL backends are // initialized, different backend-specific graph transformations happen // in-place. As GraphFloat32 is not copyable by design, we keep two copies of // the graph until inference is built. This decision doesn't affect the amount // of run time memory used, because both graph_gl_ and graph_cl_ are deleted // in the end of the initialization stage. graph_gl_ = std::make_unique(); graph_cl_ = std::make_unique(); MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver, graph_gl_.get(), allow_quant_ops)); MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver, graph_cl_.get(), allow_quant_ops)); for (const auto& input : graph_gl_->inputs()) { input_shapes_.push_back(input->tensor.shape); } for (const auto& output : graph_gl_->outputs()) { output_shapes_.push_back(output->tensor.shape); } MP_RETURN_IF_ERROR(InitializeShapes(flatbuffer, op_resolver, &input_shape_from_model_, &output_shape_from_model_)); return absl::OkStatus(); } absl::StatusOr TFLiteGPURunner::GetInputElements(int id) { if (id >= input_shapes_.size()) { return absl::InternalError("Wrong input tensor id."); } else { return input_shapes_[id].DimensionsProduct(); } } absl::StatusOr TFLiteGPURunner::GetOutputElements(int id) { if (id >= output_shapes_.size()) { return absl::InternalError("Wrong output tensor id."); } else { return output_shapes_[id].DimensionsProduct(); } } absl::Status TFLiteGPURunner::Build() { // 1. Prepare inference builder. std::unique_ptr builder; // By default, we try CL first & fall back to GL if that fails. if (opencl_is_forced_) { MP_RETURN_IF_ERROR(InitializeOpenCL(&builder)); } else if (opengl_is_forced_) { MP_RETURN_IF_ERROR(InitializeOpenGL(&builder)); } else { // try to build OpenCL first. If something goes wrong, fall back to OpenGL. absl::Status status = InitializeOpenCL(&builder); if (status.ok()) { LOG(INFO) << "OpenCL backend is used."; } else { LOG(ERROR) << "Falling back to OpenGL: " << status.message(); MP_RETURN_IF_ERROR(InitializeOpenGL(&builder)); } } // Both graphs are not needed anymore. Make sure they are deleted. graph_gl_.reset(nullptr); graph_cl_.reset(nullptr); // 2. Describe output/input objects for created builder. for (int flow_index = 0; flow_index < input_shapes_.size(); ++flow_index) { MP_RETURN_IF_ERROR(builder->SetInputObjectDef( flow_index, GetSSBOObjectDef(input_shapes_[flow_index].c))); } for (int flow_index = 0; flow_index < output_shapes_.size(); ++flow_index) { MP_RETURN_IF_ERROR(builder->SetOutputObjectDef( flow_index, GetSSBOObjectDef(output_shapes_[flow_index].c))); } // 3. Build inference runner with the created builder. return builder->Build(&runner_); } absl::Status TFLiteGPURunner::BindSSBOToInputTensor(GLuint ssbo_id, int input_id) { OpenGlBuffer buffer; buffer.id = ssbo_id; return runner_->SetInputObject(input_id, std::move(buffer)); } absl::Status TFLiteGPURunner::BindSSBOToOutputTensor(GLuint ssbo_id, int output_id) { OpenGlBuffer buffer; buffer.id = ssbo_id; return runner_->SetOutputObject(output_id, std::move(buffer)); } absl::Status TFLiteGPURunner::Invoke() { return runner_->Run(); } absl::Status TFLiteGPURunner::InitializeOpenGL( std::unique_ptr* builder) { gl::InferenceEnvironmentOptions env_options; gl::InferenceEnvironmentProperties properties; gl::InferenceOptions gl_options; gl_options.priority1 = options_.priority1; gl_options.priority2 = options_.priority2; gl_options.priority3 = options_.priority3; gl_options.usage = options_.usage; MP_RETURN_IF_ERROR( NewInferenceEnvironment(env_options, &gl_environment_, &properties)); MP_RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph_gl_), gl_options, builder)); return absl::OkStatus(); } absl::Status TFLiteGPURunner::InitializeOpenCL( std::unique_ptr* builder) { #ifdef __ANDROID__ cl::InferenceEnvironmentOptions env_options; if (!serialized_binary_cache_.empty()) { env_options.serialized_binary_cache = serialized_binary_cache_; } cl::InferenceEnvironmentProperties properties; cl::InferenceOptions cl_options; cl_options.priority1 = options_.priority1; cl_options.priority2 = options_.priority2; cl_options.priority3 = options_.priority3; cl_options.usage = options_.usage; MP_RETURN_IF_ERROR( cl::NewInferenceEnvironment(env_options, &cl_environment_, &properties)); MP_RETURN_IF_ERROR(cl_environment_->NewInferenceBuilder( cl_options, std::move(*graph_cl_), builder)); #endif return absl::OkStatus(); } } // namespace gpu } // namespace tflite