diff --git a/mediapipe/calculators/tensor/inference_calculator_gl.cc b/mediapipe/calculators/tensor/inference_calculator_gl.cc index 1f3768ee0..bd8eb3eed 100644 --- a/mediapipe/calculators/tensor/inference_calculator_gl.cc +++ b/mediapipe/calculators/tensor/inference_calculator_gl.cc @@ -26,6 +26,8 @@ #include "mediapipe/gpu/gl_calculator_helper.h" #include "tensorflow/lite/delegates/gpu/gl_delegate.h" +#define PERFETTO_TRACK_EVENT_NAMESPACE mediapipe + namespace mediapipe { namespace api2 { @@ -191,7 +193,7 @@ absl::Status InferenceCalculatorGlImpl::GpuInferenceRunner::Process( CalculatorContext* cc, const std::vector& input_tensors, std::vector& output_tensors) { return gpu_helper_.RunInGlContext( - [this, &input_tensors, &output_tensors]() -> absl::Status { + [this, cc, &input_tensors, &output_tensors]() -> absl::Status { // Explicitly copy input. for (int i = 0; i < input_tensors.size(); ++i) { glBindBuffer(GL_COPY_READ_BUFFER, @@ -203,7 +205,10 @@ absl::Status InferenceCalculatorGlImpl::GpuInferenceRunner::Process( } // Run inference. - RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); + { + MEDIAPIPE_PROFILING(GPU_TASK_INVOKE, cc); + RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); + } output_tensors.reserve(output_size_); for (int i = 0; i < output_size_; ++i) { diff --git a/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc b/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc index 7e11ee072..52359f7f5 100644 --- a/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc +++ b/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc @@ -32,6 +32,8 @@ #include "mediapipe/util/android/file/base/helpers.h" #endif // MEDIAPIPE_ANDROID +#define PERFETTO_TRACK_EVENT_NAMESPACE mediapipe + namespace mediapipe { namespace api2 { @@ -83,7 +85,7 @@ class InferenceCalculatorGlAdvancedImpl const mediapipe::InferenceCalculatorOptions::Delegate& delegate); absl::StatusOr> Process( - const std::vector& input_tensors); + CalculatorContext* cc, const std::vector& input_tensors); absl::Status Close(); @@ -121,11 +123,11 @@ absl::Status InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::Init( absl::StatusOr> InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::Process( - const std::vector& input_tensors) { + CalculatorContext* cc, const std::vector& input_tensors) { std::vector output_tensors; MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( - [this, &input_tensors, &output_tensors]() -> absl::Status { + [this, cc, &input_tensors, &output_tensors]() -> absl::Status { for (int i = 0; i < input_tensors.size(); ++i) { MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToInputTensor( input_tensors[i].GetOpenGlBufferReadView().name(), i)); @@ -138,7 +140,10 @@ InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::Process( output_tensors.back().GetOpenGlBufferWriteView().name(), i)); } // Run inference. - return tflite_gpu_runner_->Invoke(); + { + MEDIAPIPE_PROFILING(GPU_TASK_INVOKE, cc); + return tflite_gpu_runner_->Invoke(); + } })); return output_tensors; @@ -354,7 +359,7 @@ absl::Status InferenceCalculatorGlAdvancedImpl::Process(CalculatorContext* cc) { auto output_tensors = absl::make_unique>(); ASSIGN_OR_RETURN(*output_tensors, - gpu_inference_runner_->Process(input_tensors)); + gpu_inference_runner_->Process(cc, input_tensors)); kOutTensors(cc).Send(std::move(output_tensors)); return absl::OkStatus(); diff --git a/mediapipe/framework/calculator_profile.proto b/mediapipe/framework/calculator_profile.proto index 06ec678a9..1512da6af 100644 --- a/mediapipe/framework/calculator_profile.proto +++ b/mediapipe/framework/calculator_profile.proto @@ -133,7 +133,12 @@ message GraphTrace { TPU_TASK = 13; GPU_CALIBRATION = 14; PACKET_QUEUED = 15; + GPU_TASK_INVOKE = 16; + TPU_TASK_INVOKE = 17; } + // //depot/mediapipe/framework/mediapipe_profiling.h:profiler_census_tags, + // //depot/mediapipe/framework/profiler/trace_buffer.h:event_type_list, + // ) // The timing for one packet set being processed at one caclulator node. message CalculatorTrace { diff --git a/mediapipe/framework/profiler/trace_buffer.h b/mediapipe/framework/profiler/trace_buffer.h index 069f09610..60352c705 100644 --- a/mediapipe/framework/profiler/trace_buffer.h +++ b/mediapipe/framework/profiler/trace_buffer.h @@ -109,6 +109,11 @@ struct TraceEvent { static constexpr EventType TPU_TASK = GraphTrace::TPU_TASK; static constexpr EventType GPU_CALIBRATION = GraphTrace::GPU_CALIBRATION; static constexpr EventType PACKET_QUEUED = GraphTrace::PACKET_QUEUED; + static constexpr EventType GPU_TASK_INVOKE = GraphTrace::GPU_TASK_INVOKE; + static constexpr EventType TPU_TASK_INVOKE = GraphTrace::TPU_TASK_INVOKE; + // //depot/mediapipe/framework/mediapipe_profiling.h:profiler_census_tags, + // //depot/mediapipe/framework/calculator_profile.proto:event_type, + // ) }; // Packet trace log buffer.