diff --git a/.bazelrc b/.bazelrc index 5a586f3ca..2834c7012 100644 --- a/.bazelrc +++ b/.bazelrc @@ -83,3 +83,13 @@ build:ios_fat --watchos_cpus=armv7k build:darwin_x86_64 --apple_platform_type=macos build:darwin_x86_64 --macos_minimum_os=10.12 build:darwin_x86_64 --cpu=darwin_x86_64 + +# This config refers to building with CUDA available. It does not necessarily +# mean that we build CUDA op kernels. +build:using_cuda --define=using_cuda=true +build:using_cuda --action_env TF_NEED_CUDA=1 +build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain + +# This config refers to building CUDA op kernels with nvcc. +build:cuda --config=using_cuda +build:cuda --define=using_cuda_nvcc=true diff --git a/mediapipe/calculators/video/tvl1_optical_flow_calculator.cc b/mediapipe/calculators/video/tvl1_optical_flow_calculator.cc index c774cfeb1..e6130f8b5 100644 --- a/mediapipe/calculators/video/tvl1_optical_flow_calculator.cc +++ b/mediapipe/calculators/video/tvl1_optical_flow_calculator.cc @@ -88,7 +88,7 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase { // cv::DenseOpticalFlow is not thread-safe. Invoking multiple // DenseOpticalFlow::calc() in parallel may lead to memory corruption or // memory leak. - std::list> tvl1_computers_ + std::list> tvl1_computers_ ABSL_GUARDED_BY(mutex_); absl::Mutex mutex_; }; @@ -163,7 +163,7 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase { // Tries getting an idle DenseOpticalFlow object from the cache. If not, // creates a new DenseOpticalFlow. - cv::Ptr tvl1_computer; + cv::Ptr tvl1_computer; { absl::MutexLock lock(&mutex_); if (!tvl1_computers_.empty()) { @@ -177,7 +177,18 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase { flow->Allocate(first.cols, first.rows); cv::Mat cv_flow(flow->mutable_flow_data()); +#if defined(CV_CUDA) + cv::cuda::GpuMat gpu_first, gpu_second, gpu_flow; + gpu_first.upload(first); + gpu_second.upload(second); + gpu_flow.upload(cv_flow); + tvl1_computer->calc(gpu_first, gpu_second, gpu_flow); + gpu_first.download(first); + gpu_second.download(second); + gpu_flow.download(cv_flow); +#else tvl1_computer->calc(first, second, cv_flow); +#endif CHECK_EQ(flow->mutable_flow_data().data, cv_flow.data); // Inserts the idle DenseOpticalFlow object back to the cache for reuse. { diff --git a/mediapipe/framework/port/opencv_video_inc.h b/mediapipe/framework/port/opencv_video_inc.h index dc84bf59b..05fbd2782 100644 --- a/mediapipe/framework/port/opencv_video_inc.h +++ b/mediapipe/framework/port/opencv_video_inc.h @@ -84,13 +84,26 @@ inline int fourcc(char c1, char c2, char c3, char c4) { #include #include +//#define CV_CUDA #if CV_VERSION_MAJOR == 4 && !defined(MEDIAPIPE_MOBILE) +#if defined(CV_CUDA) +#include +typedef cv::cuda::DenseOpticalFlow DenseOpticalFlow; +#else #include +typedef cv::DenseOpticalFlow DenseOpticalFlow; +#endif namespace cv { +#if defined(CV_CUDA) +inline Ptr createOptFlow_DualTVL1() { + return cuda::OpticalFlowDual_TVL1::create(); +} +#else inline Ptr createOptFlow_DualTVL1() { return optflow::createOptFlow_DualTVL1(); } +#endif } // namespace cv #endif diff --git a/third_party/opencv_linux.BUILD b/third_party/opencv_linux.BUILD index 26978d532..026734ab9 100644 --- a/third_party/opencv_linux.BUILD +++ b/third_party/opencv_linux.BUILD @@ -23,6 +23,8 @@ cc_library( "lib/x86_64-linux-gnu/libopencv_imgproc.so", "lib/x86_64-linux-gnu/libopencv_video.so", "lib/x86_64-linux-gnu/libopencv_videoio.so", + "lib/x86_64-linux-gnu/libopencv_optflow.so", + "lib/x86_64-linux-gnu/libopencv_cudaoptflow.so", ], ), hdrs = glob([