Add option for cuda optflow.

This commit is contained in:
Cheoljun Lee 2020-08-06 17:31:54 +09:00
parent 6b0ab0e012
commit 600bda3df2
4 changed files with 38 additions and 2 deletions

View File

@ -83,3 +83,13 @@ build:ios_fat --watchos_cpus=armv7k
build:darwin_x86_64 --apple_platform_type=macos
build:darwin_x86_64 --macos_minimum_os=10.12
build:darwin_x86_64 --cpu=darwin_x86_64
# This config refers to building with CUDA available. It does not necessarily
# mean that we build CUDA op kernels.
build:using_cuda --define=using_cuda=true
build:using_cuda --action_env TF_NEED_CUDA=1
build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
# This config refers to building CUDA op kernels with nvcc.
build:cuda --config=using_cuda
build:cuda --define=using_cuda_nvcc=true

View File

@ -88,7 +88,7 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase {
// cv::DenseOpticalFlow is not thread-safe. Invoking multiple
// DenseOpticalFlow::calc() in parallel may lead to memory corruption or
// memory leak.
std::list<cv::Ptr<cv::DenseOpticalFlow>> tvl1_computers_
std::list<cv::Ptr<DenseOpticalFlow>> tvl1_computers_
ABSL_GUARDED_BY(mutex_);
absl::Mutex mutex_;
};
@ -163,7 +163,7 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase {
// Tries getting an idle DenseOpticalFlow object from the cache. If not,
// creates a new DenseOpticalFlow.
cv::Ptr<cv::DenseOpticalFlow> tvl1_computer;
cv::Ptr<DenseOpticalFlow> tvl1_computer;
{
absl::MutexLock lock(&mutex_);
if (!tvl1_computers_.empty()) {
@ -177,7 +177,18 @@ class Tvl1OpticalFlowCalculator : public CalculatorBase {
flow->Allocate(first.cols, first.rows);
cv::Mat cv_flow(flow->mutable_flow_data());
#if defined(CV_CUDA)
cv::cuda::GpuMat gpu_first, gpu_second, gpu_flow;
gpu_first.upload(first);
gpu_second.upload(second);
gpu_flow.upload(cv_flow);
tvl1_computer->calc(gpu_first, gpu_second, gpu_flow);
gpu_first.download(first);
gpu_second.download(second);
gpu_flow.download(cv_flow);
#else
tvl1_computer->calc(first, second, cv_flow);
#endif
CHECK_EQ(flow->mutable_flow_data().data, cv_flow.data);
// Inserts the idle DenseOpticalFlow object back to the cache for reuse.
{

View File

@ -84,13 +84,26 @@ inline int fourcc(char c1, char c2, char c3, char c4) {
#include <opencv2/video.hpp>
#include <opencv2/videoio.hpp>
//#define CV_CUDA
#if CV_VERSION_MAJOR == 4 && !defined(MEDIAPIPE_MOBILE)
#if defined(CV_CUDA)
#include <opencv2/cudaoptflow.hpp>
typedef cv::cuda::DenseOpticalFlow DenseOpticalFlow;
#else
#include <opencv2/optflow.hpp>
typedef cv::DenseOpticalFlow DenseOpticalFlow;
#endif
namespace cv {
#if defined(CV_CUDA)
inline Ptr<cuda::DenseOpticalFlow> createOptFlow_DualTVL1() {
return cuda::OpticalFlowDual_TVL1::create();
}
#else
inline Ptr<DenseOpticalFlow> createOptFlow_DualTVL1() {
return optflow::createOptFlow_DualTVL1();
}
#endif
} // namespace cv
#endif

View File

@ -23,6 +23,8 @@ cc_library(
"lib/x86_64-linux-gnu/libopencv_imgproc.so",
"lib/x86_64-linux-gnu/libopencv_video.so",
"lib/x86_64-linux-gnu/libopencv_videoio.so",
"lib/x86_64-linux-gnu/libopencv_optflow.so",
"lib/x86_64-linux-gnu/libopencv_cudaoptflow.so",
],
),
hdrs = glob([