diff --git a/mediapipe/pose_tracking_dll/BUILD b/mediapipe/pose_tracking_dll/BUILD new file mode 100644 index 000000000..9e2df0b7d --- /dev/null +++ b/mediapipe/pose_tracking_dll/BUILD @@ -0,0 +1,59 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("windows_dll_library.bzl", "windows_dll_library") +licenses(["notice"]) + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +# Define the shared library +windows_dll_library( + name = "pose_tracking_lib", + srcs = ["pose_tracking.cpp"], + hdrs = ["pose_tracking.h"], + # Define COMPILING_DLL to export symbols during the DLL compilation. + copts = ["-DCOMPILING_DLL"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:packet_presence_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps", + ] +) + +# **Implicitly link to face_mesh_lib.dll** +cc_binary( + name = "pose_tracking_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps", + ":pose_tracking_lib" + ], +) diff --git a/mediapipe/pose_tracking_dll/README.md b/mediapipe/pose_tracking_dll/README.md new file mode 100644 index 000000000..249c0ae15 --- /dev/null +++ b/mediapipe/pose_tracking_dll/README.md @@ -0,0 +1,109 @@ +## Description +The pose_tracking_dll module allows for building a Mediapipe-based pose tracking DLL library that can be used with any C++ project. All the dependencies such as tensorflow are built statically into the dll. + +Currently, the following features are supported: +- Segmenting the person(s) of interest +- Segmenting the skeleton(s) +- Accessing the 3D coordinates of each node of the skeleton + +## Prerequisites + +Install Mediapipe development environment as follows. + +**Note**: This guide assumes the Nimagna development environment. Otherwise, please follow the guidelines on the official Mediapipe website: https://google.github.io/mediapipe/getting_started/install.html#installing-on-windows with the important change that Bazel version 3.7.2 is required and the helpful sidemark that OpenCV version used by default in mediapipe is 3.4.10. + +### Install MSYS2 + +- Install MSYS2 from https://www.msys2.org/ +- Edit the %PATH% environment variable: If MSYS2 is installed to `C:\msys64`, add `C:\msys64\usr\bin` to your %PATH% environment variable. + +### Install necessary packages + +- Run `pacman -S git patch unzip` and confirm installation + +### Install Python 3.9 + +- Download Python 3.9.9 Windows executable https://www.python.org/downloads/release/python-399/ and install. Note that Python 3.10 does not work. +- Allow the installer to edit the %PATH% environment variable. +- The Python installation path is referred to as `PYTHONDIR` in the following steps. Usually, this is `C:\Users\...\AppData\Local\Programs\Python\Python39` when installing only for the current user. +- Run `pip install numpy` in a new command line. + +### Install Visual C++ Build Tools 2019 and WinSDK + +- Download and install VC build tools from https://visualstudio.microsoft.com/visual-cpp-build-tools/ with the following settings: + ![image](https://user-images.githubusercontent.com/83065859/148920359-fc5830c2-3eb1-47d4-ba33-8b1ba783b728.png) + +- Redistributables and WinSDK 10.0.19041.0 should be installed already. If not, install with Visual Studio or download the [WinSDK from the official Microsoft website](https://developer.microsoft.com/en-us/windows/downloads/sdk-archive/) and install. + +### Install Bazel 3.7.2 + +- Download `bazel-3.7.2-windows-x86_64.exe` from https://github.com/bazelbuild/bazel/releases/tag/3.7.2 and rename to `bazel.exe` +- Add the location of the downloaded Bazel executable to the %PATH% environment variable. +- Set additional Bazel environment variables as follows: + - `BAZEL_VS=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools` + - `BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC` + - `BAZEL_VC_FULL_VERSION=14.29.30133` - or the name of the folder you find in `C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC` + - `BAZEL_WINSDK_FULL_VERSION=10.0.19041.0` + +### Checkout Mediapipe + +- git clone https://github.com/NimagnaAG/mediapipe +- the repository root folder is referred to as `MEDIAPIPEDIR` in the following steps + +### Install OpenCV + +- Download OpenCV 3.4.10 from https://sourceforge.net/projects/opencvlibrary/files/3.4.10/opencv-3.4.10-vc14_vc15.exe/download +- Extract OpenCV from into a folder, e.g. `C:\Users\ChristophNiederberge\source\repos\opencv_3.4.10`. This folder is referred to as `OPENCVDIR` in the following steps. +- Edit the `MEDIAPIPEDIR\WORKSPACE` file: Around line 215, is the "windows_opencv" repository. Adapt the path to point to `OPENCVDIR\\build` (using double backslashes): + ``` + new_local_repository( + name = "windows_opencv", + build_file = "@//third_party:opencv_windows.BUILD", + path = "OPENCVDIR\\build", + ) + ``` + +#### Installation issue handling + +- If you are using a **different OpenCV version**, adapt the `OPENCV_VERSION` variable in the file `mediapipe/external/opencv_.BUILD` to the one installed in the system (https://github.com/google/mediapipe/issues/1926#issuecomment-825874197). + +## How to build +Assuming you're in `MEDIAPIPEDIR`, the root of the repository, run the following commands by replacing `PYTHONDIR` using forward slashes "/" in the path: + +``` +cd mediapipe +bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH=PYTHONDIR/python.exe pose_tracking_dll:pose_tracking_cpu +``` + +The results will be stored in the `bazel-bin\mediapipe\pose_tracking_dll` folder. + +In the case the build stalls, pressing Ctrl+C might not be sufficient to stop the task. In that case, if you try to (resume the) build again, +the following message will be displayed: + +``` +Another command (pid=5300) is running. Waiting for it to complete on the server (server_pid=3684) +``` + +Unfortunately this process is hidden for some reason and can't be found in taskmgr. Fortunately, you can use the `taskkill` command to kill the process: + +``` +taskkill /F /PID 3684 +``` + +After that, you should be able to run the build command again. + +### Build debug symbols +- `dbg` can be used in place of `opt` to build the library with debug symbols in Visual Studio pdb format. + +### Build issue handling + +- If bazel fails to download packages, run `bazel clean --expunge` and try again. +- If bazel fails with an `fatal error C1083: Cannot open compiler generated file: '': Invalid argument`, your [path is too long](https://stackoverflow.com/questions/34074925/vs-2015-cannot-open-compiler-generated-file-invalid-argument). Actually, it is most probably the username... + - Adapt the call to `bazel --output_base=E:\nim_output build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH=PYTHONDIR/python.exe pose_tracking_dll:pose_tracking_cpu` where `E:\nim_output build` can be replaced with some short path where bazel will store the packages and perform the build. + +## How to use + +- Go to bazel-bin\mediapipe\pose_tracking_dll +- Link `pose_tracking_cpu.lib` and `pose_tracking_lib.dll.if.lib` statically in your project. +- Make sure `opencv_world3410.dll` and `pose_tracking_lib.dll` are accessible in your working directory. +- Includeptyh `mediapipe\pose_tracking_dll\pose_tracking.h` header file to access the methods of the library. diff --git a/mediapipe/pose_tracking_dll/pose_tracking.cpp b/mediapipe/pose_tracking_dll/pose_tracking.cpp new file mode 100644 index 000000000..33dbdb934 --- /dev/null +++ b/mediapipe/pose_tracking_dll/pose_tracking.cpp @@ -0,0 +1,170 @@ +/** + Copyright 2022, Nimagna AG + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "pose_tracking.h" + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +class PoseTrackingImpl { + public: + PoseTrackingImpl(const std::string& calculatorGraphConfigFile) { + auto status = initialize(calculatorGraphConfigFile); + LOG(WARNING) << "Initialized PoseTracking with status: " << status; + } + + absl::Status initialize(const std::string& calculatorGraphConfigFile) { + std::string graphContents; + MP_RETURN_IF_ERROR(mediapipe::file::GetContents(calculatorGraphConfigFile, &graphContents)); + + mediapipe::CalculatorGraphConfig config = + mediapipe::ParseTextProtoOrDie(graphContents); + + MP_RETURN_IF_ERROR(graph.Initialize(config)); + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller poller, + graph.AddOutputStreamPoller(kOutputSegmentationStream, true)); + + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarksPoller, + graph.AddOutputStreamPoller(kOutpuLandmarksStream, true)); + + + maskPollerPtr = std::make_unique(std::move(poller)); + + landmarksPollerPtr = + std::make_unique(std::move(landmarksPoller)); + + + MP_RETURN_IF_ERROR(graph.StartRun({})); + } + + bool processFrame(const cv::Mat& inputRGB8Bit) { + // Wrap Mat into an ImageFrame. + auto inputFrame = absl::make_unique( + mediapipe::ImageFormat::SRGB, inputRGB8Bit.cols, inputRGB8Bit.rows, + mediapipe::ImageFrame::kDefaultAlignmentBoundary); + cv::Mat inputFrameMat = mediapipe::formats::MatView(inputFrame.get()); + inputRGB8Bit.copyTo(inputFrameMat); + + // Send image packet into the graph. + size_t frameTimestampUs = + static_cast(cv::getTickCount()) / static_cast(cv::getTickFrequency()) * 1e6; + auto status = graph.AddPacketToInputStream( + kInputStream, + mediapipe::Adopt(inputFrame.release()).At(mediapipe::Timestamp(frameTimestampUs))); + + if (!status.ok()) { + LOG(WARNING) << "Graph execution failed: " << status; + return false; + } + + // Get the graph result packet, or stop if that fails. + mediapipe::Packet maskPacket; + if (!maskPollerPtr || !maskPollerPtr->Next(&maskPacket) || maskPacket.IsEmpty()) return false; + auto& outputFrame = maskPacket.Get(); + + // Get pose landmarks. + if (!landmarksPollerPtr || !landmarksPollerPtr->Next(&poseLandmarksPacket)) { + return false; + } + + // Convert back to opencv for display or saving. + auto mask = mediapipe::formats::MatView(&outputFrame); + segmentedMask = mask.clone(); + + absl::Status landmarksStatus = detectLandmarksWithStatus(poseLandmarks); + + return landmarksStatus.ok(); + } + + absl::Status detectLandmarksWithStatus(nimagna::cv_wrapper::Point3f* poseLandmarks) { + if (poseLandmarksPacket.IsEmpty()) { + return absl::CancelledError("Pose landmarks packet is empty."); + } + + auto retrievedLandmarks = poseLandmarksPacket.Get<::mediapipe::NormalizedLandmarkList>(); + + // Convert landmarks to cv::Point3f**. + const auto landmarksCount = retrievedLandmarks.landmark_size(); + + for (int j = 0; j < landmarksCount; ++j) { + const auto& landmark = retrievedLandmarks.landmark(j); + poseLandmarks[j].x = landmark.x(); + poseLandmarks[j].y = landmark.y(); + poseLandmarks[j].z = landmark.z(); + visibility[j] = landmark.visibility(); + } + + return absl::OkStatus(); + } + + nimagna::cv_wrapper::Point3f* lastDetectedLandmarks() { return poseLandmarks; } + + cv::Mat lastSegmentedFrame() { return segmentedMask; } + float* landmarksVisibility() { return visibility; } + + static constexpr size_t kLandmarksCount = 33u; + + private: + mediapipe::Packet poseLandmarksPacket; + cv::Mat segmentedMask; + nimagna::cv_wrapper::Point3f poseLandmarks[kLandmarksCount]; + float visibility[kLandmarksCount] = {0}; + std::unique_ptr maskPollerPtr; + std::unique_ptr landmarksPollerPtr; + mediapipe::CalculatorGraph graph; + const char* kInputStream = "input_video"; + const char* kOutputSegmentationStream = "segmentation_mask"; + const char* kOutpuLandmarksStream = "pose_landmarks"; +}; + +namespace nimagna { +PoseTracking::PoseTracking(const char* calculatorGraphConfigFile) { + mImplementation = new PoseTrackingImpl(calculatorGraphConfigFile); +} + +bool PoseTracking::processFrame(const cv_wrapper::Mat& inputRGB8Bit) { + const auto frame = cv::Mat(inputRGB8Bit.rows, inputRGB8Bit.cols, CV_8UC3, inputRGB8Bit.data); + return mImplementation->processFrame(frame); +} + +PoseTracking::PoseLandmarks PoseTracking::lastDetectedLandmarks() { + return {mImplementation->lastDetectedLandmarks(), mImplementation->landmarksVisibility()}; +} + +cv_wrapper::Mat PoseTracking::lastSegmentedFrame() { + const cv::Mat result = mImplementation->lastSegmentedFrame(); + + return cv_wrapper::Mat(result.rows, result.cols, result.data); +} + +PoseTracking::~PoseTracking() +{ + delete mImplementation; +} +} // namespace nimagna diff --git a/mediapipe/pose_tracking_dll/pose_tracking.h b/mediapipe/pose_tracking_dll/pose_tracking.h new file mode 100644 index 000000000..b40925185 --- /dev/null +++ b/mediapipe/pose_tracking_dll/pose_tracking.h @@ -0,0 +1,124 @@ +/** + Copyright 2022, Nimagna AG + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#ifndef POSE_TRACKING_LIBRARY_H +#define POSE_TRACKING_LIBRARY_H + +#ifdef COMPILING_DLL +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __declspec(dllimport) +#endif + +class PoseTrackingImpl; + +namespace nimagna { +namespace cv_wrapper { +struct Point2f { + float x = 0; + float y = 0; + + Point2f() = default; + Point2f(float x, float y) : x(x), y(y) {} +}; +struct Point3f { + float x = 0; + float y = 0; + float z = 0; + + Point3f() = default; + Point3f(float x, float y, float z) : x(x), y(y), z(z) {} +}; + +struct Rect { + int x = 0; + int y = 0; + int width = 0; + int height = 0; + + Rect() = default; + Rect(int x, int y, int width, int height) : x(x), y(y), width(width), height(height) {} +}; + +struct Mat { + int rows = 0; + int cols = 0; + unsigned char* data = 0; + + Mat(int rows, int cols, unsigned char* data) : rows(rows), cols(cols), data(data) {} +}; +} // namespace cv_wrapper + + +class DLLEXPORT PoseTracking { + public: + struct PoseLandmarks { + PoseLandmarks(cv_wrapper::Point3f* points, float* visibility) : points(points), visibility(visibility) {} + static constexpr size_t kLandmarksCount = 33u; + const cv_wrapper::Point3f* points; + const float* visibility; + }; + + enum LandmarkNames { + NOSE = 0, + LEFT_EYE_INNER, + LEFT_EYE, + LEFT_EYE_OUTER, + RIGHT_EYE_INNER, + RIGHT_EYE, + RIGHT_EYE_OUTER, + LEFT_EAR, + RIGHT_EAR, + MOUTH_LEFT, + MOUTH_RIGHT, + LEFT_SHOULDER, + RIGHT_SHOULDER, + LEFT_ELBOW, + RIGHT_ELBOW, + LEFT_WRIST, + RIGHT_WRIST, + LEFT_PINKY, + RIGHT_PINKY, + LEFT_INDEX, + RIGHT_INDEX, + LEFT_THUMB, + RIGHT_THUMB, + LEFT_HIP, + RIGHT_HIP, + LEFT_KNEE, + RIGHT_KNEE, + LEFT_ANKLE, + RIGHT_ANKLE, + LEFT_HEEL, + RIGHT_HEEL, + LEFT_FOOT_INDEX, + RIGHT_FOOT_INDEX, + COUNT = PoseLandmarks::kLandmarksCount + }; + + PoseTracking(const char* calculatorGraphConfigFile); + ~PoseTracking(); + + bool processFrame(const cv_wrapper::Mat& inputRGB8Bit); + cv_wrapper::Mat lastSegmentedFrame(); + PoseTracking::PoseLandmarks lastDetectedLandmarks(); + + private: + PoseTrackingImpl* mImplementation; +}; +} // namespace nimagna + +#endif diff --git a/mediapipe/pose_tracking_dll/windows_dll_library.bzl b/mediapipe/pose_tracking_dll/windows_dll_library.bzl new file mode 100644 index 000000000..ef7371af8 --- /dev/null +++ b/mediapipe/pose_tracking_dll/windows_dll_library.bzl @@ -0,0 +1,62 @@ +""" +This is a simple windows_dll_library rule for builing a DLL Windows +that can be depended on by other cc rules. +Example useage: + windows_dll_library( + name = "hellolib", + srcs = [ + "hello-library.cpp", + ], + hdrs = ["hello-library.h"], + # Define COMPILING_DLL to export symbols during compiling the DLL. + copts = ["/DCOMPILING_DLL"], + ) +""" + +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") + +def windows_dll_library( + name, + srcs = [], + deps = [], + hdrs = [], + visibility = None, + **kwargs): + """A simple windows_dll_library rule for builing a DLL Windows.""" + dll_name = name + ".dll" + import_lib_name = name + "_import_lib" + import_target_name = name + "_dll_import" + + # Build the shared library + cc_binary( + name = dll_name, + srcs = srcs + hdrs, + deps = deps, + linkshared = 1, + **kwargs + ) + + # Get the import library for the dll + native.filegroup( + name = import_lib_name, + srcs = [":" + dll_name], + output_group = "interface_library", + ) + + # Because we cannot directly depend on cc_binary from other cc rules in deps attribute, + # we use cc_import as a bridge to depend on the dll. + cc_import( + name = import_target_name, + interface_library = ":" + import_lib_name, + shared_library = ":" + dll_name, + ) + + # Create a new cc_library to also include the headers needed for the shared library + cc_library( + name = name, + hdrs = hdrs, + visibility = visibility, + deps = deps + [ + ":" + import_target_name, + ], + )