Add zmq calculator.

2020-04-19 11:53:39 +09:00 · 2020-04-19 11:53:39 +09:00 · c5adb57f30
commit c5adb57f30
parent 7bad8fce62
7 changed files with 23201 additions and 1 deletions
--- a/mediapipe/calculators/ipc/BUILD
+++ b/mediapipe/calculators/ipc/BUILD
@ -0,0 +1,26 @@
+
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:private"])
+
+cc_library(
+    name = "zmq_calculator",
+    srcs = ["zmq_calculator.cc", "json.hpp"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/formats:landmark_cc_proto",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/framework:timestamp",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/stream_handler:immediate_input_stream_handler",
+        "//mediapipe/util:header_util",
+    ],
+    linkopts = ["-lzmq"],
+    alwayslink = 1,
+)
--- a/mediapipe/calculators/ipc/json.hpp
+++ b/mediapipe/calculators/ipc/json.hpp
--- a/mediapipe/calculators/ipc/zmq_calculator.cc
+++ b/mediapipe/calculators/ipc/zmq_calculator.cc
@ -0,0 +1,113 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+#include <time.h>
+
+#include <zmq.hpp>
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/formats/landmark.pb.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/header_util.h"
+
+#include "json.hpp"
+
+using json = nlohmann::json;
+
+#define within(num) (int)((float)num * random() / (RAND_MAX + 1.0))
+
+namespace mediapipe
+{
+
+class ZmqCalculator : public CalculatorBase
+{
+public:
+    static ::mediapipe::Status GetContract(CalculatorContract *cc)
+    {
+        cc->Inputs().Get("LANDMARKS", 0).SetAny();
+        cc->Inputs().Get("NORM_RECTS", 0).SetAny();
+        cc->SetInputStreamHandler("ImmediateInputStreamHandler");
+        return ::mediapipe::OkStatus();
+    }
+
+    ::mediapipe::Status Open(CalculatorContext *cc) final
+    {
+        std::cout << "opened" << std::endl;
+        socket.bind("tcp://*:5555");
+        return ::mediapipe::OkStatus();
+    }
+
+    bool Allow() { return true; }
+
+    ::mediapipe::Status Process(CalculatorContext *cc) final
+    {
+        if (!cc->Inputs().Tag("LANDMARKS").IsEmpty())
+        {
+            const auto &landmarks =
+                cc->Inputs().Tag("LANDMARKS").Get<std::vector<NormalizedLandmarkList>>();
+        }
+
+        if (!cc->Inputs().Tag("NORM_RECTS").IsEmpty())
+        {
+            const auto &norm_rects =
+                cc->Inputs().Tag("NORM_RECTS").Get<std::vector<NormalizedRect>>();
+            if (norm_rects.size() > 0)
+            {
+                json data = json({});
+                data["hands"] = json::array();
+
+                for (const auto &norm_rect : norm_rects)
+                {
+                    if (norm_rect.width() == 0.0 && norm_rect.height() == 0.0 && norm_rect.x_center() == 0.0 && norm_rect.y_center() == 0.0 && norm_rect.rect_id() == 0) {
+                        continue;
+                    }
+                    json empty_object_explicit = json::object();
+                    empty_object_explicit["width"] = norm_rect.width();
+                    empty_object_explicit["height"] = norm_rect.height();
+                    empty_object_explicit["x_center"] = norm_rect.x_center();
+                    empty_object_explicit["y_center"] = norm_rect.y_center();
+                    empty_object_explicit["rect_id"] = norm_rect.rect_id();
+                    data["hands"].push_back(empty_object_explicit);
+                }
+                std::string s = data.dump();
+                std::string topic = "Detection";
+
+                zmq::message_t message(topic.size());
+                memcpy(message.data(), topic.c_str(), topic.size());
+                socket.send(message, ZMQ_SNDMORE);
+
+                zmq::message_t message2(s.size());
+                memcpy(message2.data(), s.c_str(), s.size());
+                socket.send(message2);
+                
+                std::cout << "Publishing" << s << std::endl;
+            }
+        }
+
+        return ::mediapipe::OkStatus();
+    }
+
+private:
+    zmq::context_t context{1};
+    zmq::socket_t socket{context, ZMQ_PUB};
+};
+REGISTER_CALCULATOR(ZmqCalculator);
+
+} // namespace mediapipe
--- a/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc
+++ b/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc
@ -70,7 +70,7 @@ DEFINE_string(output_video_path, "",
  if (load_video) {
    capture.open(FLAGS_input_video_path);
  } else {
-    capture.open(0);
+    capture.open(4);
  }
  RET_CHECK(capture.isOpened());

--- a/mediapipe/examples/desktop/multi_hand_tracking/BUILD
+++ b/mediapipe/examples/desktop/multi_hand_tracking/BUILD
@ -37,6 +37,18 @@ cc_binary(
    name = "multi_hand_tracking_gpu",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main_gpu",
+        "//mediapipe/calculators/ipc:zmq_calculator",
        "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators",
+        "//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators",
+    ],
+)
+
+cc_binary(
+    name = "face_hand_tracking_gpu",
+    deps = [
+        "//mediapipe/examples/desktop:demo_run_graph_main_gpu",
+        "//mediapipe/calculators/ipc:zmq_calculator",
+        "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators",
+        "//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators",
    ],
 )
--- a/mediapipe/graphs/hand_tracking/hand_face_tracking_desktop.pbtxt
+++ b/mediapipe/graphs/hand_tracking/hand_face_tracking_desktop.pbtxt
@ -0,0 +1,168 @@
+# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU.
+# Used in the examples in
+# mediapipe/examples/android/src/java/com/mediapipe/apps/multihandtrackinggpu.
+
+# Images coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Collection of detected/processed faces, each represented as a list of
+# landmarks. (std::vector<NormalizedLandmarkList>)
+output_stream: "multi_face_landmarks"
+
+
+# Throttles the images flowing downstream for flow control. It passes through
+# the very first incoming image unaltered, and waits for downstream nodes
+# (calculators and subgraphs) in the graph to finish their tasks before it
+# passes through another image. All images that come in while waiting are
+# dropped, limiting the number of in-flight images in most part of the graph to
+# 1. This prevents the downstream nodes from queuing up incoming images and data
+# excessively, which leads to increased latency and memory usage, unwanted in
+# real-time mobile applications. It also eliminates unnecessarily computation,
+# e.g., the output produced by a node may get dropped downstream if the
+# subsequent nodes are still busy processing previous inputs.
+node {
+  calculator: "FlowLimiterCalculator"
+  input_stream: "input_video"
+  input_stream: "FINISHED:multi_hand_rects"
+  # input_stream: "FINISHED:output_video_1"
+  input_stream_info: {
+    tag_index: "FINISHED"
+    back_edge: true
+  }
+  output_stream: "throttled_input_video"
+}
+
+# Determines if an input vector of NormalizedRect has a size greater than or
+# equal to the provided min_size.
+node {
+  calculator: "NormalizedRectVectorHasMinSizeCalculator"
+  input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks"
+  output_stream: "prev_has_enough_hands"
+  node_options: {
+    [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] {
+      # This value can be changed to support tracking arbitrary number of hands.
+      # Please also remember to modify max_vec_size in
+      # ClipVectorSizeCalculatorOptions in
+      # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt
+      min_size: 2
+    }
+  }
+}
+
+# Drops the incoming image if the previous frame had at least N hands.
+# Otherwise, passes the incoming image through to trigger a new round of hand
+# detection in MultiHandDetectionSubgraph.
+node {
+  calculator: "GateCalculator"
+  input_stream: "throttled_input_video"
+  input_stream: "DISALLOW:prev_has_enough_hands"
+  output_stream: "multi_hand_detection_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.GateCalculatorOptions] {
+      empty_packets_as_allow: true
+    }
+  }
+}
+
+# Subgraph that detections hands (see multi_hand_detection_gpu.pbtxt).
+node {
+  calculator: "MultiHandDetectionSubgraph"
+  input_stream: "multi_hand_detection_input_video"
+  output_stream: "DETECTIONS:multi_palm_detections"
+  output_stream: "NORM_RECTS:multi_palm_rects"
+}
+
+# Subgraph that localizes hand landmarks for multiple hands (see
+# multi_hand_landmark.pbtxt).
+node {
+  calculator: "MultiHandLandmarkSubgraph"
+  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "NORM_RECTS:multi_hand_rects"
+  output_stream: "LANDMARKS:multi_hand_landmarks"
+  output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks"
+}
+
+# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the
+# arrival of the next input image sends out the cached rectangle with the
+# timestamp replaced by that of the input image, essentially generating a packet
+# that carries the previous hand rectangle. Note that upon the arrival of the
+# very first input image, an empty packet is sent out to jump start the
+# feedback loop.
+node {
+  calculator: "PreviousLoopbackCalculator"
+  input_stream: "MAIN:throttled_input_video"
+  input_stream: "LOOP:multi_hand_rects_from_landmarks"
+  input_stream_info: {
+    tag_index: "LOOP"
+    back_edge: true
+  }
+  output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks"
+}
+
+# Performs association between NormalizedRect vector elements from previous
+# frame and those from the current frame if MultiHandDetectionSubgraph runs.
+# This calculator ensures that the output multi_hand_rects vector doesn't
+# contain overlapping regions based on the specified min_similarity_threshold.
+node {
+  calculator: "AssociationNormRectCalculator"
+  input_stream: "prev_multi_hand_rects_from_landmarks"
+  input_stream: "multi_palm_rects"
+  output_stream: "multi_hand_rects"
+  node_options: {
+    [type.googleapis.com/mediapipe.AssociationCalculatorOptions] {
+      min_similarity_threshold: 0.5
+    }
+  }
+}
+
+
+# Defines side packets for further use in the graph.
+node {
+  calculator: "ConstantSidePacketCalculator"
+  output_side_packet: "PACKET:num_faces"
+  node_options: {
+    [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
+      packet { int_value: 1 }
+    }
+  }
+}
+
+# Subgraph that detects faces and corresponding landmarks.
+node {
+  calculator: "FaceLandmarkFrontGpu"
+  input_stream: "IMAGE:throttled_input_video"
+  input_side_packet: "NUM_FACES:num_faces"
+  output_stream: "LANDMARKS:multi_face_landmarks"
+  output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
+  output_stream: "DETECTIONS:face_detections"
+  output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
+}
+
+# node {
+#  calculator: "ZmqCalculator"
+#
+#  input_stream: "LANDMARKS:multi_face_landmarks"
+# }
+
+# Subgraph that renders face-landmark annotation onto the input image.
+node {
+   calculator: "FaceRendererGpu"
+   input_stream: "IMAGE:throttled_input_video"
+   input_stream: "LANDMARKS:multi_face_landmarks"
+   input_stream: "NORM_RECTS:face_rects_from_landmarks"
+   input_stream: "DETECTIONS:face_detections"
+   output_stream: "IMAGE:output_video_1"
+}
+
+# Subgraph that renders annotations and overlays them on top of the input
+# images (see multi_hand_renderer_gpu.pbtxt).
+node {
+  calculator: "MultiHandRendererSubgraph"
+  input_stream: "IMAGE:output_video_1"
+  input_stream: "DETECTIONS:multi_palm_detections"
+  input_stream: "LANDMARKS:multi_hand_landmarks"
+  input_stream: "NORM_RECTS:0:multi_palm_rects"
+  input_stream: "NORM_RECTS:1:multi_hand_rects"
+  output_stream: "IMAGE:output_video"
+}
--- a/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt
+++ b/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt
@ -110,6 +110,12 @@ node {
  }
 }

+node {
+  calculator: "ZmqCalculator"
+  input_stream: "LANDMARKS:multi_hand_landmarks"
+  input_stream: "NORM_RECTS:multi_hand_rects"
+}
+
 # Subgraph that renders annotations and overlays them on top of the input
 # images (see multi_hand_renderer_gpu.pbtxt).
 node {