Merge branch 'opts' into 'master'

Various optimizations See merge request Percipiote/RapidPoseTriangulation!7
2025-03-04 10:13:30 +00:00
parent 24d706d030 de21be50a2
commit d8fee53284
21 changed files with 7032 additions and 7499 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,7 @@ Fast triangulation of multiple persons from multiple camera views.
 ## Build

 - Clone this project with submodules:
+
    ```bash
    git clone --recurse-submodules https://gitlab.com/Percipiote/RapidPoseTriangulation.git
    cd RapidPoseTriangulation/
@ -40,17 +41,19 @@ Fast triangulation of multiple persons from multiple camera views.
  - Restart docker: `sudo systemctl restart docker`

 - Build docker container:
+
  ```bash
  docker build --progress=plain -t rapidposetriangulation .
  ./run_container.sh
  ```

 - Build triangulator:
+
  ```bash
  cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..

  cd /RapidPoseTriangulation/scripts/ && \
-  g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
+  g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto \
    -I /RapidPoseTriangulation/rpt/ \
    -isystem /usr/include/opencv4/ \
    -isystem /onnxruntime/include/ \
@ -72,11 +75,13 @@ Fast triangulation of multiple persons from multiple camera views.
  ```

 - Test with samples:
+
  ```bash
  python3 /RapidPoseTriangulation/scripts/test_triangulate.py
  ```

 - Test with _skelda_ dataset:
+
  ```bash
  export CUDA_VISIBLE_DEVICES=0
  python3 /RapidPoseTriangulation/scripts/test_skelda_dataset.py
--- a/extras/easypose/README.md
+++ b/extras/easypose/README.md
@ -1,6 +1,6 @@
 # Test ONNX with EasyPose

-Code files originally from: https://github.com/Dominic23331/EasyPose.git
+Code files originally from: <https://github.com/Dominic23331/EasyPose.git>

 <br>

--- a/extras/jetson/README.md
+++ b/extras/jetson/README.md
@ -8,7 +8,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
 ## Base installation

 - Install newest software image: \
-  (https://developer.nvidia.com/sdk-manager)
+  (<https://developer.nvidia.com/sdk-manager>)

  - Use manual recovery mode setup for first installation

@ -19,7 +19,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
    ```

 - Initialize system: \
-  (https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit)
+  (<https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit>)

  - Connect via _ssh_, because using _screen_ did not work, skip _oem-config_ step

@ -53,7 +53,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
  ```

 - Enable _docker_ without _sudo_: \
-  (https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user)
+  (<https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>)

 - Enable GPU-access for docker building:

@ -97,7 +97,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
  cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..

  cd /RapidPoseTriangulation/scripts/ && \
-  g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
+  g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto \
    -I /RapidPoseTriangulation/rpt/ \
    -isystem /usr/include/opencv4/ \
    -isystem /usr/local/include/onnxruntime/ \
@ -130,12 +130,12 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.

  ```bash
  docker build --progress=plain -f extras/ros/dockerfile -t rapidposetriangulation_ros .
-  ./run_container.sh
  ```

 - Run and test:
+
  ```bash
-  docker compose -f extras/jetson/docker-compose.yml up
+  export CAMID="camera01" && docker compose -f extras/jetson/docker-compose-2d.yml up

  docker exec -it jetson-test_node-1 bash
  export ROS_DOMAIN_ID=18
--- a/extras/jetson/RESULTS.md
+++ b/extras/jetson/RESULTS.md
--- a/extras/jetson/docker-compose-2d.yml
+++ b/extras/jetson/docker-compose-2d.yml
@ -32,6 +32,7 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
+            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
--- a/extras/ros/README.md
+++ b/extras/ros/README.md
@ -5,6 +5,7 @@ Run pose estimator with ros topics as inputs and publish detected poses.
 <br>

 - Build container:
+
  ```bash
  docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
  ```
@ -12,8 +13,10 @@ Run pose estimator with ros topics as inputs and publish detected poses.
 - Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment

 - Run and test:
+
  ```bash
-  xhost +; docker compose -f extras/ros/docker-compose.yml up
+  xhost + && export CAMID="camera01" && docker compose -f extras/ros/docker-compose-2d.yml up
+  xhost + && docker compose -f extras/ros/docker-compose-3d.yml up

  docker exec -it ros-test_node-1 bash
  export ROS_DOMAIN_ID=18
--- a/extras/ros/docker-compose-2d.yml
+++ b/extras/ros/docker-compose-2d.yml
@ -32,6 +32,7 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
+            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
@ -49,10 +50,11 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
+            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
-        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer'
+        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer $CAMID'

    pose_viewer:
        image: rapidposetriangulation_ros
@ -64,7 +66,8 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
+            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
-        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run image_view image_view --ros-args --remap image:=/camera01/img_with_pose -p autosize:=True -p window_name:=MyPoseImage'
+        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run image_view image_view --ros-args --remap image:=/$CAMID/img_with_pose -p autosize:=True -p window_name:=MyPoseImage'
--- a/extras/ros/docker-compose-3d.yml
+++ b/extras/ros/docker-compose-3d.yml
@ -0,0 +1,38 @@
+version: "2.3"
+# runtime: nvidia needs version 2 else change standard runtime at host pc
+
+services:
+
+    test_node:
+        image: rapidposetriangulation_ros
+        network_mode: "host"
+        ipc: "host"
+        runtime: nvidia
+        privileged: true
+        volumes:
+            - ../../:/RapidPoseTriangulation/
+            - ../../skelda/:/skelda/
+            - /tmp/.X11-unix:/tmp/.X11-unix
+            - /dev/shm:/dev/shm
+        environment:
+            - DISPLAY
+            - QT_X11_NO_MITSHM=1
+            - "PYTHONUNBUFFERED=1"
+        command: /bin/bash -i -c 'sleep infinity'
+
+    triangulator:
+        image: rapidposetriangulation_ros
+        network_mode: "host"
+        ipc: "host"
+        runtime: nvidia
+        privileged: true
+        volumes:
+            - ../../:/RapidPoseTriangulation/
+            - ../../skelda/:/skelda/
+            - /tmp/.X11-unix:/tmp/.X11-unix
+            - /dev/shm:/dev/shm
+        environment:
+            - DISPLAY
+            - QT_X11_NO_MITSHM=1
+            - "PYTHONUNBUFFERED=1"
+        command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt3d_wrapper_cpp rpt3d_wrapper'
--- a/extras/ros/dockerfile
+++ b/extras/ros/dockerfile
@ -45,14 +45,17 @@ RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
 # Copy modules
 COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
 COPY ./scripts/ /RapidPoseTriangulation/scripts/
+COPY ./rpt/ /RapidPoseTriangulation/rpt/
 COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
 COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
 COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
+COPY ./extras/ros/rpt3d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/

 # Link and build as ros package
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/
+RUN ln -s /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/ /project/dev_ws/src/
 RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'

 # Update ros packages -> autocompletion and check
--- a/extras/ros/pose2d_visualizer/pose2d_visualizer/pose2d_visualizer.py
+++ b/extras/ros/pose2d_visualizer/pose2d_visualizer/pose2d_visualizer.py
@ -19,10 +19,9 @@ bridge = CvBridge()
 node = None
 publisher_img = None

-cam_id = "camera01"
-img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
-pose_input_topic = "/poses/" + cam_id
-img_output_topic = "/" + cam_id + "/img_with_pose"
+img_input_topic = "/{}/pylon_ros2_camera_node/image_raw"
+pose_input_topic = "/poses/{}"
+img_output_topic = "/{}/img_with_pose"

 last_input_image = None
 lock = threading.Lock()
@ -113,6 +112,7 @@ def main():

    # Start node
    rclpy.init(args=sys.argv)
+    cam_id = sys.argv[1]
    node = rclpy.create_node("pose2d_visualizer")

    # Quality of service settings
@ -125,19 +125,23 @@ def main():
    # Create subscribers
    _ = node.create_subscription(
        Image,
-        img_input_topic,
+        img_input_topic.format(cam_id),
        callback_images,
        qos_profile,
    )
    _ = node.create_subscription(
        Poses,
-        pose_input_topic,
+        pose_input_topic.format(cam_id),
        callback_poses,
        qos_profile,
    )

    # Create publishers
-    publisher_img = node.create_publisher(Image, img_output_topic, qos_profile)
+    publisher_img = node.create_publisher(
+        Image,
+        img_output_topic.format(cam_id),
+        qos_profile,
+    )

    node.get_logger().info("Finished initialization of pose visualizer")

--- a/extras/ros/rpt2d_wrapper_cpp/src/rpt2d_wrapper.cpp
+++ b/extras/ros/rpt2d_wrapper_cpp/src/rpt2d_wrapper.cpp
@ -1,8 +1,6 @@
 #include <atomic>
 #include <chrono>
-#include <cmath>
 #include <iostream>
-#include <memory>
 #include <string>
 #include <vector>

@ -24,9 +22,8 @@ using json = nlohmann::json;

 // =================================================================================================

-static const std::string cam_id = "camera01";
-static const std::string img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw";
-static const std::string pose_out_topic = "/poses/" + cam_id;
+static const std::string img_input_topic = "/{}/pylon_ros2_camera_node/image_raw";
+static const std::string pose_out_topic = "/poses/{}";

 static const float min_bbox_score = 0.4;
 static const float min_bbox_area = 0.1 * 0.1;
@ -44,10 +41,14 @@ static const std::map<std::string, bool> whole_body = {
 class Rpt2DWrapperNode : public rclcpp::Node
 {
 public:
-    Rpt2DWrapperNode(const std::string &node_name)
-        : Node(node_name)
+    Rpt2DWrapperNode(const std::string &cam_id)
+        : Node("rpt2d_wrapper_" + cam_id)
    {
        this->is_busy = false;
+        std::string img_topic = std::string(img_input_topic)
+                                    .replace(img_input_topic.find("{}"), 2, cam_id);
+        std::string pose_topic = std::string(pose_out_topic)
+                                     .replace(pose_out_topic.find("{}"), 2, cam_id);

        // QoS
        rclcpp::QoS qos_profile(1);
@ -56,11 +57,11 @@ public:

        // Setup subscriber
        image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
-            img_input_topic, qos_profile,
+            img_topic, qos_profile,
            std::bind(&Rpt2DWrapperNode::callback_images, this, std::placeholders::_1));

        // Setup publisher
-        pose_pub_ = this->create_publisher<rpt_msgs::msg::Poses>(pose_out_topic, qos_profile);
+        pose_pub_ = this->create_publisher<rpt_msgs::msg::Poses>(pose_topic, qos_profile);

        // Load model
        bool use_wb = utils_pipeline::use_whole_body(whole_body);
@ -142,10 +143,10 @@ void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr
    jdata["timestamps"] = {
        {"trigger", time_stamp},
        {"image", ts_image_sec},
-        {"pose", ts_pose_sec},
+        {"pose2d", ts_pose_sec},
        {"z-trigger-image", z_trigger_image},
-        {"z-image-pose", z_image_pose},
-        {"z-trigger-pose", z_trigger_pose}};
+        {"z-image-pose2d", z_image_pose},
+        {"z-trigger-pose2d", z_trigger_pose}};

    // Publish message
    auto pose_msg = rpt_msgs::msg::Poses();
@ -153,11 +154,11 @@ void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr
    std::vector<int32_t> bshape = {(int)valid_poses.size(), (int)joint_names_2d.size(), 3};
    pose_msg.bodies_shape = bshape;
    pose_msg.bodies_flat.reserve(bshape[0] * bshape[1] * bshape[2]);
-    for (size_t i = 0; i < bshape[0]; i++)
+    for (int32_t i = 0; i < bshape[0]; i++)
    {
-        for (size_t j = 0; j < bshape[1]; j++)
+        for (int32_t j = 0; j < bshape[1]; j++)
        {
-            for (size_t k = 0; k < bshape[2]; k++)
+            for (int32_t k = 0; k < bshape[2]; k++)
            {
                pose_msg.bodies_flat.push_back(valid_poses[i][j][k]);
            }
@ -226,8 +227,9 @@ std::vector<std::vector<std::array<float, 3>>> Rpt2DWrapperNode::call_model(cons
 int main(int argc, char **argv)
 {
    rclcpp::init(argc, argv);
+    const std::string cam_id = std::getenv("CAMID");

-    auto node = std::make_shared<Rpt2DWrapperNode>("rpt2d_wrapper");
+    auto node = std::make_shared<Rpt2DWrapperNode>(cam_id);
    rclcpp::spin(node);

    rclcpp::shutdown();
--- a/extras/ros/rpt3d_wrapper_cpp/CMakeLists.txt
+++ b/extras/ros/rpt3d_wrapper_cpp/CMakeLists.txt
@ -0,0 +1,64 @@
+cmake_minimum_required(VERSION 3.5)
+project(rpt3d_wrapper_cpp)
+
+# Default to C99
+if(NOT CMAKE_C_STANDARD)
+  set(CMAKE_C_STANDARD 99)
+endif()
+
+# Default to C++20
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 20)
+endif()
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+# Find dependencies
+find_package(ament_cmake REQUIRED)
+find_package(rclcpp REQUIRED)
+find_package(rpt_msgs REQUIRED)
+find_package(std_msgs REQUIRED)
+
+# Add RapidPoseTriangulation implementation
+set(RAPID_POSE_TRIANGULATION_DIR "/RapidPoseTriangulation")
+add_library(rapid_pose_triangulation
+${RAPID_POSE_TRIANGULATION_DIR}/rpt/camera.cpp
+  ${RAPID_POSE_TRIANGULATION_DIR}/rpt/interface.cpp
+  ${RAPID_POSE_TRIANGULATION_DIR}/rpt/triangulator.cpp
+)
+target_include_directories(rapid_pose_triangulation PUBLIC
+  ${RAPID_POSE_TRIANGULATION_DIR}/extras/include
+  ${RAPID_POSE_TRIANGULATION_DIR}/rpt
+)
+target_compile_options(rapid_pose_triangulation PUBLIC
+  -fPIC -O3 -march=native -Wall -Werror
+)
+
+# Build the executable
+add_executable(rpt3d_wrapper src/rpt3d_wrapper.cpp)
+ament_target_dependencies(rpt3d_wrapper rclcpp std_msgs rpt_msgs)
+target_include_directories(rpt3d_wrapper PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  $<INSTALL_INTERFACE:include>)
+
+target_link_libraries(rpt3d_wrapper
+  rapid_pose_triangulation
+)
+
+install(TARGETS rpt3d_wrapper
+  DESTINATION lib/${PROJECT_NAME})
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  # the following line skips the linter which checks for copyrights
+  # uncomment the line when a copyright and license is not present in all source files
+  #set(ament_cmake_copyright_FOUND TRUE)
+  # the following line skips cpplint (only works in a git repo)
+  # uncomment the line when this package is not in a git repo
+  #set(ament_cmake_cpplint_FOUND TRUE)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_package()
--- a/extras/ros/rpt3d_wrapper_cpp/package.xml
+++ b/extras/ros/rpt3d_wrapper_cpp/package.xml
@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>rpt3d_wrapper_cpp</name>
+  <version>0.0.0</version>
+  <description>TODO: Package description</description>
+  <maintainer email="root@todo.todo">root</maintainer>
+  <license>TODO: License declaration</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <depend>rclcpp</depend>
+  <depend>rpt_msgs</depend>
+  <depend>std_msgs</depend>
+
+  <depend>OpenCV</depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>ament_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
--- a/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp
+++ b/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp
@ -0,0 +1,293 @@
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <mutex>
+#include <string>
+#include <vector>
+
+// ROS2
+#include <rclcpp/rclcpp.hpp>
+#include <std_msgs/msg/string.hpp>
+
+// JSON library
+#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
+using json = nlohmann::json;
+
+#include "rpt_msgs/msg/poses.hpp"
+#include "/RapidPoseTriangulation/rpt/camera.hpp"
+#include "/RapidPoseTriangulation/rpt/interface.hpp"
+
+// =================================================================================================
+
+static const std::vector<std::string> cam_ids = {
+    "camera01",
+    "camera02",
+    "camera03",
+    "camera04",
+    "camera05",
+    "camera06",
+    "camera07",
+    "camera08",
+    "camera09",
+    "camera10",
+};
+
+static const std::string pose_in_topic = "/poses/{}";
+static const std::string cam_info_topic = "/{}/calibration";
+static const std::string pose_out_topic = "/poses/humans3d";
+
+static const float min_match_score = 0.95;
+static const size_t min_group_size = 1;
+
+static const std::array<std::array<float, 3>, 2> roomparams = {{
+    {4.0, 5.0, 2.2},
+    {1.0, 0.0, 1.1},
+}};
+
+// =================================================================================================
+// =================================================================================================
+
+class Rpt3DWrapperNode : public rclcpp::Node
+{
+public:
+    Rpt3DWrapperNode()
+        : Node("rpt3d_wrapper")
+    {
+        this->all_cameras.resize(cam_ids.size());
+        this->all_poses.resize(cam_ids.size());
+        this->all_timestamps.resize(cam_ids.size());
+        this->joint_names = {};
+        this->all_poses_set.resize(cam_ids.size(), false);
+
+        // Load 3D model
+        tri_model = std::make_unique<Triangulator>(
+            min_match_score, min_group_size);
+
+        // QoS
+        rclcpp::QoS qos_profile(1);
+        qos_profile.reliable();
+        qos_profile.keep_last(1);
+
+        // Parallel executable callbacks
+        auto my_callback_group = create_callback_group(rclcpp::CallbackGroupType::Reentrant);
+        rclcpp::SubscriptionOptions options;
+        options.callback_group = my_callback_group;
+
+        // Setup subscribers
+        for (size_t i = 0; i < cam_ids.size(); i++)
+        {
+            std::string cam_id = cam_ids[i];
+            std::string topic_pose = pose_in_topic;
+            std::string topic_cam = cam_info_topic;
+            topic_pose.replace(topic_pose.find("{}"), 2, cam_id);
+            topic_cam.replace(topic_cam.find("{}"), 2, cam_id);
+
+            auto sub_pose = this->create_subscription<rpt_msgs::msg::Poses>(
+                topic_pose, qos_profile,
+                [this, i](const rpt_msgs::msg::Poses::SharedPtr msg)
+                {
+                    this->callback_poses(msg, i);
+                },
+                options);
+            sub_pose_list_.push_back(sub_pose);
+
+            auto sub_cam = this->create_subscription<std_msgs::msg::String>(
+                topic_cam, qos_profile,
+                [this, i](const std_msgs::msg::String::SharedPtr msg)
+                {
+                    this->callback_cam_info(msg, i);
+                },
+                options);
+            sub_cam_list_.push_back(sub_cam);
+        }
+
+        // Setup publisher
+        pose_pub_ = this->create_publisher<rpt_msgs::msg::Poses>(pose_out_topic, qos_profile);
+
+        RCLCPP_INFO(this->get_logger(), "Finished initialization of pose triangulator.");
+    }
+
+private:
+    std::vector<rclcpp::Subscription<rpt_msgs::msg::Poses>::SharedPtr> sub_pose_list_;
+    std::vector<rclcpp::Subscription<std_msgs::msg::String>::SharedPtr> sub_cam_list_;
+    rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_;
+
+    std::unique_ptr<Triangulator> tri_model;
+    std::vector<Camera> all_cameras;
+    std::mutex cams_mutex, pose_mutex, model_mutex;
+
+    std::vector<std::vector<std::vector<std::array<float, 3>>>> all_poses;
+    std::vector<double> all_timestamps;
+    std::vector<std::string> joint_names;
+    std::vector<bool> all_poses_set;
+
+    void callback_poses(const rpt_msgs::msg::Poses::SharedPtr msg, size_t cam_idx);
+    void callback_cam_info(const std_msgs::msg::String::SharedPtr msg, size_t cam_idx);
+    void call_model();
+};
+
+// =================================================================================================
+
+void Rpt3DWrapperNode::callback_cam_info(const std_msgs::msg::String::SharedPtr msg, size_t cam_idx)
+{
+    json cam = json::parse(msg->data);
+
+    Camera camera;
+    camera.name = cam["name"].get<std::string>();
+    camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
+    camera.DC = cam["DC"].get<std::vector<float>>();
+    camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
+    camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
+    camera.width = cam["width"].get<int>();
+    camera.height = cam["height"].get<int>();
+    camera.type = cam["type"].get<std::string>();
+
+    cams_mutex.lock();
+    all_cameras[cam_idx] = camera;
+    cams_mutex.unlock();
+}
+
+// =================================================================================================
+
+void Rpt3DWrapperNode::callback_poses(const rpt_msgs::msg::Poses::SharedPtr msg, size_t cam_idx)
+{
+    std::vector<std::vector<std::array<float, 3>>> poses;
+    if (this->joint_names.empty())
+    {
+        this->joint_names = msg->joint_names;
+    }
+
+    // Unflatten poses
+    size_t idx = 0;
+    std::vector<int32_t> &bshape = msg->bodies_shape;
+    for (int32_t i = 0; i < bshape[0]; ++i)
+    {
+        std::vector<std::array<float, 3>> body;
+        for (int32_t j = 0; j < bshape[1]; ++j)
+        {
+            std::array<float, 3> joint;
+            for (int32_t k = 0; k < bshape[2]; ++k)
+            {
+                joint[k] = msg->bodies_flat[idx];
+                ++idx;
+            }
+            body.push_back(std::move(joint));
+        }
+        poses.push_back(std::move(body));
+    }
+
+    // If no pose was detected, create an empty placeholder
+    if (poses.size() == 0)
+    {
+        std::vector<std::array<float, 3>> body(joint_names.size(), {0, 0, 0});
+        poses.push_back(std::move(body));
+    }
+
+    pose_mutex.lock();
+    this->all_poses[cam_idx] = std::move(poses);
+    this->all_poses_set[cam_idx] = true;
+    this->all_timestamps[cam_idx] = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
+    pose_mutex.unlock();
+
+    // Trigger model callback
+    model_mutex.lock();
+    call_model();
+    model_mutex.unlock();
+}
+
+// =================================================================================================
+
+void Rpt3DWrapperNode::call_model()
+{
+    auto ts_msg = std::chrono::high_resolution_clock::now();
+
+    // Check if all cameras are set
+    cams_mutex.lock();
+    for (size_t i = 0; i < cam_ids.size(); i++)
+    {
+        if (all_cameras[i].name.empty())
+        {
+            RCLCPP_WARN(this->get_logger(), "Skipping frame, still waiting for cameras...");
+            cams_mutex.unlock();
+            return;
+        }
+    }
+    cams_mutex.unlock();
+
+    // If not all poses are set, return and wait for the others
+    pose_mutex.lock();
+    for (size_t i = 0; i < cam_ids.size(); i++)
+    {
+        if (!this->all_poses_set[i])
+        {
+            pose_mutex.unlock();
+            return;
+        }
+    }
+    pose_mutex.unlock();
+
+    // Call model, and meanwhile lock updates of the inputs
+    // Since the prediction is very fast, parallel callback threads only need to wait a short time
+    cams_mutex.lock();
+    pose_mutex.lock();
+    const auto valid_poses = tri_model->triangulate_poses(
+        all_poses, all_cameras, roomparams, joint_names);
+
+    double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end());
+    this->all_poses_set = std::vector<bool>(cam_ids.size(), false);
+    pose_mutex.unlock();
+    cams_mutex.unlock();
+
+    // Calculate timings
+    auto ts_pose = std::chrono::high_resolution_clock::now();
+    double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
+    double z_trigger_pose3d = ts_pose_sec - min_ts;
+    json jdata;
+    jdata["timestamps"] = {
+        {"trigger", min_ts},
+        {"pose3d", ts_pose_sec},
+        {"z-trigger-pose3d", z_trigger_pose3d}};
+
+    // Publish message
+    auto pose_msg = rpt_msgs::msg::Poses();
+    pose_msg.header.stamp.sec = static_cast<int>(min_ts);
+    pose_msg.header.stamp.nanosec = (min_ts - pose_msg.header.stamp.sec) * 1.0e9;
+    pose_msg.header.frame_id = "world";
+    std::vector<int32_t> pshape = {(int)valid_poses.size(), (int)joint_names.size(), 4};
+    pose_msg.bodies_shape = pshape;
+    pose_msg.bodies_flat.reserve(pshape[0] * pshape[1] * pshape[2]);
+    for (int32_t i = 0; i < pshape[0]; i++)
+    {
+        for (int32_t j = 0; j < pshape[1]; j++)
+        {
+            for (int32_t k = 0; k < pshape[2]; k++)
+            {
+                pose_msg.bodies_flat.push_back(valid_poses[i][j][k]);
+            }
+        }
+    }
+    pose_msg.joint_names = joint_names;
+    pose_msg.extra_data = jdata.dump();
+    pose_pub_->publish(pose_msg);
+
+    // Print info
+    double elapsed_time = std::chrono::duration<double>(
+                              std::chrono::high_resolution_clock::now() - ts_msg)
+                              .count();
+    std::cout << "Detected persons: " << valid_poses.size()
+              << " - Prediction time: " << elapsed_time << "s" << std::endl;
+}
+
+// =================================================================================================
+// =================================================================================================
+
+int main(int argc, char **argv)
+{
+    rclcpp::init(argc, argv);
+    auto node = std::make_shared<Rpt3DWrapperNode>();
+    rclcpp::executors::MultiThreadedExecutor exec;
+    exec.add_node(node);
+    exec.spin();
+    rclcpp::shutdown();
+    return 0;
+}
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/rpt/camera.cpp
+++ b/rpt/camera.cpp
@ -56,7 +56,6 @@ std::string Camera::to_string() const

    out << "'R': " << print_matrix(R) << ", ";
    out << "'T': " << print_matrix(T) << ", ";
-    out << "'P': " << print_matrix(P) << ", ";

    out << "'width': " << width << ", ";
    out << "'height': " << height << ", ";
--- a/rpt/camera.hpp
+++ b/rpt/camera.hpp
@ -14,7 +14,6 @@ struct Camera
    std::vector<float> DC;
    std::array<std::array<float, 3>, 3> R;
    std::array<std::array<float, 1>, 3> T;
-    std::array<std::array<float, 3>, 4> P;
    int width;
    int height;
    std::string type;
--- a/rpt/triangulator.cpp
+++ b/rpt/triangulator.cpp
--- a/rpt/triangulator.hpp
+++ b/rpt/triangulator.hpp
@ -1,12 +1,9 @@
 #pragma once

 #include <array>
-#include <iostream>
 #include <string>
 #include <vector>

-#include <opencv2/opencv.hpp>
-
 #include "camera.hpp"

 // =================================================================================================
@ -17,13 +14,25 @@ public:
    CameraInternal(const Camera &cam);

    Camera cam;
-    cv::Mat K;
-    cv::Mat DC;
-    cv::Mat R;
-    cv::Mat T;
-    cv::Mat P;

-    void update_projection_matrix();
+    std::array<std::array<float, 3>, 3> invR;
+    std::array<float, 3> center;
+    std::array<std::array<float, 3>, 3> newK;
+    std::array<std::array<float, 3>, 3> invK;
+
+    static std::array<std::array<float, 3>, 3> transpose3x3(
+        const std::array<std::array<float, 3>, 3> &M);
+
+    static std::array<std::array<float, 3>, 3> invert3x3(
+        const std::array<std::array<float, 3>, 3> &M);
+
+    static void undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k);
+    static void undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k);
+
+    std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
+        float balance, std::pair<int, int> new_size);
+    std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
+        float alpha, std::pair<int, int> new_size);
 };

 // =================================================================================================
@ -70,64 +79,76 @@ private:
        {"shoulder_left", "elbow_left"},
        {"shoulder_right", "elbow_right"},
    };
-    std::vector<cv::Mat> last_poses_3d;
+    std::vector<std::vector<std::array<float, 4>>> last_poses_3d;

-    void undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam);
+    void undistort_poses(
+        std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam);

-    std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> project_poses(
-        const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists);
+    std::tuple<std::vector<std::vector<std::array<float, 3>>>, std::vector<std::vector<float>>>
+    project_poses(
+        const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
+        const CameraInternal &icam,
+        bool calc_dists);

    float calc_pose_score(
-        const cv::Mat &pose1,
-        const cv::Mat &pose2,
-        const cv::Mat &dist1,
+        const std::vector<std::array<float, 3>> &pose1,
+        const std::vector<std::array<float, 3>> &pose2,
+        const std::vector<float> &dist1,
        const CameraInternal &icam);

-    cv::Mat score_projection(
-        const cv::Mat &pose1,
-        const cv::Mat &repro1,
-        const cv::Mat &dists1,
-        const cv::Mat &mask,
+    std::vector<float> score_projection(
+        const std::vector<std::array<float, 3>> &pose,
+        const std::vector<std::array<float, 3>> &repro,
+        const std::vector<float> &dists,
+        const std::vector<bool> &mask,
        float iscale);

-    std::pair<cv::Mat, float> triangulate_and_score(
-        const cv::Mat &pose1,
-        const cv::Mat &pose2,
+    std::pair<std::vector<std::array<float, 4>>, float> triangulate_and_score(
+        const std::vector<std::array<float, 3>> &pose1,
+        const std::vector<std::array<float, 3>> &pose2,
        const CameraInternal &cam1,
        const CameraInternal &cam2,
        const std::array<std::array<float, 3>, 2> &roomparams,
        const std::vector<std::array<size_t, 2>> &core_limbs_idx);

-    std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> calc_grouping(
-        const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
-        const std::vector<std::pair<cv::Mat, float>> &all_scored_poses,
+    std::vector<std::tuple<
+        std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
+    calc_grouping(
+        const std::vector<std::pair<
+            std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
+        const std::vector<std::pair<std::vector<std::array<float, 4>>, float>> &all_scored_poses,
        float min_score);

-    cv::Mat merge_group(const std::vector<cv::Mat> &poses_3d, float min_score);
+    std::vector<std::array<float, 4>> merge_group(
+        const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
+        float min_score);

-    void add_extra_joints(std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
+    void add_extra_joints(
+        std::vector<std::vector<std::array<float, 4>>> &poses,
+        const std::vector<std::string> &joint_names);

    void filter_poses(
-        std::vector<cv::Mat> &poses,
+        std::vector<std::vector<std::array<float, 4>>> &poses,
        const std::array<std::array<float, 3>, 2> &roomparams,
        const std::vector<size_t> &core_joint_idx,
        const std::vector<std::array<size_t, 2>> &core_limbs_idx);

    void add_missing_joints(
-        std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
+        std::vector<std::vector<std::array<float, 4>>> &poses,
+        const std::vector<std::string> &joint_names);

    // Statistics
-    float num_calls = 0;
-    float total_time = 0;
-    float init_time = 0;
-    float undistort_time = 0;
-    float project_time = 0;
-    float match_time = 0;
-    float pairs_time = 0;
-    float pair_scoring_time = 0;
-    float grouping_time = 0;
-    float full_time = 0;
-    float merge_time = 0;
-    float post_time = 0;
-    float convert_time = 0;
+    double num_calls = 0;
+    double total_time = 0;
+    double init_time = 0;
+    double undistort_time = 0;
+    double project_time = 0;
+    double match_time = 0;
+    double pairs_time = 0;
+    double pair_scoring_time = 0;
+    double grouping_time = 0;
+    double full_time = 0;
+    double merge_time = 0;
+    double post_time = 0;
+    double convert_time = 0;
 };
--- a/scripts/utils_2d_pose.hpp
+++ b/scripts/utils_2d_pose.hpp
@ -486,16 +486,16 @@ namespace utils_2d_pose
        float new_end_yf = center_y + 0.5f * adjusted_h;

        // Round the box coordinates
-        int start_x_i = static_cast<int>(std::floor(new_start_xf));
-        int start_y_i = static_cast<int>(std::floor(new_start_yf));
-        int end_x_i = static_cast<int>(std::ceil(new_end_xf));
-        int end_y_i = static_cast<int>(std::ceil(new_end_yf));
+        int start_xi = static_cast<int>(std::floor(new_start_xf));
+        int start_yi = static_cast<int>(std::floor(new_start_yf));
+        int end_xi = static_cast<int>(std::ceil(new_end_xf));
+        int end_yi = static_cast<int>(std::ceil(new_end_yf));

        // Define the new box coordinates
-        int new_start_x = std::max(0, start_x_i);
-        int new_start_y = std::max(0, start_y_i);
-        int new_end_x = std::min(img_w - 1, end_x_i);
-        int new_end_y = std::min(img_h - 1, end_y_i);
+        int new_start_x = std::max(0, start_xi);
+        int new_start_y = std::max(0, start_yi);
+        int new_end_x = std::min(img_w - 1, end_xi);
+        int new_end_y = std::min(img_h - 1, end_yi);
        std::array<int, 4> new_box{new_start_x, new_start_y, new_end_x, new_end_y};

        // Calculate resized crop size
@ -516,12 +516,12 @@ namespace utils_2d_pose
        int pad_bottom = 0;
        if (pad_w > 0)
        {
-            if (start_x_i < 0)
+            if (start_xi < 0)
            {
                pad_left = pad_w;
                pad_right = 0;
            }
-            else if (end_x_i > img_w)
+            else if (end_xi > img_w)
            {
                pad_left = 0;
                pad_right = pad_w;
@ -535,12 +535,12 @@ namespace utils_2d_pose
        }
        if (pad_h > 0)
        {
-            if (start_y_i < 0)
+            if (start_yi < 0)
            {
                pad_top = pad_h;
                pad_bottom = 0;
            }
-            else if (end_y_i > img_h)
+            else if (end_yi > img_h)
            {
                pad_top = 0;
                pad_bottom = pad_h;
@ -867,8 +867,6 @@ namespace utils_2d_pose
        int pad_top = paddings[2];
        int box_left = box[0];
        int box_top = box[1];
-        int img_w = image.cols;
-        int img_h = image.rows;

        for (auto &kp : kpts)
        {
@ -884,10 +882,6 @@ namespace utils_2d_pose

            x += box_left;
            y += box_top;
-
-            // Clamp to iamge region
-            x = std::max(0.0f, std::min(x, img_w - 1.0f));
-            y = std::max(0.0f, std::min(y, img_h - 1.0f));
        }
    }

@ -949,6 +943,37 @@ namespace utils_2d_pose
        // Sometimes the detection model predicts multiple boxes with different shapes for the same
        // person. They then result in strongly overlapping poses, which are merged here.
        merge_close_poses(poses, {(size_t)image.cols, (size_t)image.rows});
+
+        // Clip keypoints far outside the image
+        float mask_offset = (image.cols + image.rows) / 10.0;
+        for (size_t i = 0; i < poses.size(); ++i)
+        {
+            for (size_t j = 0; j < poses[i].size(); ++j)
+            {
+                auto &kp = poses[i][j];
+                if (kp[0] < -mask_offset)
+                {
+                    kp[0] = -mask_offset;
+                    kp[2] = 0.001;
+                }
+                if (kp[1] < -mask_offset)
+                {
+                    kp[1] = -mask_offset;
+                    kp[2] = 0.001;
+                }
+                if (kp[0] >= image.cols + mask_offset)
+                {
+                    kp[0] = image.cols + mask_offset;
+                    kp[2] = 0.001;
+                }
+                if (kp[1] >= image.rows + mask_offset)
+                {
+                    kp[1] = image.rows + mask_offset;
+                    kp[2] = 0.001;
+                }
+            }
+        }
+
        return poses;
    }

--- a/swig/Makefile
+++ b/swig/Makefile
@ -1,5 +1,5 @@
 # Standard compile options for the C++ executable
-FLAGS = -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd
+FLAGS = -fPIC -O3 -march=native -Wall -Werror -flto=auto

 # The Python interface through SWIG
 PYTHON_VERSION = $(shell python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}");')
@ -8,7 +8,7 @@ PYTHONL = -Xlinker -export-dynamic

 # Default super-target
 all:
-	cd ../rpt/ && g++ $(FLAGS) -std=c++2a -isystem /usr/include/opencv4/ -c *.cpp ; cd ../swig/
+	cd ../rpt/ && g++ $(FLAGS) -std=c++2a -c *.cpp ; cd ../swig/
 	swig -c++ -python -keyword -o rpt_wrap.cxx rpt.i
 	g++ $(FLAGS) $(PYTHONI) -c rpt_wrap.cxx -o rpt_wrap.o
-	g++ $(FLAGS) $(PYTHONL) -shared ../rpt/*.o rpt_wrap.o -lopencv_core -lopencv_imgproc -lopencv_calib3d -o _rpt.so
+	g++ $(FLAGS) $(PYTHONL) -shared ../rpt/*.o rpt_wrap.o -o _rpt.so