Merge branch 'ros' into 'master'

Ros wrapper See merge request Percipiote/RapidPoseTriangulation!5
2025-01-27 13:12:42 +00:00
parent 6b3fe5761b a1f85a5a53
commit f29b7e6707
29 changed files with 32232 additions and 4735 deletions
--- a/README.md
+++ b/README.md
@ -30,6 +30,27 @@ Fast triangulation of multiple persons from multiple camera views.
 - Build triangulator:
  ```bash
  cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..
  cd /RapidPoseTriangulation/scripts/ && \
  g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
    -I /RapidPoseTriangulation/rpt/ \
    -isystem /usr/include/opencv4/ \
    -isystem /onnxruntime/include/ \
    -isystem /onnxruntime/include/onnxruntime/core/session/ \
    -isystem /onnxruntime/include/onnxruntime/core/providers/tensorrt/ \
    -L /onnxruntime/build/Linux/Release/ \
    test_skelda_dataset.cpp \
    /RapidPoseTriangulation/rpt/*.cpp \
    -o test_skelda_dataset.bin \
    -Wl,--start-group \
      -lonnxruntime_providers_tensorrt \
      -lonnxruntime_providers_shared \
      -lonnxruntime_providers_cuda \
      -lonnxruntime \
    -Wl,--end-group \
    $(pkg-config --libs opencv4) \
    -Wl,-rpath,/onnxruntime/build/Linux/Release/ \
  && cd ..
  ```
 - Test with samples:
--- a/extras/include/nlohmann/json.hpp
+++ b/extras/include/nlohmann/json.hpp
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@ -54,6 +54,7 @@ def add_steps_to_onnx(model_path):
        inputs=[input_name],
        outputs=[casted_output],
        to=cast_type,
        name="Cast_Input",
    )
    # Node to transpose
@ -118,6 +119,90 @@ def add_steps_to_onnx(model_path):
    # Set input image type to int8
    model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
    # Cast all outputs to fp32 to avoid half precision issues in cpp code
    for output in graph.output:
        orig_output_name = output.name
        internal_output_name = orig_output_name + "_internal"
        # Rename the output tensor
        for node in model.graph.node:
            for idx, name in enumerate(node.output):
                if name == orig_output_name:
                    node.output[idx] = internal_output_name
        # Insert a Cast node that casts the internal output to fp32
        cast_fp32_name = orig_output_name
        cast_node_output = helper.make_node(
            "Cast",
            inputs=[internal_output_name],
            outputs=[cast_fp32_name],
            to=1,
            name="Cast_Output_" + orig_output_name,
        )
        # Append the cast node to the graph
        graph.node.append(cast_node_output)
        # Update the output's data type info
        output.type.tensor_type.elem_type = TensorProto.FLOAT
    # Merge the two outputs
    if "det" in model_path:
        r1_output = "dets"
        r2_output = "labels"
        out_name = "bboxes"
        out_dim = 6
    if "pose" in model_path:
        r1_output = "kpts"
        r2_output = "scores"
        out_name = "keypoints"
        out_dim = 3
    if "det" in model_path or "pose" in model_path:
        # Node to expand
        r2_expanded = r2_output + "_expanded"
        unsqueeze_node = helper.make_node(
            "Unsqueeze",
            inputs=[r2_output],
            outputs=[r2_expanded],
            axes=[2],
            name="Unsqueeze",
        )
        # Node to concatenate
        r12_merged = out_name
        concat_node = helper.make_node(
            "Concat",
            inputs=[r1_output, r2_expanded],
            outputs=[r12_merged],
            axis=2,
            name="Merged",
        )
        # Define the new concatenated output
        merged_output = helper.make_tensor_value_info(
            r12_merged,
            TensorProto.FLOAT,
            [
                (
                    graph.input[0].type.tensor_type.shape.dim[0].dim_value
                    if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
                    else None
                ),
                (
                    graph.output[0].type.tensor_type.shape.dim[1].dim_value
                    if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
                    else None
                ),
                out_dim,
            ],
        )
        # Update the graph
        graph.node.append(unsqueeze_node)
        graph.node.append(concat_node)
        graph.output.pop()
        graph.output.pop()
        graph.output.append(merged_output)
    path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
    path = path.replace(".onnx", "_extra-steps.onnx")
    onnx.save(model, path)
--- a/extras/ros/README.md
+++ b/extras/ros/README.md
@ -0,0 +1,20 @@
 # ROS-Wrapper
 Run pose estimator with ros topics as inputs and publish detected poses.
 <br>
 - Build container:
  ```bash
  docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
  ```
 - Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
 - Run and test:
  ```bash
  xhost +; docker compose -f extras/ros/docker-compose.yml up
  docker exec -it ros-test_node-1 bash
  export ROS_DOMAIN_ID=18
  ```
--- a/extras/ros/docker-compose.yml
+++ b/extras/ros/docker-compose.yml
@ -0,0 +1,70 @@
 version: "2.3"
 # runtime: nvidia needs version 2 else change standard runtime at host pc
 services:
    test_node:
        image: rapidposetriangulation_ros
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
        privileged: true
        volumes:
            - ../../:/RapidPoseTriangulation/
            - ../../skelda/:/skelda/
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'sleep infinity'
    estimator:
        image: rapidposetriangulation_ros
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
        privileged: true
        volumes:
            - ../../:/RapidPoseTriangulation/
            - ../../skelda/:/skelda/
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
    pose_visualizer:
        image: rapidposetriangulation_ros
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
        privileged: true
        volumes:
            - ../../:/RapidPoseTriangulation/
            - ../../skelda/:/skelda/
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer'
    pose_viewer:
        image: rapidposetriangulation_ros
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
        privileged: true
        volumes:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run image_view image_view --ros-args --remap image:=/camera01/img_with_pose -p autosize:=True -p window_name:=MyImage'
--- a/extras/ros/dockerfile
+++ b/extras/ros/dockerfile
@ -0,0 +1,82 @@
 FROM rapidposetriangulation
 WORKDIR /
 # Install ONNX runtime
 # See: https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt
 RUN pip3 uninstall -y onnxruntime-gpu
 RUN git clone --recursive --depth=1 --branch=v1.20.1 https://github.com/Microsoft/onnxruntime.git
 ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/cmake-3.30.1-linux-x86_64/bin:${PATH}
 ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
 ENV TRT_VERSION=10.5.0.18
 RUN /bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh
 RUN /bin/sh onnxruntime/dockerfiles/scripts/checkout_submodules.sh ${trt_version}
 RUN ls
 RUN cd onnxruntime && \
    /bin/sh build.sh --allow_running_as_root --parallel --build_shared_lib \
      --cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu/ --use_tensorrt \
      --tensorrt_home /usr/lib/x86_64-linux-gnu/ --config Release --build_wheel --skip_tests \
      --skip_submodule_sync --cmake_extra_defines '"CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'"'
 RUN cd onnxruntime && pip install build/Linux/Release/dist/*.whl
 # Install ROS2
 # https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html
 RUN apt-get update && apt-get install -y --no-install-recommends locales
 RUN locale-gen en_US en_US.UTF-8 && update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
 RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common
 RUN add-apt-repository universe
 RUN apt-get update && apt-get install -y --no-install-recommends curl
 RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.key -o /usr/share/keyrings/ros-archive-keyring.gpg
 RUN echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] http://packages.ros.org/ros2/ubuntu $(. /etc/os-release && echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/ros2.list
 RUN apt-get update && apt-get install -y --no-install-recommends ros-humble-ros-base python3-argcomplete
 RUN apt-get update && apt-get install -y --no-install-recommends ros-dev-tools
 RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
 # Create ROS2 workspace for basic packages
 RUN mkdir -p /project/base/src/
 RUN cd /project/base/; colcon build
 RUN echo "source /project/base/install/setup.bash" >> ~/.bashrc
 # Install opencv and cv_bridge
 RUN apt-get update && apt-get install -y --no-install-recommends libboost-dev
 RUN apt-get update && apt-get install -y --no-install-recommends libboost-python-dev
 RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
 RUN cd /project/base/src/; git clone --branch humble --depth=1 https://github.com/ros-perception/vision_opencv.git
 RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
 # Install ROS2 image viewer
 RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_pipeline.git
 RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_common.git
 RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --packages-select camera_calibration_parsers image_transport image_view --cmake-args -DCMAKE_BUILD_TYPE=Release'
 # Fix module not found error when displaying images
 RUN apt-get update && apt-get install -y --no-install-recommends libcanberra-gtk-module libcanberra-gtk3-module
 # Create ROS2 workspace for project packages
 RUN mkdir -p /project/dev_ws/src/
 RUN cd /project/dev_ws/; colcon build
 RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
 # Fix ros package building error
 RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
 # Copy modules
 COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
 COPY ./scripts/ /RapidPoseTriangulation/scripts/
 COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
 COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
 # Link and build as ros package
 RUN ln -s  /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/pose2d_visualizer
 RUN ln -s  /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/rpt2d_wrapper_cpp
 RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
 # Update ros packages -> autocompletion and check
 RUN /bin/bash -i -c 'ros2 pkg list'
 # Clear cache to save space, only has an effect if image is squashed
 RUN apt-get autoremove -y \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 WORKDIR /RapidPoseTriangulation/
 CMD ["/bin/bash"]
--- a/extras/ros/pose2d_visualizer/package.xml
+++ b/extras/ros/pose2d_visualizer/package.xml
@ -0,0 +1,18 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
  <name>pose2d_visualizer</name>
  <version>0.0.0</version>
  <description>TODO: Package description</description>
  <maintainer email="root@todo.todo">root</maintainer>
  <license>TODO: License declaration</license>
  <test_depend>ament_copyright</test_depend>
  <test_depend>ament_flake8</test_depend>
  <test_depend>ament_pep257</test_depend>
  <test_depend>python3-pytest</test_depend>
  <export>
    <build_type>ament_python</build_type>
  </export>
 </package>
--- a/extras/ros/pose2d_visualizer/pose2d_visualizer/init.py
+++ b/extras/ros/pose2d_visualizer/pose2d_visualizer/init.py
--- a/extras/ros/pose2d_visualizer/pose2d_visualizer/pose2d_visualizer.py
+++ b/extras/ros/pose2d_visualizer/pose2d_visualizer/pose2d_visualizer.py
@ -0,0 +1,152 @@
 import json
 import os
 import sys
 import threading
 import time
 import cv2
 from matplotlib import pyplot as plt
 import numpy as np
 import rclpy
 from cv_bridge import CvBridge
 from rclpy.qos import QoSHistoryPolicy, QoSProfile, QoSReliabilityPolicy
 from sensor_msgs.msg import Image
 from std_msgs.msg import String
 filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
 sys.path.append(filepath + "../../../../scripts/")
 import utils_pipeline
 from skelda import utils_view
 # ==================================================================================================
 bridge = CvBridge()
 node = None
 publisher_img = None
 cam_id = "camera01"
 img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
 pose_input_topic = "/poses/" + cam_id
 img_output_topic = "/" + cam_id + "/img_with_pose"
 last_input_image = None
 lock = threading.Lock()
 # ==================================================================================================
 def callback_images(image_data):
    global last_input_image, lock
    # Convert into cv images from image string
    if image_data.encoding == "bayer_rggb8":
        bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8")
        color_image = utils_pipeline.bayer2rgb(bayer_image)
    elif image_data.encoding == "mono8":
        gray_image = bridge.imgmsg_to_cv2(image_data, "mono8")
        color_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2RGB)
    elif image_data.encoding == "rgb8":
        color_image = bridge.imgmsg_to_cv2(image_data, "rgb8")
    else:
        raise ValueError("Unknown image encoding:", image_data.encoding)
    time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
    with lock:
        last_input_image = (color_image, time_stamp)
 # ==================================================================================================
 def callback_poses(pose_data):
    global last_input_image, lock
    ptime = time.time()
    if last_input_image is None:
        return
    # Convert pose data from json string
    poses = json.loads(pose_data.data)
    # Collect inputs
    images_2d = []
    timestamps = []
    with lock:
        img = np.copy(last_input_image[0])
        ts = last_input_image[1]
        images_2d.append(img)
        timestamps.append(ts)
    # Visualize 2D poses
    bodies2D = poses["bodies2D"]
    colors = plt.cm.hsv(np.linspace(0, 1, len(bodies2D), endpoint=False)).tolist()
    colors = [[int(c[0] * 255), int(c[1] * 255), int(c[2] * 255)] for c in colors]
    for i, body in enumerate(bodies2D):
        color = list(reversed(colors[i]))
        img = utils_view.draw_body_in_image(img, body, poses["joints"], color)
    # Publish image with poses
    publish(img)
    msg = "Visualization time: {:.3f}s"
    print(msg.format(time.time() - ptime))
 # ==================================================================================================
 def publish(img):
    # Publish image data
    msg = bridge.cv2_to_imgmsg(img, "rgb8")
    publisher_img.publish(msg)
 # ==================================================================================================
 def main():
    global node, publisher_img
    # Start node
    rclpy.init(args=sys.argv)
    node = rclpy.create_node("pose2d_visualizer")
    # Quality of service settings
    qos_profile = QoSProfile(
        reliability=QoSReliabilityPolicy.RELIABLE,
        history=QoSHistoryPolicy.KEEP_LAST,
        depth=1,
    )
    # Create subscribers
    _ = node.create_subscription(
        Image,
        img_input_topic,
        callback_images,
        qos_profile,
    )
    _ = node.create_subscription(
        String,
        pose_input_topic,
        callback_poses,
        qos_profile,
    )
    # Create publishers
    publisher_img = node.create_publisher(Image, img_output_topic, qos_profile)
    node.get_logger().info("Finished initialization of pose visualizer")
    # Run ros update thread
    rclpy.spin(node)
    node.destroy_node()
    rclpy.shutdown()
 # ==================================================================================================
 if __name__ == "__main__":
    main()
--- a/extras/ros/pose2d_visualizer/resource/pose2d_visualizer
+++ b/extras/ros/pose2d_visualizer/resource/pose2d_visualizer
--- a/extras/ros/pose2d_visualizer/setup.cfg
+++ b/extras/ros/pose2d_visualizer/setup.cfg
@ -0,0 +1,4 @@
 [develop]
 script_dir=$base/lib/pose2d_visualizer
 [install]
 install_scripts=$base/lib/pose2d_visualizer
--- a/extras/ros/pose2d_visualizer/setup.py
+++ b/extras/ros/pose2d_visualizer/setup.py
@ -0,0 +1,23 @@
 from setuptools import setup
 package_name = "pose2d_visualizer"
 setup(
    name=package_name,
    version="0.0.0",
    packages=[package_name],
    data_files=[
        ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
        ("share/" + package_name, ["package.xml"]),
    ],
    install_requires=["setuptools"],
    zip_safe=True,
    maintainer="root",
    maintainer_email="root@todo.todo",
    description="TODO: Package description",
    license="TODO: License declaration",
    tests_require=["pytest"],
    entry_points={
        "console_scripts": ["pose2d_visualizer = pose2d_visualizer.pose2d_visualizer:main"],
    },
 )
--- a/extras/ros/pose2d_visualizer/test/test_copyright.py
+++ b/extras/ros/pose2d_visualizer/test/test_copyright.py
@ -0,0 +1,27 @@
 # Copyright 2015 Open Source Robotics Foundation, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
 from ament_copyright.main import main
 # Remove the `skip` decorator once the source file(s) have a copyright header
@pytest.mark.skip(
    reason="No copyright header has been placed in the generated source file."
 )
@pytest.mark.copyright
@pytest.mark.linter
 def test_copyright():
    rc = main(argv=[".", "test"])
    assert rc == 0, "Found errors"
--- a/extras/ros/pose2d_visualizer/test/test_flake8.py
+++ b/extras/ros/pose2d_visualizer/test/test_flake8.py
@ -0,0 +1,25 @@
 # Copyright 2017 Open Source Robotics Foundation, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
 from ament_flake8.main import main_with_errors
@pytest.mark.flake8
@pytest.mark.linter
 def test_flake8():
    rc, errors = main_with_errors(argv=[])
    assert rc == 0, "Found %d code style errors / warnings:\n" % len(
        errors
    ) + "\n".join(errors)
--- a/extras/ros/pose2d_visualizer/test/test_pep257.py
+++ b/extras/ros/pose2d_visualizer/test/test_pep257.py
@ -0,0 +1,23 @@
 # Copyright 2015 Open Source Robotics Foundation, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
 from ament_pep257.main import main
@pytest.mark.linter
@pytest.mark.pep257
 def test_pep257():
    rc = main(argv=[".", "test"])
    assert rc == 0, "Found code style errors / warnings"
--- a/extras/ros/rpt2d_wrapper_cpp/CMakeLists.txt
+++ b/extras/ros/rpt2d_wrapper_cpp/CMakeLists.txt
@ -0,0 +1,65 @@
 cmake_minimum_required(VERSION 3.5)
 project(rpt2d_wrapper_cpp)
 # Default to C99
 if(NOT CMAKE_C_STANDARD)
  set(CMAKE_C_STANDARD 99)
 endif()
 # Default to C++17
 if(NOT CMAKE_CXX_STANDARD)
  set(CMAKE_CXX_STANDARD 17)
 endif()
 if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
  add_compile_options(-Wall -Wextra -Wpedantic)
 endif()
 # find dependencies
 find_package(ament_cmake REQUIRED)
 find_package(rclcpp REQUIRED)
 find_package(std_msgs REQUIRED)
 find_package(sensor_msgs REQUIRED)
 find_package(cv_bridge REQUIRED)
 find_package(OpenCV REQUIRED)
 ### 3) ONNX Runtime
 include_directories(/onnxruntime/include/
                    /onnxruntime/include/onnxruntime/core/session/
                    /onnxruntime/include/onnxruntime/core/providers/tensorrt/)
 link_directories(/onnxruntime/build/Linux/Release/)
 add_executable(rpt2d_wrapper src/rpt2d_wrapper.cpp)
 ament_target_dependencies(rpt2d_wrapper rclcpp std_msgs sensor_msgs cv_bridge)
 target_include_directories(rpt2d_wrapper PUBLIC
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
  $<INSTALL_INTERFACE:include>)
 target_link_libraries(rpt2d_wrapper
  ${OpenCV_LIBS}
  onnxruntime_providers_tensorrt
  onnxruntime_providers_shared
  onnxruntime_providers_cuda
  onnxruntime
 )
 set_target_properties(rpt2d_wrapper PROPERTIES
  BUILD_WITH_INSTALL_RPATH TRUE
  INSTALL_RPATH "/onnxruntime/build/Linux/Release"
 )
 install(TARGETS rpt2d_wrapper
  DESTINATION lib/${PROJECT_NAME})
 if(BUILD_TESTING)
  find_package(ament_lint_auto REQUIRED)
  # the following line skips the linter which checks for copyrights
  # uncomment the line when a copyright and license is not present in all source files
  #set(ament_cmake_copyright_FOUND TRUE)
  # the following line skips cpplint (only works in a git repo)
  # uncomment the line when this package is not in a git repo
  #set(ament_cmake_cpplint_FOUND TRUE)
  ament_lint_auto_find_test_dependencies()
 endif()
 ament_package()
--- a/extras/ros/rpt2d_wrapper_cpp/package.xml
+++ b/extras/ros/rpt2d_wrapper_cpp/package.xml
@ -0,0 +1,25 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
  <name>rpt2d_wrapper_cpp</name>
  <version>0.0.0</version>
  <description>TODO: Package description</description>
  <maintainer email="root@todo.todo">root</maintainer>
  <license>TODO: License declaration</license>
  <buildtool_depend>ament_cmake</buildtool_depend>
  <depend>rclcpp</depend>
  <depend>std_msgs</depend>
  <depend>sensor_msgs</depend>
  <depend>cv_bridge</depend>
  <depend>OpenCV</depend>
  <test_depend>ament_lint_auto</test_depend>
  <test_depend>ament_lint_common</test_depend>
  <export>
    <build_type>ament_cmake</build_type>
  </export>
 </package>
--- a/extras/ros/rpt2d_wrapper_cpp/src/rpt2d_wrapper.cpp
+++ b/extras/ros/rpt2d_wrapper_cpp/src/rpt2d_wrapper.cpp
@ -0,0 +1,230 @@
 #include <atomic>
 #include <chrono>
 #include <cmath>
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 // ROS2
 #include <rclcpp/rclcpp.hpp>
 #include <sensor_msgs/msg/image.hpp>
 #include <std_msgs/msg/string.hpp>
 // OpenCV / cv_bridge
 #include <cv_bridge/cv_bridge.h>
 #include <opencv2/opencv.hpp>
 // JSON library
 #include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
 using json = nlohmann::json;
 #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
 #include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
 // =================================================================================================
 static const std::string cam_id = "camera01";
 static const std::string img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw";
 static const std::string pose_out_topic = "/poses/" + cam_id;
 static const float min_bbox_score = 0.4;
 static const float min_bbox_area = 0.1 * 0.1;
 static const bool batch_poses = true;
 static const std::map<std::string, bool> whole_body = {
    {"foots", true},
    {"face", true},
    {"hands", true},
 };
 // =================================================================================================
 // =================================================================================================
 class Rpt2DWrapperNode : public rclcpp::Node
 {
 public:
    Rpt2DWrapperNode(const std::string &node_name)
        : Node(node_name)
    {
        this->is_busy = false;
        // QoS
        rclcpp::QoS qos_profile(1);
        qos_profile.reliable();
        qos_profile.keep_last(1);
        // Setup subscriber
        image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
            img_input_topic, qos_profile,
            std::bind(&Rpt2DWrapperNode::callback_images, this, std::placeholders::_1));
        // Setup publisher
        pose_pub_ = this->create_publisher<std_msgs::msg::String>(pose_out_topic, qos_profile);
        // Load model
        bool use_wb = utils_pipeline::use_whole_body(whole_body);
        this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>(
            use_wb, min_bbox_score, min_bbox_area, batch_poses);
        RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator.");
    }
 private:
    rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_;
    rclcpp::Publisher<std_msgs::msg::String>::SharedPtr pose_pub_;
    std::atomic<bool> is_busy;
    // Pose model pointer
    std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model;
    const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
    void callback_images(const sensor_msgs::msg::Image::SharedPtr msg);
    std::vector<std::vector<std::array<float, 3>>> call_model(const cv::Mat &image);
    void publish(const json &data)
    {
        std_msgs::msg::String msg;
        msg.data = data.dump();
        pose_pub_->publish(msg);
    }
 };
 // =================================================================================================
 void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr msg)
 {
    if (this->is_busy)
    {
        RCLCPP_WARN(this->get_logger(), "Skipping frame, still processing...");
        return;
    }
    this->is_busy = true;
    auto ts_image = std::chrono::high_resolution_clock::now();
    // Load or convert image to Bayer format
    cv::Mat bayer_image;
    try
    {
        if (msg->encoding == "mono8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
            bayer_image = cv_ptr->image;
        }
        else if (msg->encoding == "bayer_rggb8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
            bayer_image = cv_ptr->image;
        }
        else if (msg->encoding == "rgb8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
            cv::Mat color_image = cv_ptr->image;
            bayer_image = utils_pipeline::rgb2bayer(color_image);
        }
        else
        {
            throw std::runtime_error("Unknown image encoding: " + msg->encoding);
        }
    }
    catch (const std::exception &e)
    {
        RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
        return;
    }
    // Call model
    const auto &valid_poses = this->call_model(bayer_image);
    // Calculate timings
    double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
    auto ts_image_sec = std::chrono::duration<double>(ts_image.time_since_epoch()).count();
    auto ts_pose = std::chrono::high_resolution_clock::now();
    double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
    double z_trigger_image = ts_image_sec - time_stamp;
    double z_trigger_pose = ts_pose_sec - time_stamp;
    double z_image_pose = ts_pose_sec - ts_image_sec;
    // Build JSON
    json poses_msg;
    poses_msg["bodies2D"] = valid_poses; // shape: persons x keypoints x 3
    poses_msg["joints"] = joint_names_2d;
    poses_msg["num_persons"] = valid_poses.size();
    poses_msg["timestamps"] = {
        {"trigger", time_stamp},
        {"image", ts_image_sec},
        {"pose", ts_pose_sec},
        {"z-trigger-image", z_trigger_image},
        {"z-image-pose", z_image_pose},
        {"z-trigger-pose", z_trigger_pose}};
    // Publish
    publish(poses_msg);
    // Print info
    double elapsed_time = std::chrono::duration<double>(
                              std::chrono::high_resolution_clock::now() - ts_image)
                              .count();
    std::cout << "Detected persons: " << valid_poses.size()
              << " - Prediction time: " << elapsed_time << "s" << std::endl;
    this->is_busy = false;
 }
 // =================================================================================================
 std::vector<std::vector<std::array<float, 3>>> Rpt2DWrapperNode::call_model(const cv::Mat &image)
 {
    // Create image vector
    cv::Mat rgb_image = utils_pipeline::bayer2rgb(image);
    std::vector<cv::Mat> images_2d = {rgb_image};
    // Predict 2D poses
    auto poses_2d_all = kpt_model->predict(images_2d);
    auto poses_2d_upd = utils_pipeline::update_keypoints(
        poses_2d_all, joint_names_2d, whole_body);
    auto &poses_2d = poses_2d_upd[0];
    // Drop persons with no joints
    std::vector<std::vector<std::array<float, 3>>> valid_poses;
    for (auto &person : poses_2d)
    {
        float sum_conf = 0.0;
        for (auto &kp : person)
        {
            sum_conf += kp[2];
        }
        if (sum_conf > 0.0)
        {
            valid_poses.push_back(person);
        }
    }
    // Round poses to 3 decimal places
    for (auto &person : valid_poses)
    {
        for (auto &kp : person)
        {
            kp[0] = std::round(kp[0] * 1000.0) / 1000.0;
            kp[1] = std::round(kp[1] * 1000.0) / 1000.0;
            kp[2] = std::round(kp[2] * 1000.0) / 1000.0;
        }
    }
    return valid_poses;
 }
 // =================================================================================================
 // =================================================================================================
 int main(int argc, char **argv)
 {
    rclcpp::init(argc, argv);
    auto node = std::make_shared<Rpt2DWrapperNode>("rpt2d_wrapper");
    rclcpp::spin(node);
    rclcpp::shutdown();
    return 0;
 }
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/rpt/triangulator.cpp
+++ b/rpt/triangulator.cpp
@ -581,20 +581,21 @@ void TriangulatorInternal::reset()
 void TriangulatorInternal::print_stats()
 {
-    std::cout << "Triangulator statistics:" << std::endl;
+    std::cout << "{" << std::endl;
-    std::cout << " Number of calls: " << num_calls << std::endl;
+    std::cout << "  \"triangulator_calls\": " << num_calls << "," << std::endl;
-    std::cout << " Init time: " << init_time / num_calls << std::endl;
+    std::cout << "  \"init_time\": " << init_time / num_calls << "," << std::endl;
-    std::cout << " Undistort time: " << undistort_time / num_calls << std::endl;
+    std::cout << "  \"undistort_time\": " << undistort_time / num_calls << "," << std::endl;
-    std::cout << " Project time: " << project_time / num_calls << std::endl;
+    std::cout << "  \"project_time\": " << project_time / num_calls << "," << std::endl;
-    std::cout << " Match time: " << match_time / num_calls << std::endl;
+    std::cout << "  \"match_time\": " << match_time / num_calls << "," << std::endl;
-    std::cout << " Pairs time: " << pairs_time / num_calls << std::endl;
+    std::cout << "  \"pairs_time\": " << pairs_time / num_calls << "," << std::endl;
-    std::cout << " Pair scoring time: " << pair_scoring_time / num_calls << std::endl;
+    std::cout << "  \"pair_scoring_time\": " << pair_scoring_time / num_calls << "," << std::endl;
-    std::cout << " Grouping time: " << grouping_time / num_calls << std::endl;
+    std::cout << "  \"grouping_time\": " << grouping_time / num_calls << "," << std::endl;
-    std::cout << " Full time: " << full_time / num_calls << std::endl;
+    std::cout << "  \"full_time\": " << full_time / num_calls << "," << std::endl;
-    std::cout << " Merge time: " << merge_time / num_calls << std::endl;
+    std::cout << "  \"merge_time\": " << merge_time / num_calls << "," << std::endl;
-    std::cout << " Post time: " << post_time / num_calls << std::endl;
+    std::cout << "  \"post_time\": " << post_time / num_calls << "," << std::endl;
-    std::cout << " Convert time: " << convert_time / num_calls << std::endl;
+    std::cout << "  \"convert_time\": " << convert_time / num_calls << "," << std::endl;
-    std::cout << " Total time: " << total_time / num_calls << std::endl;
+    std::cout << "  \"total_time\": " << total_time / num_calls << std::endl;
    std::cout << "}" << std::endl;
 }
 // =================================================================================================
--- a/scripts/test_skelda_dataset.cpp
+++ b/scripts/test_skelda_dataset.cpp
@ -0,0 +1,268 @@
 #include <chrono>
 #include <cmath>
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>
 // OpenCV
 #include <opencv2/opencv.hpp>
 // JSON library
 #include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
 using json = nlohmann::json;
 #include "/RapidPoseTriangulation/rpt/camera.hpp"
 #include "/RapidPoseTriangulation/rpt/interface.hpp"
 #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
 #include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
 // =================================================================================================
 static const std::string path_data = "/tmp/rpt/all.json";
 static const std::string path_cfg = "/tmp/rpt/config.json";
 // =================================================================================================
 std::vector<cv::Mat> load_images(json &item)
 {
    // Load images
    std::vector<cv::Mat> images;
    for (size_t j = 0; j < item["imgpaths"].size(); j++)
    {
        auto ipath = item["imgpaths"][j].get<std::string>();
        cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR);
        cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
        images.push_back(image);
    }
    if (item["dataset_name"] == "human36m")
    {
        // Since the images don't have the same shape, rescale some of them
        for (size_t i = 0; i < images.size(); i++)
        {
            cv::Mat &img = images[i];
            cv::Size ishape = img.size();
            if (ishape != cv::Size(1000, 1000))
            {
                auto cam = item["cameras"][i];
                cam["K"][1][1] = cam["K"][1][1].get<float>() * (1000.0 / ishape.height);
                cam["K"][1][2] = cam["K"][1][2].get<float>() * (1000.0 / ishape.height);
                cam["K"][0][0] = cam["K"][0][0].get<float>() * (1000.0 / ishape.width);
                cam["K"][0][2] = cam["K"][0][2].get<float>() * (1000.0 / ishape.width);
                cv::resize(img, img, cv::Size(1000, 1000));
                images[i] = img;
            }
        }
    }
    // Convert image format to Bayer encoding to simulate real camera input
    // This also resulted in notably better MPJPE results in most cases, presumbly since the
    // demosaicing algorithm from OpenCV is better than the default one from the cameras
    for (size_t i = 0; i < images.size(); i++)
    {
        cv::Mat &img = images[i];
        cv::Mat bayer_image = utils_pipeline::rgb2bayer(img);
        images[i] = std::move(bayer_image);
    }
    return images;
 }
 // =================================================================================================
 std::string read_file(const std::string &path)
 {
    std::ifstream file_stream(path);
    if (!file_stream.is_open())
    {
        throw std::runtime_error("Unable to open file: " + path);
    }
    std::stringstream buffer;
    buffer << file_stream.rdbuf();
    return buffer.str();
 }
 void write_file(const std::string &path, const std::string &content)
 {
    std::ofstream file_stream(path, std::ios::out | std::ios::binary);
    if (!file_stream.is_open())
    {
        throw std::runtime_error("Unable to open file for writing: " + path);
    }
    file_stream << content;
    if (!file_stream)
    {
        throw std::runtime_error("Error occurred while writing to file: " + path);
    }
    file_stream.close();
 }
 // =================================================================================================
 int main(int argc, char **argv)
 {
    // Load the files
    auto dataset = json::parse(read_file(path_data));
    auto config = json::parse(read_file(path_cfg));
    // Load the configuration
    const std::map<std::string, bool> whole_body = config["whole_body"];
    const float min_bbox_score = config["min_bbox_score"];
    const float min_bbox_area = config["min_bbox_area"];
    const bool batch_poses = config["batch_poses"];
    const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
    const float min_match_score = config["min_match_score"];
    const size_t min_group_size = config["min_group_size"];
    const int take_interval = config["take_interval"];
    // Load 2D model
    bool use_wb = utils_pipeline::use_whole_body(whole_body);
    std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model =
        std::make_unique<utils_2d_pose::PosePredictor>(
            use_wb, min_bbox_score, min_bbox_area, batch_poses);
    // Load 3D model
    std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
        min_match_score, min_group_size);
    // Timers
    size_t time_count = dataset.size();
    double time_image = 0.0;
    double time_pose2d = 0.0;
    double time_pose3d = 0.0;
    size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
    print_steps = std::max((size_t)1, print_steps);
    std::cout << "Running predictions:  |";
    size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps) - 2;
    for (size_t i = 0; i < bar_width; i++)
    {
        std::cout << "-";
    }
    std::cout << "|" << std::endl;
    // Calculate 2D poses [items, views, persons, joints, 3]
    std::vector<std::vector<std::vector<std::vector<std::array<float, 3>>>>> all_poses_2d;
    std::cout << "Calculating 2D poses: ";
    for (size_t i = 0; i < dataset.size(); i++)
    {
        if (i % print_steps == 0)
        {
            std::cout << "#" << std::flush;
        }
        std::chrono::duration<float> elapsed;
        auto &item = dataset[i];
        // Load images
        auto stime = std::chrono::high_resolution_clock::now();
        std::vector<cv::Mat> images = load_images(item);
        elapsed = std::chrono::high_resolution_clock::now() - stime;
        time_image += elapsed.count();
        // Predict 2D poses
        stime = std::chrono::high_resolution_clock::now();
        for (size_t i = 0; i < images.size(); i++)
        {
            cv::Mat &img = images[i];
            cv::Mat rgb = utils_pipeline::bayer2rgb(img);
            images[i] = std::move(rgb);
        }
        auto poses_2d_all = kpt_model->predict(images);
        auto poses_2d_upd = utils_pipeline::update_keypoints(
            poses_2d_all, joint_names_2d, whole_body);
        elapsed = std::chrono::high_resolution_clock::now() - stime;
        time_pose2d += elapsed.count();
        all_poses_2d.push_back(std::move(poses_2d_upd));
    }
    std::cout << std::endl;
    // Calculate 3D poses [items, persons, joints, 4]
    std::vector<std::vector<std::vector<std::array<float, 4>>>> all_poses_3d;
    std::vector<std::string> all_ids;
    std::string old_scene = "";
    int old_id = -1;
    std::cout << "Calculating 3D poses: ";
    for (size_t i = 0; i < dataset.size(); i++)
    {
        if (i % print_steps == 0)
        {
            std::cout << "#" << std::flush;
        }
        std::chrono::duration<float> elapsed;
        auto &item = dataset[i];
        auto &poses_2d = all_poses_2d[i];
        if (old_scene != item["scene"] || old_id + take_interval < item["index"])
        {
            // Reset last poses if scene changes
            tri_model->reset();
            old_scene = item["scene"];
        }
        auto stime = std::chrono::high_resolution_clock::now();
        std::vector<Camera> cameras;
        for (size_t j = 0; j < item["cameras"].size(); j++)
        {
            auto &cam = item["cameras"][j];
            Camera camera;
            camera.name = cam["name"].get<std::string>();
            camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
            camera.DC = cam["DC"].get<std::vector<float>>();
            camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
            camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
            camera.width = cam["width"].get<int>();
            camera.height = cam["height"].get<int>();
            camera.type = cam["type"].get<std::string>();
            cameras.push_back(camera);
        }
        std::array<std::array<float, 3>, 2> roomparams = {
            item["room_size"].get<std::array<float, 3>>(),
            item["room_center"].get<std::array<float, 3>>()};
        auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d);
        elapsed = std::chrono::high_resolution_clock::now() - stime;
        time_pose3d += elapsed.count();
        all_poses_3d.push_back(std::move(poses_3d));
        all_ids.push_back(item["id"].get<std::string>());
        old_id = item["index"];
    }
    std::cout << std::endl;
    // Print timing stats
    std::cout << "\nMetrics:" << std::endl;
    size_t warmup = 10;
    double avg_time_image = time_image / (time_count - warmup);
    double avg_time_pose2d = time_pose2d / (time_count - warmup);
    double avg_time_pose3d = time_pose3d / (time_count - warmup);
    double fps = 1.0 / (avg_time_pose2d + avg_time_pose3d);
    std::cout << "{\n"
              << "  \"img_loading\": " << avg_time_image << ",\n"
              << "  \"avg_time_2d\": " << avg_time_pose2d << ",\n"
              << "  \"avg_time_3d\": " << avg_time_pose3d << ",\n"
              << "  \"fps\": " << fps << "\n"
              << "}" << std::endl;
    tri_model->print_stats();
    // Store the results as json
    json all_results;
    all_results["all_ids"] = all_ids;
    all_results["all_poses_2d"] = all_poses_2d;
    all_results["all_poses_3d"] = all_poses_3d;
    all_results["joint_names_2d"] = joint_names_2d;
    all_results["joint_names_3d"] = joint_names_2d;
    // Save the results
    std::string path_results = "/tmp/rpt/results.json";
    write_file(path_results, all_results.dump(0));
    return 0;
 }
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@ -1,22 +1,18 @@
 import json
 import os
 import sys
 import time
-import cv2
+import utils_pipeline
 import matplotlib
 import numpy as np
 import tqdm
 import test_triangulate
 import utils_2d_pose
 from skelda import evals
-
+from skelda.writers import json_writer
 sys.path.append("/RapidPoseTriangulation/swig/")
 import rpt
 # ==================================================================================================
 whole_body = {
    "foots": False,
    "face": False,
    "hands": False,
 }
 dataset_use = "human36m"
 # dataset_use = "panoptic"
 # dataset_use = "mvor"
@ -33,8 +29,6 @@ dataset_use = "human36m"
 # dataset_use = "egohumans_volleyball"
 # dataset_use = "egohumans_badminton"
 # dataset_use = "egohumans_tennis"
 # dataset_use = "ntu"
 # dataset_use = "koarob"
 # Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
@ -89,13 +83,13 @@ datasets = {
    "campus": {
        "path": "/datasets/campus/skelda/test.json",
        "take_interval": 1,
-        "min_match_score": 0.90,
+        "min_match_score": 0.92,
        "min_bbox_score": 0.5,
    },
    "shelf": {
        "path": "/datasets/shelf/skelda/test.json",
        "take_interval": 1,
-        "min_match_score": 0.96,
+        "min_match_score": 0.95,
        "min_group_size": 2,
    },
    "ikeaasm": {
@ -175,7 +169,7 @@ datasets = {
    },
 }
-joint_names_2d = test_triangulate.joint_names_2d
+joint_names_2d = utils_pipeline.get_joint_names(whole_body)
 joint_names_3d = list(joint_names_2d)
 eval_joints = [
    "head",
@ -197,7 +191,7 @@ if dataset_use == "human36m":
 if dataset_use == "panoptic":
    eval_joints[eval_joints.index("head")] = "nose"
 if dataset_use == "human36m_wb":
-    if any((test_triangulate.whole_body.values())):
+    if utils_pipeline.use_whole_body(whole_body):
        eval_joints = list(joint_names_2d)
    else:
        eval_joints[eval_joints.index("head")] = "nose"
@ -214,6 +208,11 @@ def load_json(path: str):
    return data
 def save_json(data: dict, path: str):
    with open(path, "w+", encoding="utf-8") as file:
        json.dump(data, file, indent=0)
 # ==================================================================================================
@ -304,6 +303,14 @@ def load_labels(dataset: dict):
        if take_interval > 1:
            labels = [l for i, l in enumerate(labels) if i % take_interval == 0]
    # Add default values
    for label in labels:
        if "scene" not in label:
            label["scene"] = "default"
        for cam in label["cameras"]:
            if not "type" in cam:
                cam["type"] = "pinhole"
    return labels
@ -313,28 +320,6 @@ def load_labels(dataset: dict):
 def main():
    global joint_names_3d, eval_joints
    # Load dataset specific parameters
    min_match_score = datasets[dataset_use].get(
        "min_match_score", default_min_match_score
    )
    min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
    min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
    min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
    batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
    # Load 2D pose model
    whole_body = test_triangulate.whole_body
    if any((whole_body[k] for k in whole_body)):
        kpt_model = utils_2d_pose.load_wb_model()
    else:
        kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
    # Manually set matplotlib backend
    try:
        matplotlib.use("TkAgg")
    except ImportError:
        print("WARNING: Using headless mode, no visualizations will be shown.")
    print("Loading dataset ...")
    labels = load_labels(
        {
@ -346,120 +331,47 @@ def main():
    # Print a dataset sample for debugging
    print(labels[0])
-    print("\nPrefetching images ...")
+    # Save dataset
-    for label in tqdm.tqdm(labels):
+    tmp_export_dir = "/tmp/rpt/"
-        # If the images are stored on a HDD, it sometimes takes a while to load them
+    for label in labels:
-        # Prefetching them results in more stable timings of the following steps
+        if "splits" in label:
-        # To prevent memory overflow, the code only loads the images, but does not store them
+            label.pop("splits")
-        try:
+    json_writer.save_dataset(labels, tmp_export_dir)
            for i in range(len(label["imgpaths"])):
                imgpath = label["imgpaths"][i]
                img = test_triangulate.load_image(imgpath)
        except cv2.error:
            print("One of the paths not found:", label["imgpaths"])
            continue
    time.sleep(3)
-    print("\nCalculating 2D predictions ...")
+    # Load dataset specific parameters
-    all_poses_2d = []
+    min_match_score = datasets[dataset_use].get(
-    times = []
+        "min_match_score", default_min_match_score
    for label in tqdm.tqdm(labels):
        images_2d = []
        start = time.time()
        try:
            for i in range(len(label["imgpaths"])):
                imgpath = label["imgpaths"][i]
                img = test_triangulate.load_image(imgpath)
                images_2d.append(img)
        except cv2.error:
            print("One of the paths not found:", label["imgpaths"])
            continue
        if dataset_use == "human36m":
            for i in range(len(images_2d)):
                # Since the images don't have the same shape, rescale some of them
                img = images_2d[i]
                ishape = img.shape
                if ishape != (1000, 1000, 3):
                    cam = label["cameras"][i]
                    cam["K"][1][1] = cam["K"][1][1] * (1000 / ishape[0])
                    cam["K"][1][2] = cam["K"][1][2] * (1000 / ishape[0])
                    cam["K"][0][0] = cam["K"][0][0] * (1000 / ishape[1])
                    cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
                    images_2d[i] = cv2.resize(img, (1000, 1000))
        # Convert image format to Bayer encoding to simulate real camera input
        # This also resulted in notably better MPJPE results in most cases, presumbly since the
        # demosaicing algorithm from OpenCV is better than the default one from the cameras
        for i in range(len(images_2d)):
            images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
        time_imgs = time.time() - start
        start = time.time()
        for i in range(len(images_2d)):
            images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
        poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
        poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
        time_2d = time.time() - start
        all_poses_2d.append(poses_2d)
        times.append([time_imgs, time_2d, 0])
    print("\nCalculating 3D predictions ...")
    all_poses_3d = []
    all_ids = []
    triangulator = rpt.Triangulator(
        min_match_score=min_match_score, min_group_size=min_group_size
    )
-    old_scene = ""
+    min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
-    old_index = -1
+    min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
-    for i in tqdm.tqdm(range(len(labels))):
+    min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
-        label = labels[i]
+    batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
        poses_2d = all_poses_2d[i]
-        if old_scene != label.get("scene", "") or (
+    # Save config
-            old_index + datasets[dataset_use]["take_interval"] < label["index"]
+    config_path = tmp_export_dir + "config.json"
-        ):
+    config = {
-            # Reset last poses if scene changes
+        "min_match_score": min_match_score,
-            old_scene = label.get("scene", "")
+        "min_group_size": min_group_size,
-            triangulator.reset()
+        "min_bbox_score": min_bbox_score,
-
+        "min_bbox_area": min_bbox_area,
-        start = time.time()
+        "batch_poses": batch_poses,
-        if sum(np.sum(p) for p in poses_2d) == 0:
+        "whole_body": whole_body,
-            poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist()
+        "take_interval": datasets[dataset_use]["take_interval"],
        else:
            rpt_cameras = rpt.convert_cameras(label["cameras"])
            roomparams = [label["room_size"], label["room_center"]]
            poses3D = triangulator.triangulate_poses(
                poses_2d, rpt_cameras, roomparams, joint_names_2d
            )
        time_3d = time.time() - start
        old_index = label["index"]
        all_poses_3d.append(np.array(poses3D).tolist())
        all_ids.append(label["id"])
        times[i][2] = time_3d
    # Print per-step triangulation timings
    print("")
    triangulator.print_stats()
    warmup_iters = 10
    if len(times) > warmup_iters:
        times = times[warmup_iters:]
    avg_time_im = np.mean([t[0] for t in times])
    avg_time_2d = np.mean([t[1] for t in times])
    avg_time_3d = np.mean([t[2] for t in times])
    tstats = {
        "img_loading": avg_time_im,
        "avg_time_2d": avg_time_2d,
        "avg_time_3d": avg_time_3d,
        "avg_fps": 1.0 / (avg_time_2d + avg_time_3d),
    }
-    print("\nMetrics:")
+    save_json(config, config_path)
    print(json.dumps(tstats, indent=2))
    # Call the CPP binary
    os.system("/RapidPoseTriangulation/scripts/test_skelda_dataset.bin")
    # Load the results
    print("Loading exports ...")
    res_path = tmp_export_dir + "results.json"
    results = load_json(res_path)
    all_poses_3d = results["all_poses_3d"]
    all_ids = results["all_ids"]
    joint_names_3d = results["joint_names_3d"]
    # Run evaluation
    _ = evals.mpjpe.run_eval(
        labels,
        all_poses_3d,
@ -477,7 +389,6 @@ def main():
        replace_head_with_nose=True,
    )
 # ==================================================================================================
 if __name__ == "__main__":
--- a/scripts/test_triangulate.py
+++ b/scripts/test_triangulate.py
@ -3,13 +3,12 @@ import json
 import os
 import sys
 import time
 from typing import List
 import cv2
 import matplotlib
 import numpy as np
 import utils_2d_pose
 import utils_pipeline
 from skelda import utils_pose, utils_view
 sys.path.append("/RapidPoseTriangulation/swig/")
@ -25,175 +24,9 @@ whole_body = {
    "hands": False,
 }
-joint_names_2d = [
+joint_names_2d = utils_pipeline.get_joint_names(whole_body)
    "nose",
    "eye_left",
    "eye_right",
    "ear_left",
    "ear_right",
    "shoulder_left",
    "shoulder_right",
    "elbow_left",
    "elbow_right",
    "wrist_left",
    "wrist_right",
    "hip_left",
    "hip_right",
    "knee_left",
    "knee_right",
    "ankle_left",
    "ankle_right",
 ]
 if whole_body["foots"]:
    joint_names_2d.extend(
        [
            "foot_toe_big_left",
            "foot_toe_small_left",
            "foot_heel_left",
            "foot_toe_big_right",
            "foot_toe_small_right",
            "foot_heel_right",
        ]
    )
 if whole_body["face"]:
    joint_names_2d.extend(
        [
            "face_jaw_right_1",
            "face_jaw_right_2",
            "face_jaw_right_3",
            "face_jaw_right_4",
            "face_jaw_right_5",
            "face_jaw_right_6",
            "face_jaw_right_7",
            "face_jaw_right_8",
            "face_jaw_middle",
            "face_jaw_left_1",
            "face_jaw_left_2",
            "face_jaw_left_3",
            "face_jaw_left_4",
            "face_jaw_left_5",
            "face_jaw_left_6",
            "face_jaw_left_7",
            "face_jaw_left_8",
            "face_eyebrow_right_1",
            "face_eyebrow_right_2",
            "face_eyebrow_right_3",
            "face_eyebrow_right_4",
            "face_eyebrow_right_5",
            "face_eyebrow_left_1",
            "face_eyebrow_left_2",
            "face_eyebrow_left_3",
            "face_eyebrow_left_4",
            "face_eyebrow_left_5",
            "face_nose_1",
            "face_nose_2",
            "face_nose_3",
            "face_nose_4",
            "face_nose_5",
            "face_nose_6",
            "face_nose_7",
            "face_nose_8",
            "face_nose_9",
            "face_eye_right_1",
            "face_eye_right_2",
            "face_eye_right_3",
            "face_eye_right_4",
            "face_eye_right_5",
            "face_eye_right_6",
            "face_eye_left_1",
            "face_eye_left_2",
            "face_eye_left_3",
            "face_eye_left_4",
            "face_eye_left_5",
            "face_eye_left_6",
            "face_mouth_1",
            "face_mouth_2",
            "face_mouth_3",
            "face_mouth_4",
            "face_mouth_5",
            "face_mouth_6",
            "face_mouth_7",
            "face_mouth_8",
            "face_mouth_9",
            "face_mouth_10",
            "face_mouth_11",
            "face_mouth_12",
            "face_mouth_13",
            "face_mouth_14",
            "face_mouth_15",
            "face_mouth_16",
            "face_mouth_17",
            "face_mouth_18",
            "face_mouth_19",
            "face_mouth_20",
        ]
    )
 if whole_body["hands"]:
    joint_names_2d.extend(
        [
            "hand_wrist_left",
            "hand_finger_thumb_left_1",
            "hand_finger_thumb_left_2",
            "hand_finger_thumb_left_3",
            "hand_finger_thumb_left_4",
            "hand_finger_index_left_1",
            "hand_finger_index_left_2",
            "hand_finger_index_left_3",
            "hand_finger_index_left_4",
            "hand_finger_middle_left_1",
            "hand_finger_middle_left_2",
            "hand_finger_middle_left_3",
            "hand_finger_middle_left_4",
            "hand_finger_ring_left_1",
            "hand_finger_ring_left_2",
            "hand_finger_ring_left_3",
            "hand_finger_ring_left_4",
            "hand_finger_pinky_left_1",
            "hand_finger_pinky_left_2",
            "hand_finger_pinky_left_3",
            "hand_finger_pinky_left_4",
            "hand_wrist_right",
            "hand_finger_thumb_right_1",
            "hand_finger_thumb_right_2",
            "hand_finger_thumb_right_3",
            "hand_finger_thumb_right_4",
            "hand_finger_index_right_1",
            "hand_finger_index_right_2",
            "hand_finger_index_right_3",
            "hand_finger_index_right_4",
            "hand_finger_middle_right_1",
            "hand_finger_middle_right_2",
            "hand_finger_middle_right_3",
            "hand_finger_middle_right_4",
            "hand_finger_ring_right_1",
            "hand_finger_ring_right_2",
            "hand_finger_ring_right_3",
            "hand_finger_ring_right_4",
            "hand_finger_pinky_right_1",
            "hand_finger_pinky_right_2",
            "hand_finger_pinky_right_3",
            "hand_finger_pinky_right_4",
        ]
    )
 joint_names_2d.extend(
    [
        "hip_middle",
        "shoulder_middle",
        "head",
    ]
 )
 joint_names_3d = list(joint_names_2d)
 main_limbs = [
    ("shoulder_left", "elbow_left"),
    ("elbow_left", "wrist_left"),
    ("shoulder_right", "elbow_right"),
    ("elbow_right", "wrist_right"),
    ("hip_left", "knee_left"),
    ("knee_left", "ankle_left"),
    ("hip_right", "knee_right"),
    ("knee_right", "ankle_right"),
 ]
 # ==================================================================================================
@ -217,85 +50,6 @@ def update_sample(sample, new_dir=""):
 # ==================================================================================================
 def load_image(path: str):
    image = cv2.imread(path, 3)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.asarray(image, dtype=np.uint8)
    return image
 # ==================================================================================================
 def rgb2bayer(img):
    bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
    bayer[0::2, 0::2] = img[0::2, 0::2, 0]
    bayer[0::2, 1::2] = img[0::2, 1::2, 1]
    bayer[1::2, 0::2] = img[1::2, 0::2, 1]
    bayer[1::2, 1::2] = img[1::2, 1::2, 2]
    return bayer
 def bayer2rgb(bayer):
    img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
    return img
 # ==================================================================================================
 def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
    new_views = []
    for view in poses_2d:
        new_bodies = []
        for body in view:
            body = body.tolist()
            new_body = body[:17]
            if whole_body["foots"]:
                new_body.extend(body[17:22])
            if whole_body["face"]:
                new_body.extend(body[22:90])
            if whole_body["hands"]:
                new_body.extend(body[90:])
            body = new_body
            hlid = joint_names.index("hip_left")
            hrid = joint_names.index("hip_right")
            mid_hip = [
                float(((body[hlid][0] + body[hrid][0]) / 2.0)),
                float(((body[hlid][1] + body[hrid][1]) / 2.0)),
                min(body[hlid][2], body[hrid][2]),
            ]
            body.append(mid_hip)
            slid = joint_names.index("shoulder_left")
            srid = joint_names.index("shoulder_right")
            mid_shoulder = [
                float(((body[slid][0] + body[srid][0]) / 2.0)),
                float(((body[slid][1] + body[srid][1]) / 2.0)),
                min(body[slid][2], body[srid][2]),
            ]
            body.append(mid_shoulder)
            elid = joint_names.index("ear_left")
            erid = joint_names.index("ear_right")
            head = [
                float(((body[elid][0] + body[erid][0]) / 2.0)),
                float(((body[elid][1] + body[erid][1]) / 2.0)),
                min(body[elid][2], body[erid][2]),
            ]
            body.append(head)
            new_bodies.append(body)
        new_views.append(new_bodies)
    return new_views
 # ==================================================================================================
 def main():
    if any((whole_body[k] for k in whole_body)):
        kpt_model = utils_2d_pose.load_wb_model()
@ -330,15 +84,15 @@ def main():
        images_2d = []
        for i in range(len(sample["cameras_color"])):
            imgpath = sample["imgpaths_color"][i]
-            img = load_image(imgpath)
+            img = utils_pipeline.load_image(imgpath)
-            img = rgb2bayer(img)
+            img = utils_pipeline.rgb2bayer(img)
-            img = bayer2rgb(img)
+            img = utils_pipeline.bayer2rgb(img)
            images_2d.append(img)
        # Get 2D poses
        stime = time.time()
        poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
-        poses_2d = update_keypoints(poses_2d, joint_names_2d)
+        poses_2d = utils_pipeline.update_keypoints(poses_2d, joint_names_2d, whole_body)
        print("2D time:", time.time() - stime)
        # print([np.array(p).round(6).tolist() for p in poses_2d])
--- a/scripts/utils_2d_pose.hpp
+++ b/scripts/utils_2d_pose.hpp
--- a/scripts/utils_2d_pose.py
+++ b/scripts/utils_2d_pose.py
@ -20,6 +20,7 @@ class BaseModel(ABC):
            raise FileNotFoundError("File not found:", model_path)
        if model_path.endswith(".onnx"):
            print("Loading model:", model_path)
            self.init_onnxruntime(model_path)
            self.runtime = "ort"
        else:
@ -189,9 +190,15 @@ class BoxCrop:
        self.fill_value = fill_value
    def calc_params(self, ishape, bbox):
-        start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3]
+        img_h, img_w = ishape[:2]
        target_h, target_w = self.target_size
        # Round the bounding box coordinates
        start_x = math.floor(bbox[0])
        start_y = math.floor(bbox[1])
        end_x = math.ceil(bbox[2])
        end_y = math.ceil(bbox[3])
        # Calculate original bounding box center
        center_x = (start_x + end_x) / 2.0
        center_y = (start_y + end_y) / 2.0
@ -231,8 +238,8 @@ class BoxCrop:
        # Define the new box coordinates
        new_start_x = max(0, start_x)
        new_start_y = max(0, start_y)
-        new_end_x = min(ishape[1] - 1, end_x)
+        new_end_x = min(img_w - 1, end_x)
-        new_end_y = min(ishape[0] - 1, end_y)
+        new_end_y = min(img_h - 1, end_y)
        new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
        # Calculate resized crop size
@ -332,9 +339,8 @@ class RTMDet(BaseModel):
    def postprocess(self, result: List[np.ndarray], image: np.ndarray):
        boxes = np.squeeze(result[0], axis=0)
        classes = np.squeeze(result[1], axis=0)
-        human_class = classes[:] == 0
+        human_class = boxes[:, 5] == 0
        boxes = boxes[human_class]
        keep = boxes[:, 4] > self.conf_threshold
@ -344,7 +350,6 @@ class RTMDet(BaseModel):
            return np.array([])
        # Drop boxes with too small area
        boxes = boxes.astype(np.float32)
        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        keep = areas >= self.min_area
        boxes = boxes[keep]
@ -386,10 +391,7 @@ class RTMPose(BaseModel):
    def preprocess(self, image: np.ndarray, bboxes: np.ndarray):
        cutouts = []
        for i in range(len(bboxes)):
-            bbox = np.asarray(bboxes[i])[0:4]
+            region = self.boxcrop.crop_resize_box(image, bboxes[i])
            bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
            bbox = bbox.round().astype(np.int32)
            region = self.boxcrop.crop_resize_box(image, bbox)
            tensor = np.asarray(region).astype(self.input_types[0], copy=False)
            cutouts.append(tensor)
@ -406,10 +408,7 @@ class RTMPose(BaseModel):
    ):
        kpts = []
        for i in range(len(bboxes)):
-            scores = np.clip(result[1][i], 0, 1)
+            kp = result[0][i]
            kp = np.concatenate(
                [result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
            )
            paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
            kp[:, 0] -= paddings[0]
--- a/scripts/utils_pipeline.hpp
+++ b/scripts/utils_pipeline.hpp
@ -0,0 +1,316 @@
 #pragma once
 #include <string>
 #include <vector>
 #include <opencv2/opencv.hpp>
 // =================================================================================================
 namespace utils_pipeline
 {
    bool use_whole_body(const std::map<std::string, bool> &whole_body)
    {
        for (const auto &pair : whole_body)
        {
            if (pair.second)
            {
                return true;
            }
        }
        return false;
    }
    // =============================================================================================
    std::vector<std::string> get_joint_names(const std::map<std::string, bool> &whole_body)
    {
        std::vector<std::string> joint_names_2d = {
            "nose",
            "eye_left",
            "eye_right",
            "ear_left",
            "ear_right",
            "shoulder_left",
            "shoulder_right",
            "elbow_left",
            "elbow_right",
            "wrist_left",
            "wrist_right",
            "hip_left",
            "hip_right",
            "knee_left",
            "knee_right",
            "ankle_left",
            "ankle_right",
        };
        if (whole_body.at("foots"))
        {
            joint_names_2d.insert(
                joint_names_2d.end(),
                {
                    "foot_toe_big_left",
                    "foot_toe_small_left",
                    "foot_heel_left",
                    "foot_toe_big_right",
                    "foot_toe_small_right",
                    "foot_heel_right",
                });
        }
        if (whole_body.at("face"))
        {
            joint_names_2d.insert(
                joint_names_2d.end(),
                {
                    "face_jaw_right_1",
                    "face_jaw_right_2",
                    "face_jaw_right_3",
                    "face_jaw_right_4",
                    "face_jaw_right_5",
                    "face_jaw_right_6",
                    "face_jaw_right_7",
                    "face_jaw_right_8",
                    "face_jaw_middle",
                    "face_jaw_left_1",
                    "face_jaw_left_2",
                    "face_jaw_left_3",
                    "face_jaw_left_4",
                    "face_jaw_left_5",
                    "face_jaw_left_6",
                    "face_jaw_left_7",
                    "face_jaw_left_8",
                    "face_eyebrow_right_1",
                    "face_eyebrow_right_2",
                    "face_eyebrow_right_3",
                    "face_eyebrow_right_4",
                    "face_eyebrow_right_5",
                    "face_eyebrow_left_1",
                    "face_eyebrow_left_2",
                    "face_eyebrow_left_3",
                    "face_eyebrow_left_4",
                    "face_eyebrow_left_5",
                    "face_nose_1",
                    "face_nose_2",
                    "face_nose_3",
                    "face_nose_4",
                    "face_nose_5",
                    "face_nose_6",
                    "face_nose_7",
                    "face_nose_8",
                    "face_nose_9",
                    "face_eye_right_1",
                    "face_eye_right_2",
                    "face_eye_right_3",
                    "face_eye_right_4",
                    "face_eye_right_5",
                    "face_eye_right_6",
                    "face_eye_left_1",
                    "face_eye_left_2",
                    "face_eye_left_3",
                    "face_eye_left_4",
                    "face_eye_left_5",
                    "face_eye_left_6",
                    "face_mouth_1",
                    "face_mouth_2",
                    "face_mouth_3",
                    "face_mouth_4",
                    "face_mouth_5",
                    "face_mouth_6",
                    "face_mouth_7",
                    "face_mouth_8",
                    "face_mouth_9",
                    "face_mouth_10",
                    "face_mouth_11",
                    "face_mouth_12",
                    "face_mouth_13",
                    "face_mouth_14",
                    "face_mouth_15",
                    "face_mouth_16",
                    "face_mouth_17",
                    "face_mouth_18",
                    "face_mouth_19",
                    "face_mouth_20",
                });
        }
        if (whole_body.at("hands"))
        {
            joint_names_2d.insert(
                joint_names_2d.end(),
                {
                    "hand_wrist_left",
                    "hand_finger_thumb_left_1",
                    "hand_finger_thumb_left_2",
                    "hand_finger_thumb_left_3",
                    "hand_finger_thumb_left_4",
                    "hand_finger_index_left_1",
                    "hand_finger_index_left_2",
                    "hand_finger_index_left_3",
                    "hand_finger_index_left_4",
                    "hand_finger_middle_left_1",
                    "hand_finger_middle_left_2",
                    "hand_finger_middle_left_3",
                    "hand_finger_middle_left_4",
                    "hand_finger_ring_left_1",
                    "hand_finger_ring_left_2",
                    "hand_finger_ring_left_3",
                    "hand_finger_ring_left_4",
                    "hand_finger_pinky_left_1",
                    "hand_finger_pinky_left_2",
                    "hand_finger_pinky_left_3",
                    "hand_finger_pinky_left_4",
                    "hand_wrist_right",
                    "hand_finger_thumb_right_1",
                    "hand_finger_thumb_right_2",
                    "hand_finger_thumb_right_3",
                    "hand_finger_thumb_right_4",
                    "hand_finger_index_right_1",
                    "hand_finger_index_right_2",
                    "hand_finger_index_right_3",
                    "hand_finger_index_right_4",
                    "hand_finger_middle_right_1",
                    "hand_finger_middle_right_2",
                    "hand_finger_middle_right_3",
                    "hand_finger_middle_right_4",
                    "hand_finger_ring_right_1",
                    "hand_finger_ring_right_2",
                    "hand_finger_ring_right_3",
                    "hand_finger_ring_right_4",
                    "hand_finger_pinky_right_1",
                    "hand_finger_pinky_right_2",
                    "hand_finger_pinky_right_3",
                    "hand_finger_pinky_right_4",
                });
        }
        joint_names_2d.insert(
            joint_names_2d.end(),
            {
                "hip_middle",
                "shoulder_middle",
                "head",
            });
        return joint_names_2d;
    }
    // =============================================================================================
    cv::Mat bayer2rgb(const cv::Mat &bayer)
    {
        cv::Mat rgb;
        cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB);
        return rgb;
    }
    cv::Mat rgb2bayer(const cv::Mat &img)
    {
        CV_Assert(img.type() == CV_8UC3);
        cv::Mat bayer(img.rows, img.cols, CV_8UC1);
        for (int r = 0; r < img.rows; ++r)
        {
            const uchar *imgData = img.ptr<uchar>(r);
            uchar *bayerRowPtr = bayer.ptr<uchar>(r);
            for (int c = 0; c < img.cols; ++c)
            {
                int pixelIndex = 3 * c;
                // Use faster bit operation instead of modulo+if
                // Even row, even col => R = 0
                // Even row, odd col => G = 1
                // Odd row, even col => G = 1
                // Odd row, odd col => B = 2
                int row_mod = r & 1;
                int col_mod = c & 1;
                int component = row_mod + col_mod;
                bayerRowPtr[c] = imgData[pixelIndex + component];
            }
        }
        return bayer;
    }
    // =============================================================================================
    inline int find_index(const std::vector<std::string> &vec, const std::string &key)
    {
        auto it = std::find(vec.begin(), vec.end(), key);
        if (it == vec.end())
        {
            throw std::runtime_error("Cannot find \"" + key + "\" in joint_names.");
        }
        return static_cast<int>(std::distance(vec.begin(), it));
    }
    std::vector<std::vector<std::vector<std::array<float, 3>>>> update_keypoints(
        const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
        const std::vector<std::string> &joint_names,
        const std::map<std::string, bool> &whole_body)
    {
        std::vector<std::vector<std::vector<std::array<float, 3>>>> new_views;
        new_views.reserve(poses_2d.size());
        for (const auto &view : poses_2d)
        {
            // "view" is a list of bodies => each body is Nx3
            std::vector<std::vector<std::array<float, 3>>> new_bodies;
            new_bodies.reserve(view.size());
            for (const auto &body : view)
            {
                // 1) Copy first 17 keypoints
                std::vector<std::array<float, 3>> new_body;
                new_body.insert(new_body.end(), body.begin(), body.begin() + 17);
                // 2) Optionally append extra keypoints
                if (whole_body.at("foots"))
                {
                    new_body.insert(new_body.end(), body.begin() + 17, body.begin() + 23);
                }
                if (whole_body.at("face"))
                {
                    new_body.insert(new_body.end(), body.begin() + 23, body.begin() + 91);
                }
                if (whole_body.at("hands"))
                {
                    new_body.insert(new_body.end(), body.begin() + 91, body.end());
                }
                // 3) Compute mid_hip
                int hlid = find_index(joint_names, "hip_left");
                int hrid = find_index(joint_names, "hip_right");
                float mid_hip_x = 0.5 * (new_body[hlid][0] + new_body[hrid][0]);
                float mid_hip_y = 0.5 * (new_body[hlid][1] + new_body[hrid][1]);
                float mid_hip_c = std::min(new_body[hlid][2], new_body[hrid][2]);
                new_body.push_back({mid_hip_x, mid_hip_y, mid_hip_c});
                // 4) Compute mid_shoulder
                int slid = find_index(joint_names, "shoulder_left");
                int srid = find_index(joint_names, "shoulder_right");
                float mid_shoulder_x = 0.5 * (new_body[slid][0] + new_body[srid][0]);
                float mid_shoulder_y = 0.5 * (new_body[slid][1] + new_body[srid][1]);
                float mid_shoulder_c = std::min(new_body[slid][2], new_body[srid][2]);
                new_body.push_back({mid_shoulder_x, mid_shoulder_y, mid_shoulder_c});
                // 5) Compute head
                int elid = find_index(joint_names, "ear_left");
                int erid = find_index(joint_names, "ear_right");
                float head_x = 0.5 * (new_body[elid][0] + new_body[erid][0]);
                float head_y = 0.5 * (new_body[elid][1] + new_body[erid][1]);
                float head_c = std::min(new_body[elid][2], new_body[erid][2]);
                new_body.push_back({head_x, head_y, head_c});
                // Add this updated body into new_bodies
                new_bodies.push_back(new_body);
            }
            // Add all updated bodies for this view
            new_views.push_back(new_bodies);
        }
        return new_views;
    }
 }
--- a/scripts/utils_pipeline.py
+++ b/scripts/utils_pipeline.py
@ -0,0 +1,255 @@
 from typing import List
 import cv2
 import numpy as np
 # ==================================================================================================
 def use_whole_body(whole_body: dict) -> bool:
    return any((whole_body[k] for k in whole_body))
 # ==================================================================================================
 def get_joint_names(whole_body: dict):
    joint_names_2d = [
        "nose",
        "eye_left",
        "eye_right",
        "ear_left",
        "ear_right",
        "shoulder_left",
        "shoulder_right",
        "elbow_left",
        "elbow_right",
        "wrist_left",
        "wrist_right",
        "hip_left",
        "hip_right",
        "knee_left",
        "knee_right",
        "ankle_left",
        "ankle_right",
    ]
    if whole_body["foots"]:
        joint_names_2d.extend(
            [
                "foot_toe_big_left",
                "foot_toe_small_left",
                "foot_heel_left",
                "foot_toe_big_right",
                "foot_toe_small_right",
                "foot_heel_right",
            ]
        )
    if whole_body["face"]:
        joint_names_2d.extend(
            [
                "face_jaw_right_1",
                "face_jaw_right_2",
                "face_jaw_right_3",
                "face_jaw_right_4",
                "face_jaw_right_5",
                "face_jaw_right_6",
                "face_jaw_right_7",
                "face_jaw_right_8",
                "face_jaw_middle",
                "face_jaw_left_1",
                "face_jaw_left_2",
                "face_jaw_left_3",
                "face_jaw_left_4",
                "face_jaw_left_5",
                "face_jaw_left_6",
                "face_jaw_left_7",
                "face_jaw_left_8",
                "face_eyebrow_right_1",
                "face_eyebrow_right_2",
                "face_eyebrow_right_3",
                "face_eyebrow_right_4",
                "face_eyebrow_right_5",
                "face_eyebrow_left_1",
                "face_eyebrow_left_2",
                "face_eyebrow_left_3",
                "face_eyebrow_left_4",
                "face_eyebrow_left_5",
                "face_nose_1",
                "face_nose_2",
                "face_nose_3",
                "face_nose_4",
                "face_nose_5",
                "face_nose_6",
                "face_nose_7",
                "face_nose_8",
                "face_nose_9",
                "face_eye_right_1",
                "face_eye_right_2",
                "face_eye_right_3",
                "face_eye_right_4",
                "face_eye_right_5",
                "face_eye_right_6",
                "face_eye_left_1",
                "face_eye_left_2",
                "face_eye_left_3",
                "face_eye_left_4",
                "face_eye_left_5",
                "face_eye_left_6",
                "face_mouth_1",
                "face_mouth_2",
                "face_mouth_3",
                "face_mouth_4",
                "face_mouth_5",
                "face_mouth_6",
                "face_mouth_7",
                "face_mouth_8",
                "face_mouth_9",
                "face_mouth_10",
                "face_mouth_11",
                "face_mouth_12",
                "face_mouth_13",
                "face_mouth_14",
                "face_mouth_15",
                "face_mouth_16",
                "face_mouth_17",
                "face_mouth_18",
                "face_mouth_19",
                "face_mouth_20",
            ]
        )
    if whole_body["hands"]:
        joint_names_2d.extend(
            [
                "hand_wrist_left",
                "hand_finger_thumb_left_1",
                "hand_finger_thumb_left_2",
                "hand_finger_thumb_left_3",
                "hand_finger_thumb_left_4",
                "hand_finger_index_left_1",
                "hand_finger_index_left_2",
                "hand_finger_index_left_3",
                "hand_finger_index_left_4",
                "hand_finger_middle_left_1",
                "hand_finger_middle_left_2",
                "hand_finger_middle_left_3",
                "hand_finger_middle_left_4",
                "hand_finger_ring_left_1",
                "hand_finger_ring_left_2",
                "hand_finger_ring_left_3",
                "hand_finger_ring_left_4",
                "hand_finger_pinky_left_1",
                "hand_finger_pinky_left_2",
                "hand_finger_pinky_left_3",
                "hand_finger_pinky_left_4",
                "hand_wrist_right",
                "hand_finger_thumb_right_1",
                "hand_finger_thumb_right_2",
                "hand_finger_thumb_right_3",
                "hand_finger_thumb_right_4",
                "hand_finger_index_right_1",
                "hand_finger_index_right_2",
                "hand_finger_index_right_3",
                "hand_finger_index_right_4",
                "hand_finger_middle_right_1",
                "hand_finger_middle_right_2",
                "hand_finger_middle_right_3",
                "hand_finger_middle_right_4",
                "hand_finger_ring_right_1",
                "hand_finger_ring_right_2",
                "hand_finger_ring_right_3",
                "hand_finger_ring_right_4",
                "hand_finger_pinky_right_1",
                "hand_finger_pinky_right_2",
                "hand_finger_pinky_right_3",
                "hand_finger_pinky_right_4",
            ]
        )
    joint_names_2d.extend(
        [
            "hip_middle",
            "shoulder_middle",
            "head",
        ]
    )
    return joint_names_2d
 # ==================================================================================================
 def load_image(path: str):
    image = cv2.imread(path, 3)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.asarray(image, dtype=np.uint8)
    return image
 # ==================================================================================================
 def rgb2bayer(img):
    bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
    bayer[0::2, 0::2] = img[0::2, 0::2, 0]
    bayer[0::2, 1::2] = img[0::2, 1::2, 1]
    bayer[1::2, 0::2] = img[1::2, 0::2, 1]
    bayer[1::2, 1::2] = img[1::2, 1::2, 2]
    return bayer
 def bayer2rgb(bayer):
    img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
    return img
 # ==================================================================================================
 def update_keypoints(poses_2d: list, joint_names: List[str], whole_body: dict) -> list:
    new_views = []
    for view in poses_2d:
        new_bodies = []
        for body in view:
            body = body.tolist()
            new_body = body[:17]
            if whole_body["foots"]:
                new_body.extend(body[17:23])
            if whole_body["face"]:
                new_body.extend(body[23:91])
            if whole_body["hands"]:
                new_body.extend(body[91:])
            body = new_body
            hlid = joint_names.index("hip_left")
            hrid = joint_names.index("hip_right")
            mid_hip = [
                float(((body[hlid][0] + body[hrid][0]) / 2.0)),
                float(((body[hlid][1] + body[hrid][1]) / 2.0)),
                min(body[hlid][2], body[hrid][2]),
            ]
            body.append(mid_hip)
            slid = joint_names.index("shoulder_left")
            srid = joint_names.index("shoulder_right")
            mid_shoulder = [
                float(((body[slid][0] + body[srid][0]) / 2.0)),
                float(((body[slid][1] + body[srid][1]) / 2.0)),
                min(body[slid][2], body[srid][2]),
            ]
            body.append(mid_shoulder)
            elid = joint_names.index("ear_left")
            erid = joint_names.index("ear_right")
            head = [
                float(((body[elid][0] + body[erid][0]) / 2.0)),
                float(((body[elid][1] + body[erid][1]) / 2.0)),
                min(body[elid][2], body[erid][2]),
            ]
            body.append(head)
            new_bodies.append(body)
        new_views.append(new_bodies)
    return new_views
--- a/tests/README.md
+++ b/tests/README.md
@ -0,0 +1,34 @@
 # Tests
 Various module tests
 ### Triangulator
  ```bash
  cd /RapidPoseTriangulation/tests/ && python3 test_interface.py && cd ..
  ```
 ### Onnx C++ Interface
 ```bash
 cd /RapidPoseTriangulation/tests/
 g++ -std=c++17 -O3 -march=native -Wall \
    $(pkg-config --cflags opencv4) \
    -I /onnxruntime/include \
    -I /onnxruntime/include/onnxruntime/core/session \
    -I /onnxruntime/include/onnxruntime/core/providers/tensorrt \
    -L /onnxruntime/build/Linux/Release \
    test_utils2d.cpp \
    -o my_app.bin \
    -Wl,--start-group \
      -lonnxruntime_providers_tensorrt \
      -lonnxruntime_providers_shared \
      -lonnxruntime_providers_cuda \
      -lonnxruntime \
    -Wl,--end-group \
    $(pkg-config --libs opencv4) \
    -Wl,-rpath,/onnxruntime/build/Linux/Release
 ./my_app.bin
 ```
--- a/tests/test_utils2d.cpp
+++ b/tests/test_utils2d.cpp
@ -0,0 +1,96 @@
 #include <filesystem>
 #include <vector>
 #include <string>
 #include <memory>
 #include <opencv2/opencv.hpp>
 #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
 // =================================================================================================
 // =================================================================================================
 int main(int argc, char **argv)
 {
    using namespace utils_2d_pose;
    std::string base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/";
    std::string model_path1 = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
    std::string model_path2 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
    std::vector<std::string> img_paths = {
        "/RapidPoseTriangulation/data/h1/54138969-img_003201.jpg",
        "/RapidPoseTriangulation/data/h1/55011271-img_003201.jpg",
        "/RapidPoseTriangulation/data/h1/58860488-img_003201.jpg",
        "/RapidPoseTriangulation/data/h1/60457274-img_003201.jpg",
    };
    // {
    //     std::cout << "\nTesting RTMDet and RTMPose" << std::endl;
    //     RTMDet model1(model_path1, 0.3, 0.1 * 0.1, 30);
    //     RTMPose model2(model_path2, 30);
    //     for (size_t i = 0; i < img_paths.size(); i++)
    //     {
    //         cv::Mat img = cv::imread(img_paths[i]);
    //         cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
    //         auto outputs1 = model1.call(img);
    //         std::cout << "Model1 outputs: " << outputs1[0][0] << " " << outputs1[0][1] << " "
    //                   << outputs1[0][2] << " " << outputs1[0][3] << " " << outputs1[0][4] << " "
    //                   << std::endl;
    //         for (size_t j = 0; j < outputs1.size(); j++)
    //         {
    //             std::vector<std::array<float, 5>> bboxes = {outputs1[j]};
    //             auto outputs2 = model2.call(img, bboxes);
    //             std::cout << "Model2 outputs: " << outputs2[0][0][0] << " "
    //                       << outputs2[0][0][1] << " " << outputs2[0][0][2] << " " << std::endl;
    //         }
    //     }
    // }
    // {
    //     std::cout << "\nTesting TopDown" << std::endl;
    //     TopDown model3(model_path1, model_path2, 0.3, 0.1 * 0.1, false, 30);
    //     for (size_t i = 0; i < img_paths.size(); i++)
    //     {
    //         cv::Mat img = cv::imread(img_paths[i]);
    //         cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
    //         auto outputs3 = model3.predict(img);
    //         std::cout << "Model3 outputs: " << outputs3[0][0][0] << " "
    //                   << outputs3[0][0][1] << " " << outputs3[0][0][2] << " " << std::endl;
    //     }
    // }
    {
        std::cout << "\nTesting PosePredictor 1" << std::endl;
        PosePredictor model4(false, 0.3, 0.1 * 0.1, false);
        std::vector<cv::Mat> images;
        for (size_t i = 0; i < img_paths.size(); i++)
        {
            cv::Mat img = cv::imread(img_paths[i]);
            cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
            images.push_back(img);
        }
        auto outputs4 = model4.predict(images);
        std::cout << "Model4 outputs: " << outputs4[0][0][0][0] << " "
                  << outputs4[0][0][0][1] << " " << outputs4[0][0][0][2] << " " << std::endl;
    }
    {
        std::cout << "\nTesting PosePredictor 2" << std::endl;
        PosePredictor model5(false, 0.3, 0.1 * 0.1, true);
        std::vector<cv::Mat> images;
        for (size_t i = 0; i < img_paths.size(); i++)
        {
            cv::Mat img = cv::imread(img_paths[i]);
            cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
            images.push_back(img);
        }
        auto outputs5 = model5.predict(images);
        std::cout << "Model5 outputs: " << outputs5[0][0][0][0] << " "
                  << outputs5[0][0][0][1] << " " << outputs5[0][0][0][2] << " " << std::endl;
    }
    return 0;
 }