Merge branch 'ros' into 'master'

Ros wrapper

See merge request Percipiote/RapidPoseTriangulation!5
This commit is contained in:
DANBER
2025-01-27 13:12:42 +00:00
29 changed files with 32232 additions and 4735 deletions

View File

@ -30,6 +30,27 @@ Fast triangulation of multiple persons from multiple camera views.
- Build triangulator: - Build triangulator:
```bash ```bash
cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd .. cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..
cd /RapidPoseTriangulation/scripts/ && \
g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
-I /RapidPoseTriangulation/rpt/ \
-isystem /usr/include/opencv4/ \
-isystem /onnxruntime/include/ \
-isystem /onnxruntime/include/onnxruntime/core/session/ \
-isystem /onnxruntime/include/onnxruntime/core/providers/tensorrt/ \
-L /onnxruntime/build/Linux/Release/ \
test_skelda_dataset.cpp \
/RapidPoseTriangulation/rpt/*.cpp \
-o test_skelda_dataset.bin \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release/ \
&& cd ..
``` ```
- Test with samples: - Test with samples:

File diff suppressed because it is too large Load Diff

View File

@ -54,6 +54,7 @@ def add_steps_to_onnx(model_path):
inputs=[input_name], inputs=[input_name],
outputs=[casted_output], outputs=[casted_output],
to=cast_type, to=cast_type,
name="Cast_Input",
) )
# Node to transpose # Node to transpose
@ -118,6 +119,90 @@ def add_steps_to_onnx(model_path):
# Set input image type to int8 # Set input image type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
# Cast all outputs to fp32 to avoid half precision issues in cpp code
for output in graph.output:
orig_output_name = output.name
internal_output_name = orig_output_name + "_internal"
# Rename the output tensor
for node in model.graph.node:
for idx, name in enumerate(node.output):
if name == orig_output_name:
node.output[idx] = internal_output_name
# Insert a Cast node that casts the internal output to fp32
cast_fp32_name = orig_output_name
cast_node_output = helper.make_node(
"Cast",
inputs=[internal_output_name],
outputs=[cast_fp32_name],
to=1,
name="Cast_Output_" + orig_output_name,
)
# Append the cast node to the graph
graph.node.append(cast_node_output)
# Update the output's data type info
output.type.tensor_type.elem_type = TensorProto.FLOAT
# Merge the two outputs
if "det" in model_path:
r1_output = "dets"
r2_output = "labels"
out_name = "bboxes"
out_dim = 6
if "pose" in model_path:
r1_output = "kpts"
r2_output = "scores"
out_name = "keypoints"
out_dim = 3
if "det" in model_path or "pose" in model_path:
# Node to expand
r2_expanded = r2_output + "_expanded"
unsqueeze_node = helper.make_node(
"Unsqueeze",
inputs=[r2_output],
outputs=[r2_expanded],
axes=[2],
name="Unsqueeze",
)
# Node to concatenate
r12_merged = out_name
concat_node = helper.make_node(
"Concat",
inputs=[r1_output, r2_expanded],
outputs=[r12_merged],
axis=2,
name="Merged",
)
# Define the new concatenated output
merged_output = helper.make_tensor_value_info(
r12_merged,
TensorProto.FLOAT,
[
(
graph.input[0].type.tensor_type.shape.dim[0].dim_value
if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
else None
),
(
graph.output[0].type.tensor_type.shape.dim[1].dim_value
if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
else None
),
out_dim,
],
)
# Update the graph
graph.node.append(unsqueeze_node)
graph.node.append(concat_node)
graph.output.pop()
graph.output.pop()
graph.output.append(merged_output)
path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path) path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
path = path.replace(".onnx", "_extra-steps.onnx") path = path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path) onnx.save(model, path)

20
extras/ros/README.md Normal file
View File

@ -0,0 +1,20 @@
# ROS-Wrapper
Run pose estimator with ros topics as inputs and publish detected poses.
<br>
- Build container:
```bash
docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
```
- Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
- Run and test:
```bash
xhost +; docker compose -f extras/ros/docker-compose.yml up
docker exec -it ros-test_node-1 bash
export ROS_DOMAIN_ID=18
```

View File

@ -0,0 +1,70 @@
version: "2.3"
# runtime: nvidia needs version 2 else change standard runtime at host pc
services:
test_node:
image: rapidposetriangulation_ros
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep infinity'
estimator:
image: rapidposetriangulation_ros
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
pose_visualizer:
image: rapidposetriangulation_ros
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer'
pose_viewer:
image: rapidposetriangulation_ros
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run image_view image_view --ros-args --remap image:=/camera01/img_with_pose -p autosize:=True -p window_name:=MyImage'

82
extras/ros/dockerfile Normal file
View File

@ -0,0 +1,82 @@
FROM rapidposetriangulation
WORKDIR /
# Install ONNX runtime
# See: https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt
RUN pip3 uninstall -y onnxruntime-gpu
RUN git clone --recursive --depth=1 --branch=v1.20.1 https://github.com/Microsoft/onnxruntime.git
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/cmake-3.30.1-linux-x86_64/bin:${PATH}
ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
ENV TRT_VERSION=10.5.0.18
RUN /bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh
RUN /bin/sh onnxruntime/dockerfiles/scripts/checkout_submodules.sh ${trt_version}
RUN ls
RUN cd onnxruntime && \
/bin/sh build.sh --allow_running_as_root --parallel --build_shared_lib \
--cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu/ --use_tensorrt \
--tensorrt_home /usr/lib/x86_64-linux-gnu/ --config Release --build_wheel --skip_tests \
--skip_submodule_sync --cmake_extra_defines '"CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'"'
RUN cd onnxruntime && pip install build/Linux/Release/dist/*.whl
# Install ROS2
# https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html
RUN apt-get update && apt-get install -y --no-install-recommends locales
RUN locale-gen en_US en_US.UTF-8 && update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common
RUN add-apt-repository universe
RUN apt-get update && apt-get install -y --no-install-recommends curl
RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.key -o /usr/share/keyrings/ros-archive-keyring.gpg
RUN echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] http://packages.ros.org/ros2/ubuntu $(. /etc/os-release && echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/ros2.list
RUN apt-get update && apt-get install -y --no-install-recommends ros-humble-ros-base python3-argcomplete
RUN apt-get update && apt-get install -y --no-install-recommends ros-dev-tools
RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
# Create ROS2 workspace for basic packages
RUN mkdir -p /project/base/src/
RUN cd /project/base/; colcon build
RUN echo "source /project/base/install/setup.bash" >> ~/.bashrc
# Install opencv and cv_bridge
RUN apt-get update && apt-get install -y --no-install-recommends libboost-dev
RUN apt-get update && apt-get install -y --no-install-recommends libboost-python-dev
RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
RUN cd /project/base/src/; git clone --branch humble --depth=1 https://github.com/ros-perception/vision_opencv.git
RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Install ROS2 image viewer
RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_pipeline.git
RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_common.git
RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --packages-select camera_calibration_parsers image_transport image_view --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Fix module not found error when displaying images
RUN apt-get update && apt-get install -y --no-install-recommends libcanberra-gtk-module libcanberra-gtk3-module
# Create ROS2 workspace for project packages
RUN mkdir -p /project/dev_ws/src/
RUN cd /project/dev_ws/; colcon build
RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
# Fix ros package building error
RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
# Copy modules
COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
COPY ./scripts/ /RapidPoseTriangulation/scripts/
COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
# Link and build as ros package
RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/pose2d_visualizer
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/rpt2d_wrapper_cpp
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Update ros packages -> autocompletion and check
RUN /bin/bash -i -c 'ros2 pkg list'
# Clear cache to save space, only has an effect if image is squashed
RUN apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /RapidPoseTriangulation/
CMD ["/bin/bash"]

View File

@ -0,0 +1,18 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>pose2d_visualizer</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer>
<license>TODO: License declaration</license>
<test_depend>ament_copyright</test_depend>
<test_depend>ament_flake8</test_depend>
<test_depend>ament_pep257</test_depend>
<test_depend>python3-pytest</test_depend>
<export>
<build_type>ament_python</build_type>
</export>
</package>

View File

@ -0,0 +1,152 @@
import json
import os
import sys
import threading
import time
import cv2
from matplotlib import pyplot as plt
import numpy as np
import rclpy
from cv_bridge import CvBridge
from rclpy.qos import QoSHistoryPolicy, QoSProfile, QoSReliabilityPolicy
from sensor_msgs.msg import Image
from std_msgs.msg import String
filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
sys.path.append(filepath + "../../../../scripts/")
import utils_pipeline
from skelda import utils_view
# ==================================================================================================
bridge = CvBridge()
node = None
publisher_img = None
cam_id = "camera01"
img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
pose_input_topic = "/poses/" + cam_id
img_output_topic = "/" + cam_id + "/img_with_pose"
last_input_image = None
lock = threading.Lock()
# ==================================================================================================
def callback_images(image_data):
global last_input_image, lock
# Convert into cv images from image string
if image_data.encoding == "bayer_rggb8":
bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8")
color_image = utils_pipeline.bayer2rgb(bayer_image)
elif image_data.encoding == "mono8":
gray_image = bridge.imgmsg_to_cv2(image_data, "mono8")
color_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2RGB)
elif image_data.encoding == "rgb8":
color_image = bridge.imgmsg_to_cv2(image_data, "rgb8")
else:
raise ValueError("Unknown image encoding:", image_data.encoding)
time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
with lock:
last_input_image = (color_image, time_stamp)
# ==================================================================================================
def callback_poses(pose_data):
global last_input_image, lock
ptime = time.time()
if last_input_image is None:
return
# Convert pose data from json string
poses = json.loads(pose_data.data)
# Collect inputs
images_2d = []
timestamps = []
with lock:
img = np.copy(last_input_image[0])
ts = last_input_image[1]
images_2d.append(img)
timestamps.append(ts)
# Visualize 2D poses
bodies2D = poses["bodies2D"]
colors = plt.cm.hsv(np.linspace(0, 1, len(bodies2D), endpoint=False)).tolist()
colors = [[int(c[0] * 255), int(c[1] * 255), int(c[2] * 255)] for c in colors]
for i, body in enumerate(bodies2D):
color = list(reversed(colors[i]))
img = utils_view.draw_body_in_image(img, body, poses["joints"], color)
# Publish image with poses
publish(img)
msg = "Visualization time: {:.3f}s"
print(msg.format(time.time() - ptime))
# ==================================================================================================
def publish(img):
# Publish image data
msg = bridge.cv2_to_imgmsg(img, "rgb8")
publisher_img.publish(msg)
# ==================================================================================================
def main():
global node, publisher_img
# Start node
rclpy.init(args=sys.argv)
node = rclpy.create_node("pose2d_visualizer")
# Quality of service settings
qos_profile = QoSProfile(
reliability=QoSReliabilityPolicy.RELIABLE,
history=QoSHistoryPolicy.KEEP_LAST,
depth=1,
)
# Create subscribers
_ = node.create_subscription(
Image,
img_input_topic,
callback_images,
qos_profile,
)
_ = node.create_subscription(
String,
pose_input_topic,
callback_poses,
qos_profile,
)
# Create publishers
publisher_img = node.create_publisher(Image, img_output_topic, qos_profile)
node.get_logger().info("Finished initialization of pose visualizer")
# Run ros update thread
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
# ==================================================================================================
if __name__ == "__main__":
main()

View File

@ -0,0 +1,4 @@
[develop]
script_dir=$base/lib/pose2d_visualizer
[install]
install_scripts=$base/lib/pose2d_visualizer

View File

@ -0,0 +1,23 @@
from setuptools import setup
package_name = "pose2d_visualizer"
setup(
name=package_name,
version="0.0.0",
packages=[package_name],
data_files=[
("share/ament_index/resource_index/packages", ["resource/" + package_name]),
("share/" + package_name, ["package.xml"]),
],
install_requires=["setuptools"],
zip_safe=True,
maintainer="root",
maintainer_email="root@todo.todo",
description="TODO: Package description",
license="TODO: License declaration",
tests_require=["pytest"],
entry_points={
"console_scripts": ["pose2d_visualizer = pose2d_visualizer.pose2d_visualizer:main"],
},
)

View File

@ -0,0 +1,27 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_copyright.main import main
# Remove the `skip` decorator once the source file(s) have a copyright header
@pytest.mark.skip(
reason="No copyright header has been placed in the generated source file."
)
@pytest.mark.copyright
@pytest.mark.linter
def test_copyright():
rc = main(argv=[".", "test"])
assert rc == 0, "Found errors"

View File

@ -0,0 +1,25 @@
# Copyright 2017 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_flake8.main import main_with_errors
@pytest.mark.flake8
@pytest.mark.linter
def test_flake8():
rc, errors = main_with_errors(argv=[])
assert rc == 0, "Found %d code style errors / warnings:\n" % len(
errors
) + "\n".join(errors)

View File

@ -0,0 +1,23 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_pep257.main import main
@pytest.mark.linter
@pytest.mark.pep257
def test_pep257():
rc = main(argv=[".", "test"])
assert rc == 0, "Found code style errors / warnings"

View File

@ -0,0 +1,65 @@
cmake_minimum_required(VERSION 3.5)
project(rpt2d_wrapper_cpp)
# Default to C99
if(NOT CMAKE_C_STANDARD)
set(CMAKE_C_STANDARD 99)
endif()
# Default to C++17
if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()
# find dependencies
find_package(ament_cmake REQUIRED)
find_package(rclcpp REQUIRED)
find_package(std_msgs REQUIRED)
find_package(sensor_msgs REQUIRED)
find_package(cv_bridge REQUIRED)
find_package(OpenCV REQUIRED)
### 3) ONNX Runtime
include_directories(/onnxruntime/include/
/onnxruntime/include/onnxruntime/core/session/
/onnxruntime/include/onnxruntime/core/providers/tensorrt/)
link_directories(/onnxruntime/build/Linux/Release/)
add_executable(rpt2d_wrapper src/rpt2d_wrapper.cpp)
ament_target_dependencies(rpt2d_wrapper rclcpp std_msgs sensor_msgs cv_bridge)
target_include_directories(rpt2d_wrapper PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
target_link_libraries(rpt2d_wrapper
${OpenCV_LIBS}
onnxruntime_providers_tensorrt
onnxruntime_providers_shared
onnxruntime_providers_cuda
onnxruntime
)
set_target_properties(rpt2d_wrapper PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "/onnxruntime/build/Linux/Release"
)
install(TARGETS rpt2d_wrapper
DESTINATION lib/${PROJECT_NAME})
if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
# the following line skips the linter which checks for copyrights
# uncomment the line when a copyright and license is not present in all source files
#set(ament_cmake_copyright_FOUND TRUE)
# the following line skips cpplint (only works in a git repo)
# uncomment the line when this package is not in a git repo
#set(ament_cmake_cpplint_FOUND TRUE)
ament_lint_auto_find_test_dependencies()
endif()
ament_package()

View File

@ -0,0 +1,25 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>rpt2d_wrapper_cpp</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer>
<license>TODO: License declaration</license>
<buildtool_depend>ament_cmake</buildtool_depend>
<depend>rclcpp</depend>
<depend>std_msgs</depend>
<depend>sensor_msgs</depend>
<depend>cv_bridge</depend>
<depend>OpenCV</depend>
<test_depend>ament_lint_auto</test_depend>
<test_depend>ament_lint_common</test_depend>
<export>
<build_type>ament_cmake</build_type>
</export>
</package>

View File

@ -0,0 +1,230 @@
#include <atomic>
#include <chrono>
#include <cmath>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
// ROS2
#include <rclcpp/rclcpp.hpp>
#include <sensor_msgs/msg/image.hpp>
#include <std_msgs/msg/string.hpp>
// OpenCV / cv_bridge
#include <cv_bridge/cv_bridge.h>
#include <opencv2/opencv.hpp>
// JSON library
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
using json = nlohmann::json;
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
// =================================================================================================
static const std::string cam_id = "camera01";
static const std::string img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw";
static const std::string pose_out_topic = "/poses/" + cam_id;
static const float min_bbox_score = 0.4;
static const float min_bbox_area = 0.1 * 0.1;
static const bool batch_poses = true;
static const std::map<std::string, bool> whole_body = {
{"foots", true},
{"face", true},
{"hands", true},
};
// =================================================================================================
// =================================================================================================
class Rpt2DWrapperNode : public rclcpp::Node
{
public:
Rpt2DWrapperNode(const std::string &node_name)
: Node(node_name)
{
this->is_busy = false;
// QoS
rclcpp::QoS qos_profile(1);
qos_profile.reliable();
qos_profile.keep_last(1);
// Setup subscriber
image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
img_input_topic, qos_profile,
std::bind(&Rpt2DWrapperNode::callback_images, this, std::placeholders::_1));
// Setup publisher
pose_pub_ = this->create_publisher<std_msgs::msg::String>(pose_out_topic, qos_profile);
// Load model
bool use_wb = utils_pipeline::use_whole_body(whole_body);
this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>(
use_wb, min_bbox_score, min_bbox_area, batch_poses);
RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator.");
}
private:
rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_;
rclcpp::Publisher<std_msgs::msg::String>::SharedPtr pose_pub_;
std::atomic<bool> is_busy;
// Pose model pointer
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model;
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
void callback_images(const sensor_msgs::msg::Image::SharedPtr msg);
std::vector<std::vector<std::array<float, 3>>> call_model(const cv::Mat &image);
void publish(const json &data)
{
std_msgs::msg::String msg;
msg.data = data.dump();
pose_pub_->publish(msg);
}
};
// =================================================================================================
void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr msg)
{
if (this->is_busy)
{
RCLCPP_WARN(this->get_logger(), "Skipping frame, still processing...");
return;
}
this->is_busy = true;
auto ts_image = std::chrono::high_resolution_clock::now();
// Load or convert image to Bayer format
cv::Mat bayer_image;
try
{
if (msg->encoding == "mono8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "bayer_rggb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "rgb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
cv::Mat color_image = cv_ptr->image;
bayer_image = utils_pipeline::rgb2bayer(color_image);
}
else
{
throw std::runtime_error("Unknown image encoding: " + msg->encoding);
}
}
catch (const std::exception &e)
{
RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
return;
}
// Call model
const auto &valid_poses = this->call_model(bayer_image);
// Calculate timings
double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
auto ts_image_sec = std::chrono::duration<double>(ts_image.time_since_epoch()).count();
auto ts_pose = std::chrono::high_resolution_clock::now();
double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
double z_trigger_image = ts_image_sec - time_stamp;
double z_trigger_pose = ts_pose_sec - time_stamp;
double z_image_pose = ts_pose_sec - ts_image_sec;
// Build JSON
json poses_msg;
poses_msg["bodies2D"] = valid_poses; // shape: persons x keypoints x 3
poses_msg["joints"] = joint_names_2d;
poses_msg["num_persons"] = valid_poses.size();
poses_msg["timestamps"] = {
{"trigger", time_stamp},
{"image", ts_image_sec},
{"pose", ts_pose_sec},
{"z-trigger-image", z_trigger_image},
{"z-image-pose", z_image_pose},
{"z-trigger-pose", z_trigger_pose}};
// Publish
publish(poses_msg);
// Print info
double elapsed_time = std::chrono::duration<double>(
std::chrono::high_resolution_clock::now() - ts_image)
.count();
std::cout << "Detected persons: " << valid_poses.size()
<< " - Prediction time: " << elapsed_time << "s" << std::endl;
this->is_busy = false;
}
// =================================================================================================
std::vector<std::vector<std::array<float, 3>>> Rpt2DWrapperNode::call_model(const cv::Mat &image)
{
// Create image vector
cv::Mat rgb_image = utils_pipeline::bayer2rgb(image);
std::vector<cv::Mat> images_2d = {rgb_image};
// Predict 2D poses
auto poses_2d_all = kpt_model->predict(images_2d);
auto poses_2d_upd = utils_pipeline::update_keypoints(
poses_2d_all, joint_names_2d, whole_body);
auto &poses_2d = poses_2d_upd[0];
// Drop persons with no joints
std::vector<std::vector<std::array<float, 3>>> valid_poses;
for (auto &person : poses_2d)
{
float sum_conf = 0.0;
for (auto &kp : person)
{
sum_conf += kp[2];
}
if (sum_conf > 0.0)
{
valid_poses.push_back(person);
}
}
// Round poses to 3 decimal places
for (auto &person : valid_poses)
{
for (auto &kp : person)
{
kp[0] = std::round(kp[0] * 1000.0) / 1000.0;
kp[1] = std::round(kp[1] * 1000.0) / 1000.0;
kp[2] = std::round(kp[2] * 1000.0) / 1000.0;
}
}
return valid_poses;
}
// =================================================================================================
// =================================================================================================
int main(int argc, char **argv)
{
rclcpp::init(argc, argv);
auto node = std::make_shared<Rpt2DWrapperNode>("rpt2d_wrapper");
rclcpp::spin(node);
rclcpp::shutdown();
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -581,20 +581,21 @@ void TriangulatorInternal::reset()
void TriangulatorInternal::print_stats() void TriangulatorInternal::print_stats()
{ {
std::cout << "Triangulator statistics:" << std::endl; std::cout << "{" << std::endl;
std::cout << " Number of calls: " << num_calls << std::endl; std::cout << " \"triangulator_calls\": " << num_calls << "," << std::endl;
std::cout << " Init time: " << init_time / num_calls << std::endl; std::cout << " \"init_time\": " << init_time / num_calls << "," << std::endl;
std::cout << " Undistort time: " << undistort_time / num_calls << std::endl; std::cout << " \"undistort_time\": " << undistort_time / num_calls << "," << std::endl;
std::cout << " Project time: " << project_time / num_calls << std::endl; std::cout << " \"project_time\": " << project_time / num_calls << "," << std::endl;
std::cout << " Match time: " << match_time / num_calls << std::endl; std::cout << " \"match_time\": " << match_time / num_calls << "," << std::endl;
std::cout << " Pairs time: " << pairs_time / num_calls << std::endl; std::cout << " \"pairs_time\": " << pairs_time / num_calls << "," << std::endl;
std::cout << " Pair scoring time: " << pair_scoring_time / num_calls << std::endl; std::cout << " \"pair_scoring_time\": " << pair_scoring_time / num_calls << "," << std::endl;
std::cout << " Grouping time: " << grouping_time / num_calls << std::endl; std::cout << " \"grouping_time\": " << grouping_time / num_calls << "," << std::endl;
std::cout << " Full time: " << full_time / num_calls << std::endl; std::cout << " \"full_time\": " << full_time / num_calls << "," << std::endl;
std::cout << " Merge time: " << merge_time / num_calls << std::endl; std::cout << " \"merge_time\": " << merge_time / num_calls << "," << std::endl;
std::cout << " Post time: " << post_time / num_calls << std::endl; std::cout << " \"post_time\": " << post_time / num_calls << "," << std::endl;
std::cout << " Convert time: " << convert_time / num_calls << std::endl; std::cout << " \"convert_time\": " << convert_time / num_calls << "," << std::endl;
std::cout << " Total time: " << total_time / num_calls << std::endl; std::cout << " \"total_time\": " << total_time / num_calls << std::endl;
std::cout << "}" << std::endl;
} }
// ================================================================================================= // =================================================================================================

View File

@ -0,0 +1,268 @@
#include <chrono>
#include <cmath>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
// OpenCV
#include <opencv2/opencv.hpp>
// JSON library
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
using json = nlohmann::json;
#include "/RapidPoseTriangulation/rpt/camera.hpp"
#include "/RapidPoseTriangulation/rpt/interface.hpp"
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
// =================================================================================================
static const std::string path_data = "/tmp/rpt/all.json";
static const std::string path_cfg = "/tmp/rpt/config.json";
// =================================================================================================
std::vector<cv::Mat> load_images(json &item)
{
// Load images
std::vector<cv::Mat> images;
for (size_t j = 0; j < item["imgpaths"].size(); j++)
{
auto ipath = item["imgpaths"][j].get<std::string>();
cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR);
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
images.push_back(image);
}
if (item["dataset_name"] == "human36m")
{
// Since the images don't have the same shape, rescale some of them
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Size ishape = img.size();
if (ishape != cv::Size(1000, 1000))
{
auto cam = item["cameras"][i];
cam["K"][1][1] = cam["K"][1][1].get<float>() * (1000.0 / ishape.height);
cam["K"][1][2] = cam["K"][1][2].get<float>() * (1000.0 / ishape.height);
cam["K"][0][0] = cam["K"][0][0].get<float>() * (1000.0 / ishape.width);
cam["K"][0][2] = cam["K"][0][2].get<float>() * (1000.0 / ishape.width);
cv::resize(img, img, cv::Size(1000, 1000));
images[i] = img;
}
}
}
// Convert image format to Bayer encoding to simulate real camera input
// This also resulted in notably better MPJPE results in most cases, presumbly since the
// demosaicing algorithm from OpenCV is better than the default one from the cameras
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Mat bayer_image = utils_pipeline::rgb2bayer(img);
images[i] = std::move(bayer_image);
}
return images;
}
// =================================================================================================
std::string read_file(const std::string &path)
{
std::ifstream file_stream(path);
if (!file_stream.is_open())
{
throw std::runtime_error("Unable to open file: " + path);
}
std::stringstream buffer;
buffer << file_stream.rdbuf();
return buffer.str();
}
void write_file(const std::string &path, const std::string &content)
{
std::ofstream file_stream(path, std::ios::out | std::ios::binary);
if (!file_stream.is_open())
{
throw std::runtime_error("Unable to open file for writing: " + path);
}
file_stream << content;
if (!file_stream)
{
throw std::runtime_error("Error occurred while writing to file: " + path);
}
file_stream.close();
}
// =================================================================================================
int main(int argc, char **argv)
{
// Load the files
auto dataset = json::parse(read_file(path_data));
auto config = json::parse(read_file(path_cfg));
// Load the configuration
const std::map<std::string, bool> whole_body = config["whole_body"];
const float min_bbox_score = config["min_bbox_score"];
const float min_bbox_area = config["min_bbox_area"];
const bool batch_poses = config["batch_poses"];
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
const float min_match_score = config["min_match_score"];
const size_t min_group_size = config["min_group_size"];
const int take_interval = config["take_interval"];
// Load 2D model
bool use_wb = utils_pipeline::use_whole_body(whole_body);
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model =
std::make_unique<utils_2d_pose::PosePredictor>(
use_wb, min_bbox_score, min_bbox_area, batch_poses);
// Load 3D model
std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
min_match_score, min_group_size);
// Timers
size_t time_count = dataset.size();
double time_image = 0.0;
double time_pose2d = 0.0;
double time_pose3d = 0.0;
size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
print_steps = std::max((size_t)1, print_steps);
std::cout << "Running predictions: |";
size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps) - 2;
for (size_t i = 0; i < bar_width; i++)
{
std::cout << "-";
}
std::cout << "|" << std::endl;
// Calculate 2D poses [items, views, persons, joints, 3]
std::vector<std::vector<std::vector<std::vector<std::array<float, 3>>>>> all_poses_2d;
std::cout << "Calculating 2D poses: ";
for (size_t i = 0; i < dataset.size(); i++)
{
if (i % print_steps == 0)
{
std::cout << "#" << std::flush;
}
std::chrono::duration<float> elapsed;
auto &item = dataset[i];
// Load images
auto stime = std::chrono::high_resolution_clock::now();
std::vector<cv::Mat> images = load_images(item);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_image += elapsed.count();
// Predict 2D poses
stime = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Mat rgb = utils_pipeline::bayer2rgb(img);
images[i] = std::move(rgb);
}
auto poses_2d_all = kpt_model->predict(images);
auto poses_2d_upd = utils_pipeline::update_keypoints(
poses_2d_all, joint_names_2d, whole_body);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_pose2d += elapsed.count();
all_poses_2d.push_back(std::move(poses_2d_upd));
}
std::cout << std::endl;
// Calculate 3D poses [items, persons, joints, 4]
std::vector<std::vector<std::vector<std::array<float, 4>>>> all_poses_3d;
std::vector<std::string> all_ids;
std::string old_scene = "";
int old_id = -1;
std::cout << "Calculating 3D poses: ";
for (size_t i = 0; i < dataset.size(); i++)
{
if (i % print_steps == 0)
{
std::cout << "#" << std::flush;
}
std::chrono::duration<float> elapsed;
auto &item = dataset[i];
auto &poses_2d = all_poses_2d[i];
if (old_scene != item["scene"] || old_id + take_interval < item["index"])
{
// Reset last poses if scene changes
tri_model->reset();
old_scene = item["scene"];
}
auto stime = std::chrono::high_resolution_clock::now();
std::vector<Camera> cameras;
for (size_t j = 0; j < item["cameras"].size(); j++)
{
auto &cam = item["cameras"][j];
Camera camera;
camera.name = cam["name"].get<std::string>();
camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
camera.DC = cam["DC"].get<std::vector<float>>();
camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
camera.width = cam["width"].get<int>();
camera.height = cam["height"].get<int>();
camera.type = cam["type"].get<std::string>();
cameras.push_back(camera);
}
std::array<std::array<float, 3>, 2> roomparams = {
item["room_size"].get<std::array<float, 3>>(),
item["room_center"].get<std::array<float, 3>>()};
auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_pose3d += elapsed.count();
all_poses_3d.push_back(std::move(poses_3d));
all_ids.push_back(item["id"].get<std::string>());
old_id = item["index"];
}
std::cout << std::endl;
// Print timing stats
std::cout << "\nMetrics:" << std::endl;
size_t warmup = 10;
double avg_time_image = time_image / (time_count - warmup);
double avg_time_pose2d = time_pose2d / (time_count - warmup);
double avg_time_pose3d = time_pose3d / (time_count - warmup);
double fps = 1.0 / (avg_time_pose2d + avg_time_pose3d);
std::cout << "{\n"
<< " \"img_loading\": " << avg_time_image << ",\n"
<< " \"avg_time_2d\": " << avg_time_pose2d << ",\n"
<< " \"avg_time_3d\": " << avg_time_pose3d << ",\n"
<< " \"fps\": " << fps << "\n"
<< "}" << std::endl;
tri_model->print_stats();
// Store the results as json
json all_results;
all_results["all_ids"] = all_ids;
all_results["all_poses_2d"] = all_poses_2d;
all_results["all_poses_3d"] = all_poses_3d;
all_results["joint_names_2d"] = joint_names_2d;
all_results["joint_names_3d"] = joint_names_2d;
// Save the results
std::string path_results = "/tmp/rpt/results.json";
write_file(path_results, all_results.dump(0));
return 0;
}

View File

@ -1,22 +1,18 @@
import json import json
import os import os
import sys
import time
import cv2 import utils_pipeline
import matplotlib
import numpy as np
import tqdm
import test_triangulate
import utils_2d_pose
from skelda import evals from skelda import evals
from skelda.writers import json_writer
sys.path.append("/RapidPoseTriangulation/swig/")
import rpt
# ================================================================================================== # ==================================================================================================
whole_body = {
"foots": False,
"face": False,
"hands": False,
}
dataset_use = "human36m" dataset_use = "human36m"
# dataset_use = "panoptic" # dataset_use = "panoptic"
# dataset_use = "mvor" # dataset_use = "mvor"
@ -33,8 +29,6 @@ dataset_use = "human36m"
# dataset_use = "egohumans_volleyball" # dataset_use = "egohumans_volleyball"
# dataset_use = "egohumans_badminton" # dataset_use = "egohumans_badminton"
# dataset_use = "egohumans_tennis" # dataset_use = "egohumans_tennis"
# dataset_use = "ntu"
# dataset_use = "koarob"
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered # Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
@ -89,13 +83,13 @@ datasets = {
"campus": { "campus": {
"path": "/datasets/campus/skelda/test.json", "path": "/datasets/campus/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"min_match_score": 0.90, "min_match_score": 0.92,
"min_bbox_score": 0.5, "min_bbox_score": 0.5,
}, },
"shelf": { "shelf": {
"path": "/datasets/shelf/skelda/test.json", "path": "/datasets/shelf/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"min_match_score": 0.96, "min_match_score": 0.95,
"min_group_size": 2, "min_group_size": 2,
}, },
"ikeaasm": { "ikeaasm": {
@ -175,7 +169,7 @@ datasets = {
}, },
} }
joint_names_2d = test_triangulate.joint_names_2d joint_names_2d = utils_pipeline.get_joint_names(whole_body)
joint_names_3d = list(joint_names_2d) joint_names_3d = list(joint_names_2d)
eval_joints = [ eval_joints = [
"head", "head",
@ -197,7 +191,7 @@ if dataset_use == "human36m":
if dataset_use == "panoptic": if dataset_use == "panoptic":
eval_joints[eval_joints.index("head")] = "nose" eval_joints[eval_joints.index("head")] = "nose"
if dataset_use == "human36m_wb": if dataset_use == "human36m_wb":
if any((test_triangulate.whole_body.values())): if utils_pipeline.use_whole_body(whole_body):
eval_joints = list(joint_names_2d) eval_joints = list(joint_names_2d)
else: else:
eval_joints[eval_joints.index("head")] = "nose" eval_joints[eval_joints.index("head")] = "nose"
@ -214,6 +208,11 @@ def load_json(path: str):
return data return data
def save_json(data: dict, path: str):
with open(path, "w+", encoding="utf-8") as file:
json.dump(data, file, indent=0)
# ================================================================================================== # ==================================================================================================
@ -304,6 +303,14 @@ def load_labels(dataset: dict):
if take_interval > 1: if take_interval > 1:
labels = [l for i, l in enumerate(labels) if i % take_interval == 0] labels = [l for i, l in enumerate(labels) if i % take_interval == 0]
# Add default values
for label in labels:
if "scene" not in label:
label["scene"] = "default"
for cam in label["cameras"]:
if not "type" in cam:
cam["type"] = "pinhole"
return labels return labels
@ -313,28 +320,6 @@ def load_labels(dataset: dict):
def main(): def main():
global joint_names_3d, eval_joints global joint_names_3d, eval_joints
# Load dataset specific parameters
min_match_score = datasets[dataset_use].get(
"min_match_score", default_min_match_score
)
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
# Load 2D pose model
whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model()
else:
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
# Manually set matplotlib backend
try:
matplotlib.use("TkAgg")
except ImportError:
print("WARNING: Using headless mode, no visualizations will be shown.")
print("Loading dataset ...") print("Loading dataset ...")
labels = load_labels( labels = load_labels(
{ {
@ -346,120 +331,47 @@ def main():
# Print a dataset sample for debugging # Print a dataset sample for debugging
print(labels[0]) print(labels[0])
print("\nPrefetching images ...") # Save dataset
for label in tqdm.tqdm(labels): tmp_export_dir = "/tmp/rpt/"
# If the images are stored on a HDD, it sometimes takes a while to load them for label in labels:
# Prefetching them results in more stable timings of the following steps if "splits" in label:
# To prevent memory overflow, the code only loads the images, but does not store them label.pop("splits")
try: json_writer.save_dataset(labels, tmp_export_dir)
for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath)
except cv2.error:
print("One of the paths not found:", label["imgpaths"])
continue
time.sleep(3)
print("\nCalculating 2D predictions ...") # Load dataset specific parameters
all_poses_2d = [] min_match_score = datasets[dataset_use].get(
times = [] "min_match_score", default_min_match_score
for label in tqdm.tqdm(labels):
images_2d = []
start = time.time()
try:
for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath)
images_2d.append(img)
except cv2.error:
print("One of the paths not found:", label["imgpaths"])
continue
if dataset_use == "human36m":
for i in range(len(images_2d)):
# Since the images don't have the same shape, rescale some of them
img = images_2d[i]
ishape = img.shape
if ishape != (1000, 1000, 3):
cam = label["cameras"][i]
cam["K"][1][1] = cam["K"][1][1] * (1000 / ishape[0])
cam["K"][1][2] = cam["K"][1][2] * (1000 / ishape[0])
cam["K"][0][0] = cam["K"][0][0] * (1000 / ishape[1])
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
images_2d[i] = cv2.resize(img, (1000, 1000))
# Convert image format to Bayer encoding to simulate real camera input
# This also resulted in notably better MPJPE results in most cases, presumbly since the
# demosaicing algorithm from OpenCV is better than the default one from the cameras
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
time_imgs = time.time() - start
start = time.time()
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
time_2d = time.time() - start
all_poses_2d.append(poses_2d)
times.append([time_imgs, time_2d, 0])
print("\nCalculating 3D predictions ...")
all_poses_3d = []
all_ids = []
triangulator = rpt.Triangulator(
min_match_score=min_match_score, min_group_size=min_group_size
) )
old_scene = "" min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
old_index = -1 min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
for i in tqdm.tqdm(range(len(labels))): min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
label = labels[i] batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
poses_2d = all_poses_2d[i]
if old_scene != label.get("scene", "") or ( # Save config
old_index + datasets[dataset_use]["take_interval"] < label["index"] config_path = tmp_export_dir + "config.json"
): config = {
# Reset last poses if scene changes "min_match_score": min_match_score,
old_scene = label.get("scene", "") "min_group_size": min_group_size,
triangulator.reset() "min_bbox_score": min_bbox_score,
"min_bbox_area": min_bbox_area,
start = time.time() "batch_poses": batch_poses,
if sum(np.sum(p) for p in poses_2d) == 0: "whole_body": whole_body,
poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist() "take_interval": datasets[dataset_use]["take_interval"],
else:
rpt_cameras = rpt.convert_cameras(label["cameras"])
roomparams = [label["room_size"], label["room_center"]]
poses3D = triangulator.triangulate_poses(
poses_2d, rpt_cameras, roomparams, joint_names_2d
)
time_3d = time.time() - start
old_index = label["index"]
all_poses_3d.append(np.array(poses3D).tolist())
all_ids.append(label["id"])
times[i][2] = time_3d
# Print per-step triangulation timings
print("")
triangulator.print_stats()
warmup_iters = 10
if len(times) > warmup_iters:
times = times[warmup_iters:]
avg_time_im = np.mean([t[0] for t in times])
avg_time_2d = np.mean([t[1] for t in times])
avg_time_3d = np.mean([t[2] for t in times])
tstats = {
"img_loading": avg_time_im,
"avg_time_2d": avg_time_2d,
"avg_time_3d": avg_time_3d,
"avg_fps": 1.0 / (avg_time_2d + avg_time_3d),
} }
print("\nMetrics:") save_json(config, config_path)
print(json.dumps(tstats, indent=2))
# Call the CPP binary
os.system("/RapidPoseTriangulation/scripts/test_skelda_dataset.bin")
# Load the results
print("Loading exports ...")
res_path = tmp_export_dir + "results.json"
results = load_json(res_path)
all_poses_3d = results["all_poses_3d"]
all_ids = results["all_ids"]
joint_names_3d = results["joint_names_3d"]
# Run evaluation
_ = evals.mpjpe.run_eval( _ = evals.mpjpe.run_eval(
labels, labels,
all_poses_3d, all_poses_3d,
@ -477,7 +389,6 @@ def main():
replace_head_with_nose=True, replace_head_with_nose=True,
) )
# ================================================================================================== # ==================================================================================================
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -3,13 +3,12 @@ import json
import os import os
import sys import sys
import time import time
from typing import List
import cv2
import matplotlib import matplotlib
import numpy as np import numpy as np
import utils_2d_pose import utils_2d_pose
import utils_pipeline
from skelda import utils_pose, utils_view from skelda import utils_pose, utils_view
sys.path.append("/RapidPoseTriangulation/swig/") sys.path.append("/RapidPoseTriangulation/swig/")
@ -25,175 +24,9 @@ whole_body = {
"hands": False, "hands": False,
} }
joint_names_2d = [ joint_names_2d = utils_pipeline.get_joint_names(whole_body)
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
]
if whole_body["foots"]:
joint_names_2d.extend(
[
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
]
)
if whole_body["face"]:
joint_names_2d.extend(
[
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
]
)
if whole_body["hands"]:
joint_names_2d.extend(
[
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
]
)
joint_names_2d.extend(
[
"hip_middle",
"shoulder_middle",
"head",
]
)
joint_names_3d = list(joint_names_2d) joint_names_3d = list(joint_names_2d)
main_limbs = [
("shoulder_left", "elbow_left"),
("elbow_left", "wrist_left"),
("shoulder_right", "elbow_right"),
("elbow_right", "wrist_right"),
("hip_left", "knee_left"),
("knee_left", "ankle_left"),
("hip_right", "knee_right"),
("knee_right", "ankle_right"),
]
# ================================================================================================== # ==================================================================================================
@ -217,85 +50,6 @@ def update_sample(sample, new_dir=""):
# ================================================================================================== # ==================================================================================================
def load_image(path: str):
image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.asarray(image, dtype=np.uint8)
return image
# ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
new_views = []
for view in poses_2d:
new_bodies = []
for body in view:
body = body.tolist()
new_body = body[:17]
if whole_body["foots"]:
new_body.extend(body[17:22])
if whole_body["face"]:
new_body.extend(body[22:90])
if whole_body["hands"]:
new_body.extend(body[90:])
body = new_body
hlid = joint_names.index("hip_left")
hrid = joint_names.index("hip_right")
mid_hip = [
float(((body[hlid][0] + body[hrid][0]) / 2.0)),
float(((body[hlid][1] + body[hrid][1]) / 2.0)),
min(body[hlid][2], body[hrid][2]),
]
body.append(mid_hip)
slid = joint_names.index("shoulder_left")
srid = joint_names.index("shoulder_right")
mid_shoulder = [
float(((body[slid][0] + body[srid][0]) / 2.0)),
float(((body[slid][1] + body[srid][1]) / 2.0)),
min(body[slid][2], body[srid][2]),
]
body.append(mid_shoulder)
elid = joint_names.index("ear_left")
erid = joint_names.index("ear_right")
head = [
float(((body[elid][0] + body[erid][0]) / 2.0)),
float(((body[elid][1] + body[erid][1]) / 2.0)),
min(body[elid][2], body[erid][2]),
]
body.append(head)
new_bodies.append(body)
new_views.append(new_bodies)
return new_views
# ==================================================================================================
def main(): def main():
if any((whole_body[k] for k in whole_body)): if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model() kpt_model = utils_2d_pose.load_wb_model()
@ -330,15 +84,15 @@ def main():
images_2d = [] images_2d = []
for i in range(len(sample["cameras_color"])): for i in range(len(sample["cameras_color"])):
imgpath = sample["imgpaths_color"][i] imgpath = sample["imgpaths_color"][i]
img = load_image(imgpath) img = utils_pipeline.load_image(imgpath)
img = rgb2bayer(img) img = utils_pipeline.rgb2bayer(img)
img = bayer2rgb(img) img = utils_pipeline.bayer2rgb(img)
images_2d.append(img) images_2d.append(img)
# Get 2D poses # Get 2D poses
stime = time.time() stime = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = update_keypoints(poses_2d, joint_names_2d) poses_2d = utils_pipeline.update_keypoints(poses_2d, joint_names_2d, whole_body)
print("2D time:", time.time() - stime) print("2D time:", time.time() - stime)
# print([np.array(p).round(6).tolist() for p in poses_2d]) # print([np.array(p).round(6).tolist() for p in poses_2d])

1034
scripts/utils_2d_pose.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ class BaseModel(ABC):
raise FileNotFoundError("File not found:", model_path) raise FileNotFoundError("File not found:", model_path)
if model_path.endswith(".onnx"): if model_path.endswith(".onnx"):
print("Loading model:", model_path)
self.init_onnxruntime(model_path) self.init_onnxruntime(model_path)
self.runtime = "ort" self.runtime = "ort"
else: else:
@ -189,9 +190,15 @@ class BoxCrop:
self.fill_value = fill_value self.fill_value = fill_value
def calc_params(self, ishape, bbox): def calc_params(self, ishape, bbox):
start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3] img_h, img_w = ishape[:2]
target_h, target_w = self.target_size target_h, target_w = self.target_size
# Round the bounding box coordinates
start_x = math.floor(bbox[0])
start_y = math.floor(bbox[1])
end_x = math.ceil(bbox[2])
end_y = math.ceil(bbox[3])
# Calculate original bounding box center # Calculate original bounding box center
center_x = (start_x + end_x) / 2.0 center_x = (start_x + end_x) / 2.0
center_y = (start_y + end_y) / 2.0 center_y = (start_y + end_y) / 2.0
@ -231,8 +238,8 @@ class BoxCrop:
# Define the new box coordinates # Define the new box coordinates
new_start_x = max(0, start_x) new_start_x = max(0, start_x)
new_start_y = max(0, start_y) new_start_y = max(0, start_y)
new_end_x = min(ishape[1] - 1, end_x) new_end_x = min(img_w - 1, end_x)
new_end_y = min(ishape[0] - 1, end_y) new_end_y = min(img_h - 1, end_y)
new_box = [new_start_x, new_start_y, new_end_x, new_end_y] new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
# Calculate resized crop size # Calculate resized crop size
@ -332,9 +339,8 @@ class RTMDet(BaseModel):
def postprocess(self, result: List[np.ndarray], image: np.ndarray): def postprocess(self, result: List[np.ndarray], image: np.ndarray):
boxes = np.squeeze(result[0], axis=0) boxes = np.squeeze(result[0], axis=0)
classes = np.squeeze(result[1], axis=0)
human_class = classes[:] == 0 human_class = boxes[:, 5] == 0
boxes = boxes[human_class] boxes = boxes[human_class]
keep = boxes[:, 4] > self.conf_threshold keep = boxes[:, 4] > self.conf_threshold
@ -344,7 +350,6 @@ class RTMDet(BaseModel):
return np.array([]) return np.array([])
# Drop boxes with too small area # Drop boxes with too small area
boxes = boxes.astype(np.float32)
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
keep = areas >= self.min_area keep = areas >= self.min_area
boxes = boxes[keep] boxes = boxes[keep]
@ -386,10 +391,7 @@ class RTMPose(BaseModel):
def preprocess(self, image: np.ndarray, bboxes: np.ndarray): def preprocess(self, image: np.ndarray, bboxes: np.ndarray):
cutouts = [] cutouts = []
for i in range(len(bboxes)): for i in range(len(bboxes)):
bbox = np.asarray(bboxes[i])[0:4] region = self.boxcrop.crop_resize_box(image, bboxes[i])
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
bbox = bbox.round().astype(np.int32)
region = self.boxcrop.crop_resize_box(image, bbox)
tensor = np.asarray(region).astype(self.input_types[0], copy=False) tensor = np.asarray(region).astype(self.input_types[0], copy=False)
cutouts.append(tensor) cutouts.append(tensor)
@ -406,10 +408,7 @@ class RTMPose(BaseModel):
): ):
kpts = [] kpts = []
for i in range(len(bboxes)): for i in range(len(bboxes)):
scores = np.clip(result[1][i], 0, 1) kp = result[0][i]
kp = np.concatenate(
[result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
)
paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i]) paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
kp[:, 0] -= paddings[0] kp[:, 0] -= paddings[0]

316
scripts/utils_pipeline.hpp Normal file
View File

@ -0,0 +1,316 @@
#pragma once
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
// =================================================================================================
namespace utils_pipeline
{
bool use_whole_body(const std::map<std::string, bool> &whole_body)
{
for (const auto &pair : whole_body)
{
if (pair.second)
{
return true;
}
}
return false;
}
// =============================================================================================
std::vector<std::string> get_joint_names(const std::map<std::string, bool> &whole_body)
{
std::vector<std::string> joint_names_2d = {
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
};
if (whole_body.at("foots"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
});
}
if (whole_body.at("face"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
});
}
if (whole_body.at("hands"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
});
}
joint_names_2d.insert(
joint_names_2d.end(),
{
"hip_middle",
"shoulder_middle",
"head",
});
return joint_names_2d;
}
// =============================================================================================
cv::Mat bayer2rgb(const cv::Mat &bayer)
{
cv::Mat rgb;
cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB);
return rgb;
}
cv::Mat rgb2bayer(const cv::Mat &img)
{
CV_Assert(img.type() == CV_8UC3);
cv::Mat bayer(img.rows, img.cols, CV_8UC1);
for (int r = 0; r < img.rows; ++r)
{
const uchar *imgData = img.ptr<uchar>(r);
uchar *bayerRowPtr = bayer.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c)
{
int pixelIndex = 3 * c;
// Use faster bit operation instead of modulo+if
// Even row, even col => R = 0
// Even row, odd col => G = 1
// Odd row, even col => G = 1
// Odd row, odd col => B = 2
int row_mod = r & 1;
int col_mod = c & 1;
int component = row_mod + col_mod;
bayerRowPtr[c] = imgData[pixelIndex + component];
}
}
return bayer;
}
// =============================================================================================
inline int find_index(const std::vector<std::string> &vec, const std::string &key)
{
auto it = std::find(vec.begin(), vec.end(), key);
if (it == vec.end())
{
throw std::runtime_error("Cannot find \"" + key + "\" in joint_names.");
}
return static_cast<int>(std::distance(vec.begin(), it));
}
std::vector<std::vector<std::vector<std::array<float, 3>>>> update_keypoints(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<std::string> &joint_names,
const std::map<std::string, bool> &whole_body)
{
std::vector<std::vector<std::vector<std::array<float, 3>>>> new_views;
new_views.reserve(poses_2d.size());
for (const auto &view : poses_2d)
{
// "view" is a list of bodies => each body is Nx3
std::vector<std::vector<std::array<float, 3>>> new_bodies;
new_bodies.reserve(view.size());
for (const auto &body : view)
{
// 1) Copy first 17 keypoints
std::vector<std::array<float, 3>> new_body;
new_body.insert(new_body.end(), body.begin(), body.begin() + 17);
// 2) Optionally append extra keypoints
if (whole_body.at("foots"))
{
new_body.insert(new_body.end(), body.begin() + 17, body.begin() + 23);
}
if (whole_body.at("face"))
{
new_body.insert(new_body.end(), body.begin() + 23, body.begin() + 91);
}
if (whole_body.at("hands"))
{
new_body.insert(new_body.end(), body.begin() + 91, body.end());
}
// 3) Compute mid_hip
int hlid = find_index(joint_names, "hip_left");
int hrid = find_index(joint_names, "hip_right");
float mid_hip_x = 0.5 * (new_body[hlid][0] + new_body[hrid][0]);
float mid_hip_y = 0.5 * (new_body[hlid][1] + new_body[hrid][1]);
float mid_hip_c = std::min(new_body[hlid][2], new_body[hrid][2]);
new_body.push_back({mid_hip_x, mid_hip_y, mid_hip_c});
// 4) Compute mid_shoulder
int slid = find_index(joint_names, "shoulder_left");
int srid = find_index(joint_names, "shoulder_right");
float mid_shoulder_x = 0.5 * (new_body[slid][0] + new_body[srid][0]);
float mid_shoulder_y = 0.5 * (new_body[slid][1] + new_body[srid][1]);
float mid_shoulder_c = std::min(new_body[slid][2], new_body[srid][2]);
new_body.push_back({mid_shoulder_x, mid_shoulder_y, mid_shoulder_c});
// 5) Compute head
int elid = find_index(joint_names, "ear_left");
int erid = find_index(joint_names, "ear_right");
float head_x = 0.5 * (new_body[elid][0] + new_body[erid][0]);
float head_y = 0.5 * (new_body[elid][1] + new_body[erid][1]);
float head_c = std::min(new_body[elid][2], new_body[erid][2]);
new_body.push_back({head_x, head_y, head_c});
// Add this updated body into new_bodies
new_bodies.push_back(new_body);
}
// Add all updated bodies for this view
new_views.push_back(new_bodies);
}
return new_views;
}
}

255
scripts/utils_pipeline.py Normal file
View File

@ -0,0 +1,255 @@
from typing import List
import cv2
import numpy as np
# ==================================================================================================
def use_whole_body(whole_body: dict) -> bool:
return any((whole_body[k] for k in whole_body))
# ==================================================================================================
def get_joint_names(whole_body: dict):
joint_names_2d = [
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
]
if whole_body["foots"]:
joint_names_2d.extend(
[
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
]
)
if whole_body["face"]:
joint_names_2d.extend(
[
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
]
)
if whole_body["hands"]:
joint_names_2d.extend(
[
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
]
)
joint_names_2d.extend(
[
"hip_middle",
"shoulder_middle",
"head",
]
)
return joint_names_2d
# ==================================================================================================
def load_image(path: str):
image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.asarray(image, dtype=np.uint8)
return image
# ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str], whole_body: dict) -> list:
new_views = []
for view in poses_2d:
new_bodies = []
for body in view:
body = body.tolist()
new_body = body[:17]
if whole_body["foots"]:
new_body.extend(body[17:23])
if whole_body["face"]:
new_body.extend(body[23:91])
if whole_body["hands"]:
new_body.extend(body[91:])
body = new_body
hlid = joint_names.index("hip_left")
hrid = joint_names.index("hip_right")
mid_hip = [
float(((body[hlid][0] + body[hrid][0]) / 2.0)),
float(((body[hlid][1] + body[hrid][1]) / 2.0)),
min(body[hlid][2], body[hrid][2]),
]
body.append(mid_hip)
slid = joint_names.index("shoulder_left")
srid = joint_names.index("shoulder_right")
mid_shoulder = [
float(((body[slid][0] + body[srid][0]) / 2.0)),
float(((body[slid][1] + body[srid][1]) / 2.0)),
min(body[slid][2], body[srid][2]),
]
body.append(mid_shoulder)
elid = joint_names.index("ear_left")
erid = joint_names.index("ear_right")
head = [
float(((body[elid][0] + body[erid][0]) / 2.0)),
float(((body[elid][1] + body[erid][1]) / 2.0)),
min(body[elid][2], body[erid][2]),
]
body.append(head)
new_bodies.append(body)
new_views.append(new_bodies)
return new_views

34
tests/README.md Normal file
View File

@ -0,0 +1,34 @@
# Tests
Various module tests
### Triangulator
```bash
cd /RapidPoseTriangulation/tests/ && python3 test_interface.py && cd ..
```
### Onnx C++ Interface
```bash
cd /RapidPoseTriangulation/tests/
g++ -std=c++17 -O3 -march=native -Wall \
$(pkg-config --cflags opencv4) \
-I /onnxruntime/include \
-I /onnxruntime/include/onnxruntime/core/session \
-I /onnxruntime/include/onnxruntime/core/providers/tensorrt \
-L /onnxruntime/build/Linux/Release \
test_utils2d.cpp \
-o my_app.bin \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release
./my_app.bin
```

96
tests/test_utils2d.cpp Normal file
View File

@ -0,0 +1,96 @@
#include <filesystem>
#include <vector>
#include <string>
#include <memory>
#include <opencv2/opencv.hpp>
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
// =================================================================================================
// =================================================================================================
int main(int argc, char **argv)
{
using namespace utils_2d_pose;
std::string base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/";
std::string model_path1 = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
std::string model_path2 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
std::vector<std::string> img_paths = {
"/RapidPoseTriangulation/data/h1/54138969-img_003201.jpg",
"/RapidPoseTriangulation/data/h1/55011271-img_003201.jpg",
"/RapidPoseTriangulation/data/h1/58860488-img_003201.jpg",
"/RapidPoseTriangulation/data/h1/60457274-img_003201.jpg",
};
// {
// std::cout << "\nTesting RTMDet and RTMPose" << std::endl;
// RTMDet model1(model_path1, 0.3, 0.1 * 0.1, 30);
// RTMPose model2(model_path2, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// auto outputs1 = model1.call(img);
// std::cout << "Model1 outputs: " << outputs1[0][0] << " " << outputs1[0][1] << " "
// << outputs1[0][2] << " " << outputs1[0][3] << " " << outputs1[0][4] << " "
// << std::endl;
// for (size_t j = 0; j < outputs1.size(); j++)
// {
// std::vector<std::array<float, 5>> bboxes = {outputs1[j]};
// auto outputs2 = model2.call(img, bboxes);
// std::cout << "Model2 outputs: " << outputs2[0][0][0] << " "
// << outputs2[0][0][1] << " " << outputs2[0][0][2] << " " << std::endl;
// }
// }
// }
// {
// std::cout << "\nTesting TopDown" << std::endl;
// TopDown model3(model_path1, model_path2, 0.3, 0.1 * 0.1, false, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// auto outputs3 = model3.predict(img);
// std::cout << "Model3 outputs: " << outputs3[0][0][0] << " "
// << outputs3[0][0][1] << " " << outputs3[0][0][2] << " " << std::endl;
// }
// }
{
std::cout << "\nTesting PosePredictor 1" << std::endl;
PosePredictor model4(false, 0.3, 0.1 * 0.1, false);
std::vector<cv::Mat> images;
for (size_t i = 0; i < img_paths.size(); i++)
{
cv::Mat img = cv::imread(img_paths[i]);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
images.push_back(img);
}
auto outputs4 = model4.predict(images);
std::cout << "Model4 outputs: " << outputs4[0][0][0][0] << " "
<< outputs4[0][0][0][1] << " " << outputs4[0][0][0][2] << " " << std::endl;
}
{
std::cout << "\nTesting PosePredictor 2" << std::endl;
PosePredictor model5(false, 0.3, 0.1 * 0.1, true);
std::vector<cv::Mat> images;
for (size_t i = 0; i < img_paths.size(); i++)
{
cv::Mat img = cv::imread(img_paths[i]);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
images.push_back(img);
}
auto outputs5 = model5.predict(images);
std::cout << "Model5 outputs: " << outputs5[0][0][0][0] << " "
<< outputs5[0][0][0][1] << " " << outputs5[0][0][0][2] << " " << std::endl;
}
return 0;
}