Merge remote-tracking branch 'origin/ros' into jetson

This commit is contained in:
Isse
2025-01-21 18:31:46 +01:00
40 changed files with 1109 additions and 1253 deletions

View File

@ -30,6 +30,27 @@ Fast triangulation of multiple persons from multiple camera views.
- Build triangulator: - Build triangulator:
```bash ```bash
cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd .. cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..
cd /RapidPoseTriangulation/scripts/ && \
g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
-I /RapidPoseTriangulation/rpt/ \
-isystem /usr/include/opencv4/ \
-isystem /onnxruntime/include/ \
-isystem /onnxruntime/include/onnxruntime/core/session/ \
-isystem /onnxruntime/include/onnxruntime/core/providers/tensorrt/ \
-L /onnxruntime/build/Linux/Release/ \
test_skelda_dataset.cpp \
/RapidPoseTriangulation/rpt/*.cpp \
-o test_skelda_dataset.bin \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release/ \
&& cd ..
``` ```
- Test with samples: - Test with samples:

20
extras/ros/README.md Normal file
View File

@ -0,0 +1,20 @@
# ROS-Wrapper
Run pose estimator with ros topics as inputs and publish detected poses.
<br>
- Build container:
```bash
docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
```
- Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
- Run and test:
```bash
xhost +; docker compose -f extras/ros/docker-compose.yml up
docker exec -it ros-test_node-1 bash
export ROS_DOMAIN_ID=18
```

View File

@ -10,8 +10,8 @@ services:
runtime: nvidia runtime: nvidia
privileged: true privileged: true
volumes: volumes:
- ../:/RapidPoseTriangulation/ - ../../:/RapidPoseTriangulation/
- ../skelda/:/skelda/ - ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix - /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm - /dev/shm:/dev/shm
environment: environment:
@ -27,16 +27,15 @@ services:
runtime: nvidia runtime: nvidia
privileged: true privileged: true
volumes: volumes:
- ../:/RapidPoseTriangulation/ - ../../:/RapidPoseTriangulation/
- ../skelda/:/skelda/ - ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix - /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm - /dev/shm:/dev/shm
environment: environment:
- DISPLAY - DISPLAY
- QT_X11_NO_MITSHM=1 - QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1" - "PYTHONUNBUFFERED=1"
# command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2D_wrapper_py rpt2D_wrapper' command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2D_wrapper_cpp rpt2D_wrapper'
pose_visualizer: pose_visualizer:
image: rapidposetriangulation_ros image: rapidposetriangulation_ros
@ -45,15 +44,15 @@ services:
runtime: nvidia runtime: nvidia
privileged: true privileged: true
volumes: volumes:
- ../:/RapidPoseTriangulation/ - ../../:/RapidPoseTriangulation/
- ../skelda/:/skelda/ - ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix - /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm - /dev/shm:/dev/shm
environment: environment:
- DISPLAY - DISPLAY
- QT_X11_NO_MITSHM=1 - QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1" - "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2D_visualizer pose2D_visualizer' command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer'
pose_viewer: pose_viewer:
image: rapidposetriangulation_ros image: rapidposetriangulation_ros

View File

@ -60,14 +60,14 @@ RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
RUN pip3 install --no-cache-dir "setuptools<=58.2.0" RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
# Copy modules # Copy modules
COPY ./ros/pose2D_visualizer /RapidPoseTriangulation/ros/pose2D_visualizer/ COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
COPY ./ros/rpt2D_wrapper_py /RapidPoseTriangulation/ros/rpt2D_wrapper_py/ COPY ./scripts/ /RapidPoseTriangulation/scripts/
COPY ./ros/rpt2D_wrapper_cpp /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/ COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
# Link and build as ros package # Link and build as ros package
RUN ln -s /RapidPoseTriangulation/ros/pose2D_visualizer/ /project/dev_ws/src/pose2D_visualizer RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/pose2d_visualizer
RUN ln -s /RapidPoseTriangulation/ros/rpt2D_wrapper_py/ /project/dev_ws/src/rpt2D_wrapper_py RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/rpt2d_wrapper_cpp
RUN ln -s /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/ /project/dev_ws/src/rpt2D_wrapper_cpp
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release' RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Update ros packages -> autocompletion and check # Update ros packages -> autocompletion and check

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?> <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3"> <package format="3">
<name>rpt2D_wrapper_py</name> <name>pose2d_visualizer</name>
<version>0.0.0</version> <version>0.0.0</version>
<description>TODO: Package description</description> <description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer> <maintainer email="root@todo.todo">root</maintainer>

View File

@ -14,8 +14,8 @@ from sensor_msgs.msg import Image
from std_msgs.msg import String from std_msgs.msg import String
filepath = os.path.dirname(os.path.realpath(__file__)) + "/" filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
sys.path.append(filepath + "../../../scripts/") sys.path.append(filepath + "../../../../scripts/")
import test_triangulate import utils_pipeline
from skelda import utils_view from skelda import utils_view
@ -42,7 +42,7 @@ def callback_images(image_data):
# Convert into cv images from image string # Convert into cv images from image string
if image_data.encoding == "bayer_rggb8": if image_data.encoding == "bayer_rggb8":
bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8") bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8")
color_image = test_triangulate.bayer2rgb(bayer_image) color_image = utils_pipeline.bayer2rgb(bayer_image)
elif image_data.encoding == "mono8": elif image_data.encoding == "mono8":
gray_image = bridge.imgmsg_to_cv2(image_data, "mono8") gray_image = bridge.imgmsg_to_cv2(image_data, "mono8")
color_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2RGB) color_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2RGB)
@ -111,7 +111,7 @@ def main():
# Start node # Start node
rclpy.init(args=sys.argv) rclpy.init(args=sys.argv)
node = rclpy.create_node("pose2D_visualizer") node = rclpy.create_node("pose2d_visualizer")
# Quality of service settings # Quality of service settings
qos_profile = QoSProfile( qos_profile = QoSProfile(

View File

@ -0,0 +1,4 @@
[develop]
script_dir=$base/lib/pose2d_visualizer
[install]
install_scripts=$base/lib/pose2d_visualizer

View File

@ -1,6 +1,6 @@
from setuptools import setup from setuptools import setup
package_name = "pose2D_visualizer" package_name = "pose2d_visualizer"
setup( setup(
name=package_name, name=package_name,
@ -18,6 +18,6 @@ setup(
license="TODO: License declaration", license="TODO: License declaration",
tests_require=["pytest"], tests_require=["pytest"],
entry_points={ entry_points={
"console_scripts": ["pose2D_visualizer = pose2D_visualizer.pose2D_visualizer:main"], "console_scripts": ["pose2d_visualizer = pose2d_visualizer.pose2d_visualizer:main"],
}, },
) )

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5)
project(rpt2D_wrapper_cpp) project(rpt2d_wrapper_cpp)
# Default to C99 # Default to C99
if(NOT CMAKE_C_STANDARD) if(NOT CMAKE_C_STANDARD)
@ -25,21 +25,21 @@ find_package(OpenCV REQUIRED)
### 3) ONNX Runtime ### 3) ONNX Runtime
# for desktop # for desktop
include_directories(/onnxruntime/include include_directories(/onnxruntime/include/
/onnxruntime/include/onnxruntime/core/session /onnxruntime/include/onnxruntime/core/session/
/onnxruntime/include/onnxruntime/core/providers/tensorrt) /onnxruntime/include/onnxruntime/core/providers/tensorrt/)
link_directories(/onnxruntime/build/Linux/Release) link_directories(/onnxruntime/build/Linux/Release/)
# for jetson # for jetson
include_directories(/usr/local/include/onnxruntime/) include_directories(/usr/local/include/onnxruntime/)
link_directories(/usr/local/lib/) link_directories(/usr/local/lib/)
add_executable(rpt2D_wrapper src/rpt2D_wrapper.cpp) add_executable(rpt2d_wrapper src/rpt2d_wrapper.cpp)
ament_target_dependencies(rpt2D_wrapper rclcpp std_msgs sensor_msgs cv_bridge) ament_target_dependencies(rpt2d_wrapper rclcpp std_msgs sensor_msgs cv_bridge)
target_include_directories(rpt2D_wrapper PUBLIC target_include_directories(rpt2d_wrapper PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>) $<INSTALL_INTERFACE:include>)
target_link_libraries(rpt2D_wrapper target_link_libraries(rpt2d_wrapper
${OpenCV_LIBS} ${OpenCV_LIBS}
onnxruntime_providers_tensorrt onnxruntime_providers_tensorrt
onnxruntime_providers_shared onnxruntime_providers_shared
@ -47,12 +47,12 @@ target_link_libraries(rpt2D_wrapper
onnxruntime onnxruntime
) )
set_target_properties(rpt2D_wrapper PROPERTIES set_target_properties(rpt2d_wrapper PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "/onnxruntime/build/Linux/Release" INSTALL_RPATH "/onnxruntime/build/Linux/Release"
) )
install(TARGETS rpt2D_wrapper install(TARGETS rpt2d_wrapper
DESTINATION lib/${PROJECT_NAME}) DESTINATION lib/${PROJECT_NAME})
if(BUILD_TESTING) if(BUILD_TESTING)

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?> <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3"> <package format="3">
<name>rpt2D_wrapper_cpp</name> <name>rpt2d_wrapper_cpp</name>
<version>0.0.0</version> <version>0.0.0</version>
<description>TODO: Package description</description> <description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer> <maintainer email="root@todo.todo">root</maintainer>

View File

@ -1,12 +1,10 @@
#include <chrono>
#include <memory>
#include <string>
#include <thread>
#include <vector>
#include <mutex>
#include <atomic> #include <atomic>
#include <chrono>
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <memory>
#include <string>
#include <vector>
// ROS2 // ROS2
#include <rclcpp/rclcpp.hpp> #include <rclcpp/rclcpp.hpp>
@ -14,15 +12,15 @@
#include <std_msgs/msg/string.hpp> #include <std_msgs/msg/string.hpp>
// OpenCV / cv_bridge // OpenCV / cv_bridge
#include <opencv2/opencv.hpp>
#include <cv_bridge/cv_bridge.h> #include <cv_bridge/cv_bridge.h>
#include <opencv2/opencv.hpp>
// JSON library // JSON library
#include "nlohmann/json.hpp" #include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
using json = nlohmann::json; using json = nlohmann::json;
#include "test_triangulate.hpp" #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "utils_2d_pose.hpp" #include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
// ================================================================================================= // =================================================================================================
@ -34,6 +32,12 @@ static const float min_bbox_score = 0.4;
static const float min_bbox_area = 0.1 * 0.1; static const float min_bbox_area = 0.1 * 0.1;
static const bool batch_poses = true; static const bool batch_poses = true;
static const std::map<std::string, bool> whole_body = {
{"foots", true},
{"face", true},
{"hands", true},
};
// ================================================================================================= // =================================================================================================
// ================================================================================================= // =================================================================================================
@ -43,9 +47,7 @@ public:
Rpt2DWrapperNode(const std::string &node_name) Rpt2DWrapperNode(const std::string &node_name)
: Node(node_name) : Node(node_name)
{ {
this->stop_flag = false; this->is_busy = false;
this->last_input_image = cv::Mat();
this->last_input_time = 0.0;
// QoS // QoS
rclcpp::QoS qos_profile(1); rclcpp::QoS qos_profile(1);
@ -55,58 +57,31 @@ public:
// Setup subscriber // Setup subscriber
image_sub_ = this->create_subscription<sensor_msgs::msg::Image>( image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
img_input_topic, qos_profile, img_input_topic, qos_profile,
std::bind(&Rpt2DWrapperNode::callbackImages, this, std::placeholders::_1)); std::bind(&Rpt2DWrapperNode::callback_images, this, std::placeholders::_1));
// Setup publisher // Setup publisher
pose_pub_ = this->create_publisher<std_msgs::msg::String>(pose_out_topic, qos_profile); pose_pub_ = this->create_publisher<std_msgs::msg::String>(pose_out_topic, qos_profile);
// Load model // Load model
bool whole_body = test_triangulate::use_whole_body(); bool use_wb = utils_pipeline::use_whole_body(whole_body);
this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>( this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>(
whole_body, min_bbox_score, min_bbox_area, batch_poses); use_wb, min_bbox_score, min_bbox_area, batch_poses);
RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator."); RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator.");
// Start background prediction thread
model_thread = std::thread(&Rpt2DWrapperNode::callbackWrapper, this);
}
~Rpt2DWrapperNode()
{
stop_flag = true;
if (model_thread.joinable())
{
model_thread.join();
}
} }
private: private:
rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_; rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_;
rclcpp::Publisher<std_msgs::msg::String>::SharedPtr pose_pub_; rclcpp::Publisher<std_msgs::msg::String>::SharedPtr pose_pub_;
std::atomic<bool> is_busy;
// Pose model pointer // Pose model pointer
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model; std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model;
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
// Threading void callback_images(const sensor_msgs::msg::Image::SharedPtr msg);
std::thread model_thread;
std::mutex mutex;
std::atomic<bool> stop_flag;
cv::Mat last_input_image; std::vector<std::vector<std::array<float, 3>>> call_model(const cv::Mat &image);
double last_input_time;
void callbackImages(const sensor_msgs::msg::Image::SharedPtr msg);
void callbackModel();
void callbackWrapper()
{
using namespace std::chrono_literals;
while (!stop_flag)
{
callbackModel();
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
}
void publish(const json &data) void publish(const json &data)
{ {
@ -118,8 +93,16 @@ private:
// ================================================================================================= // =================================================================================================
void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr msg) void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr msg)
{ {
if (this->is_busy)
{
RCLCPP_WARN(this->get_logger(), "Skipping frame, still processing...");
return;
}
this->is_busy = true;
auto ts_image = std::chrono::high_resolution_clock::now();
// Load or convert image to Bayer format // Load or convert image to Bayer format
cv::Mat bayer_image; cv::Mat bayer_image;
try try
@ -138,7 +121,7 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
{ {
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8"); cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
cv::Mat color_image = cv_ptr->image; cv::Mat color_image = cv_ptr->image;
bayer_image = test_triangulate::rgb2bayer(color_image); bayer_image = utils_pipeline::rgb2bayer(color_image);
} }
else else
{ {
@ -151,48 +134,56 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
return; return;
} }
// Get timestamp // Call model
double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9; const auto &valid_poses = this->call_model(bayer_image);
// Store in member variables with lock // Calculate timings
{ double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
std::lock_guard<std::mutex> lk(mutex); auto ts_image_sec = std::chrono::duration<double>(ts_image.time_since_epoch()).count();
this->last_input_image = std::move(bayer_image); auto ts_pose = std::chrono::high_resolution_clock::now();
this->last_input_time = time_stamp; double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
} double z_trigger_image = ts_image_sec - time_stamp;
double z_trigger_pose = ts_pose_sec - time_stamp;
double z_image_pose = ts_pose_sec - ts_image_sec;
// Build JSON
json poses_msg;
poses_msg["bodies2D"] = valid_poses; // shape: persons x keypoints x 3
poses_msg["joints"] = joint_names_2d;
poses_msg["num_persons"] = valid_poses.size();
poses_msg["timestamps"] = {
{"trigger", time_stamp},
{"image", ts_image_sec},
{"pose", ts_pose_sec},
{"z-trigger-image", z_trigger_image},
{"z-image-pose", z_image_pose},
{"z-trigger-pose", z_trigger_pose}};
// Publish
publish(poses_msg);
// Print info
double elapsed_time = std::chrono::duration<double>(
std::chrono::high_resolution_clock::now() - ts_image)
.count();
std::cout << "Detected persons: " << valid_poses.size()
<< " - Prediction time: " << elapsed_time << "s" << std::endl;
this->is_busy = false;
} }
// ================================================================================================= // =================================================================================================
void Rpt2DWrapperNode::callbackModel() std::vector<std::vector<std::array<float, 3>>> Rpt2DWrapperNode::call_model(const cv::Mat &image)
{ {
auto ptime = std::chrono::high_resolution_clock::now();
// Check if we have an image
cv::Mat local_image;
double local_timestamp = 0.0;
{
std::lock_guard<std::mutex> lk(mutex);
if (last_input_time == 0.0)
{
return;
}
local_image = std::move(last_input_image);
local_timestamp = last_input_time;
last_input_image = cv::Mat();
last_input_time = 0.0;
}
// Create image vector // Create image vector
cv::Mat rgb_image = test_triangulate::bayer2rgb(local_image); cv::Mat rgb_image = utils_pipeline::bayer2rgb(image);
std::vector<cv::Mat> images_2d; std::vector<cv::Mat> images_2d = {rgb_image};
images_2d.push_back(rgb_image);
// Predict 2D poses // Predict 2D poses
auto poses_2d_all = kpt_model->predict(images_2d); auto poses_2d_all = kpt_model->predict(images_2d);
auto poses_2d_upd = test_triangulate::update_keypoints( auto poses_2d_upd = utils_pipeline::update_keypoints(
poses_2d_all, test_triangulate::joint_names_2d); poses_2d_all, joint_names_2d, whole_body);
auto &poses_2d = poses_2d_upd[0]; auto &poses_2d = poses_2d_upd[0];
// Drop persons with no joints // Drop persons with no joints
@ -221,29 +212,7 @@ void Rpt2DWrapperNode::callbackModel()
} }
} }
// Build JSON return valid_poses;
auto ts_pose = std::chrono::high_resolution_clock::now();
double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
double z_images_pose = ts_pose_sec - local_timestamp;
json poses_msg;
poses_msg["bodies2D"] = valid_poses; // shape: persons x keypoints x 3
poses_msg["joints"] = test_triangulate::joint_names_2d;
poses_msg["num_persons"] = valid_poses.size();
poses_msg["timestamps"] = {
{"image", local_timestamp},
{"pose", ts_pose_sec},
{"z-images-pose", z_images_pose}};
// Publish
publish(poses_msg);
// Print info
double elapsed_time = std::chrono::duration<double>(
std::chrono::high_resolution_clock::now() - ptime)
.count();
std::cout << "Detected persons: " << valid_poses.size()
<< " - Prediction time: " << elapsed_time << "s" << std::endl;
} }
// ================================================================================================= // =================================================================================================
@ -253,7 +222,7 @@ int main(int argc, char **argv)
{ {
rclcpp::init(argc, argv); rclcpp::init(argc, argv);
auto node = std::make_shared<Rpt2DWrapperNode>("rpt2D_wrapper"); auto node = std::make_shared<Rpt2DWrapperNode>("rpt2d_wrapper");
rclcpp::spin(node); rclcpp::spin(node);
rclcpp::shutdown(); rclcpp::shutdown();

View File

@ -1,45 +0,0 @@
# ROS-Wrapper
Run pose estimator with ros topics as inputs and publish detected poses.
<br>
- Build container:
```bash
docker build --progress=plain -t rapidposetriangulation_ros -f ros/dockerfile .
```
- Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
- Run and test:
```bash
xhost +; docker compose -f ros/docker-compose.yml up
docker exec -it ros-test_node-1 bash
export ROS_DOMAIN_ID=18
```
### Debugging
```bash
cd /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/tests/
g++ -std=c++17 -O3 -march=native -Wall -DCOMPILE_EXAMPLE_MAIN \
$(pkg-config --cflags opencv4) \
-I /onnxruntime/include \
-I /onnxruntime/include/onnxruntime/core/session \
-I /onnxruntime/include/onnxruntime/core/providers/tensorrt \
-L /onnxruntime/build/Linux/Release \
my_app.cpp \
-o my_app \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release
./my_app
```

View File

@ -1,18 +0,0 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>pose2D_visualizer</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer>
<license>TODO: License declaration</license>
<test_depend>ament_copyright</test_depend>
<test_depend>ament_flake8</test_depend>
<test_depend>ament_pep257</test_depend>
<test_depend>python3-pytest</test_depend>
<export>
<build_type>ament_python</build_type>
</export>
</package>

View File

@ -1,4 +0,0 @@
[develop]
script_dir=$base/lib/pose2D_visualizer
[install]
install_scripts=$base/lib/pose2D_visualizer

View File

@ -1,296 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
// =================================================================================================
namespace test_triangulate
{
// If any part shall be disabled, also update the joint names list
static const std::map<std::string, bool> whole_body = {
{"foots", true},
{"face", true},
{"hands", true},
};
static const std::vector<std::string> joint_names_2d = {
// coco
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
// foot
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
// face
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
// hand
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
// extras
"hip_middle",
"shoulder_middle",
"head",
};
// =============================================================================================
// =============================================================================================
[[maybe_unused]] static bool use_whole_body()
{
for (const auto &pair : whole_body)
{
if (pair.second)
{
return true;
}
}
return false;
}
// =============================================================================================
cv::Mat bayer2rgb(const cv::Mat &bayer)
{
cv::Mat rgb;
cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB);
return rgb;
}
cv::Mat rgb2bayer(const cv::Mat &img)
{
CV_Assert(img.type() == CV_8UC3);
cv::Mat bayer(img.rows, img.cols, CV_8UC1);
for (int r = 0; r < img.rows; ++r)
{
const uchar *imgData = img.ptr<uchar>(r);
uchar *bayerRowPtr = bayer.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c)
{
int pixelIndex = 3 * c;
// Use faster bit operation instead of modulo+if
// Even row, even col => R = 0
// Even row, odd col => G = 1
// Odd row, even col => G = 1
// Odd row, odd col => B = 2
int row_mod = r & 1;
int col_mod = c & 1;
int component = row_mod + col_mod;
bayerRowPtr[c] = imgData[pixelIndex + component];
}
}
return bayer;
}
// =============================================================================================
inline int find_index(const std::vector<std::string> &vec, const std::string &key)
{
auto it = std::find(vec.begin(), vec.end(), key);
if (it == vec.end())
{
throw std::runtime_error("Cannot find \"" + key + "\" in joint_names.");
}
return static_cast<int>(std::distance(vec.begin(), it));
}
std::vector<std::vector<std::vector<std::array<float, 3>>>> update_keypoints(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<std::string> &joint_names)
{
std::vector<std::vector<std::vector<std::array<float, 3>>>> new_views;
new_views.reserve(poses_2d.size());
for (const auto &view : poses_2d)
{
// "view" is a list of bodies => each body is Nx3
std::vector<std::vector<std::array<float, 3>>> new_bodies;
new_bodies.reserve(view.size());
for (const auto &body : view)
{
// 1) Copy first 17 keypoints
std::vector<std::array<float, 3>> new_body;
new_body.insert(new_body.end(), body.begin(), body.begin() + 17);
// 2) Optionally append extra keypoints
if (whole_body.at("foots"))
{
new_body.insert(new_body.end(), body.begin() + 17, body.begin() + 23);
}
if (whole_body.at("face"))
{
new_body.insert(new_body.end(), body.begin() + 23, body.begin() + 91);
}
if (whole_body.at("hands"))
{
new_body.insert(new_body.end(), body.begin() + 91, body.end());
}
// 3) Compute mid_hip
int hlid = find_index(joint_names, "hip_left");
int hrid = find_index(joint_names, "hip_right");
float mid_hip_x = 0.5 * (new_body[hlid][0] + new_body[hrid][0]);
float mid_hip_y = 0.5 * (new_body[hlid][1] + new_body[hrid][1]);
float mid_hip_c = std::min(new_body[hlid][2], new_body[hrid][2]);
new_body.push_back({mid_hip_x, mid_hip_y, mid_hip_c});
// 4) Compute mid_shoulder
int slid = find_index(joint_names, "shoulder_left");
int srid = find_index(joint_names, "shoulder_right");
float mid_shoulder_x = 0.5 * (new_body[slid][0] + new_body[srid][0]);
float mid_shoulder_y = 0.5 * (new_body[slid][1] + new_body[srid][1]);
float mid_shoulder_c = std::min(new_body[slid][2], new_body[srid][2]);
new_body.push_back({mid_shoulder_x, mid_shoulder_y, mid_shoulder_c});
// 5) Compute head
int elid = find_index(joint_names, "ear_left");
int erid = find_index(joint_names, "ear_right");
float head_x = 0.5 * (new_body[elid][0] + new_body[erid][0]);
float head_y = 0.5 * (new_body[elid][1] + new_body[erid][1]);
float head_c = std::min(new_body[elid][2], new_body[erid][2]);
new_body.push_back({head_x, head_y, head_c});
// Add this updated body into new_bodies
new_bodies.push_back(new_body);
}
// Add all updated bodies for this view
new_views.push_back(new_bodies);
}
return new_views;
}
}

View File

@ -1 +0,0 @@
my_app

View File

@ -1,196 +0,0 @@
import json
import os
import sys
import threading
import time
import numpy as np
import rclpy
from cv_bridge import CvBridge
from rclpy.qos import QoSHistoryPolicy, QoSProfile, QoSReliabilityPolicy
from sensor_msgs.msg import Image
from std_msgs.msg import String
filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
sys.path.append(filepath + "../../../scripts/")
import test_triangulate
import utils_2d_pose
# ==================================================================================================
bridge = CvBridge()
node = None
publisher_pose = None
cam_id = "camera01"
img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
pose_out_topic = "/poses/" + cam_id
last_input_image = None
last_input_time = 0.0
kpt_model = None
joint_names_2d = test_triangulate.joint_names_2d
lock = threading.Lock()
stop_flag = False
# Model config
min_bbox_score = 0.4
min_bbox_area = 0.1 * 0.1
batch_poses = True
# ==================================================================================================
def callback_images(image_data):
global last_input_image, last_input_time, lock
# Convert into cv images from image string
if image_data.encoding == "bayer_rggb8":
bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8")
elif image_data.encoding == "mono8":
bayer_image = bridge.imgmsg_to_cv2(image_data, "mono8")
elif image_data.encoding == "rgb8":
color_image = bridge.imgmsg_to_cv2(image_data, "rgb8")
bayer_image = test_triangulate.rgb2bayer(color_image)
else:
raise ValueError("Unknown image encoding:", image_data.encoding)
time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
with lock:
last_input_image = bayer_image
last_input_time = time_stamp
# ==================================================================================================
def callback_model():
global last_input_image, last_input_time, kpt_model, lock
ptime = time.time()
if last_input_time == 0.0:
time.sleep(0.0001)
return
# Collect inputs
images_2d = []
timestamps = []
with lock:
img = last_input_image
ts = last_input_time
images_2d.append(img)
timestamps.append(ts)
last_input_image = None
last_input_time = 0.0
# Predict 2D poses
images_2d = [test_triangulate.bayer2rgb(img) for img in images_2d]
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
poses_2d = poses_2d[0]
# Drop persons with no joints
poses_2d = np.asarray(poses_2d)
mask = np.sum(poses_2d[..., 2], axis=1) > 0
poses_2d = poses_2d[mask]
# Round poses
poses2D = [np.array(p).round(3).tolist() for p in poses_2d]
# Publish poses
ts_pose = time.time()
poses = {
"bodies2D": poses2D,
"joints": joint_names_2d,
"num_persons": len(poses2D),
"timestamps": {
"image": timestamps[0],
"pose": ts_pose,
"z-images-pose": ts_pose - timestamps[0],
},
}
publish(poses)
msg = "Detected persons: {} - Prediction time: {:.4f}s"
print(msg.format(poses["num_persons"], time.time() - ptime))
# ==================================================================================================
def callback_wrapper():
global stop_flag
while not stop_flag:
callback_model()
time.sleep(0.001)
# ==================================================================================================
def publish(data):
# Publish json data
msg = String()
msg.data = json.dumps(data)
publisher_pose.publish(msg)
# ==================================================================================================
def main():
global node, publisher_pose, kpt_model, stop_flag
# Start node
rclpy.init(args=sys.argv)
node = rclpy.create_node("rpt2D_wrapper")
# Quality of service settings
qos_profile = QoSProfile(
reliability=QoSReliabilityPolicy.RELIABLE,
history=QoSHistoryPolicy.KEEP_LAST,
depth=1,
)
# Create subscribers
_ = node.create_subscription(
Image,
img_input_topic,
callback_images,
qos_profile,
)
# Create publishers
publisher_pose = node.create_publisher(String, pose_out_topic, qos_profile)
# Load 2D pose model
whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model(
min_bbox_score, min_bbox_area, batch_poses
)
else:
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
node.get_logger().info("Finished initialization of pose estimator")
# Start prediction thread
p1 = threading.Thread(target=callback_wrapper)
p1.start()
# Run ros update thread
rclpy.spin(node)
stop_flag = True
p1.join()
node.destroy_node()
rclpy.shutdown()
# ==================================================================================================
if __name__ == "__main__":
main()

View File

@ -1,4 +0,0 @@
[develop]
script_dir=$base/lib/rpt2D_wrapper_py
[install]
install_scripts=$base/lib/rpt2D_wrapper_py

View File

@ -1,23 +0,0 @@
from setuptools import setup
package_name = "rpt2D_wrapper_py"
setup(
name=package_name,
version="0.0.0",
packages=[package_name],
data_files=[
("share/ament_index/resource_index/packages", ["resource/" + package_name]),
("share/" + package_name, ["package.xml"]),
],
install_requires=["setuptools"],
zip_safe=True,
maintainer="root",
maintainer_email="root@todo.todo",
description="TODO: Package description",
license="TODO: License declaration",
tests_require=["pytest"],
entry_points={
"console_scripts": ["rpt2D_wrapper = rpt2D_wrapper_py.rpt2D_wrapper:main"],
},
)

View File

@ -1,27 +0,0 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_copyright.main import main
# Remove the `skip` decorator once the source file(s) have a copyright header
@pytest.mark.skip(
reason="No copyright header has been placed in the generated source file."
)
@pytest.mark.copyright
@pytest.mark.linter
def test_copyright():
rc = main(argv=[".", "test"])
assert rc == 0, "Found errors"

View File

@ -1,25 +0,0 @@
# Copyright 2017 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_flake8.main import main_with_errors
@pytest.mark.flake8
@pytest.mark.linter
def test_flake8():
rc, errors = main_with_errors(argv=[])
assert rc == 0, "Found %d code style errors / warnings:\n" % len(
errors
) + "\n".join(errors)

View File

@ -1,23 +0,0 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_pep257.main import main
@pytest.mark.linter
@pytest.mark.pep257
def test_pep257():
rc = main(argv=[".", "test"])
assert rc == 0, "Found code style errors / warnings"

View File

@ -581,20 +581,21 @@ void TriangulatorInternal::reset()
void TriangulatorInternal::print_stats() void TriangulatorInternal::print_stats()
{ {
std::cout << "Triangulator statistics:" << std::endl; std::cout << "{" << std::endl;
std::cout << " Number of calls: " << num_calls << std::endl; std::cout << " \"triangulator_calls\": " << num_calls << "," << std::endl;
std::cout << " Init time: " << init_time / num_calls << std::endl; std::cout << " \"init_time\": " << init_time / num_calls << "," << std::endl;
std::cout << " Undistort time: " << undistort_time / num_calls << std::endl; std::cout << " \"undistort_time\": " << undistort_time / num_calls << "," << std::endl;
std::cout << " Project time: " << project_time / num_calls << std::endl; std::cout << " \"project_time\": " << project_time / num_calls << "," << std::endl;
std::cout << " Match time: " << match_time / num_calls << std::endl; std::cout << " \"match_time\": " << match_time / num_calls << "," << std::endl;
std::cout << " Pairs time: " << pairs_time / num_calls << std::endl; std::cout << " \"pairs_time\": " << pairs_time / num_calls << "," << std::endl;
std::cout << " Pair scoring time: " << pair_scoring_time / num_calls << std::endl; std::cout << " \"pair_scoring_time\": " << pair_scoring_time / num_calls << "," << std::endl;
std::cout << " Grouping time: " << grouping_time / num_calls << std::endl; std::cout << " \"grouping_time\": " << grouping_time / num_calls << "," << std::endl;
std::cout << " Full time: " << full_time / num_calls << std::endl; std::cout << " \"full_time\": " << full_time / num_calls << "," << std::endl;
std::cout << " Merge time: " << merge_time / num_calls << std::endl; std::cout << " \"merge_time\": " << merge_time / num_calls << "," << std::endl;
std::cout << " Post time: " << post_time / num_calls << std::endl; std::cout << " \"post_time\": " << post_time / num_calls << "," << std::endl;
std::cout << " Convert time: " << convert_time / num_calls << std::endl; std::cout << " \"convert_time\": " << convert_time / num_calls << "," << std::endl;
std::cout << " Total time: " << total_time / num_calls << std::endl; std::cout << " \"total_time\": " << total_time / num_calls << std::endl;
std::cout << "}" << std::endl;
} }
// ================================================================================================= // =================================================================================================

View File

@ -0,0 +1,267 @@
#include <chrono>
#include <cmath>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
// OpenCV
#include <opencv2/opencv.hpp>
// JSON library
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
using json = nlohmann::json;
#include "/RapidPoseTriangulation/rpt/camera.hpp"
#include "/RapidPoseTriangulation/rpt/interface.hpp"
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
// =================================================================================================
static const std::string path_data = "/tmp/rpt/all.json";
static const std::string path_cfg = "/tmp/rpt/config.json";
// =================================================================================================
std::vector<cv::Mat> load_images(json &item)
{
// Load images
std::vector<cv::Mat> images;
for (size_t j = 0; j < item["imgpaths"].size(); j++)
{
auto ipath = item["imgpaths"][j].get<std::string>();
cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR);
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
images.push_back(image);
}
if (item["dataset_name"] == "human36m")
{
// Since the images don't have the same shape, rescale some of them
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Size ishape = img.size();
if (ishape != cv::Size(1000, 1000))
{
auto cam = item["cameras"][i];
cam["K"][1][1] = cam["K"][1][1].get<float>() * (1000.0 / ishape.height);
cam["K"][1][2] = cam["K"][1][2].get<float>() * (1000.0 / ishape.height);
cam["K"][0][0] = cam["K"][0][0].get<float>() * (1000.0 / ishape.width);
cam["K"][0][2] = cam["K"][0][2].get<float>() * (1000.0 / ishape.width);
cv::resize(img, img, cv::Size(1000, 1000));
images[i] = img;
}
}
}
// Convert image format to Bayer encoding to simulate real camera input
// This also resulted in notably better MPJPE results in most cases, presumbly since the
// demosaicing algorithm from OpenCV is better than the default one from the cameras
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Mat bayer_image = utils_pipeline::rgb2bayer(img);
images[i] = std::move(bayer_image);
}
return images;
}
// =================================================================================================
std::string read_file(const std::string &path)
{
std::ifstream file_stream(path);
if (!file_stream.is_open())
{
throw std::runtime_error("Unable to open file: " + path);
}
std::stringstream buffer;
buffer << file_stream.rdbuf();
return buffer.str();
}
void write_file(const std::string &path, const std::string &content)
{
std::ofstream file_stream(path, std::ios::out | std::ios::binary);
if (!file_stream.is_open())
{
throw std::runtime_error("Unable to open file for writing: " + path);
}
file_stream << content;
if (!file_stream)
{
throw std::runtime_error("Error occurred while writing to file: " + path);
}
file_stream.close();
}
// =================================================================================================
int main(int argc, char **argv)
{
// Load the files
auto dataset = json::parse(read_file(path_data));
auto config = json::parse(read_file(path_cfg));
// Load the configuration
const std::map<std::string, bool> whole_body = config["whole_body"];
const float min_bbox_score = config["min_bbox_score"];
const float min_bbox_area = config["min_bbox_area"];
const bool batch_poses = config["batch_poses"];
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
const float min_match_score = config["min_match_score"];
const size_t min_group_size = config["min_group_size"];
const int take_interval = config["take_interval"];
// Load 2D model
bool use_wb = utils_pipeline::use_whole_body(whole_body);
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model =
std::make_unique<utils_2d_pose::PosePredictor>(
use_wb, min_bbox_score, min_bbox_area, batch_poses);
// Load 3D model
std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
min_match_score, min_group_size);
// Timers
size_t time_count = dataset.size();
double time_image = 0.0;
double time_pose2d = 0.0;
double time_pose3d = 0.0;
size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
std::cout << "Running predictions: |";
size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps) - 2;
for (size_t i = 0; i < bar_width; i++)
{
std::cout << "-";
}
std::cout << "|" << std::endl;
// Calculate 2D poses [items, views, persons, joints, 3]
std::vector<std::vector<std::vector<std::vector<std::array<float, 3>>>>> all_poses_2d;
std::cout << "Calculating 2D poses: ";
for (size_t i = 0; i < dataset.size(); i++)
{
if (i % print_steps == 0)
{
std::cout << "#" << std::flush;
}
std::chrono::duration<float> elapsed;
auto &item = dataset[i];
// Load images
auto stime = std::chrono::high_resolution_clock::now();
std::vector<cv::Mat> images = load_images(item);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_image += elapsed.count();
// Predict 2D poses
stime = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < images.size(); i++)
{
cv::Mat &img = images[i];
cv::Mat rgb = utils_pipeline::bayer2rgb(img);
images[i] = std::move(rgb);
}
auto poses_2d_all = kpt_model->predict(images);
auto poses_2d_upd = utils_pipeline::update_keypoints(
poses_2d_all, joint_names_2d, whole_body);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_pose2d += elapsed.count();
all_poses_2d.push_back(std::move(poses_2d_upd));
}
std::cout << std::endl;
// Calculate 3D poses [items, persons, joints, 4]
std::vector<std::vector<std::vector<std::array<float, 4>>>> all_poses_3d;
std::vector<std::string> all_ids;
std::string old_scene = "";
int old_id = -1;
std::cout << "Calculating 3D poses: ";
for (size_t i = 0; i < dataset.size(); i++)
{
if (i % print_steps == 0)
{
std::cout << "#" << std::flush;
}
std::chrono::duration<float> elapsed;
auto &item = dataset[i];
auto &poses_2d = all_poses_2d[i];
if (old_scene != item["scene"] || old_id + take_interval < item["index"])
{
// Reset last poses if scene changes
tri_model->reset();
old_scene = item["scene"];
}
auto stime = std::chrono::high_resolution_clock::now();
std::vector<Camera> cameras;
for (size_t j = 0; j < item["cameras"].size(); j++)
{
auto &cam = item["cameras"][j];
Camera camera;
camera.name = cam["name"].get<std::string>();
camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
camera.DC = cam["DC"].get<std::vector<float>>();
camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
camera.width = cam["width"].get<int>();
camera.height = cam["height"].get<int>();
camera.type = cam["type"].get<std::string>();
cameras.push_back(camera);
}
std::array<std::array<float, 3>, 2> roomparams = {
item["room_size"].get<std::array<float, 3>>(),
item["room_center"].get<std::array<float, 3>>()};
auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d);
elapsed = std::chrono::high_resolution_clock::now() - stime;
time_pose3d += elapsed.count();
all_poses_3d.push_back(std::move(poses_3d));
all_ids.push_back(item["id"].get<std::string>());
old_id = item["index"];
}
std::cout << std::endl;
// Print timing stats
std::cout << "\nMetrics:" << std::endl;
size_t warmup = 10;
double avg_time_image = time_image / (time_count - warmup);
double avg_time_pose2d = time_pose2d / (time_count - warmup);
double avg_time_pose3d = time_pose3d / (time_count - warmup);
double fps = 1.0 / (avg_time_pose2d + avg_time_pose3d);
std::cout << "{\n"
<< " \"img_loading\": " << avg_time_image << ",\n"
<< " \"avg_time_2d\": " << avg_time_pose2d << ",\n"
<< " \"avg_time_3d\": " << avg_time_pose3d << ",\n"
<< " \"fps\": " << fps << "\n"
<< "}" << std::endl;
tri_model->print_stats();
// Store the results as json
json all_results;
all_results["all_ids"] = all_ids;
all_results["all_poses_2d"] = all_poses_2d;
all_results["all_poses_3d"] = all_poses_3d;
all_results["joint_names_2d"] = joint_names_2d;
all_results["joint_names_3d"] = joint_names_2d;
// Save the results
std::string path_results = "/tmp/rpt/results.json";
write_file(path_results, all_results.dump(0));
return 0;
}

View File

@ -1,22 +1,18 @@
import json import json
import os import os
import sys
import time
import cv2 import utils_pipeline
import matplotlib
import numpy as np
import tqdm
import test_triangulate
import utils_2d_pose
from skelda import evals from skelda import evals
from skelda.writers import json_writer
sys.path.append("/RapidPoseTriangulation/swig/")
import rpt
# ================================================================================================== # ==================================================================================================
whole_body = {
"foots": False,
"face": False,
"hands": False,
}
dataset_use = "human36m" dataset_use = "human36m"
# dataset_use = "panoptic" # dataset_use = "panoptic"
# dataset_use = "mvor" # dataset_use = "mvor"
@ -33,8 +29,6 @@ dataset_use = "human36m"
# dataset_use = "egohumans_volleyball" # dataset_use = "egohumans_volleyball"
# dataset_use = "egohumans_badminton" # dataset_use = "egohumans_badminton"
# dataset_use = "egohumans_tennis" # dataset_use = "egohumans_tennis"
# dataset_use = "ntu"
# dataset_use = "koarob"
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered # Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
@ -175,7 +169,7 @@ datasets = {
}, },
} }
joint_names_2d = test_triangulate.joint_names_2d joint_names_2d = utils_pipeline.get_joint_names(whole_body)
joint_names_3d = list(joint_names_2d) joint_names_3d = list(joint_names_2d)
eval_joints = [ eval_joints = [
"head", "head",
@ -197,7 +191,7 @@ if dataset_use == "human36m":
if dataset_use == "panoptic": if dataset_use == "panoptic":
eval_joints[eval_joints.index("head")] = "nose" eval_joints[eval_joints.index("head")] = "nose"
if dataset_use == "human36m_wb": if dataset_use == "human36m_wb":
if any((test_triangulate.whole_body.values())): if utils_pipeline.use_whole_body(whole_body):
eval_joints = list(joint_names_2d) eval_joints = list(joint_names_2d)
else: else:
eval_joints[eval_joints.index("head")] = "nose" eval_joints[eval_joints.index("head")] = "nose"
@ -214,6 +208,11 @@ def load_json(path: str):
return data return data
def save_json(data: dict, path: str):
with open(path, "w+", encoding="utf-8") as file:
json.dump(data, file, indent=0)
# ================================================================================================== # ==================================================================================================
@ -304,6 +303,14 @@ def load_labels(dataset: dict):
if take_interval > 1: if take_interval > 1:
labels = [l for i, l in enumerate(labels) if i % take_interval == 0] labels = [l for i, l in enumerate(labels) if i % take_interval == 0]
# Add default values
for label in labels:
if "scene" not in label:
label["scene"] = "default"
for cam in label["cameras"]:
if not "type" in cam:
cam["type"] = "pinhole"
return labels return labels
@ -313,28 +320,6 @@ def load_labels(dataset: dict):
def main(): def main():
global joint_names_3d, eval_joints global joint_names_3d, eval_joints
# Load dataset specific parameters
min_match_score = datasets[dataset_use].get(
"min_match_score", default_min_match_score
)
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
# Load 2D pose model
whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model()
else:
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
# Manually set matplotlib backend
try:
matplotlib.use("TkAgg")
except ImportError:
print("WARNING: Using headless mode, no visualizations will be shown.")
print("Loading dataset ...") print("Loading dataset ...")
labels = load_labels( labels = load_labels(
{ {
@ -346,120 +331,47 @@ def main():
# Print a dataset sample for debugging # Print a dataset sample for debugging
print(labels[0]) print(labels[0])
print("\nPrefetching images ...") # Save dataset
for label in tqdm.tqdm(labels): tmp_export_dir = "/tmp/rpt/"
# If the images are stored on a HDD, it sometimes takes a while to load them for label in labels:
# Prefetching them results in more stable timings of the following steps if "splits" in label:
# To prevent memory overflow, the code only loads the images, but does not store them label.pop("splits")
try: json_writer.save_dataset(labels, tmp_export_dir)
for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath)
except cv2.error:
print("One of the paths not found:", label["imgpaths"])
continue
time.sleep(3)
print("\nCalculating 2D predictions ...") # Load dataset specific parameters
all_poses_2d = [] min_match_score = datasets[dataset_use].get(
times = [] "min_match_score", default_min_match_score
for label in tqdm.tqdm(labels):
images_2d = []
start = time.time()
try:
for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath)
images_2d.append(img)
except cv2.error:
print("One of the paths not found:", label["imgpaths"])
continue
if dataset_use == "human36m":
for i in range(len(images_2d)):
# Since the images don't have the same shape, rescale some of them
img = images_2d[i]
ishape = img.shape
if ishape != (1000, 1000, 3):
cam = label["cameras"][i]
cam["K"][1][1] = cam["K"][1][1] * (1000 / ishape[0])
cam["K"][1][2] = cam["K"][1][2] * (1000 / ishape[0])
cam["K"][0][0] = cam["K"][0][0] * (1000 / ishape[1])
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
images_2d[i] = cv2.resize(img, (1000, 1000))
# Convert image format to Bayer encoding to simulate real camera input
# This also resulted in notably better MPJPE results in most cases, presumbly since the
# demosaicing algorithm from OpenCV is better than the default one from the cameras
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
time_imgs = time.time() - start
start = time.time()
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
time_2d = time.time() - start
all_poses_2d.append(poses_2d)
times.append([time_imgs, time_2d, 0])
print("\nCalculating 3D predictions ...")
all_poses_3d = []
all_ids = []
triangulator = rpt.Triangulator(
min_match_score=min_match_score, min_group_size=min_group_size
) )
old_scene = "" min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
old_index = -1 min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
for i in tqdm.tqdm(range(len(labels))): min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
label = labels[i] batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
poses_2d = all_poses_2d[i]
if old_scene != label.get("scene", "") or ( # Save config
old_index + datasets[dataset_use]["take_interval"] < label["index"] config_path = tmp_export_dir + "config.json"
): config = {
# Reset last poses if scene changes "min_match_score": min_match_score,
old_scene = label.get("scene", "") "min_group_size": min_group_size,
triangulator.reset() "min_bbox_score": min_bbox_score,
"min_bbox_area": min_bbox_area,
start = time.time() "batch_poses": batch_poses,
if sum(np.sum(p) for p in poses_2d) == 0: "whole_body": whole_body,
poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist() "take_interval": datasets[dataset_use]["take_interval"],
else:
rpt_cameras = rpt.convert_cameras(label["cameras"])
roomparams = [label["room_size"], label["room_center"]]
poses3D = triangulator.triangulate_poses(
poses_2d, rpt_cameras, roomparams, joint_names_2d
)
time_3d = time.time() - start
old_index = label["index"]
all_poses_3d.append(np.array(poses3D).tolist())
all_ids.append(label["id"])
times[i][2] = time_3d
# Print per-step triangulation timings
print("")
triangulator.print_stats()
warmup_iters = 10
if len(times) > warmup_iters:
times = times[warmup_iters:]
avg_time_im = np.mean([t[0] for t in times])
avg_time_2d = np.mean([t[1] for t in times])
avg_time_3d = np.mean([t[2] for t in times])
tstats = {
"img_loading": avg_time_im,
"avg_time_2d": avg_time_2d,
"avg_time_3d": avg_time_3d,
"avg_fps": 1.0 / (avg_time_2d + avg_time_3d),
} }
print("\nMetrics:") save_json(config, config_path)
print(json.dumps(tstats, indent=2))
# Call the CPP binary
os.system("/RapidPoseTriangulation/scripts/test_skelda_dataset.bin")
# Load the results
print("Loading exports ...")
res_path = tmp_export_dir + "results.json"
results = load_json(res_path)
all_poses_3d = results["all_poses_3d"]
all_ids = results["all_ids"]
joint_names_3d = results["joint_names_3d"]
# Run evaluation
_ = evals.mpjpe.run_eval( _ = evals.mpjpe.run_eval(
labels, labels,
all_poses_3d, all_poses_3d,
@ -477,7 +389,6 @@ def main():
replace_head_with_nose=True, replace_head_with_nose=True,
) )
# ================================================================================================== # ==================================================================================================
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -3,13 +3,12 @@ import json
import os import os
import sys import sys
import time import time
from typing import List
import cv2
import matplotlib import matplotlib
import numpy as np import numpy as np
import utils_2d_pose import utils_2d_pose
import utils_pipeline
from skelda import utils_pose, utils_view from skelda import utils_pose, utils_view
sys.path.append("/RapidPoseTriangulation/swig/") sys.path.append("/RapidPoseTriangulation/swig/")
@ -25,175 +24,9 @@ whole_body = {
"hands": False, "hands": False,
} }
joint_names_2d = [ joint_names_2d = utils_pipeline.get_joint_names(whole_body)
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
]
if whole_body["foots"]:
joint_names_2d.extend(
[
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
]
)
if whole_body["face"]:
joint_names_2d.extend(
[
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
]
)
if whole_body["hands"]:
joint_names_2d.extend(
[
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
]
)
joint_names_2d.extend(
[
"hip_middle",
"shoulder_middle",
"head",
]
)
joint_names_3d = list(joint_names_2d) joint_names_3d = list(joint_names_2d)
main_limbs = [
("shoulder_left", "elbow_left"),
("elbow_left", "wrist_left"),
("shoulder_right", "elbow_right"),
("elbow_right", "wrist_right"),
("hip_left", "knee_left"),
("knee_left", "ankle_left"),
("hip_right", "knee_right"),
("knee_right", "ankle_right"),
]
# ================================================================================================== # ==================================================================================================
@ -217,85 +50,6 @@ def update_sample(sample, new_dir=""):
# ================================================================================================== # ==================================================================================================
def load_image(path: str):
image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.asarray(image, dtype=np.uint8)
return image
# ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
new_views = []
for view in poses_2d:
new_bodies = []
for body in view:
body = body.tolist()
new_body = body[:17]
if whole_body["foots"]:
new_body.extend(body[17:23])
if whole_body["face"]:
new_body.extend(body[23:91])
if whole_body["hands"]:
new_body.extend(body[91:])
body = new_body
hlid = joint_names.index("hip_left")
hrid = joint_names.index("hip_right")
mid_hip = [
float(((body[hlid][0] + body[hrid][0]) / 2.0)),
float(((body[hlid][1] + body[hrid][1]) / 2.0)),
min(body[hlid][2], body[hrid][2]),
]
body.append(mid_hip)
slid = joint_names.index("shoulder_left")
srid = joint_names.index("shoulder_right")
mid_shoulder = [
float(((body[slid][0] + body[srid][0]) / 2.0)),
float(((body[slid][1] + body[srid][1]) / 2.0)),
min(body[slid][2], body[srid][2]),
]
body.append(mid_shoulder)
elid = joint_names.index("ear_left")
erid = joint_names.index("ear_right")
head = [
float(((body[elid][0] + body[erid][0]) / 2.0)),
float(((body[elid][1] + body[erid][1]) / 2.0)),
min(body[elid][2], body[erid][2]),
]
body.append(head)
new_bodies.append(body)
new_views.append(new_bodies)
return new_views
# ==================================================================================================
def main(): def main():
if any((whole_body[k] for k in whole_body)): if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model() kpt_model = utils_2d_pose.load_wb_model()
@ -330,15 +84,15 @@ def main():
images_2d = [] images_2d = []
for i in range(len(sample["cameras_color"])): for i in range(len(sample["cameras_color"])):
imgpath = sample["imgpaths_color"][i] imgpath = sample["imgpaths_color"][i]
img = load_image(imgpath) img = utils_pipeline.load_image(imgpath)
img = rgb2bayer(img) img = utils_pipeline.rgb2bayer(img)
img = bayer2rgb(img) img = utils_pipeline.bayer2rgb(img)
images_2d.append(img) images_2d.append(img)
# Get 2D poses # Get 2D poses
stime = time.time() stime = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = update_keypoints(poses_2d, joint_names_2d) poses_2d = utils_pipeline.update_keypoints(poses_2d, joint_names_2d, whole_body)
print("2D time:", time.time() - stime) print("2D time:", time.time() - stime)
# print([np.array(p).round(6).tolist() for p in poses_2d]) # print([np.array(p).round(6).tolist() for p in poses_2d])

View File

@ -1,13 +1,13 @@
#pragma once #pragma once
#include <filesystem> #include <filesystem>
#include <vector>
#include <string>
#include <memory> #include <memory>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h> #include <onnxruntime_cxx_api.h>
#include <onnxruntime_c_api.h> #include <onnxruntime_c_api.h>
#include <opencv2/opencv.hpp>
#include <tensorrt_provider_options.h> #include <tensorrt_provider_options.h>
// ================================================================================================= // =================================================================================================
@ -646,27 +646,10 @@ namespace utils_2d_pose
std::vector<std::array<float, 5>> RTMDet::call(const cv::Mat &image) std::vector<std::array<float, 5>> RTMDet::call(const cv::Mat &image)
{ {
auto stime = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> elapsed;
cv::Mat preprocessed = preprocess(image); cv::Mat preprocessed = preprocess(image);
std::vector<cv::Mat> inputs = {preprocessed}; std::vector<cv::Mat> inputs = {preprocessed};
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto results = call_by_image(inputs); auto results = call_by_image(inputs);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Inference time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto outputs = postprocess(results, image); auto outputs = postprocess(results, image);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
return outputs; return outputs;
} }
@ -818,26 +801,9 @@ namespace utils_2d_pose
std::vector<std::vector<std::array<float, 3>>> RTMPose::call( std::vector<std::vector<std::array<float, 3>>> RTMPose::call(
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes) const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
{ {
auto stime = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> elapsed;
std::vector<cv::Mat> inputs = preprocess(image, bboxes); std::vector<cv::Mat> inputs = preprocess(image, bboxes);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto results = call_by_image(inputs); auto results = call_by_image(inputs);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Inference time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto outputs = postprocess(results, image, bboxes); auto outputs = postprocess(results, image, bboxes);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
return outputs; return outputs;
} }

View File

@ -20,6 +20,7 @@ class BaseModel(ABC):
raise FileNotFoundError("File not found:", model_path) raise FileNotFoundError("File not found:", model_path)
if model_path.endswith(".onnx"): if model_path.endswith(".onnx"):
print("Loading model:", model_path)
self.init_onnxruntime(model_path) self.init_onnxruntime(model_path)
self.runtime = "ort" self.runtime = "ort"
else: else:

316
scripts/utils_pipeline.hpp Normal file
View File

@ -0,0 +1,316 @@
#pragma once
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
// =================================================================================================
namespace utils_pipeline
{
bool use_whole_body(const std::map<std::string, bool> &whole_body)
{
for (const auto &pair : whole_body)
{
if (pair.second)
{
return true;
}
}
return false;
}
// =============================================================================================
std::vector<std::string> get_joint_names(const std::map<std::string, bool> &whole_body)
{
std::vector<std::string> joint_names_2d = {
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
};
if (whole_body.at("foots"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
});
}
if (whole_body.at("face"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
});
}
if (whole_body.at("hands"))
{
joint_names_2d.insert(
joint_names_2d.end(),
{
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
});
}
joint_names_2d.insert(
joint_names_2d.end(),
{
"hip_middle",
"shoulder_middle",
"head",
});
return joint_names_2d;
}
// =============================================================================================
cv::Mat bayer2rgb(const cv::Mat &bayer)
{
cv::Mat rgb;
cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB);
return rgb;
}
cv::Mat rgb2bayer(const cv::Mat &img)
{
CV_Assert(img.type() == CV_8UC3);
cv::Mat bayer(img.rows, img.cols, CV_8UC1);
for (int r = 0; r < img.rows; ++r)
{
const uchar *imgData = img.ptr<uchar>(r);
uchar *bayerRowPtr = bayer.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c)
{
int pixelIndex = 3 * c;
// Use faster bit operation instead of modulo+if
// Even row, even col => R = 0
// Even row, odd col => G = 1
// Odd row, even col => G = 1
// Odd row, odd col => B = 2
int row_mod = r & 1;
int col_mod = c & 1;
int component = row_mod + col_mod;
bayerRowPtr[c] = imgData[pixelIndex + component];
}
}
return bayer;
}
// =============================================================================================
inline int find_index(const std::vector<std::string> &vec, const std::string &key)
{
auto it = std::find(vec.begin(), vec.end(), key);
if (it == vec.end())
{
throw std::runtime_error("Cannot find \"" + key + "\" in joint_names.");
}
return static_cast<int>(std::distance(vec.begin(), it));
}
std::vector<std::vector<std::vector<std::array<float, 3>>>> update_keypoints(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<std::string> &joint_names,
const std::map<std::string, bool> &whole_body)
{
std::vector<std::vector<std::vector<std::array<float, 3>>>> new_views;
new_views.reserve(poses_2d.size());
for (const auto &view : poses_2d)
{
// "view" is a list of bodies => each body is Nx3
std::vector<std::vector<std::array<float, 3>>> new_bodies;
new_bodies.reserve(view.size());
for (const auto &body : view)
{
// 1) Copy first 17 keypoints
std::vector<std::array<float, 3>> new_body;
new_body.insert(new_body.end(), body.begin(), body.begin() + 17);
// 2) Optionally append extra keypoints
if (whole_body.at("foots"))
{
new_body.insert(new_body.end(), body.begin() + 17, body.begin() + 23);
}
if (whole_body.at("face"))
{
new_body.insert(new_body.end(), body.begin() + 23, body.begin() + 91);
}
if (whole_body.at("hands"))
{
new_body.insert(new_body.end(), body.begin() + 91, body.end());
}
// 3) Compute mid_hip
int hlid = find_index(joint_names, "hip_left");
int hrid = find_index(joint_names, "hip_right");
float mid_hip_x = 0.5 * (new_body[hlid][0] + new_body[hrid][0]);
float mid_hip_y = 0.5 * (new_body[hlid][1] + new_body[hrid][1]);
float mid_hip_c = std::min(new_body[hlid][2], new_body[hrid][2]);
new_body.push_back({mid_hip_x, mid_hip_y, mid_hip_c});
// 4) Compute mid_shoulder
int slid = find_index(joint_names, "shoulder_left");
int srid = find_index(joint_names, "shoulder_right");
float mid_shoulder_x = 0.5 * (new_body[slid][0] + new_body[srid][0]);
float mid_shoulder_y = 0.5 * (new_body[slid][1] + new_body[srid][1]);
float mid_shoulder_c = std::min(new_body[slid][2], new_body[srid][2]);
new_body.push_back({mid_shoulder_x, mid_shoulder_y, mid_shoulder_c});
// 5) Compute head
int elid = find_index(joint_names, "ear_left");
int erid = find_index(joint_names, "ear_right");
float head_x = 0.5 * (new_body[elid][0] + new_body[erid][0]);
float head_y = 0.5 * (new_body[elid][1] + new_body[erid][1]);
float head_c = std::min(new_body[elid][2], new_body[erid][2]);
new_body.push_back({head_x, head_y, head_c});
// Add this updated body into new_bodies
new_bodies.push_back(new_body);
}
// Add all updated bodies for this view
new_views.push_back(new_bodies);
}
return new_views;
}
}

255
scripts/utils_pipeline.py Normal file
View File

@ -0,0 +1,255 @@
from typing import List
import cv2
import numpy as np
# ==================================================================================================
def use_whole_body(whole_body: dict) -> bool:
return any((whole_body[k] for k in whole_body))
# ==================================================================================================
def get_joint_names(whole_body: dict):
joint_names_2d = [
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
]
if whole_body["foots"]:
joint_names_2d.extend(
[
"foot_toe_big_left",
"foot_toe_small_left",
"foot_heel_left",
"foot_toe_big_right",
"foot_toe_small_right",
"foot_heel_right",
]
)
if whole_body["face"]:
joint_names_2d.extend(
[
"face_jaw_right_1",
"face_jaw_right_2",
"face_jaw_right_3",
"face_jaw_right_4",
"face_jaw_right_5",
"face_jaw_right_6",
"face_jaw_right_7",
"face_jaw_right_8",
"face_jaw_middle",
"face_jaw_left_1",
"face_jaw_left_2",
"face_jaw_left_3",
"face_jaw_left_4",
"face_jaw_left_5",
"face_jaw_left_6",
"face_jaw_left_7",
"face_jaw_left_8",
"face_eyebrow_right_1",
"face_eyebrow_right_2",
"face_eyebrow_right_3",
"face_eyebrow_right_4",
"face_eyebrow_right_5",
"face_eyebrow_left_1",
"face_eyebrow_left_2",
"face_eyebrow_left_3",
"face_eyebrow_left_4",
"face_eyebrow_left_5",
"face_nose_1",
"face_nose_2",
"face_nose_3",
"face_nose_4",
"face_nose_5",
"face_nose_6",
"face_nose_7",
"face_nose_8",
"face_nose_9",
"face_eye_right_1",
"face_eye_right_2",
"face_eye_right_3",
"face_eye_right_4",
"face_eye_right_5",
"face_eye_right_6",
"face_eye_left_1",
"face_eye_left_2",
"face_eye_left_3",
"face_eye_left_4",
"face_eye_left_5",
"face_eye_left_6",
"face_mouth_1",
"face_mouth_2",
"face_mouth_3",
"face_mouth_4",
"face_mouth_5",
"face_mouth_6",
"face_mouth_7",
"face_mouth_8",
"face_mouth_9",
"face_mouth_10",
"face_mouth_11",
"face_mouth_12",
"face_mouth_13",
"face_mouth_14",
"face_mouth_15",
"face_mouth_16",
"face_mouth_17",
"face_mouth_18",
"face_mouth_19",
"face_mouth_20",
]
)
if whole_body["hands"]:
joint_names_2d.extend(
[
"hand_wrist_left",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
]
)
joint_names_2d.extend(
[
"hip_middle",
"shoulder_middle",
"head",
]
)
return joint_names_2d
# ==================================================================================================
def load_image(path: str):
image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.asarray(image, dtype=np.uint8)
return image
# ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str], whole_body: dict) -> list:
new_views = []
for view in poses_2d:
new_bodies = []
for body in view:
body = body.tolist()
new_body = body[:17]
if whole_body["foots"]:
new_body.extend(body[17:23])
if whole_body["face"]:
new_body.extend(body[23:91])
if whole_body["hands"]:
new_body.extend(body[91:])
body = new_body
hlid = joint_names.index("hip_left")
hrid = joint_names.index("hip_right")
mid_hip = [
float(((body[hlid][0] + body[hrid][0]) / 2.0)),
float(((body[hlid][1] + body[hrid][1]) / 2.0)),
min(body[hlid][2], body[hrid][2]),
]
body.append(mid_hip)
slid = joint_names.index("shoulder_left")
srid = joint_names.index("shoulder_right")
mid_shoulder = [
float(((body[slid][0] + body[srid][0]) / 2.0)),
float(((body[slid][1] + body[srid][1]) / 2.0)),
min(body[slid][2], body[srid][2]),
]
body.append(mid_shoulder)
elid = joint_names.index("ear_left")
erid = joint_names.index("ear_right")
head = [
float(((body[elid][0] + body[erid][0]) / 2.0)),
float(((body[elid][1] + body[erid][1]) / 2.0)),
min(body[elid][2], body[erid][2]),
]
body.append(head)
new_bodies.append(body)
new_views.append(new_bodies)
return new_views

34
tests/README.md Normal file
View File

@ -0,0 +1,34 @@
# Tests
Various module tests
### Triangulator
```bash
cd /RapidPoseTriangulation/tests/ && python3 test_interface.py && cd ..
```
### Onnx C++ Interface
```bash
cd /RapidPoseTriangulation/tests/
g++ -std=c++17 -O3 -march=native -Wall \
$(pkg-config --cflags opencv4) \
-I /onnxruntime/include \
-I /onnxruntime/include/onnxruntime/core/session \
-I /onnxruntime/include/onnxruntime/core/providers/tensorrt \
-L /onnxruntime/build/Linux/Release \
test_utils2d.cpp \
-o my_app.bin \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release
./my_app.bin
```

View File

@ -5,7 +5,7 @@
#include <opencv2/opencv.hpp> #include <opencv2/opencv.hpp>
#include "../src/utils_2d_pose.hpp" #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
// ================================================================================================= // =================================================================================================
// ================================================================================================= // =================================================================================================