Reimplemented wrapper in cpp.
This commit is contained in:
@ -18,3 +18,28 @@ Run pose estimator with ros topics as inputs and publish detected poses.
|
|||||||
docker exec -it ros-test_node-1 bash
|
docker exec -it ros-test_node-1 bash
|
||||||
export ROS_DOMAIN_ID=18
|
export ROS_DOMAIN_ID=18
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Debugging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/tests/
|
||||||
|
|
||||||
|
g++ -std=c++17 -O3 -march=native -DCOMPILE_EXAMPLE_MAIN \
|
||||||
|
$(pkg-config --cflags opencv4) \
|
||||||
|
-I /onnxruntime/include \
|
||||||
|
-I /onnxruntime/include/onnxruntime/core/session \
|
||||||
|
-I /onnxruntime/include/onnxruntime/core/providers/tensorrt \
|
||||||
|
-L /onnxruntime/build/Linux/Release \
|
||||||
|
my_app.cpp \
|
||||||
|
-o my_app \
|
||||||
|
-Wl,--start-group \
|
||||||
|
-lonnxruntime_providers_tensorrt \
|
||||||
|
-lonnxruntime_providers_shared \
|
||||||
|
-lonnxruntime_providers_cuda \
|
||||||
|
-lonnxruntime \
|
||||||
|
-Wl,--end-group \
|
||||||
|
$(pkg-config --libs opencv4) \
|
||||||
|
-Wl,-rpath,/onnxruntime/build/Linux/Release
|
||||||
|
|
||||||
|
./my_app
|
||||||
|
```
|
||||||
|
|||||||
@ -35,7 +35,8 @@ services:
|
|||||||
- DISPLAY
|
- DISPLAY
|
||||||
- QT_X11_NO_MITSHM=1
|
- QT_X11_NO_MITSHM=1
|
||||||
- "PYTHONUNBUFFERED=1"
|
- "PYTHONUNBUFFERED=1"
|
||||||
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2D_wrapper rpt2D_wrapper'
|
# command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2D_wrapper_py rpt2D_wrapper'
|
||||||
|
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2D_wrapper_cpp rpt2D_wrapper'
|
||||||
|
|
||||||
pose_visualizer:
|
pose_visualizer:
|
||||||
image: rapidposetriangulation_ros
|
image: rapidposetriangulation_ros
|
||||||
|
|||||||
@ -1,4 +1,22 @@
|
|||||||
FROM rapidposetriangulation
|
FROM rapidposetriangulation
|
||||||
|
WORKDIR /
|
||||||
|
|
||||||
|
# Install ONNX runtime
|
||||||
|
# See: https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt
|
||||||
|
RUN pip3 uninstall -y onnxruntime-gpu
|
||||||
|
RUN git clone --recursive --depth=1 --branch=v1.20.1 https://github.com/Microsoft/onnxruntime.git
|
||||||
|
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/cmake-3.30.1-linux-x86_64/bin:${PATH}
|
||||||
|
ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
|
||||||
|
ENV TRT_VERSION=10.5.0.18
|
||||||
|
RUN /bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh
|
||||||
|
RUN /bin/sh onnxruntime/dockerfiles/scripts/checkout_submodules.sh ${trt_version}
|
||||||
|
RUN ls
|
||||||
|
RUN cd onnxruntime && \
|
||||||
|
/bin/sh build.sh --allow_running_as_root --parallel --build_shared_lib \
|
||||||
|
--cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu/ --use_tensorrt \
|
||||||
|
--tensorrt_home /usr/lib/x86_64-linux-gnu/ --config Release --build_wheel --skip_tests \
|
||||||
|
--skip_submodule_sync --cmake_extra_defines '"CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'"'
|
||||||
|
RUN cd onnxruntime && pip install build/Linux/Release/dist/*.whl
|
||||||
|
|
||||||
# Install ROS2
|
# Install ROS2
|
||||||
# https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html
|
# https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html
|
||||||
@ -43,11 +61,13 @@ RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
|
|||||||
|
|
||||||
# Copy modules
|
# Copy modules
|
||||||
COPY ./ros/pose2D_visualizer /RapidPoseTriangulation/ros/pose2D_visualizer/
|
COPY ./ros/pose2D_visualizer /RapidPoseTriangulation/ros/pose2D_visualizer/
|
||||||
COPY ./ros/rpt2D_wrapper /RapidPoseTriangulation/ros/rpt2D_wrapper/
|
COPY ./ros/rpt2D_wrapper_py /RapidPoseTriangulation/ros/rpt2D_wrapper_py/
|
||||||
|
COPY ./ros/rpt2D_wrapper_cpp /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/
|
||||||
|
|
||||||
# Link and build as ros package
|
# Link and build as ros package
|
||||||
RUN ln -s /RapidPoseTriangulation/ros/pose2D_visualizer/ /project/dev_ws/src/pose2D_visualizer
|
RUN ln -s /RapidPoseTriangulation/ros/pose2D_visualizer/ /project/dev_ws/src/pose2D_visualizer
|
||||||
RUN ln -s /RapidPoseTriangulation/ros/rpt2D_wrapper/ /project/dev_ws/src/rpt2D_wrapper
|
RUN ln -s /RapidPoseTriangulation/ros/rpt2D_wrapper_py/ /project/dev_ws/src/rpt2D_wrapper_py
|
||||||
|
RUN ln -s /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/ /project/dev_ws/src/rpt2D_wrapper_cpp
|
||||||
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
|
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
|
||||||
|
|
||||||
# Update ros packages -> autocompletion and check
|
# Update ros packages -> autocompletion and check
|
||||||
@ -58,4 +78,5 @@ RUN apt-get autoremove -y \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /RapidPoseTriangulation/
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@ -1,4 +0,0 @@
|
|||||||
[develop]
|
|
||||||
script_dir=$base/lib/rpt2D_wrapper
|
|
||||||
[install]
|
|
||||||
install_scripts=$base/lib/rpt2D_wrapper
|
|
||||||
65
ros/rpt2D_wrapper_cpp/CMakeLists.txt
Normal file
65
ros/rpt2D_wrapper_cpp/CMakeLists.txt
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.5)
|
||||||
|
project(rpt2D_wrapper_cpp)
|
||||||
|
|
||||||
|
# Default to C99
|
||||||
|
if(NOT CMAKE_C_STANDARD)
|
||||||
|
set(CMAKE_C_STANDARD 99)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Default to C++17
|
||||||
|
if(NOT CMAKE_CXX_STANDARD)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
add_compile_options(-Wall -Wextra -Wpedantic)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# find dependencies
|
||||||
|
find_package(ament_cmake REQUIRED)
|
||||||
|
find_package(rclcpp REQUIRED)
|
||||||
|
find_package(std_msgs REQUIRED)
|
||||||
|
find_package(sensor_msgs REQUIRED)
|
||||||
|
find_package(cv_bridge REQUIRED)
|
||||||
|
find_package(OpenCV REQUIRED)
|
||||||
|
|
||||||
|
### 3) ONNX Runtime
|
||||||
|
include_directories(/onnxruntime/include
|
||||||
|
/onnxruntime/include/onnxruntime/core/session
|
||||||
|
/onnxruntime/include/onnxruntime/core/providers/tensorrt)
|
||||||
|
link_directories(/onnxruntime/build/Linux/Release)
|
||||||
|
|
||||||
|
add_executable(rpt2D_wrapper src/rpt2D_wrapper.cpp)
|
||||||
|
ament_target_dependencies(rpt2D_wrapper rclcpp std_msgs sensor_msgs cv_bridge)
|
||||||
|
target_include_directories(rpt2D_wrapper PUBLIC
|
||||||
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||||
|
$<INSTALL_INTERFACE:include>)
|
||||||
|
|
||||||
|
target_link_libraries(rpt2D_wrapper
|
||||||
|
${OpenCV_LIBS}
|
||||||
|
onnxruntime_providers_tensorrt
|
||||||
|
onnxruntime_providers_shared
|
||||||
|
onnxruntime_providers_cuda
|
||||||
|
onnxruntime
|
||||||
|
)
|
||||||
|
|
||||||
|
set_target_properties(rpt2D_wrapper PROPERTIES
|
||||||
|
BUILD_WITH_INSTALL_RPATH TRUE
|
||||||
|
INSTALL_RPATH "/onnxruntime/build/Linux/Release"
|
||||||
|
)
|
||||||
|
|
||||||
|
install(TARGETS rpt2D_wrapper
|
||||||
|
DESTINATION lib/${PROJECT_NAME})
|
||||||
|
|
||||||
|
if(BUILD_TESTING)
|
||||||
|
find_package(ament_lint_auto REQUIRED)
|
||||||
|
# the following line skips the linter which checks for copyrights
|
||||||
|
# uncomment the line when a copyright and license is not present in all source files
|
||||||
|
#set(ament_cmake_copyright_FOUND TRUE)
|
||||||
|
# the following line skips cpplint (only works in a git repo)
|
||||||
|
# uncomment the line when this package is not in a git repo
|
||||||
|
#set(ament_cmake_cpplint_FOUND TRUE)
|
||||||
|
ament_lint_auto_find_test_dependencies()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
ament_package()
|
||||||
24674
ros/rpt2D_wrapper_cpp/include/nlohmann/json.hpp
Normal file
24674
ros/rpt2D_wrapper_cpp/include/nlohmann/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
25
ros/rpt2D_wrapper_cpp/package.xml
Normal file
25
ros/rpt2D_wrapper_cpp/package.xml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||||
|
<package format="3">
|
||||||
|
<name>rpt2D_wrapper_cpp</name>
|
||||||
|
<version>0.0.0</version>
|
||||||
|
<description>TODO: Package description</description>
|
||||||
|
<maintainer email="root@todo.todo">root</maintainer>
|
||||||
|
<license>TODO: License declaration</license>
|
||||||
|
|
||||||
|
<buildtool_depend>ament_cmake</buildtool_depend>
|
||||||
|
|
||||||
|
<depend>rclcpp</depend>
|
||||||
|
<depend>std_msgs</depend>
|
||||||
|
<depend>sensor_msgs</depend>
|
||||||
|
|
||||||
|
<depend>cv_bridge</depend>
|
||||||
|
<depend>OpenCV</depend>
|
||||||
|
|
||||||
|
<test_depend>ament_lint_auto</test_depend>
|
||||||
|
<test_depend>ament_lint_common</test_depend>
|
||||||
|
|
||||||
|
<export>
|
||||||
|
<build_type>ament_cmake</build_type>
|
||||||
|
</export>
|
||||||
|
</package>
|
||||||
263
ros/rpt2D_wrapper_cpp/src/rpt2D_wrapper.cpp
Normal file
263
ros/rpt2D_wrapper_cpp/src/rpt2D_wrapper.cpp
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
#include <chrono>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
#include <mutex>
|
||||||
|
#include <atomic>
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
// ROS2
|
||||||
|
#include <rclcpp/rclcpp.hpp>
|
||||||
|
#include <sensor_msgs/msg/image.hpp>
|
||||||
|
#include <std_msgs/msg/string.hpp>
|
||||||
|
|
||||||
|
// OpenCV / cv_bridge
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <cv_bridge/cv_bridge.h>
|
||||||
|
|
||||||
|
// JSON library
|
||||||
|
#include "nlohmann/json.hpp"
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
#include "test_triangulate.hpp"
|
||||||
|
#include "utils_2d_pose.hpp"
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
static const std::string cam_id = "camera01";
|
||||||
|
static const std::string img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw";
|
||||||
|
static const std::string pose_out_topic = "/poses/" + cam_id;
|
||||||
|
|
||||||
|
static const float min_bbox_score = 0.4;
|
||||||
|
static const float min_bbox_area = 0.1 * 0.1;
|
||||||
|
static const bool batch_poses = false;
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
class Rpt2DWrapperNode : public rclcpp::Node
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Rpt2DWrapperNode(const std::string &node_name)
|
||||||
|
: Node(node_name)
|
||||||
|
{
|
||||||
|
this->stop_flag = false;
|
||||||
|
this->last_input_image = cv::Mat();
|
||||||
|
this->last_input_time = 0.0;
|
||||||
|
|
||||||
|
// QoS
|
||||||
|
rclcpp::QoS qos_profile(1);
|
||||||
|
qos_profile.reliable();
|
||||||
|
qos_profile.keep_last(1);
|
||||||
|
|
||||||
|
// Setup subscriber
|
||||||
|
image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
|
||||||
|
img_input_topic, qos_profile,
|
||||||
|
std::bind(&Rpt2DWrapperNode::callbackImages, this, std::placeholders::_1));
|
||||||
|
|
||||||
|
// Setup publisher
|
||||||
|
pose_pub_ = this->create_publisher<std_msgs::msg::String>(pose_out_topic, qos_profile);
|
||||||
|
|
||||||
|
// Load model
|
||||||
|
bool whole_body = test_triangulate::use_whole_body();
|
||||||
|
this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>(
|
||||||
|
whole_body, min_bbox_score, min_bbox_area, batch_poses);
|
||||||
|
|
||||||
|
RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator.");
|
||||||
|
|
||||||
|
// Start background prediction thread
|
||||||
|
model_thread = std::thread(&Rpt2DWrapperNode::callbackWrapper, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
~Rpt2DWrapperNode()
|
||||||
|
{
|
||||||
|
stop_flag = true;
|
||||||
|
if (model_thread.joinable())
|
||||||
|
{
|
||||||
|
model_thread.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_;
|
||||||
|
rclcpp::Publisher<std_msgs::msg::String>::SharedPtr pose_pub_;
|
||||||
|
|
||||||
|
// Pose model pointer
|
||||||
|
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model;
|
||||||
|
|
||||||
|
// Threading
|
||||||
|
std::thread model_thread;
|
||||||
|
std::mutex mutex;
|
||||||
|
std::atomic<bool> stop_flag;
|
||||||
|
|
||||||
|
cv::Mat last_input_image;
|
||||||
|
double last_input_time;
|
||||||
|
|
||||||
|
void callbackImages(const sensor_msgs::msg::Image::SharedPtr msg);
|
||||||
|
void callbackModel();
|
||||||
|
|
||||||
|
void callbackWrapper()
|
||||||
|
{
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
while (!stop_flag)
|
||||||
|
{
|
||||||
|
callbackModel();
|
||||||
|
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void publish(const json &data)
|
||||||
|
{
|
||||||
|
std_msgs::msg::String msg;
|
||||||
|
msg.data = data.dump();
|
||||||
|
pose_pub_->publish(msg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr msg)
|
||||||
|
{
|
||||||
|
// Load or convert image to Bayer format
|
||||||
|
cv::Mat bayer_image;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (msg->encoding == "mono8")
|
||||||
|
{
|
||||||
|
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
|
||||||
|
bayer_image = cv_ptr->image.clone();
|
||||||
|
}
|
||||||
|
else if (msg->encoding == "bayer_rggb8")
|
||||||
|
{
|
||||||
|
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
|
||||||
|
bayer_image = cv_ptr->image.clone();
|
||||||
|
}
|
||||||
|
else if (msg->encoding == "rgb8")
|
||||||
|
{
|
||||||
|
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
|
||||||
|
cv::Mat color_image = cv_ptr->image.clone();
|
||||||
|
bayer_image = test_triangulate::rgb2bayer(color_image);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Unknown image encoding: " + msg->encoding);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception &e)
|
||||||
|
{
|
||||||
|
RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get timestamp
|
||||||
|
double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
|
||||||
|
|
||||||
|
// Store in member variables with lock
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lk(mutex);
|
||||||
|
this->last_input_image = bayer_image;
|
||||||
|
this->last_input_time = time_stamp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
void Rpt2DWrapperNode::callbackModel()
|
||||||
|
{
|
||||||
|
auto ptime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
// Check if we have an image
|
||||||
|
cv::Mat local_image;
|
||||||
|
double local_timestamp = 0.0;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lk(mutex);
|
||||||
|
if (last_input_image.empty())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
local_image = last_input_image.clone();
|
||||||
|
local_timestamp = last_input_time;
|
||||||
|
|
||||||
|
// “Consume” the last image so we don’t process it again
|
||||||
|
last_input_image = cv::Mat();
|
||||||
|
last_input_time = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create image vector
|
||||||
|
cv::Mat rgb_image = test_triangulate::bayer2rgb(local_image);
|
||||||
|
std::vector<cv::Mat> images_2d;
|
||||||
|
images_2d.push_back(rgb_image);
|
||||||
|
|
||||||
|
// Predict 2D poses
|
||||||
|
auto poses_2d_all = kpt_model->predict(images_2d);
|
||||||
|
auto poses_2d_upd = test_triangulate::update_keypoints(
|
||||||
|
poses_2d_all, test_triangulate::joint_names_2d);
|
||||||
|
auto &poses_2d = poses_2d_upd[0];
|
||||||
|
|
||||||
|
// Drop persons with no joints
|
||||||
|
std::vector<std::vector<std::array<float,3>>> valid_poses;
|
||||||
|
for (auto &person : poses_2d)
|
||||||
|
{
|
||||||
|
float sum_conf = 0.0;
|
||||||
|
for (auto &kp : person)
|
||||||
|
{
|
||||||
|
sum_conf += kp[2];
|
||||||
|
}
|
||||||
|
if (sum_conf > 0.0)
|
||||||
|
{
|
||||||
|
valid_poses.push_back(person);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round poses to 3 decimal places
|
||||||
|
for (auto &person : valid_poses)
|
||||||
|
{
|
||||||
|
for (auto &kp : person)
|
||||||
|
{
|
||||||
|
kp[0] = std::round(kp[0] * 1000.0) / 1000.0;
|
||||||
|
kp[1] = std::round(kp[1] * 1000.0) / 1000.0;
|
||||||
|
kp[2] = std::round(kp[2] * 1000.0) / 1000.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build JSON
|
||||||
|
auto ts_pose = std::chrono::high_resolution_clock::now();
|
||||||
|
double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
|
||||||
|
double z_images_pose = ts_pose_sec - local_timestamp;
|
||||||
|
|
||||||
|
json poses_msg;
|
||||||
|
poses_msg["bodies2D"] = valid_poses; // shape: persons x keypoints x 3
|
||||||
|
poses_msg["joints"] = test_triangulate::joint_names_2d;
|
||||||
|
poses_msg["num_persons"] = valid_poses.size();
|
||||||
|
poses_msg["timestamps"] = {
|
||||||
|
{"image", local_timestamp},
|
||||||
|
{"pose", ts_pose_sec},
|
||||||
|
{"z-images-pose", z_images_pose}};
|
||||||
|
|
||||||
|
// Publish
|
||||||
|
publish(poses_msg);
|
||||||
|
|
||||||
|
// Print info
|
||||||
|
double elapsed_time = std::chrono::duration<double>(
|
||||||
|
std::chrono::high_resolution_clock::now() - ptime)
|
||||||
|
.count();
|
||||||
|
std::cout << "Detected persons: " << valid_poses.size()
|
||||||
|
<< " - Prediction time: " << elapsed_time << "s" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
rclcpp::init(argc, argv);
|
||||||
|
|
||||||
|
auto node = std::make_shared<Rpt2DWrapperNode>("rpt2D_wrapper");
|
||||||
|
rclcpp::spin(node);
|
||||||
|
|
||||||
|
rclcpp::shutdown();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
306
ros/rpt2D_wrapper_cpp/src/test_triangulate.hpp
Normal file
306
ros/rpt2D_wrapper_cpp/src/test_triangulate.hpp
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace test_triangulate
|
||||||
|
{
|
||||||
|
// If any part shall be disabled, also update the joint names list
|
||||||
|
static const std::map<std::string, bool> whole_body = {
|
||||||
|
{"foots", true},
|
||||||
|
{"face", true},
|
||||||
|
{"hands", true},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::string> joint_names_2d = {
|
||||||
|
// coco
|
||||||
|
"nose",
|
||||||
|
"eye_left",
|
||||||
|
"eye_right",
|
||||||
|
"ear_left",
|
||||||
|
"ear_right",
|
||||||
|
"shoulder_left",
|
||||||
|
"shoulder_right",
|
||||||
|
"elbow_left",
|
||||||
|
"elbow_right",
|
||||||
|
"wrist_left",
|
||||||
|
"wrist_right",
|
||||||
|
"hip_left",
|
||||||
|
"hip_right",
|
||||||
|
"knee_left",
|
||||||
|
"knee_right",
|
||||||
|
"ankle_left",
|
||||||
|
"ankle_right",
|
||||||
|
// foot
|
||||||
|
"foot_toe_big_left",
|
||||||
|
"foot_toe_small_left",
|
||||||
|
"foot_heel_left",
|
||||||
|
"foot_toe_big_right",
|
||||||
|
"foot_toe_small_right",
|
||||||
|
"foot_heel_right",
|
||||||
|
// face
|
||||||
|
"face_jaw_right_1",
|
||||||
|
"face_jaw_right_2",
|
||||||
|
"face_jaw_right_3",
|
||||||
|
"face_jaw_right_4",
|
||||||
|
"face_jaw_right_5",
|
||||||
|
"face_jaw_right_6",
|
||||||
|
"face_jaw_right_7",
|
||||||
|
"face_jaw_right_8",
|
||||||
|
"face_jaw_middle",
|
||||||
|
"face_jaw_left_1",
|
||||||
|
"face_jaw_left_2",
|
||||||
|
"face_jaw_left_3",
|
||||||
|
"face_jaw_left_4",
|
||||||
|
"face_jaw_left_5",
|
||||||
|
"face_jaw_left_6",
|
||||||
|
"face_jaw_left_7",
|
||||||
|
"face_jaw_left_8",
|
||||||
|
"face_eyebrow_right_1",
|
||||||
|
"face_eyebrow_right_2",
|
||||||
|
"face_eyebrow_right_3",
|
||||||
|
"face_eyebrow_right_4",
|
||||||
|
"face_eyebrow_right_5",
|
||||||
|
"face_eyebrow_left_1",
|
||||||
|
"face_eyebrow_left_2",
|
||||||
|
"face_eyebrow_left_3",
|
||||||
|
"face_eyebrow_left_4",
|
||||||
|
"face_eyebrow_left_5",
|
||||||
|
"face_nose_1",
|
||||||
|
"face_nose_2",
|
||||||
|
"face_nose_3",
|
||||||
|
"face_nose_4",
|
||||||
|
"face_nose_5",
|
||||||
|
"face_nose_6",
|
||||||
|
"face_nose_7",
|
||||||
|
"face_nose_8",
|
||||||
|
"face_nose_9",
|
||||||
|
"face_eye_right_1",
|
||||||
|
"face_eye_right_2",
|
||||||
|
"face_eye_right_3",
|
||||||
|
"face_eye_right_4",
|
||||||
|
"face_eye_right_5",
|
||||||
|
"face_eye_right_6",
|
||||||
|
"face_eye_left_1",
|
||||||
|
"face_eye_left_2",
|
||||||
|
"face_eye_left_3",
|
||||||
|
"face_eye_left_4",
|
||||||
|
"face_eye_left_5",
|
||||||
|
"face_eye_left_6",
|
||||||
|
"face_mouth_1",
|
||||||
|
"face_mouth_2",
|
||||||
|
"face_mouth_3",
|
||||||
|
"face_mouth_4",
|
||||||
|
"face_mouth_5",
|
||||||
|
"face_mouth_6",
|
||||||
|
"face_mouth_7",
|
||||||
|
"face_mouth_8",
|
||||||
|
"face_mouth_9",
|
||||||
|
"face_mouth_10",
|
||||||
|
"face_mouth_11",
|
||||||
|
"face_mouth_12",
|
||||||
|
"face_mouth_13",
|
||||||
|
"face_mouth_14",
|
||||||
|
"face_mouth_15",
|
||||||
|
"face_mouth_16",
|
||||||
|
"face_mouth_17",
|
||||||
|
"face_mouth_18",
|
||||||
|
"face_mouth_19",
|
||||||
|
"face_mouth_20",
|
||||||
|
// hand
|
||||||
|
"hand_wrist_left",
|
||||||
|
"hand_finger_thumb_left_1",
|
||||||
|
"hand_finger_thumb_left_2",
|
||||||
|
"hand_finger_thumb_left_3",
|
||||||
|
"hand_finger_thumb_left_4",
|
||||||
|
"hand_finger_index_left_1",
|
||||||
|
"hand_finger_index_left_2",
|
||||||
|
"hand_finger_index_left_3",
|
||||||
|
"hand_finger_index_left_4",
|
||||||
|
"hand_finger_middle_left_1",
|
||||||
|
"hand_finger_middle_left_2",
|
||||||
|
"hand_finger_middle_left_3",
|
||||||
|
"hand_finger_middle_left_4",
|
||||||
|
"hand_finger_ring_left_1",
|
||||||
|
"hand_finger_ring_left_2",
|
||||||
|
"hand_finger_ring_left_3",
|
||||||
|
"hand_finger_ring_left_4",
|
||||||
|
"hand_finger_pinky_left_1",
|
||||||
|
"hand_finger_pinky_left_2",
|
||||||
|
"hand_finger_pinky_left_3",
|
||||||
|
"hand_finger_pinky_left_4",
|
||||||
|
"hand_wrist_right",
|
||||||
|
"hand_finger_thumb_right_1",
|
||||||
|
"hand_finger_thumb_right_2",
|
||||||
|
"hand_finger_thumb_right_3",
|
||||||
|
"hand_finger_thumb_right_4",
|
||||||
|
"hand_finger_index_right_1",
|
||||||
|
"hand_finger_index_right_2",
|
||||||
|
"hand_finger_index_right_3",
|
||||||
|
"hand_finger_index_right_4",
|
||||||
|
"hand_finger_middle_right_1",
|
||||||
|
"hand_finger_middle_right_2",
|
||||||
|
"hand_finger_middle_right_3",
|
||||||
|
"hand_finger_middle_right_4",
|
||||||
|
"hand_finger_ring_right_1",
|
||||||
|
"hand_finger_ring_right_2",
|
||||||
|
"hand_finger_ring_right_3",
|
||||||
|
"hand_finger_ring_right_4",
|
||||||
|
"hand_finger_pinky_right_1",
|
||||||
|
"hand_finger_pinky_right_2",
|
||||||
|
"hand_finger_pinky_right_3",
|
||||||
|
"hand_finger_pinky_right_4",
|
||||||
|
// extras
|
||||||
|
"hip_middle",
|
||||||
|
"shoulder_middle",
|
||||||
|
"head",
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
[[maybe_unused]] static bool use_whole_body()
|
||||||
|
{
|
||||||
|
for (const auto &pair : whole_body)
|
||||||
|
{
|
||||||
|
if (pair.second)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
cv::Mat bayer2rgb(const cv::Mat &bayer)
|
||||||
|
{
|
||||||
|
cv::Mat rgb;
|
||||||
|
cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB);
|
||||||
|
return rgb;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat rgb2bayer(const cv::Mat &img)
|
||||||
|
{
|
||||||
|
CV_Assert(img.type() == CV_8UC3);
|
||||||
|
cv::Mat bayer(img.rows, img.cols, CV_8UC1);
|
||||||
|
|
||||||
|
for (int r = 0; r < img.rows; ++r)
|
||||||
|
{
|
||||||
|
const cv::Vec3b *imgRowPtr = img.ptr<cv::Vec3b>(r);
|
||||||
|
uchar *bayerRowPtr = bayer.ptr<uchar>(r);
|
||||||
|
|
||||||
|
for (int c = 0; c < img.cols; ++c)
|
||||||
|
{
|
||||||
|
const cv::Vec3b &pixel = imgRowPtr[c];
|
||||||
|
|
||||||
|
if ((r % 2 == 0) && (c % 2 == 0))
|
||||||
|
{
|
||||||
|
// Even row, even col => R
|
||||||
|
bayerRowPtr[c] = pixel[0];
|
||||||
|
}
|
||||||
|
else if ((r % 2 == 0) && (c % 2 == 1))
|
||||||
|
{
|
||||||
|
// Even row, odd col => G
|
||||||
|
bayerRowPtr[c] = pixel[1];
|
||||||
|
}
|
||||||
|
else if ((r % 2 == 1) && (c % 2 == 0))
|
||||||
|
{
|
||||||
|
// Odd row, even col => G
|
||||||
|
bayerRowPtr[c] = pixel[1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Odd row, odd col => B
|
||||||
|
bayerRowPtr[c] = pixel[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bayer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
inline int find_index(const std::vector<std::string> &vec, const std::string &key)
|
||||||
|
{
|
||||||
|
auto it = std::find(vec.begin(), vec.end(), key);
|
||||||
|
if (it == vec.end())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Cannot find \"" + key + "\" in joint_names.");
|
||||||
|
}
|
||||||
|
return static_cast<int>(std::distance(vec.begin(), it));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> update_keypoints(
|
||||||
|
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
|
||||||
|
const std::vector<std::string> &joint_names)
|
||||||
|
{
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> new_views;
|
||||||
|
new_views.reserve(poses_2d.size());
|
||||||
|
|
||||||
|
for (const auto &view : poses_2d)
|
||||||
|
{
|
||||||
|
// "view" is a list of bodies => each body is Nx3
|
||||||
|
std::vector<std::vector<std::array<float, 3>>> new_bodies;
|
||||||
|
new_bodies.reserve(view.size());
|
||||||
|
|
||||||
|
for (const auto &body : view)
|
||||||
|
{
|
||||||
|
// 1) Copy first 17 keypoints
|
||||||
|
std::vector<std::array<float, 3>> new_body;
|
||||||
|
new_body.insert(new_body.end(), body.begin(), body.begin() + 17);
|
||||||
|
|
||||||
|
// 2) Optionally append extra keypoints
|
||||||
|
if (whole_body.at("foots"))
|
||||||
|
{
|
||||||
|
new_body.insert(new_body.end(), body.begin() + 17, body.begin() + 23);
|
||||||
|
}
|
||||||
|
if (whole_body.at("face"))
|
||||||
|
{
|
||||||
|
new_body.insert(new_body.end(), body.begin() + 23, body.begin() + 91);
|
||||||
|
}
|
||||||
|
if (whole_body.at("hands"))
|
||||||
|
{
|
||||||
|
new_body.insert(new_body.end(), body.begin() + 91, body.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) Compute mid_hip
|
||||||
|
int hlid = find_index(joint_names, "hip_left");
|
||||||
|
int hrid = find_index(joint_names, "hip_right");
|
||||||
|
float mid_hip_x = 0.5 * (new_body[hlid][0] + new_body[hrid][0]);
|
||||||
|
float mid_hip_y = 0.5 * (new_body[hlid][1] + new_body[hrid][1]);
|
||||||
|
float mid_hip_c = std::min(new_body[hlid][2], new_body[hrid][2]);
|
||||||
|
new_body.push_back({mid_hip_x, mid_hip_y, mid_hip_c});
|
||||||
|
|
||||||
|
// 4) Compute mid_shoulder
|
||||||
|
int slid = find_index(joint_names, "shoulder_left");
|
||||||
|
int srid = find_index(joint_names, "shoulder_right");
|
||||||
|
float mid_shoulder_x = 0.5 * (new_body[slid][0] + new_body[srid][0]);
|
||||||
|
float mid_shoulder_y = 0.5 * (new_body[slid][1] + new_body[srid][1]);
|
||||||
|
float mid_shoulder_c = std::min(new_body[slid][2], new_body[srid][2]);
|
||||||
|
new_body.push_back({mid_shoulder_x, mid_shoulder_y, mid_shoulder_c});
|
||||||
|
|
||||||
|
// 5) Compute head
|
||||||
|
int elid = find_index(joint_names, "ear_left");
|
||||||
|
int erid = find_index(joint_names, "ear_right");
|
||||||
|
float head_x = 0.5 * (new_body[elid][0] + new_body[erid][0]);
|
||||||
|
float head_y = 0.5 * (new_body[elid][1] + new_body[erid][1]);
|
||||||
|
float head_c = std::min(new_body[elid][2], new_body[erid][2]);
|
||||||
|
new_body.push_back({head_x, head_y, head_c});
|
||||||
|
|
||||||
|
// Add this updated body into new_bodies
|
||||||
|
new_bodies.push_back(new_body);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add all updated bodies for this view
|
||||||
|
new_views.push_back(new_bodies);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_views;
|
||||||
|
}
|
||||||
|
}
|
||||||
984
ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
Normal file
984
ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
Normal file
@ -0,0 +1,984 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <filesystem>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <onnxruntime_cxx_api.h>
|
||||||
|
#include <onnxruntime_c_api.h>
|
||||||
|
#include <tensorrt_provider_options.h>
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace utils_2d_pose
|
||||||
|
{
|
||||||
|
|
||||||
|
class BaseModel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit BaseModel(const std::string &model_path, int warmup_iterations);
|
||||||
|
virtual ~BaseModel() = default;
|
||||||
|
|
||||||
|
std::vector<Ort::Value> call_by_image(const cv::Mat &img);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static Ort::Env &get_env()
|
||||||
|
{
|
||||||
|
// Static method to create or retrieve a single global Ort::Env
|
||||||
|
static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "tensorrt_model_env");
|
||||||
|
// static Ort::Env env(ORT_LOGGING_LEVEL_VERBOSE, "tensorrt_model_env");
|
||||||
|
return env;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize ONNX Runtime session with TensorRT provider
|
||||||
|
void init_onnx_runtime(const std::string &model_path);
|
||||||
|
|
||||||
|
// Generate random input data, run forward pass
|
||||||
|
void warmup(int epoch);
|
||||||
|
|
||||||
|
// Actually run the session with given input tensors
|
||||||
|
std::vector<Ort::Value> call_model(const std::vector<Ort::Value> &inputs);
|
||||||
|
|
||||||
|
// Utility to check if a string ends with a given suffix
|
||||||
|
bool ends_with(const std::string &str, const std::string &suffix)
|
||||||
|
{
|
||||||
|
if (str.size() < suffix.size())
|
||||||
|
return false;
|
||||||
|
return (str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Ort::SessionOptions session_options;
|
||||||
|
std::unique_ptr<Ort::Session> session;
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator;
|
||||||
|
|
||||||
|
std::vector<std::string> input_names;
|
||||||
|
std::vector<std::vector<int64_t>> input_shapes;
|
||||||
|
std::vector<ONNXTensorElementDataType> input_types;
|
||||||
|
std::vector<std::string> output_names;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
BaseModel::BaseModel(const std::string &model_path, int warmup_iterations)
|
||||||
|
{
|
||||||
|
if (!std::filesystem::exists(model_path))
|
||||||
|
{
|
||||||
|
throw std::runtime_error("File not found: " + model_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ends_with(model_path, ".onnx"))
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Only .onnx models are supported: " + model_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Loading model: " << model_path << std::endl;
|
||||||
|
init_onnx_runtime(model_path);
|
||||||
|
|
||||||
|
if (warmup_iterations > 0)
|
||||||
|
{
|
||||||
|
std::cout << "Running warmup ...\n";
|
||||||
|
warmup((int)warmup_iterations / 2);
|
||||||
|
warmup((int)warmup_iterations / 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
void BaseModel::init_onnx_runtime(const std::string &model_path)
|
||||||
|
{
|
||||||
|
// 0) Create env
|
||||||
|
Ort::Env &env = get_env();
|
||||||
|
|
||||||
|
session_options = Ort::SessionOptions();
|
||||||
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||||
|
|
||||||
|
// 1) Create TensorRT provider options
|
||||||
|
OrtTensorRTProviderOptionsV2 *tensorrt_options = nullptr;
|
||||||
|
Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensorrt_options));
|
||||||
|
|
||||||
|
// 2) Configure your desired fields
|
||||||
|
// tensorrt_options->trt_fp16_enable = 1; // enable FP16
|
||||||
|
tensorrt_options->trt_engine_cache_enable = 1;
|
||||||
|
tensorrt_options->trt_engine_cache_path = "/RapidPoseTriangulation/data/trt_cache/";
|
||||||
|
|
||||||
|
// 3) Append to session options
|
||||||
|
Ort::ThrowOnError(
|
||||||
|
Ort::GetApi().SessionOptionsAppendExecutionProvider_TensorRT_V2(
|
||||||
|
static_cast<OrtSessionOptions *>(session_options),
|
||||||
|
tensorrt_options));
|
||||||
|
|
||||||
|
// 4) Create the session
|
||||||
|
session = std::make_unique<Ort::Session>(env, model_path.c_str(), session_options);
|
||||||
|
|
||||||
|
// 5) Gather input info
|
||||||
|
size_t num_inputs = session->GetInputCount();
|
||||||
|
for (size_t i = 0; i < num_inputs; i++)
|
||||||
|
{
|
||||||
|
Ort::AllocatedStringPtr iname = session->GetInputNameAllocated(i, allocator);
|
||||||
|
input_names.push_back(iname.get());
|
||||||
|
|
||||||
|
Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
|
||||||
|
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||||
|
|
||||||
|
input_shapes.push_back(tensor_info.GetShape());
|
||||||
|
input_types.push_back(tensor_info.GetElementType());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gather output info to avoid "At least one output should be requested."
|
||||||
|
size_t num_outputs = session->GetOutputCount();
|
||||||
|
for (size_t i = 0; i < num_outputs; i++)
|
||||||
|
{
|
||||||
|
Ort::AllocatedStringPtr oname = session->GetOutputNameAllocated(i, allocator);
|
||||||
|
output_names.push_back(oname.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
void BaseModel::warmup(int epoch)
|
||||||
|
{
|
||||||
|
std::cout << "Warmup: ";
|
||||||
|
for (int e = 0; e < epoch; e++)
|
||||||
|
{
|
||||||
|
// Extract the 4D shape.
|
||||||
|
auto shape = input_shapes[0];
|
||||||
|
int batch_size = static_cast<int>(shape[0]);
|
||||||
|
int height = static_cast<int>(shape[1]);
|
||||||
|
int width = static_cast<int>(shape[2]);
|
||||||
|
int channels = static_cast<int>(shape[3]);
|
||||||
|
|
||||||
|
// We'll only handle batch=1 in this example
|
||||||
|
if (batch_size != 1)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Warmup only supports batch=1 for demonstration.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a multi-channel Mat of shape (Height, Width, Channels)
|
||||||
|
cv::Mat img(height, width, CV_8UC(channels));
|
||||||
|
|
||||||
|
// For multi-channel images, randu requires matching channels in low/high
|
||||||
|
if (channels == 1)
|
||||||
|
{
|
||||||
|
cv::randu(img, 0, 256);
|
||||||
|
}
|
||||||
|
else if (channels == 3)
|
||||||
|
{
|
||||||
|
cv::randu(img, cv::Scalar(0, 0, 0), cv::Scalar(256, 256, 256));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Unsupported number of channels for warmup.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the model
|
||||||
|
auto outputs = call_by_image(img);
|
||||||
|
std::cout << "#";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<Ort::Value> BaseModel::call_by_image(const cv::Mat &img)
|
||||||
|
{
|
||||||
|
size_t height = img.rows;
|
||||||
|
size_t width = img.cols;
|
||||||
|
size_t channels = img.channels();
|
||||||
|
|
||||||
|
// Create a vector of Ort::Value for the input tensors
|
||||||
|
std::vector<Ort::Value> input_tensors;
|
||||||
|
input_tensors.reserve(input_names.size());
|
||||||
|
|
||||||
|
// Flatten and create an ONNX tensor
|
||||||
|
size_t total_elems = static_cast<size_t>(height * width * channels);
|
||||||
|
auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||||
|
auto shape = input_shapes[0];
|
||||||
|
input_tensors.push_back(
|
||||||
|
Ort::Value::CreateTensor<uint8_t>(
|
||||||
|
mem_info,
|
||||||
|
(uint8_t *)img.data,
|
||||||
|
total_elems,
|
||||||
|
shape.data(),
|
||||||
|
shape.size()));
|
||||||
|
|
||||||
|
auto outputs = call_model(input_tensors);
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<Ort::Value> BaseModel::call_model(const std::vector<Ort::Value> &inputs)
|
||||||
|
{
|
||||||
|
if (inputs.size() != input_names.size())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Number of input tensors does not match model's input count.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build array of input name C-strings
|
||||||
|
std::vector<const char *> input_name_ptrs(input_names.size());
|
||||||
|
for (size_t i = 0; i < input_names.size(); i++)
|
||||||
|
{
|
||||||
|
input_name_ptrs[i] = input_names[i].c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build array of output name C-strings
|
||||||
|
std::vector<const char *> output_name_ptrs(output_names.size());
|
||||||
|
for (size_t i = 0; i < output_names.size(); i++)
|
||||||
|
{
|
||||||
|
output_name_ptrs[i] = output_names[i].c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Actually run the model, requesting all known outputs
|
||||||
|
return session->Run(
|
||||||
|
Ort::RunOptions{nullptr},
|
||||||
|
input_name_ptrs.data(),
|
||||||
|
inputs.data(),
|
||||||
|
inputs.size(),
|
||||||
|
output_name_ptrs.data(),
|
||||||
|
output_name_ptrs.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class LetterBox
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
LetterBox(const std::array<size_t, 2> &target_size, int fill_value = 0)
|
||||||
|
{
|
||||||
|
this->target_size = target_size;
|
||||||
|
this->fill_value = fill_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<std::array<int, 4>, double, std::array<size_t, 2>> calc_params(
|
||||||
|
const cv::Mat &image) const;
|
||||||
|
cv::Mat resize_image(const cv::Mat &image) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<size_t, 2> target_size;
|
||||||
|
int fill_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::tuple<std::array<int, 4>, double, std::array<size_t, 2>> LetterBox::calc_params(
|
||||||
|
const cv::Mat &image) const
|
||||||
|
{
|
||||||
|
// Original dimensions
|
||||||
|
int img_h = image.rows;
|
||||||
|
int img_w = image.cols;
|
||||||
|
|
||||||
|
// Target dimensions
|
||||||
|
int target_h = static_cast<int>(target_size[0]);
|
||||||
|
int target_w = static_cast<int>(target_size[1]);
|
||||||
|
|
||||||
|
// Scale factor
|
||||||
|
double scale = std::min(
|
||||||
|
static_cast<double>(target_w) / static_cast<double>(img_w),
|
||||||
|
static_cast<double>(target_h) / static_cast<double>(img_h));
|
||||||
|
|
||||||
|
// Compute new width/height (round to nearest int)
|
||||||
|
int new_w = static_cast<int>(std::round(img_w * scale));
|
||||||
|
int new_h = static_cast<int>(std::round(img_h * scale));
|
||||||
|
|
||||||
|
// Calculate padding
|
||||||
|
int pad_w = target_w - new_w;
|
||||||
|
int pad_h = target_h - new_h;
|
||||||
|
|
||||||
|
int pad_left = pad_w / 2;
|
||||||
|
int pad_top = pad_h / 2;
|
||||||
|
int pad_right = pad_w - pad_left;
|
||||||
|
int pad_bottom = pad_h - pad_top;
|
||||||
|
|
||||||
|
std::array<int, 4> paddings = {pad_left, pad_right, pad_top, pad_bottom};
|
||||||
|
std::array<size_t, 2> new_size = {(size_t)new_w, (size_t)new_h};
|
||||||
|
|
||||||
|
return {paddings, scale, new_size};
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
cv::Mat LetterBox::resize_image(const cv::Mat &image) const
|
||||||
|
{
|
||||||
|
// 1) Compute letterbox params
|
||||||
|
auto [paddings, scale, new_sz] = calc_params(image);
|
||||||
|
int pad_left = paddings[0];
|
||||||
|
int pad_right = paddings[1];
|
||||||
|
int pad_top = paddings[2];
|
||||||
|
int pad_bottom = paddings[3];
|
||||||
|
|
||||||
|
// 2) Resize using nearest-neighbor interpolation
|
||||||
|
cv::Mat resized_img;
|
||||||
|
cv::Size cv_new_sz = cv::Size(new_sz[0], new_sz[1]);
|
||||||
|
cv::resize(image, resized_img, cv_new_sz, /*fx=*/0, /*fy=*/0, cv::INTER_NEAREST);
|
||||||
|
|
||||||
|
// 3) Pad if needed
|
||||||
|
if (pad_left == 0 && pad_right == 0 && pad_top == 0 && pad_bottom == 0)
|
||||||
|
{
|
||||||
|
// No padding required
|
||||||
|
return resized_img;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::Mat final_img;
|
||||||
|
cv::Scalar fill_color(fill_value, fill_value, fill_value);
|
||||||
|
|
||||||
|
cv::copyMakeBorder(
|
||||||
|
resized_img, final_img,
|
||||||
|
pad_top, pad_bottom,
|
||||||
|
pad_left, pad_right,
|
||||||
|
cv::BORDER_CONSTANT,
|
||||||
|
fill_color);
|
||||||
|
return final_img;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class BoxCrop
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BoxCrop(const std::array<size_t, 2> &target_size,
|
||||||
|
float padding_scale = 1.0f,
|
||||||
|
int fill_value = 0)
|
||||||
|
{
|
||||||
|
this->target_size = target_size;
|
||||||
|
this->padding_scale = padding_scale;
|
||||||
|
this->fill_value = fill_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
|
||||||
|
calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const;
|
||||||
|
|
||||||
|
cv::Mat crop_resize_box(const cv::Mat &image,
|
||||||
|
const std::array<float, 5> &bbox) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<size_t, 2> target_size;
|
||||||
|
float padding_scale;
|
||||||
|
int fill_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
|
||||||
|
BoxCrop::calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const
|
||||||
|
{
|
||||||
|
// Extract some params
|
||||||
|
int img_h = image.rows;
|
||||||
|
int img_w = image.cols;
|
||||||
|
int target_h = target_size[0];
|
||||||
|
int target_w = target_size[1];
|
||||||
|
|
||||||
|
// Round the bounding box coordinates
|
||||||
|
float start_x = std::floor(bbox[0]);
|
||||||
|
float start_y = std::floor(bbox[1]);
|
||||||
|
float end_x = std::ceil(bbox[2]);
|
||||||
|
float end_y = std::ceil(bbox[3]);
|
||||||
|
|
||||||
|
// Calculate original bounding box center
|
||||||
|
float center_x = 0.5f * (start_x + end_x);
|
||||||
|
float center_y = 0.5f * (start_y + end_y);
|
||||||
|
|
||||||
|
// Scale the bounding box by the padding_scale
|
||||||
|
float bbox_w = end_x - start_x;
|
||||||
|
float bbox_h = end_y - start_y;
|
||||||
|
float scaled_w = bbox_w * padding_scale;
|
||||||
|
float scaled_h = bbox_h * padding_scale;
|
||||||
|
|
||||||
|
// Calculate the aspect ratios
|
||||||
|
float bbox_aspect = scaled_w / scaled_h;
|
||||||
|
float target_aspect = static_cast<float>(target_w) / static_cast<float>(target_h);
|
||||||
|
|
||||||
|
// Adjust the scaled bounding box to match the target aspect ratio
|
||||||
|
float adjusted_w, adjusted_h;
|
||||||
|
if (bbox_aspect > target_aspect)
|
||||||
|
{
|
||||||
|
adjusted_w = scaled_w;
|
||||||
|
adjusted_h = scaled_w / target_aspect;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
adjusted_h = scaled_h;
|
||||||
|
adjusted_w = scaled_h * target_aspect;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate scaled bounding box coordinates
|
||||||
|
float new_start_xf = center_x - 0.5f * adjusted_w;
|
||||||
|
float new_start_yf = center_y - 0.5f * adjusted_h;
|
||||||
|
float new_end_xf = center_x + 0.5f * adjusted_w;
|
||||||
|
float new_end_yf = center_y + 0.5f * adjusted_h;
|
||||||
|
|
||||||
|
// Round the box coordinates
|
||||||
|
int start_x_i = static_cast<int>(std::floor(new_start_xf));
|
||||||
|
int start_y_i = static_cast<int>(std::floor(new_start_yf));
|
||||||
|
int end_x_i = static_cast<int>(std::ceil(new_end_xf));
|
||||||
|
int end_y_i = static_cast<int>(std::ceil(new_end_yf));
|
||||||
|
|
||||||
|
// Define the new box coordinates
|
||||||
|
int new_start_x = std::max(0, start_x_i);
|
||||||
|
int new_start_y = std::max(0, start_y_i);
|
||||||
|
int new_end_x = std::min(img_w - 1, end_x_i);
|
||||||
|
int new_end_y = std::min(img_h - 1, end_y_i);
|
||||||
|
std::array<int, 4> new_box{new_start_x, new_start_y, new_end_x, new_end_y};
|
||||||
|
|
||||||
|
// Calculate resized crop size
|
||||||
|
int clipped_w = new_box[2] - new_box[0];
|
||||||
|
int clipped_h = new_box[3] - new_box[1];
|
||||||
|
float scale_w = static_cast<float>(target_w) / static_cast<float>(clipped_w);
|
||||||
|
float scale_h = static_cast<float>(target_h) / static_cast<float>(clipped_h);
|
||||||
|
float scale = std::min(scale_w, scale_h);
|
||||||
|
int new_w = static_cast<int>(std::round(clipped_w * scale));
|
||||||
|
int new_h = static_cast<int>(std::round(clipped_h * scale));
|
||||||
|
|
||||||
|
// Calculate paddings
|
||||||
|
int pad_w = target_w - new_w;
|
||||||
|
int pad_h = target_h - new_h;
|
||||||
|
int pad_left = 0;
|
||||||
|
int pad_right = 0;
|
||||||
|
int pad_top = 0;
|
||||||
|
int pad_bottom = 0;
|
||||||
|
if (pad_w > 0)
|
||||||
|
{
|
||||||
|
if (start_x_i < 0)
|
||||||
|
{
|
||||||
|
pad_left = pad_w;
|
||||||
|
pad_right = 0;
|
||||||
|
}
|
||||||
|
else if (end_x_i > img_w)
|
||||||
|
{
|
||||||
|
pad_left = 0;
|
||||||
|
pad_right = pad_w;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Can be caused by bbox rounding
|
||||||
|
pad_left = pad_w / 2;
|
||||||
|
pad_right = pad_w - pad_left;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pad_h > 0)
|
||||||
|
{
|
||||||
|
if (start_y_i < 0)
|
||||||
|
{
|
||||||
|
pad_top = pad_h;
|
||||||
|
pad_bottom = 0;
|
||||||
|
}
|
||||||
|
else if (end_y_i > img_h)
|
||||||
|
{
|
||||||
|
pad_top = 0;
|
||||||
|
pad_bottom = pad_h;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Can be caused by bbox rounding
|
||||||
|
pad_top = pad_h / 2;
|
||||||
|
pad_bottom = pad_h - pad_top;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::array<int, 4> paddings{pad_left, pad_right, pad_top, pad_bottom};
|
||||||
|
|
||||||
|
return {paddings, scale, new_box, {(size_t)new_w, (size_t)new_h}};
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
cv::Mat BoxCrop::crop_resize_box(const cv::Mat &image,
|
||||||
|
const std::array<float, 5> &bbox) const
|
||||||
|
{
|
||||||
|
auto [paddings, _, new_box, new_size] = calc_params(image, bbox);
|
||||||
|
|
||||||
|
// Extract the bounding box
|
||||||
|
int x1 = new_box[0];
|
||||||
|
int y1 = new_box[1];
|
||||||
|
int x2 = new_box[2];
|
||||||
|
int y2 = new_box[3];
|
||||||
|
cv::Rect roi(x1, y1, x2 - x1, y2 - y1);
|
||||||
|
cv::Mat cropped_img = image(roi);
|
||||||
|
|
||||||
|
// Resize the image
|
||||||
|
cv::Size new_size_cv = cv::Size(new_size[0], new_size[1]);
|
||||||
|
cv::Mat resized_img;
|
||||||
|
cv::resize(cropped_img, resized_img, new_size_cv, 0, 0, cv::INTER_NEAREST);
|
||||||
|
|
||||||
|
// Optionally pad the image
|
||||||
|
int pad_left = paddings[0];
|
||||||
|
int pad_right = paddings[1];
|
||||||
|
int pad_top = paddings[2];
|
||||||
|
int pad_bottom = paddings[3];
|
||||||
|
if (pad_left == 0 && pad_right == 0 && pad_top == 0 && pad_bottom == 0)
|
||||||
|
{
|
||||||
|
// No padding
|
||||||
|
return resized_img;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::Mat final_img;
|
||||||
|
cv::Scalar fill_color(fill_value, fill_value, fill_value);
|
||||||
|
cv::copyMakeBorder(
|
||||||
|
resized_img,
|
||||||
|
final_img,
|
||||||
|
pad_top,
|
||||||
|
pad_bottom,
|
||||||
|
pad_left,
|
||||||
|
pad_right,
|
||||||
|
cv::BORDER_CONSTANT,
|
||||||
|
fill_color);
|
||||||
|
return final_img;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class RTMDet : public BaseModel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RTMDet(const std::string &model_path,
|
||||||
|
float conf_threshold,
|
||||||
|
float min_area_fraction,
|
||||||
|
int warmup = 30)
|
||||||
|
: BaseModel(model_path, warmup)
|
||||||
|
{
|
||||||
|
this->target_size = {320, 320};
|
||||||
|
this->conf_threshold = conf_threshold;
|
||||||
|
|
||||||
|
float img_area = target_size[0] * target_size[1];
|
||||||
|
this->min_area = img_area * min_area_fraction;
|
||||||
|
|
||||||
|
this->letterbox = std::make_unique<LetterBox>(target_size, 114);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::array<float, 5>> call(const cv::Mat &image);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<size_t, 2> target_size;
|
||||||
|
float conf_threshold;
|
||||||
|
float min_area;
|
||||||
|
std::unique_ptr<LetterBox> letterbox;
|
||||||
|
|
||||||
|
cv::Mat preprocess(const cv::Mat &image);
|
||||||
|
std::vector<std::array<float, 5>> postprocess(const std::vector<Ort::Value> &result,
|
||||||
|
const cv::Mat &image);
|
||||||
|
|
||||||
|
void clip_boxes(std::vector<std::array<float, 5>> &boxes,
|
||||||
|
const cv::Mat &image) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::array<float, 5>> RTMDet::call(const cv::Mat &image)
|
||||||
|
{
|
||||||
|
auto stime = std::chrono::high_resolution_clock::now();
|
||||||
|
std::chrono::duration<float> elapsed;
|
||||||
|
|
||||||
|
cv::Mat preprocessed = preprocess(image);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
|
||||||
|
stime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto inputs = call_by_image(preprocessed);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Inference time: " << elapsed.count() << "s\n";
|
||||||
|
stime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto outputs = postprocess(inputs, image);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
|
||||||
|
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
cv::Mat RTMDet::preprocess(const cv::Mat &image)
|
||||||
|
{
|
||||||
|
cv::Mat resized = letterbox->resize_image(image);
|
||||||
|
return resized;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::array<float, 5>> RTMDet::postprocess(
|
||||||
|
const std::vector<Ort::Value> &result,
|
||||||
|
const cv::Mat &image)
|
||||||
|
{
|
||||||
|
// // Expected output shapes:
|
||||||
|
// result[0] => shape [1, N, 5] => (x1,y1,x2,y2,score)
|
||||||
|
// result[1] => shape [1, N] => classes
|
||||||
|
|
||||||
|
// Get pointer to boxes
|
||||||
|
const float *boxes_data = result[0].GetTensorData<float>();
|
||||||
|
const float *classes_data = result[1].GetTensorData<float>();
|
||||||
|
auto data_info = result[0].GetTensorTypeAndShapeInfo();
|
||||||
|
auto shape0 = data_info.GetShape();
|
||||||
|
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 5)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("parse_outputs: unexpected shape for boxes");
|
||||||
|
}
|
||||||
|
size_t N = (size_t)shape0[1];
|
||||||
|
|
||||||
|
// Extract human boxes
|
||||||
|
std::vector<std::array<float, 5>> boxes;
|
||||||
|
boxes.reserve(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
float x1 = boxes_data[i * 5 + 0];
|
||||||
|
float y1 = boxes_data[i * 5 + 1];
|
||||||
|
float x2 = boxes_data[i * 5 + 2];
|
||||||
|
float y2 = boxes_data[i * 5 + 3];
|
||||||
|
float score = boxes_data[i * 5 + 4];
|
||||||
|
float cls = classes_data[i];
|
||||||
|
|
||||||
|
if (cls == 0)
|
||||||
|
{
|
||||||
|
boxes.push_back({x1, y1, x2, y2, score});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (boxes.empty())
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by confidence + area
|
||||||
|
for (int i = boxes.size() - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
auto &b = boxes[i];
|
||||||
|
if (b[4] < conf_threshold)
|
||||||
|
{
|
||||||
|
boxes.erase(boxes.begin() + i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float area = (b[2] - b[0]) * (b[3] - b[1]);
|
||||||
|
if (area < min_area)
|
||||||
|
{
|
||||||
|
boxes.erase(boxes.begin() + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (boxes.empty())
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift by letterbox padding and scale back
|
||||||
|
clip_boxes(boxes, image);
|
||||||
|
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
void RTMDet::clip_boxes(std::vector<std::array<float, 5>> &boxes,
|
||||||
|
const cv::Mat &image) const
|
||||||
|
{
|
||||||
|
// Get paddings, scale from letterbox
|
||||||
|
auto [paddings, scale, _] = letterbox->calc_params(image);
|
||||||
|
int pad_left = paddings[0];
|
||||||
|
int pad_right = paddings[1];
|
||||||
|
int pad_top = paddings[2];
|
||||||
|
int pad_bottom = paddings[3];
|
||||||
|
|
||||||
|
// The effective region in the letterboxed image
|
||||||
|
float letter_w = static_cast<float>(target_size[1] - (pad_left + pad_right));
|
||||||
|
float letter_h = static_cast<float>(target_size[0] - (pad_top + pad_bottom));
|
||||||
|
|
||||||
|
for (auto &b : boxes)
|
||||||
|
{
|
||||||
|
float &x1 = b[0];
|
||||||
|
float &y1 = b[1];
|
||||||
|
float &x2 = b[2];
|
||||||
|
float &y2 = b[3];
|
||||||
|
|
||||||
|
// Shift by left/top
|
||||||
|
x1 -= pad_left;
|
||||||
|
x2 -= pad_left;
|
||||||
|
y1 -= pad_top;
|
||||||
|
y2 -= pad_top;
|
||||||
|
|
||||||
|
// Clamp to letterbox region
|
||||||
|
x1 = std::max(0.f, std::min(x1, letter_w - 1.0f));
|
||||||
|
x2 = std::max(0.f, std::min(x2, letter_w - 1.0f));
|
||||||
|
y1 = std::max(0.f, std::min(y1, letter_h - 1.0f));
|
||||||
|
y2 = std::max(0.f, std::min(y2, letter_h - 1.0f));
|
||||||
|
|
||||||
|
// Scale back to original resolution
|
||||||
|
x1 /= scale;
|
||||||
|
x2 /= scale;
|
||||||
|
y1 /= scale;
|
||||||
|
y2 /= scale;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class RTMPose : public BaseModel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RTMPose(const std::string &model_path,
|
||||||
|
int warmup = 30)
|
||||||
|
: BaseModel(model_path, warmup)
|
||||||
|
{
|
||||||
|
this->target_size = {384, 288};
|
||||||
|
this->boxcrop = std::make_unique<BoxCrop>(target_size, 1.25f, 114);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::array<float, 3>> call(const cv::Mat &image,
|
||||||
|
const std::vector<std::array<float, 5>> &bboxes);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<size_t, 2> target_size;
|
||||||
|
std::unique_ptr<BoxCrop> boxcrop;
|
||||||
|
|
||||||
|
cv::Mat preprocess(const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes);
|
||||||
|
std::vector<std::array<float, 3>> postprocess(
|
||||||
|
const std::vector<Ort::Value> &result,
|
||||||
|
const cv::Mat &image,
|
||||||
|
const std::vector<std::array<float, 5>> &bboxes);
|
||||||
|
|
||||||
|
void clip_keypoints(std::vector<std::array<float, 3>> &kp,
|
||||||
|
const cv::Mat &image,
|
||||||
|
const std::vector<std::array<float, 5>> &bboxes) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::array<float, 3>> RTMPose::call(
|
||||||
|
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
|
||||||
|
{
|
||||||
|
auto stime = std::chrono::high_resolution_clock::now();
|
||||||
|
std::chrono::duration<float> elapsed;
|
||||||
|
|
||||||
|
cv::Mat preprocessed = preprocess(image, bboxes);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
|
||||||
|
stime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto inputs = call_by_image(preprocessed);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Inference time: " << elapsed.count() << "s\n";
|
||||||
|
stime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto outputs = postprocess(inputs, image, bboxes);
|
||||||
|
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
|
||||||
|
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
cv::Mat RTMPose::preprocess(
|
||||||
|
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
|
||||||
|
{
|
||||||
|
// Assert only one box used
|
||||||
|
if (bboxes.size() != 1)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Only one box is supported!");
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat cropped = boxcrop->crop_resize_box(image, bboxes[0]);
|
||||||
|
return cropped;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::array<float, 3>> RTMPose::postprocess(
|
||||||
|
const std::vector<Ort::Value> &result,
|
||||||
|
const cv::Mat &image,
|
||||||
|
const std::vector<std::array<float, 5>> &bboxes)
|
||||||
|
{
|
||||||
|
// // Expected output shapes:
|
||||||
|
// result[0] => shape [1, N, 2] => (x,y)
|
||||||
|
// result[1] => shape [1, N] => scores
|
||||||
|
|
||||||
|
// Get pointer to boxes
|
||||||
|
const float *kpts_data = result[0].GetTensorData<float>();
|
||||||
|
const float *scores_data = result[1].GetTensorData<float>();
|
||||||
|
auto data_info = result[0].GetTensorTypeAndShapeInfo();
|
||||||
|
auto shape0 = data_info.GetShape();
|
||||||
|
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 2)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("parse_outputs: unexpected shape for keypoints");
|
||||||
|
}
|
||||||
|
size_t N = (size_t)shape0[1];
|
||||||
|
|
||||||
|
// Extract human keypoints
|
||||||
|
std::vector<std::array<float, 3>> kpts;
|
||||||
|
kpts.reserve(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
float x = kpts_data[i * 2 + 0];
|
||||||
|
float y = kpts_data[i * 2 + 1];
|
||||||
|
float score = scores_data[i];
|
||||||
|
kpts.push_back({x, y, score});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kpts.empty())
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift by boxcrop padding and scale back
|
||||||
|
clip_keypoints(kpts, image, bboxes);
|
||||||
|
|
||||||
|
return kpts;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
void RTMPose::clip_keypoints(std::vector<std::array<float, 3>> &kpts,
|
||||||
|
const cv::Mat &image,
|
||||||
|
const std::vector<std::array<float, 5>> &bboxes) const
|
||||||
|
{
|
||||||
|
// Get paddings, scale from boxcrop
|
||||||
|
auto [paddings, scale, bbox, _] = boxcrop->calc_params(image, bboxes[0]);
|
||||||
|
int pad_left = paddings[0];
|
||||||
|
int pad_top = paddings[2];
|
||||||
|
int box_left = bbox[0];
|
||||||
|
int box_top = bbox[1];
|
||||||
|
int img_w = image.cols;
|
||||||
|
int img_h = image.rows;
|
||||||
|
|
||||||
|
for (auto &kp : kpts)
|
||||||
|
{
|
||||||
|
float &x = kp[0];
|
||||||
|
float &y = kp[1];
|
||||||
|
|
||||||
|
// Shift by left/top
|
||||||
|
x -= pad_left;
|
||||||
|
y -= pad_top;
|
||||||
|
|
||||||
|
x /= scale;
|
||||||
|
y /= scale;
|
||||||
|
|
||||||
|
x += box_left;
|
||||||
|
y += box_top;
|
||||||
|
|
||||||
|
// Clamp to iamge region
|
||||||
|
x = std::max(0.0f, std::min(x, img_w - 1.0f));
|
||||||
|
y = std::max(0.0f, std::min(y, img_h - 1.0f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class TopDown
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TopDown(const std::string &det_model_path,
|
||||||
|
const std::string &pose_model_path,
|
||||||
|
float box_conf_threshold,
|
||||||
|
float box_min_area,
|
||||||
|
bool batch_poses = false,
|
||||||
|
int warmup = 30)
|
||||||
|
{
|
||||||
|
this->batch_poses = batch_poses;
|
||||||
|
|
||||||
|
this->det_model = std::make_unique<RTMDet>(
|
||||||
|
det_model_path, box_conf_threshold, box_min_area, warmup);
|
||||||
|
this->pose_model = std::make_unique<RTMPose>(pose_model_path, warmup);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::array<float, 3>>> predict(const cv::Mat &image);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<RTMDet> det_model;
|
||||||
|
std::unique_ptr<RTMPose> pose_model;
|
||||||
|
bool batch_poses;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::vector<std::array<float, 3>>> TopDown::predict(const cv::Mat &image)
|
||||||
|
{
|
||||||
|
auto bboxes = det_model->call(image);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::array<float, 3>>> keypoints;
|
||||||
|
for (auto &bbox : bboxes)
|
||||||
|
{
|
||||||
|
auto kpts = pose_model->call(image, {bbox});
|
||||||
|
keypoints.push_back(std::move(kpts));
|
||||||
|
}
|
||||||
|
return keypoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
class PosePredictor
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
PosePredictor(bool whole_body,
|
||||||
|
float min_bbox_score,
|
||||||
|
float min_bbox_area,
|
||||||
|
bool batch_poses = false)
|
||||||
|
{
|
||||||
|
std::string base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/";
|
||||||
|
std::string model_det = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
|
||||||
|
std::string model_pose1 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
|
||||||
|
std::string model_pose2 = base_path + "rtmpose-l_wb_1x384x288x3_fp16_extra-steps.onnx";
|
||||||
|
|
||||||
|
std::string model_pose = whole_body ? model_pose2 : model_pose1;
|
||||||
|
this->num_joints = whole_body ? 133 : 17;
|
||||||
|
|
||||||
|
std::cout << "Loading 2D" + std::string(whole_body ? "-WB" : "") + " models ..."
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
this->topdown_model = std::make_unique<TopDown>(
|
||||||
|
model_det, model_pose, min_bbox_score, min_bbox_area, batch_poses, 30);
|
||||||
|
|
||||||
|
std::cout << "Loaded models \n"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> predict(
|
||||||
|
const std::vector<cv::Mat> &images);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<TopDown> topdown_model;
|
||||||
|
size_t num_joints;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================================
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> PosePredictor::predict(
|
||||||
|
const std::vector<cv::Mat> &images)
|
||||||
|
{
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> keypoints;
|
||||||
|
for (auto &img : images)
|
||||||
|
{
|
||||||
|
auto kpts = topdown_model->predict(img);
|
||||||
|
|
||||||
|
if (!kpts.empty())
|
||||||
|
{
|
||||||
|
keypoints.push_back(std::move(kpts));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// create zero keypoints
|
||||||
|
std::vector<std::vector<std::array<float, 3>>> zero_keypoints;
|
||||||
|
zero_keypoints.resize(1);
|
||||||
|
zero_keypoints[0].resize(num_joints, {0.0f, 0.0f, 0.0f});
|
||||||
|
keypoints.push_back(std::move(zero_keypoints));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return keypoints;
|
||||||
|
}
|
||||||
|
}
|
||||||
1
ros/rpt2D_wrapper_cpp/tests/.gitignore
vendored
Normal file
1
ros/rpt2D_wrapper_cpp/tests/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
my_app
|
||||||
73
ros/rpt2D_wrapper_cpp/tests/my_app.cpp
Normal file
73
ros/rpt2D_wrapper_cpp/tests/my_app.cpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#include <filesystem>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
|
||||||
|
#include "../src/utils_2d_pose.hpp"
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
using namespace utils_2d_pose;
|
||||||
|
|
||||||
|
std::string base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/";
|
||||||
|
std::string model_path1 = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
|
||||||
|
std::string model_path2 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
|
||||||
|
|
||||||
|
std::vector<std::string> img_paths = {
|
||||||
|
"/RapidPoseTriangulation/data/h1/54138969-img_003201.jpg",
|
||||||
|
"/RapidPoseTriangulation/data/h1/55011271-img_003201.jpg",
|
||||||
|
"/RapidPoseTriangulation/data/h1/58860488-img_003201.jpg",
|
||||||
|
"/RapidPoseTriangulation/data/h1/60457274-img_003201.jpg",
|
||||||
|
};
|
||||||
|
|
||||||
|
// RTMDet model1(model_path1, 0.3, 0.1 * 0.1, 30);
|
||||||
|
// RTMPose model2(model_path2, 30);
|
||||||
|
// for (size_t i = 0; i < img_paths.size(); i++)
|
||||||
|
// {
|
||||||
|
// cv::Mat img = cv::imread(img_paths[i]);
|
||||||
|
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
||||||
|
|
||||||
|
// auto outputs1 = model1.call(img);
|
||||||
|
// std::cout << "Model1 outputs: " << outputs1[0][0] << " " << outputs1[0][1] << " "
|
||||||
|
// << outputs1[0][2] << " " << outputs1[0][3] << " " << outputs1[0][4] << " "
|
||||||
|
// << std::endl;
|
||||||
|
|
||||||
|
// for (size_t j = 0; j < outputs1.size(); j++)
|
||||||
|
// {
|
||||||
|
// std::vector<std::array<float, 5>> bboxes = {outputs1[j]};
|
||||||
|
// auto outputs2 = model2.call(img, bboxes);
|
||||||
|
// std::cout << "Model2 outputs: " << outputs2[0][0] << " " << outputs2[0][1] << " "
|
||||||
|
// << outputs2[0][2] << " " << std::endl;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// TopDown model3(model_path1, model_path2, 0.3, 0.1 * 0.1, 30);
|
||||||
|
// for (size_t i = 0; i < img_paths.size(); i++)
|
||||||
|
// {
|
||||||
|
// cv::Mat img = cv::imread(img_paths[i]);
|
||||||
|
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
||||||
|
|
||||||
|
// auto outputs3 = model3.predict(img);
|
||||||
|
// std::cout << "Model3 outputs: " << outputs3[0][0][0] << " " << outputs3[0][0][1] << " "
|
||||||
|
// << outputs3[0][0][2] << " " << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
PosePredictor model4(false, 0.3, 0.1 * 0.1, false);
|
||||||
|
std::vector<cv::Mat> images;
|
||||||
|
for (size_t i = 0; i < img_paths.size(); i++)
|
||||||
|
{
|
||||||
|
cv::Mat img = cv::imread(img_paths[i]);
|
||||||
|
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
||||||
|
images.push_back(img);
|
||||||
|
}
|
||||||
|
auto outputs4 = model4.predict(images);
|
||||||
|
std::cout << "Model4 outputs: " << outputs4[0][0][0][0] << " " << outputs4[0][0][0][1] << " "
|
||||||
|
<< outputs4[0][0][0][2] << " " << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||||
<package format="3">
|
<package format="3">
|
||||||
<name>rpt2D_wrapper</name>
|
<name>rpt2D_wrapper_py</name>
|
||||||
<version>0.0.0</version>
|
<version>0.0.0</version>
|
||||||
<description>TODO: Package description</description>
|
<description>TODO: Package description</description>
|
||||||
<maintainer email="root@todo.todo">root</maintainer>
|
<maintainer email="root@todo.todo">root</maintainer>
|
||||||
4
ros/rpt2D_wrapper_py/setup.cfg
Normal file
4
ros/rpt2D_wrapper_py/setup.cfg
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[develop]
|
||||||
|
script_dir=$base/lib/rpt2D_wrapper_py
|
||||||
|
[install]
|
||||||
|
install_scripts=$base/lib/rpt2D_wrapper_py
|
||||||
@ -1,6 +1,6 @@
|
|||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
package_name = "rpt2D_wrapper"
|
package_name = "rpt2D_wrapper_py"
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=package_name,
|
name=package_name,
|
||||||
@ -18,6 +18,6 @@ setup(
|
|||||||
license="TODO: License declaration",
|
license="TODO: License declaration",
|
||||||
tests_require=["pytest"],
|
tests_require=["pytest"],
|
||||||
entry_points={
|
entry_points={
|
||||||
"console_scripts": ["rpt2D_wrapper = rpt2D_wrapper.rpt2D_wrapper:main"],
|
"console_scripts": ["rpt2D_wrapper = rpt2D_wrapper_py.rpt2D_wrapper:main"],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
Reference in New Issue
Block a user