Remove ONNX and Skelda integration

This commit is contained in:
2026-03-11 22:03:42 +08:00
parent d7769794fb
commit 5bed0f0aaf
53 changed files with 6 additions and 8337 deletions
-145
View File
@@ -1,145 +0,0 @@
# Setup with Nvidia-Jetson-Orin
Initial setup and installation of _RapidPoseTriangulation_ on a _Nvidia Jetson_ device. \
Tested with a _Jetson AGX Orin Developer Kit_ module.
<br>
## Base installation
- Install newest software image: \
(<https://developer.nvidia.com/sdk-manager>)
- Use manual recovery mode setup for first installation
- Find out the _ip-address_ of the _Jetson_ for the runtime component installation with:
```bash
sudo nmap -sn $(ip route get 1 | awk '{print $(NF-2);exit}')/24
```
- Initialize system: \
(<https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit>)
- Connect via _ssh_, because using _screen_ did not work, skip _oem-config_ step
- Skip installation of _nvidia-jetpack_
- Install basic tools:
```bash
sudo apt install -y curl nano wget git
```
- Update hostname:
```bash
sudo nano /etc/hostname
sudo nano /etc/hosts
sudo reboot
```
- Enable maximum performance mode:
```bash
sudo nvpmodel -m 0
sudo jetson_clocks
```
- Test docker is working:
```bash
sudo docker run --rm hello-world
```
- Enable _docker_ without _sudo_: \
(<https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>)
- Enable GPU-access for docker building:
- Run `sudo nano /etc/docker/daemon.json` and add:
```json
{
"runtimes": {
"nvidia": {
"args": [],
"path": "nvidia-container-runtime"
}
},
"default-runtime": "nvidia"
}
```
- Restart docker: `sudo systemctl restart docker`
- Test docker is working:
```bash
docker run --rm hello-world
docker run -it --rm --runtime=nvidia --network=host dustynv/onnxruntime:1.20-r36.4.0
```
<br>
## RPT installation
- Build docker container:
```bash
docker build --progress=plain -f extras/jetson/dockerfile -t rapidposetriangulation .
./run_container.sh
```
- Build _rpt_ package inside container:
```bash
cd /RapidPoseTriangulation/
uv sync --group dev
uv run pytest tests/test_interface.py
uv build
cd /RapidPoseTriangulation/scripts/ && \
g++ -std=c++2a -fPIC -O3 -march=native -Wall -Werror -flto=auto \
-I /RapidPoseTriangulation/rpt/ \
-isystem /usr/include/opencv4/ \
-isystem /usr/local/include/onnxruntime/ \
-L /usr/local/lib/ \
test_skelda_dataset.cpp \
/RapidPoseTriangulation/rpt/*.cpp \
-o test_skelda_dataset.bin \
-Wl,--start-group \
-lonnxruntime_providers_tensorrt \
-lonnxruntime_providers_shared \
-lonnxruntime_providers_cuda \
-lonnxruntime \
-Wl,--end-group \
$(pkg-config --libs opencv4) \
-Wl,-rpath,/onnxruntime/build/Linux/Release/ \
&& cd ..
```
- Test with samples:
```bash
python3 /RapidPoseTriangulation/scripts/test_triangulate.py
```
<br>
## ROS interface
- Build docker container:
```bash
docker build --progress=plain -f extras/ros/dockerfile_2d -t rapidposetriangulation_ros2d .
```
- Run and test:
```bash
export CAMID="camera01" && docker compose -f extras/jetson/docker-compose-2d.yml up
docker exec -it jetson-test_node-1 bash
export ROS_DOMAIN_ID=18
```
File diff suppressed because it is too large Load Diff
-37
View File
@@ -1,37 +0,0 @@
services:
test_node:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep infinity'
estimator:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
-25
View File
@@ -1,25 +0,0 @@
FROM dustynv/onnxruntime:1.20-r36.4.0
ARG DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
WORKDIR /
RUN apt-get update && apt-get install -y --no-install-recommends feh
RUN apt-get update && apt-get install -y --no-install-recommends python3-opencv
RUN apt-get update && apt-get install -y --no-install-recommends libatlas-base-dev
# Show matplotlib images
RUN apt-get update && apt-get install -y --no-install-recommends python3-tk
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends build-essential
RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
RUN pip3 install --no-cache-dir uv
RUN pip3 install --no-cache-dir scipy
COPY ./skelda/ /skelda/
RUN pip3 install --no-cache-dir -e /skelda/
WORKDIR /RapidPoseTriangulation/
CMD ["/bin/bash"]
-131
View File
@@ -1,131 +0,0 @@
# Exporting MMPose models
```bash
docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy .
./extras/mmdeploy/run_container.sh
```
<br>
## ONNX
```bash
cd /mmdeploy/
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320"$withFP16".py configs/mmdet/detection/
python3 ./tools/deploy.py \
configs/mmdet/detection/detection_onnxruntime_static-320x320"$withFP16".py \
/mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x3x320x320"$withFP16".onnx
python3 ./tools/deploy.py \
configs/mmdet/detection/detection_onnxruntime_static-320x320"$withFP16".py \
/mmpose/projects/rtmpose/rtmdet/person/rtmdet_m_640-8xb32_coco-person.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-m_1x3x320x320"$withFP16".onnx
```
```bash
cd /mmdeploy/
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py configs/mmpose/
cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py configs/mmpose/
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx3x384x288"$withFP16".onnx
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-l_wb_1x3x384x288"$withFP16".onnx
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-l_wb_Bx3x384x288"$withFP16".onnx
```
```bash
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_pt.py
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_tf.py
```
```bash
python3 /RapidPoseTriangulation/extras/mmdeploy/add_extra_steps.py
```
<br>
## TensorRT
Run this directly in the inference container (the TensorRT versions need to be the same)
```bash
export withFP16="_fp16"
trtexec --fp16 \
--onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.onnx \
--saveEngine=end2end.engine
mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.engine
trtexec --fp16 \
--onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx384x288x3"$withFP16"_extra-steps.onnx \
--saveEngine=end2end.engine \
--minShapes=image_input:1x384x288x3 \
--optShapes=image_input:1x384x288x3 \
--maxShapes=image_input:1x384x288x3
mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x384x288x3"$withFP16"_extra-steps.engine
```
<br>
## Benchmark
```bash
cd /mmdeploy/
export withFP16="_fp16"
python3 ./tools/profiler.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
/RapidPoseTriangulation/extras/mmdeploy/testimages/ \
--model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx \
--shape 384x288 \
--device cuda \
--warmup 50 \
--num-iter 200
```
-233
View File
@@ -1,233 +0,0 @@
import re
import numpy as np
import onnx
from onnx import TensorProto, helper, numpy_helper
# ==================================================================================================
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
det_model_path1 = base_path + "rtmdet-nano_1x3x320x320.onnx"
det_model_path2 = base_path + "rtmdet-m_1x3x320x320.onnx"
pose_model_path1 = base_path + "rtmpose-m_Bx3x384x288.onnx"
pose_model_path2 = base_path + "rtmpose-m_1x3x384x288.onnx"
pose_model_path3 = base_path + "rtmpose-l_wb_Bx3x384x288.onnx"
pose_model_path4 = base_path + "rtmpose-l_wb_1x3x384x288.onnx"
norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
# ==================================================================================================
def add_steps_to_onnx(model_path):
# Load existing model
model = onnx.load(model_path)
graph = model.graph
mean = norm_mean.astype(np.float32)
std = norm_std.astype(np.float32)
mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)
use_fp16 = bool("fp16" in model_path)
if use_fp16:
mean = mean.astype(np.float16)
std = std.astype(np.float16)
# Add the initializers to the graph
mean_initializer = numpy_helper.from_array(mean, name="norm_mean")
std_initializer = numpy_helper.from_array(std, name="norm_std")
graph.initializer.extend([mean_initializer, std_initializer])
# Define layer names, assuming the first input is the image tensor
input_name = graph.input[0].name
# Cast to internal type
# This has to be the first node, because tensorrt does not support uint8 layers
cast_type = 10 if use_fp16 else 1
casted_output = "casted_output"
cast_node = helper.make_node(
"Cast",
inputs=[input_name],
outputs=[casted_output],
to=cast_type,
name="Cast_Input",
)
# Node to transpose
transpose_output = "transpose_output"
transpose_node = helper.make_node(
"Transpose",
inputs=[casted_output],
outputs=[transpose_output],
perm=[0, 3, 1, 2],
name="Transpose",
)
# Node to add mean
mean_added_output = "mean_added_output"
mean_add_node = helper.make_node(
"Add",
inputs=[transpose_output, "norm_mean"],
outputs=[mean_added_output],
name="Mean_Addition",
)
# Node to multiply by std
std_mult_output = "std_mult_output"
std_mul_node = helper.make_node(
"Mul",
inputs=[mean_added_output, "norm_std"],
outputs=[std_mult_output],
name="Std_Multiplication",
)
# Replace original input of the model with the output of normalization
for node in graph.node:
for idx, input_name_in_node in enumerate(node.input):
if input_name_in_node == input_name:
node.input[idx] = std_mult_output
# Add the new nodes to the graph
graph.node.insert(0, cast_node)
graph.node.insert(1, transpose_node)
graph.node.insert(2, mean_add_node)
graph.node.insert(3, std_mul_node)
# Transpose the input shape
input_shape = graph.input[0].type.tensor_type.shape.dim
dims = [dim.dim_value for dim in input_shape]
for i, j in enumerate([0, 3, 1, 2]):
input_shape[j].dim_value = dims[i]
# Set the batch size to a defined string
input_shape = graph.input[0].type.tensor_type.shape.dim
if input_shape[0].dim_value == 0:
input_shape[0].dim_param = "batch_size"
# Rename the input tensor
main_input_image_name = model.graph.input[0].name
for node in model.graph.node:
for idx, name in enumerate(node.input):
if name == main_input_image_name:
node.input[idx] = "image_input"
model.graph.input[0].name = "image_input"
# Set input image type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
# Cast all outputs to fp32 to avoid half precision issues in cpp code
for output in graph.output:
orig_output_name = output.name
internal_output_name = orig_output_name + "_internal"
# Rename the output tensor
for node in model.graph.node:
for idx, name in enumerate(node.output):
if name == orig_output_name:
node.output[idx] = internal_output_name
# Insert a Cast node that casts the internal output to fp32
cast_fp32_name = orig_output_name
cast_node_output = helper.make_node(
"Cast",
inputs=[internal_output_name],
outputs=[cast_fp32_name],
to=1,
name="Cast_Output_" + orig_output_name,
)
# Append the cast node to the graph
graph.node.append(cast_node_output)
# Update the output's data type info
output.type.tensor_type.elem_type = TensorProto.FLOAT
# Merge the two outputs
if "det" in model_path:
r1_output = "dets"
r2_output = "labels"
out_name = "bboxes"
out_dim = 6
if "pose" in model_path:
r1_output = "kpts"
r2_output = "scores"
out_name = "keypoints"
out_dim = 3
if "det" in model_path or "pose" in model_path:
# Node to expand
r2_expanded = r2_output + "_expanded"
unsqueeze_node = helper.make_node(
"Unsqueeze",
inputs=[r2_output],
outputs=[r2_expanded],
axes=[2],
name="Unsqueeze",
)
# Node to concatenate
r12_merged = out_name
concat_node = helper.make_node(
"Concat",
inputs=[r1_output, r2_expanded],
outputs=[r12_merged],
axis=2,
name="Merged",
)
# Define the new concatenated output
merged_output = helper.make_tensor_value_info(
r12_merged,
TensorProto.FLOAT,
[
(
graph.input[0].type.tensor_type.shape.dim[0].dim_value
if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
else None
),
(
graph.output[0].type.tensor_type.shape.dim[1].dim_value
if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
else None
),
out_dim,
],
)
# Update the graph
graph.node.append(unsqueeze_node)
graph.node.append(concat_node)
graph.output.pop()
graph.output.pop()
graph.output.append(merged_output)
path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
path = path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path)
# ==================================================================================================
def main():
add_steps_to_onnx(det_model_path1)
add_steps_to_onnx(det_model_path2)
add_steps_to_onnx(pose_model_path1)
add_steps_to_onnx(pose_model_path2)
add_steps_to_onnx(pose_model_path3)
add_steps_to_onnx(pose_model_path4)
add_steps_to_onnx(det_model_path1.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(det_model_path2.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path1.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path2.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path3.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path4.replace(".onnx", "_fp16.onnx"))
# ==================================================================================================
if __name__ == "__main__":
main()
@@ -1,18 +0,0 @@
_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime.py"]
onnx_config = dict(
input_shape=[320, 320],
)
codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# low confidence boxes outside the model.
post_processing=dict(
score_threshold=0.0,
confidence_threshold=0.0,
iou_threshold=0.5,
max_output_boxes_per_class=10,
),
)
@@ -1,18 +0,0 @@
_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py"]
onnx_config = dict(
input_shape=[320, 320],
)
codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# low confidence boxes outside the model.
post_processing=dict(
score_threshold=0.0,
confidence_threshold=0.0,
iou_threshold=0.5,
max_output_boxes_per_class=10,
),
)
@@ -1,19 +0,0 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
dynamic_axes={
"input": {
0: "batch",
},
"kpts": {
0: "batch",
},
"scores": {
0: "batch",
},
},
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum
@@ -1,19 +0,0 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
dynamic_axes={
"input": {
0: "batch",
},
"kpts": {
0: "batch",
},
"scores": {
0: "batch",
},
},
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum
@@ -1,8 +0,0 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum
@@ -1,8 +0,0 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum
-41
View File
@@ -1,41 +0,0 @@
FROM openmmlab/mmdeploy:ubuntu20.04-cuda11.8-mmdeploy1.3.1
ARG DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
WORKDIR /
RUN apt-get update && apt-get install -y --no-install-recommends feh
RUN git clone https://github.com/open-mmlab/mmdeploy.git --depth=1
RUN cd mmdeploy/; python3 tools/scripts/build_ubuntu_x64_ort.py
# Install MMPose
ENV FORCE_CUDA="1"
ENV MMCV_WITH_OPS=1
RUN pip3 install --upgrade --no-cache-dir openmim
RUN mim install mmengine
RUN mim install "mmcv>=2,<2.2.0"
RUN mim install "mmdet>=3"
RUN mim install "mmpose>=1.1.0"
# Fix an error when importing mmpose
RUN pip3 install --upgrade --no-cache-dir "numpy<2" scipy
RUN git clone --depth=1 --branch=main https://github.com/open-mmlab/mmpose.git
RUN echo 'export PYTHONPATH=/mmdeploy/build/lib:$PYTHONPATH' >> ~/.bashrc
RUN echo 'export LD_LIBRARY_PATH=/mmdeploy/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH' >> ~/.bashrc
# Show images
RUN apt-get update && apt-get install -y --no-install-recommends python3-tk
# Tool for fp16 conversion
RUN pip3 install --upgrade --no-cache-dir onnxconverter_common
# Fix an error when profiling
RUN pip3 install --upgrade --no-cache-dir "onnxruntime-gpu<1.17"
RUN pip3 install --upgrade --no-cache-dir tensorflow
RUN pip3 install --upgrade --no-cache-dir tf2onnx
WORKDIR /mmdeploy/
CMD ["/bin/bash"]
-2
View File
@@ -1,2 +0,0 @@
*
!.gitignore
-338
View File
@@ -1,338 +0,0 @@
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.ops import roi_align
# ==================================================================================================
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
det_target_size = (320, 320)
pose_target_size = (384, 288)
# ==================================================================================================
class Letterbox(nn.Module):
def __init__(self, target_size, fill_value=128):
"""Resize and pad image while keeping aspect ratio"""
super(Letterbox, self).__init__()
self.target_size = target_size
self.fill_value = fill_value
def calc_params(self, ishape):
ih, iw = ishape[1], ishape[2]
th, tw = self.target_size
scale = torch.min(tw / iw, th / ih)
nw = torch.round(iw * scale)
nh = torch.round(ih * scale)
pad_w = tw - nw
pad_h = th - nh
pad_left = pad_w // 2
pad_top = pad_h // 2
pad_right = pad_w - pad_left
pad_bottom = pad_h - pad_top
paddings = (pad_left, pad_right, pad_top, pad_bottom)
return paddings, scale, (nw, nh)
def forward(self, img):
paddings, _, (nw, nh) = self.calc_params(img.shape)
# Resize the image
img = img.to(torch.float32)
img = img.permute(0, 3, 1, 2)
img = F.interpolate(
img,
size=(nh, nw),
mode="bilinear",
align_corners=False,
)
img = img.permute(0, 2, 3, 1)
img = img.round()
# Pad the image
img = F.pad(
img.permute(0, 3, 1, 2),
pad=paddings,
mode="constant",
value=self.fill_value,
)
img = img.permute(0, 2, 3, 1)
return img
# ==================================================================================================
class BoxCrop(nn.Module):
def __init__(self, target_size):
"""Crop bounding box from image"""
super(BoxCrop, self).__init__()
self.target_size = target_size
self.padding_scale = 1.25
def calc_params(self, bbox):
start_x, start_y, end_x, end_y = bbox[0, 0], bbox[0, 1], bbox[0, 2], bbox[0, 3]
target_h, target_w = self.target_size
# Calculate original bounding box width, height and center
bbox_w = end_x - start_x
bbox_h = end_y - start_y
center_x = (start_x + end_x) / 2.0
center_y = (start_y + end_y) / 2.0
# Calculate the aspect ratios
bbox_aspect = bbox_w / bbox_h
target_aspect = target_w / target_h
# Adjust the scaled bounding box to match the target aspect ratio
if bbox_aspect > target_aspect:
adjusted_h = bbox_w / target_aspect
adjusted_w = bbox_w
else:
adjusted_w = bbox_h * target_aspect
adjusted_h = bbox_h
# Scale the bounding box by the padding_scale
scaled_bbox_w = adjusted_w * self.padding_scale
scaled_bbox_h = adjusted_h * self.padding_scale
# Calculate scaled bounding box coordinates
new_start_x = center_x - scaled_bbox_w / 2.0
new_start_y = center_y - scaled_bbox_h / 2.0
new_end_x = center_x + scaled_bbox_w / 2.0
new_end_y = center_y + scaled_bbox_h / 2.0
# Define the new box coordinates
new_box = torch.stack((new_start_x, new_start_y, new_end_x, new_end_y), dim=0)
new_box = new_box.unsqueeze(0)
scale = torch.stack(
((target_w / scaled_bbox_w), (target_h / scaled_bbox_h)), dim=0
)
return scale, new_box
def forward(self, img, bbox):
_, bbox = self.calc_params(bbox)
batch_indices = torch.zeros(bbox.shape[0], 1)
rois = torch.cat([batch_indices, bbox], dim=1)
# Resize and crop
img = img.to(torch.float32)
img = img.permute(0, 3, 1, 2)
img = roi_align(
img,
rois,
output_size=self.target_size,
spatial_scale=1.0,
sampling_ratio=0,
)
img = img.permute(0, 2, 3, 1)
img = img.round()
return img
# ==================================================================================================
class DetPreprocess(nn.Module):
def __init__(self, target_size, fill_value=114):
super(DetPreprocess, self).__init__()
self.letterbox = Letterbox(target_size, fill_value)
def forward(self, img):
# img: torch.Tensor of shape [batch, H, W, C], dtype=torch.uint8
img = self.letterbox(img)
return img
# ==================================================================================================
class DetPostprocess(nn.Module):
def __init__(self, target_size):
super(DetPostprocess, self).__init__()
self.target_size = target_size
self.letterbox = Letterbox(target_size)
def forward(self, img, boxes):
paddings, scale, _ = self.letterbox.calc_params(img.shape)
boxes = boxes.float()
boxes[:, :, 0] -= paddings[0]
boxes[:, :, 2] -= paddings[0]
boxes[:, :, 1] -= paddings[2]
boxes[:, :, 3] -= paddings[2]
zero = torch.tensor(0)
boxes = torch.max(boxes, zero)
th, tw = self.target_size
pad_w = paddings[0] + paddings[1]
pad_h = paddings[2] + paddings[3]
max_w = tw - pad_w - 1
max_h = th - pad_h - 1
b0 = boxes[:, :, 0]
b1 = boxes[:, :, 1]
b2 = boxes[:, :, 2]
b3 = boxes[:, :, 3]
b0 = torch.min(b0, max_w)
b1 = torch.min(b1, max_h)
b2 = torch.min(b2, max_w)
b3 = torch.min(b3, max_h)
boxes[:, :, 0] = b0
boxes[:, :, 1] = b1
boxes[:, :, 2] = b2
boxes[:, :, 3] = b3
boxes[:, :, 0:4] /= scale
return boxes
# ==================================================================================================
class PosePreprocess(nn.Module):
def __init__(self, target_size, fill_value=114):
super(PosePreprocess, self).__init__()
self.boxcrop = BoxCrop(target_size)
def forward(self, img, bbox):
# img: torch.Tensor of shape [1, H, W, C], dtype=torch.uint8
# bbox: torch.Tensor of shape [1, 4], dtype=torch.float32
img = self.boxcrop(img, bbox)
return img
# ==================================================================================================
class PosePostprocess(nn.Module):
def __init__(self, target_size):
super(PosePostprocess, self).__init__()
self.boxcrop = BoxCrop(target_size)
self.target_size = target_size
def forward(self, img, bbox, keypoints):
scale, bbox = self.boxcrop.calc_params(bbox)
kp = keypoints.float()
kp[:, :, 0:2] /= scale
kp[:, :, 0] += bbox[0, 0]
kp[:, :, 1] += bbox[0, 1]
zero = torch.tensor(0)
kp = torch.max(kp, zero)
max_w = img.shape[2] - 1
max_h = img.shape[1] - 1
k0 = kp[:, :, 0]
k1 = kp[:, :, 1]
k0 = torch.min(k0, max_w)
k1 = torch.min(k1, max_h)
kp[:, :, 0] = k0
kp[:, :, 1] = k1
return kp
# ==================================================================================================
def main():
img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg"
image = cv2.imread(img_path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Initialize the DetPreprocess module
preprocess_model = DetPreprocess(target_size=det_target_size)
det_dummy_input_a0 = torch.from_numpy(image).unsqueeze(0)
# Export to ONNX
torch.onnx.export(
preprocess_model,
det_dummy_input_a0,
base_path + "det_preprocess.onnx",
opset_version=11,
input_names=["input_image"],
output_names=["preprocessed_image"],
dynamic_axes={
"input_image": {0: "batch_size", 1: "height", 2: "width"},
"preprocessed_image": {0: "batch_size"},
},
)
# Initialize the DetPostprocess module
postprocess_model = DetPostprocess(target_size=det_target_size)
det_dummy_input_b0 = torch.from_numpy(image).unsqueeze(0)
det_dummy_input_b1 = torch.rand(1, 10, 5)
# Export to ONNX
torch.onnx.export(
postprocess_model,
(det_dummy_input_b0, det_dummy_input_b1),
base_path + "det_postprocess.onnx",
opset_version=11,
input_names=["input_image", "boxes"],
output_names=["output_boxes"],
dynamic_axes={
"input_image": {0: "batch_size", 1: "height", 2: "width"},
"boxes": {0: "batch_size", 1: "num_boxes"},
"output_boxes": {0: "batch_size", 1: "num_boxes"},
},
)
# Initialize the PosePreprocess module
preprocess_model = PosePreprocess(target_size=pose_target_size)
det_dummy_input_c0 = torch.from_numpy(image).unsqueeze(0)
det_dummy_input_c1 = torch.tensor([[352, 339, 518, 594]]).to(torch.int32)
# Export to ONNX
torch.onnx.export(
preprocess_model,
(det_dummy_input_c0, det_dummy_input_c1),
base_path + "pose_preprocess.onnx",
opset_version=11,
input_names=["input_image", "bbox"],
output_names=["preprocessed_image"],
dynamic_axes={
"input_image": {0: "batch_size", 1: "height", 2: "width"},
"preprocessed_image": {0: "batch_size"},
},
)
# Initialize the PosePostprocess module
postprocess_model = PosePostprocess(target_size=pose_target_size)
det_dummy_input_d0 = torch.from_numpy(image).unsqueeze(0)
det_dummy_input_d1 = torch.tensor([[352, 339, 518, 594]]).to(torch.int32)
det_dummy_input_d2 = torch.rand(1, 17, 2)
# Export to ONNX
torch.onnx.export(
postprocess_model,
(det_dummy_input_d0, det_dummy_input_d1, det_dummy_input_d2),
base_path + "pose_postprocess.onnx",
opset_version=11,
input_names=["input_image", "bbox", "keypoints"],
output_names=["output_keypoints"],
dynamic_axes={
"input_image": {0: "batch_size", 1: "height", 2: "width"},
"output_keypoints": {0: "batch_size"},
},
)
# ==================================================================================================
if __name__ == "__main__":
main()
-276
View File
@@ -1,276 +0,0 @@
import cv2
import numpy as np
import tensorflow as tf
import tf2onnx
# ==================================================================================================
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
det_target_size = (320, 320)
# ==================================================================================================
class BayerToRGB(tf.keras.layers.Layer):
"""Convert Bayer image to RGB
See: https://stanford.edu/class/ee367/reading/Demosaicing_ICASSP04.pdf
See: https://github.com/cheind/pytorch-debayer/blob/master/debayer/modules.py#L231
"""
def __init__(self):
super().__init__()
self.layout = "RGGB"
self.max_val = 255.0
self.kernels = tf.constant(
np.array(
[
# G at R/B locations
[
[0, 0, -1, 0, 0],
[0, 0, 2, 0, 0],
[-1, 2, 4, 2, -1],
[0, 0, 2, 0, 0],
[0, 0, -1, 0, 0],
],
# R/B at G in R/B rows and B/R columns
[
[0, 0, 0.5, 0, 0],
[0, -1, 0, -1, 0],
[-1, 4, 5, 4, -1],
[0, -1, 0, -1, 0],
[0, 0, 0.5, 0, 0],
],
# R/B at G in B/R rows and R/B columns
[
[0, 0, 0.5, 0, 0],
[0, -1, 4, -1, 0],
[-1, 0, 5, 0, -1],
[0, -1, 4, -1, 0],
[0, 0, 0.5, 0, 0],
],
# R/B at B/R in B/R rows and B/R columns
[
[0, 0, -1.5, 0, 0],
[0, 2, 0, 2, 0],
[-1.5, 0, 6, 0, -1.5],
[0, 2, 0, 2, 0],
[0, 0, -1.5, 0, 0],
],
],
dtype=np.float32,
)
.reshape(1, 4, 5, 5)
.transpose(2, 3, 0, 1)
/ 8.0
)
self.index = tf.constant(
np.array(
# Describes the kernel indices that calculate the corresponding RGB values for
# the 2x2 layout (RGGB) sub-structure
[
# Destination R
[
[4, 1], # identity, R at G in R row B column
[2, 3], # R at G in B row R column, R at B in B row R column
],
# Destination G
[
[0, 4],
[4, 0],
],
# Destination B
[
[3, 2],
[1, 4],
],
]
).reshape(1, 3, 2, 2)
)
def call(self, img):
H, W = tf.shape(img)[1], tf.shape(img)[2]
# Pad the image
tpad = img[:, 0:2, :, :]
bpad = img[:, H - 2 : H, :, :]
ipad = tf.concat([tpad, img, bpad], axis=1)
lpad = ipad[:, :, 0:2, :]
rpad = ipad[:, :, W - 2 : W, :]
ipad = tf.concat([lpad, ipad, rpad], axis=2)
# Convolve with kernels
planes = tf.nn.conv2d(ipad, self.kernels, strides=[1, 1, 1, 1], padding="VALID")
# Concatenate identity kernel
planes = tf.concat([planes, img], axis=-1)
# Gather values
index_repeated = tf.tile(self.index, multiples=[1, 1, H // 2, W // 2])
index_repeated = tf.transpose(index_repeated, perm=[0, 2, 3, 1])
row_indices, col_indices = tf.meshgrid(tf.range(H), tf.range(W), indexing="ij")
index_tensor = tf.stack([row_indices, col_indices], axis=-1)
index_tensor = tf.expand_dims(index_tensor, axis=0)
index_tensor = tf.expand_dims(index_tensor, axis=-2)
index_tensor = tf.repeat(index_tensor, repeats=3, axis=-2)
index_repeated = tf.expand_dims(index_repeated, axis=-1)
indices = tf.concat([tf.cast(index_tensor, tf.int64), index_repeated], axis=-1)
rgb = tf.gather_nd(planes, indices, batch_dims=1)
if self.max_val == 255.0:
# Make value range valid again
rgb = tf.round(rgb)
return rgb
# ==================================================================================================
def bayer_resize(img, size):
"""Resize a Bayer image by splitting color channels"""
# Split the image into 4 channels
r = img[:, 0::2, 0::2, 0]
g1 = img[:, 0::2, 1::2, 0]
g2 = img[:, 1::2, 0::2, 0]
b = img[:, 1::2, 1::2, 0]
bsplit = tf.stack([r, g1, g2, b], axis=-1)
# Resize the image
# Make sure the target size is divisible by 2
size = (size[0] // 2, size[1] // 2)
bsized = tf.image.resize(bsplit, size=size, method="bilinear")
# Create a bayer image again
img = tf.nn.depth_to_space(bsized, block_size=2)
return img
# ==================================================================================================
class Letterbox(tf.keras.layers.Layer):
def __init__(self, target_size, fill_value=128):
"""Resize and pad image while keeping aspect ratio"""
super(Letterbox, self).__init__()
self.b2rgb = BayerToRGB()
self.target_size = target_size
self.fill_value = fill_value
def calc_params(self, ishape):
img_h, img_w = ishape[1], ishape[2]
target_h, target_w = self.target_size
scale = tf.minimum(target_w / img_w, target_h / img_h)
new_w = tf.round(tf.cast(img_w, scale.dtype) * scale)
new_h = tf.round(tf.cast(img_h, scale.dtype) * scale)
new_w = tf.cast(new_w, tf.int32)
new_h = tf.cast(new_h, tf.int32)
new_w = new_w - (new_w % 2)
new_h = new_h - (new_h % 2)
pad_w = target_w - new_w
pad_h = target_h - new_h
pad_left = tf.cast(tf.floor(tf.cast(pad_w, tf.float32) / 2.0), tf.int32)
pad_top = tf.cast(tf.floor(tf.cast(pad_h, tf.float32) / 2.0), tf.int32)
pad_right = pad_w - pad_left
pad_bottom = pad_h - pad_top
paddings = [pad_top, pad_bottom, pad_left, pad_right]
return paddings, scale, (new_w, new_h)
def call(self, img):
paddings, _, (nw, nh) = self.calc_params(tf.shape(img))
# Resize the image and convert to RGB
img = bayer_resize(img, (nh, nw))
img = self.b2rgb(img)
# Pad the image
pad_top, pad_bottom, pad_left, pad_right = paddings
img = tf.pad(
img,
paddings=[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
mode="CONSTANT",
constant_values=self.fill_value,
)
return img
# ==================================================================================================
class DetPreprocess(tf.keras.layers.Layer):
def __init__(self, target_size, fill_value=114):
super(DetPreprocess, self).__init__()
self.letterbox = Letterbox(target_size, fill_value)
def call(self, img):
"""img: tf.Tensor of shape [batch, H, W, C], dtype=tf.uint8"""
# Cast to float32 since TensorRT does not support uint8 layers
img = tf.cast(img, tf.float32)
img = self.letterbox(img)
return img
# ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
# ==================================================================================================
def main():
img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg"
image = cv2.imread(img_path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = rgb2bayer(image)
image = np.expand_dims(image, axis=-1)
image = np.asarray(image, dtype=np.uint8)
# Initialize the DetPreprocess module
preprocess_model = tf.keras.Sequential()
preprocess_model.add(DetPreprocess(target_size=det_target_size))
det_dummy_input_a0 = tf.convert_to_tensor(
np.expand_dims(image, axis=0), dtype=tf.uint8
)
det_dummy_output_a0 = preprocess_model(det_dummy_input_a0)
print("\n", det_dummy_output_a0.shape, "\n")
output_a0 = det_dummy_output_a0.numpy()
output_a0 = np.squeeze(output_a0, axis=0)
output_a0 = np.asarray(output_a0, dtype=np.uint8)
output_a0 = cv2.cvtColor(output_a0, cv2.COLOR_RGB2BGR)
cv2.imwrite(base_path + "det_preprocess.jpg", output_a0)
# Export to ONNX
input_signature = [tf.TensorSpec([None, None, None, 1], tf.uint8, name="x")]
_, _ = tf2onnx.convert.from_keras(
preprocess_model,
input_signature,
opset=11,
output_path=base_path + "det_preprocess.onnx",
target=["tensorrt"],
)
# ==================================================================================================
if __name__ == "__main__":
main()
-9
View File
@@ -1,9 +0,0 @@
#! /bin/bash
xhost +
docker run --privileged --rm --network host -it \
--gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
--volume "$(pwd)"/:/RapidPoseTriangulation/ \
--volume /tmp/.X11-unix:/tmp/.X11-unix \
--env DISPLAY --env QT_X11_NO_MITSHM=1 \
rpt_mmdeploy
Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

-23
View File
@@ -1,23 +0,0 @@
# Finetuning MMPose models
See: <https://mmpose.readthedocs.io/en/latest/user_guides/train_and_test.html>
<br>
```bash
docker build --progress=plain -f extras/mmpose/dockerfile -t rpt_mmpose .
./extras/mmpose/run_container.sh
```
```bash
cd /mmpose/
export CUDA_VISIBLE_DEVICES=0
python3 ./tools/train.py \
/RapidPoseTriangulation/extras/mmpose/configs/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
--amp \
--cfg-options \
load_from=https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
base_lr=0.00004
```
@@ -1,235 +0,0 @@
_base_ = ['mmpose::_base_/default_runtime.py']
val_interval=1
max_epochs = 3
# common setting
num_keypoints = 133
input_size = (288, 384)
# runtime
stage2_num_epochs = 30
base_lr = 4e-3
train_batch_size = 32
val_batch_size = 32
train_cfg = dict(max_epochs=max_epochs, val_interval=val_interval)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
clip_grad=dict(max_norm=35, norm_type=2),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=input_size,
sigma=(6., 6.93),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=1.,
widen_factor=1.,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
)),
head=dict(
type='RTMCCHead',
in_channels=1024,
out_channels=num_keypoints,
input_size=codec['input_size'],
in_featuremap_size=tuple([s // 32 for s in codec['input_size']]),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = 'data/coco/'
backend_args = dict(backend='local')
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
# data loaders
train_dataloader = dict(
batch_size=train_batch_size,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=val_batch_size,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/coco_wholebody_val_v1.0.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
# bbox_file='data/coco/person_detection_results/'
# 'COCO_val2017_detections_AP_H_56_person.json',
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator
-9
View File
@@ -1,9 +0,0 @@
FROM rpt_mmdeploy
RUN apt-get update && apt-get install -y --no-install-recommends nano
RUN pip3 install --upgrade --no-cache-dir "albumentations<1.4"
RUN sed -i '94i\ self.runner.val_loop.run()' /usr/local/lib/python3.8/dist-packages/mmengine/runner/loops.py
WORKDIR /mmpose/
CMD ["/bin/bash"]
-11
View File
@@ -1,11 +0,0 @@
#! /bin/bash
xhost +
docker run --privileged --rm --network host -it \
--gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
--volume "$(pwd)"/:/RapidPoseTriangulation/ \
--volume "$(pwd)"/../datasets/coco2017/annotations/:/mmpose/data/coco/annotations/ \
--volume "$(pwd)"/../datasets/coco2017/images/:/mmpose/data/coco/ \
--volume /tmp/.X11-unix:/tmp/.X11-unix \
--env DISPLAY --env QT_X11_NO_MITSHM=1 \
rpt_mmpose
+2 -4
View File
@@ -1,13 +1,12 @@
# ROS-Wrapper
# ROS Wrapper
Run pose estimator with ros topics as inputs and publish detected poses.
Run the 3D triangulator with ROS topics as inputs and publish detected poses.
<br>
- Build container:
```bash
docker build --progress=plain -t rapidposetriangulation_ros2d -f extras/ros/dockerfile_2d .
docker build --progress=plain -t rapidposetriangulation_ros3d -f extras/ros/dockerfile_3d .
```
@@ -16,7 +15,6 @@ Run pose estimator with ros topics as inputs and publish detected poses.
- Run and test:
```bash
xhost + && export CAMID="camera01" && docker compose -f extras/ros/docker-compose-2d.yml up
xhost + && docker compose -f extras/ros/docker-compose-3d.yml up
docker exec -it ros-test_node-1 bash
-71
View File
@@ -1,71 +0,0 @@
services:
test_node:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep infinity'
estimator:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
pose_visualizer:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer $CAMID'
pose_viewer:
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
privileged: true
volumes:
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run image_view image_view --ros-args --remap image:=/$CAMID/img_with_pose -p autosize:=True -p window_name:=MyPoseImage'
-2
View File
@@ -7,7 +7,6 @@ services:
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
@@ -23,7 +22,6 @@ services:
privileged: true
volumes:
- ../../:/RapidPoseTriangulation/
- ../../skelda/:/skelda/
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
-67
View File
@@ -1,67 +0,0 @@
FROM rapidposetriangulation
WORKDIR /
# Install ROS2
# https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html
RUN apt-get update && apt-get install -y --no-install-recommends locales
RUN locale-gen en_US en_US.UTF-8 && update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common
RUN add-apt-repository universe
RUN apt-get update && apt-get install -y --no-install-recommends curl
RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.key -o /usr/share/keyrings/ros-archive-keyring.gpg
RUN echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] http://packages.ros.org/ros2/ubuntu $(. /etc/os-release && echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/ros2.list
RUN apt-get update && apt-get install -y --no-install-recommends ros-humble-ros-base python3-argcomplete
RUN apt-get update && apt-get install -y --no-install-recommends ros-dev-tools
RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
# Fix ros package building error
RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
# Create ROS2 workspace for basic packages
RUN mkdir -p /project/base/src/
RUN cd /project/base/; colcon build
RUN echo "source /project/base/install/setup.bash" >> ~/.bashrc
# Install opencv and cv_bridge
RUN apt-get update && apt-get install -y --no-install-recommends libboost-dev
RUN apt-get update && apt-get install -y --no-install-recommends libboost-python-dev
RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
RUN cd /project/base/src/; git clone --branch humble --depth=1 https://github.com/ros-perception/vision_opencv.git
RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Install ROS2 image viewer
RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_pipeline.git
RUN cd /project/base/src/; git clone --branch=humble --depth=1 https://github.com/ros-perception/image_common.git
RUN /bin/bash -i -c 'cd /project/base/; colcon build --symlink-install --packages-select camera_calibration_parsers image_transport image_view --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Fix module not found error when displaying images
RUN apt-get update && apt-get install -y --no-install-recommends libcanberra-gtk-module libcanberra-gtk3-module
# Create ROS2 workspace for project packages
RUN mkdir -p /project/dev_ws/src/
RUN cd /project/dev_ws/; colcon build
RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
# Copy modules
COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
COPY ./scripts/ /RapidPoseTriangulation/scripts/
COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
# Link and build as ros package
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Update ros packages -> autocompletion and check
RUN /bin/bash -i -c 'ros2 pkg list'
# Clear cache to save space, only has an effect if image is squashed
RUN apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /RapidPoseTriangulation/
CMD ["/bin/bash"]
-20
View File
@@ -1,20 +0,0 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>pose2d_visualizer</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer>
<license>TODO: License declaration</license>
<exec_depend>rpt_msgs</exec_depend>
<test_depend>ament_copyright</test_depend>
<test_depend>ament_flake8</test_depend>
<test_depend>ament_pep257</test_depend>
<test_depend>python3-pytest</test_depend>
<export>
<build_type>ament_python</build_type>
</export>
</package>
@@ -1,158 +0,0 @@
import sys
import threading
import time
import cv2
from matplotlib import pyplot as plt
import numpy as np
import rclpy
from cv_bridge import CvBridge
from rclpy.qos import QoSHistoryPolicy, QoSProfile, QoSReliabilityPolicy
from sensor_msgs.msg import Image
from rpt_msgs.msg import Poses
from skelda import utils_view
# ==================================================================================================
bridge = CvBridge()
node = None
publisher_img = None
img_input_topic = "/{}/pylon_ros2_camera_node/image_raw"
pose_input_topic = "/poses/{}"
img_output_topic = "/{}/img_with_pose"
last_input_image = None
lock = threading.Lock()
# ==================================================================================================
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def callback_images(image_data):
global last_input_image, lock
# Convert into cv images from image string
if image_data.encoding == "bayer_rggb8":
bayer_image = bridge.imgmsg_to_cv2(image_data, "bayer_rggb8")
color_image = bayer2rgb(bayer_image)
elif image_data.encoding == "mono8":
gray_image = bridge.imgmsg_to_cv2(image_data, "mono8")
color_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2RGB)
elif image_data.encoding == "rgb8":
color_image = bridge.imgmsg_to_cv2(image_data, "rgb8")
else:
raise ValueError("Unknown image encoding:", image_data.encoding)
time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
with lock:
last_input_image = (color_image, time_stamp)
# ==================================================================================================
def callback_poses(pose_data):
global last_input_image, lock
ptime = time.time()
if last_input_image is None:
return
# Extract pose data
joint_names = pose_data.joint_names
bodies2D = np.array(pose_data.bodies_flat).reshape(pose_data.bodies_shape).tolist()
# Collect inputs
images_2d = []
timestamps = []
with lock:
img = np.copy(last_input_image[0])
ts = last_input_image[1]
images_2d.append(img)
timestamps.append(ts)
# Visualize 2D poses
colors = plt.cm.hsv(np.linspace(0, 1, len(bodies2D), endpoint=False)).tolist()
colors = [[int(c[0] * 255), int(c[1] * 255), int(c[2] * 255)] for c in colors]
for i, body in enumerate(bodies2D):
color = list(reversed(colors[i]))
img = utils_view.draw_body_in_image(img, body, joint_names, color)
# Publish image with poses
publish(img)
msg = "Visualization time: {:.3f}s"
print(msg.format(time.time() - ptime))
# ==================================================================================================
def publish(img):
# Publish image data
msg = bridge.cv2_to_imgmsg(img, "rgb8")
publisher_img.publish(msg)
# ==================================================================================================
def main():
global node, publisher_img
# Start node
rclpy.init(args=sys.argv)
cam_id = sys.argv[1]
node = rclpy.create_node("pose2d_visualizer")
# Quality of service settings
qos_profile = QoSProfile(
reliability=QoSReliabilityPolicy.RELIABLE,
history=QoSHistoryPolicy.KEEP_LAST,
depth=1,
)
# Create subscribers
_ = node.create_subscription(
Image,
img_input_topic.format(cam_id),
callback_images,
qos_profile,
)
_ = node.create_subscription(
Poses,
pose_input_topic.format(cam_id),
callback_poses,
qos_profile,
)
# Create publishers
publisher_img = node.create_publisher(
Image,
img_output_topic.format(cam_id),
qos_profile,
)
node.get_logger().info("Finished initialization of pose visualizer")
# Run ros update thread
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
# ==================================================================================================
if __name__ == "__main__":
main()
-4
View File
@@ -1,4 +0,0 @@
[develop]
script_dir=$base/lib/pose2d_visualizer
[install]
install_scripts=$base/lib/pose2d_visualizer
-23
View File
@@ -1,23 +0,0 @@
from setuptools import setup
package_name = "pose2d_visualizer"
setup(
name=package_name,
version="0.0.0",
packages=[package_name],
data_files=[
("share/ament_index/resource_index/packages", ["resource/" + package_name]),
("share/" + package_name, ["package.xml"]),
],
install_requires=["setuptools"],
zip_safe=True,
maintainer="root",
maintainer_email="root@todo.todo",
description="TODO: Package description",
license="TODO: License declaration",
tests_require=["pytest"],
entry_points={
"console_scripts": ["pose2d_visualizer = pose2d_visualizer.pose2d_visualizer:main"],
},
)
@@ -1,27 +0,0 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_copyright.main import main
# Remove the `skip` decorator once the source file(s) have a copyright header
@pytest.mark.skip(
reason="No copyright header has been placed in the generated source file."
)
@pytest.mark.copyright
@pytest.mark.linter
def test_copyright():
rc = main(argv=[".", "test"])
assert rc == 0, "Found errors"
@@ -1,25 +0,0 @@
# Copyright 2017 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_flake8.main import main_with_errors
@pytest.mark.flake8
@pytest.mark.linter
def test_flake8():
rc, errors = main_with_errors(argv=[])
assert rc == 0, "Found %d code style errors / warnings:\n" % len(
errors
) + "\n".join(errors)
@@ -1,23 +0,0 @@
# Copyright 2015 Open Source Robotics Foundation, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from ament_pep257.main import main
@pytest.mark.linter
@pytest.mark.pep257
def test_pep257():
rc = main(argv=[".", "test"])
assert rc == 0, "Found code style errors / warnings"
@@ -1,69 +0,0 @@
cmake_minimum_required(VERSION 3.5)
project(rpt2d_wrapper_cpp)
# Default to C99
if(NOT CMAKE_C_STANDARD)
set(CMAKE_C_STANDARD 99)
endif()
# Default to C++17
if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()
# find dependencies
find_package(ament_cmake REQUIRED)
find_package(rclcpp REQUIRED)
find_package(rpt_msgs REQUIRED)
find_package(sensor_msgs REQUIRED)
find_package(cv_bridge REQUIRED)
find_package(OpenCV REQUIRED)
### 3) ONNX Runtime
# for desktop
include_directories(/onnxruntime/include/
/onnxruntime/include/onnxruntime/core/session/
/onnxruntime/include/onnxruntime/core/providers/tensorrt/)
link_directories(/onnxruntime/build/Linux/Release/)
# for jetson
include_directories(/usr/local/include/onnxruntime/)
link_directories(/usr/local/lib/)
add_executable(rpt2d_wrapper src/rpt2d_wrapper.cpp)
ament_target_dependencies(rpt2d_wrapper rclcpp sensor_msgs rpt_msgs cv_bridge)
target_include_directories(rpt2d_wrapper PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
target_link_libraries(rpt2d_wrapper
${OpenCV_LIBS}
onnxruntime_providers_tensorrt
onnxruntime_providers_shared
onnxruntime_providers_cuda
onnxruntime
)
set_target_properties(rpt2d_wrapper PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "/onnxruntime/build/Linux/Release"
)
install(TARGETS rpt2d_wrapper
DESTINATION lib/${PROJECT_NAME})
if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
# the following line skips the linter which checks for copyrights
# uncomment the line when a copyright and license is not present in all source files
#set(ament_cmake_copyright_FOUND TRUE)
# the following line skips cpplint (only works in a git repo)
# uncomment the line when this package is not in a git repo
#set(ament_cmake_cpplint_FOUND TRUE)
ament_lint_auto_find_test_dependencies()
endif()
ament_package()
-25
View File
@@ -1,25 +0,0 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>rpt2d_wrapper_cpp</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="root@todo.todo">root</maintainer>
<license>TODO: License declaration</license>
<buildtool_depend>ament_cmake</buildtool_depend>
<depend>rclcpp</depend>
<depend>rpt_msgs</depend>
<depend>sensor_msgs</depend>
<depend>cv_bridge</depend>
<depend>OpenCV</depend>
<test_depend>ament_lint_auto</test_depend>
<test_depend>ament_lint_common</test_depend>
<export>
<build_type>ament_cmake</build_type>
</export>
</package>
@@ -1,237 +0,0 @@
#include <atomic>
#include <chrono>
#include <iostream>
#include <string>
#include <vector>
// ROS2
#include <rclcpp/rclcpp.hpp>
#include <sensor_msgs/msg/image.hpp>
// OpenCV / cv_bridge
#include <cv_bridge/cv_bridge.h>
#include <opencv2/opencv.hpp>
// JSON library
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
using json = nlohmann::json;
#include "rpt_msgs/msg/poses.hpp"
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
// =================================================================================================
static const std::string img_input_topic = "/{}/pylon_ros2_camera_node/image_raw";
static const std::string pose_out_topic = "/poses/{}";
static const float min_bbox_score = 0.4;
static const float min_bbox_area = 0.1 * 0.1;
static const bool batch_poses = true;
static const std::map<std::string, bool> whole_body = {
{"foots", true},
{"face", true},
{"hands", true},
};
// =================================================================================================
// =================================================================================================
class Rpt2DWrapperNode : public rclcpp::Node
{
public:
Rpt2DWrapperNode(const std::string &cam_id)
: Node("rpt2d_wrapper_" + cam_id)
{
this->is_busy = false;
std::string img_topic = std::string(img_input_topic)
.replace(img_input_topic.find("{}"), 2, cam_id);
std::string pose_topic = std::string(pose_out_topic)
.replace(pose_out_topic.find("{}"), 2, cam_id);
// QoS
rclcpp::QoS qos_profile(1);
qos_profile.reliable();
qos_profile.keep_last(1);
// Setup subscriber
image_sub_ = this->create_subscription<sensor_msgs::msg::Image>(
img_topic, qos_profile,
std::bind(&Rpt2DWrapperNode::callback_images, this, std::placeholders::_1));
// Setup publisher
pose_pub_ = this->create_publisher<rpt_msgs::msg::Poses>(pose_topic, qos_profile);
// Load model
bool use_wb = utils_pipeline::use_whole_body(whole_body);
this->kpt_model = std::make_unique<utils_2d_pose::PosePredictor>(
use_wb, min_bbox_score, min_bbox_area, batch_poses);
RCLCPP_INFO(this->get_logger(), "Finished initialization of pose estimator.");
}
private:
rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr image_sub_;
rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_;
std::atomic<bool> is_busy;
// Pose model pointer
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model;
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
void callback_images(const sensor_msgs::msg::Image::SharedPtr msg);
std::vector<std::vector<std::array<float, 3>>> call_model(const cv::Mat &image);
};
// =================================================================================================
void Rpt2DWrapperNode::callback_images(const sensor_msgs::msg::Image::SharedPtr msg)
{
if (this->is_busy)
{
RCLCPP_WARN(this->get_logger(), "Skipping frame, still processing...");
return;
}
this->is_busy = true;
auto ts_image = std::chrono::high_resolution_clock::now();
// Load or convert image to Bayer format
cv::Mat bayer_image;
try
{
if (msg->encoding == "mono8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "bayer_rggb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "rgb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
cv::Mat color_image = cv_ptr->image;
bayer_image = utils_pipeline::rgb2bayer(color_image);
}
else
{
throw std::runtime_error("Unknown image encoding: " + msg->encoding);
}
}
catch (const std::exception &e)
{
RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
return;
}
// Call model
const auto &valid_poses = this->call_model(bayer_image);
// Calculate timings
double time_stamp = msg->header.stamp.sec + msg->header.stamp.nanosec / 1.0e9;
auto ts_image_sec = std::chrono::duration<double>(ts_image.time_since_epoch()).count();
auto ts_pose = std::chrono::high_resolution_clock::now();
double ts_pose_sec = std::chrono::duration<double>(ts_pose.time_since_epoch()).count();
double z_trigger_image = ts_image_sec - time_stamp;
double z_trigger_pose = ts_pose_sec - time_stamp;
double z_image_pose = ts_pose_sec - ts_image_sec;
json jdata;
jdata["timestamps"] = {
{"trigger", time_stamp},
{"image", ts_image_sec},
{"pose2d", ts_pose_sec},
{"z-trigger-image", z_trigger_image},
{"z-image-pose2d", z_image_pose},
{"z-trigger-pose2d", z_trigger_pose}};
// Publish message
auto pose_msg = rpt_msgs::msg::Poses();
pose_msg.header = msg->header;
std::vector<int32_t> bshape = {(int)valid_poses.size(), (int)joint_names_2d.size(), 3};
pose_msg.bodies_shape = bshape;
pose_msg.bodies_flat.reserve(bshape[0] * bshape[1] * bshape[2]);
for (int32_t i = 0; i < bshape[0]; i++)
{
for (int32_t j = 0; j < bshape[1]; j++)
{
for (int32_t k = 0; k < bshape[2]; k++)
{
pose_msg.bodies_flat.push_back(valid_poses[i][j][k]);
}
}
}
pose_msg.joint_names = joint_names_2d;
pose_msg.extra_data = jdata.dump();
pose_pub_->publish(pose_msg);
// Print info
double elapsed_time = std::chrono::duration<double>(
std::chrono::high_resolution_clock::now() - ts_image)
.count();
std::cout << "Detected persons: " << valid_poses.size()
<< " - Prediction time: " << elapsed_time << "s" << std::endl;
this->is_busy = false;
}
// =================================================================================================
std::vector<std::vector<std::array<float, 3>>> Rpt2DWrapperNode::call_model(const cv::Mat &image)
{
// Create image vector
cv::Mat rgb_image = utils_pipeline::bayer2rgb(image);
std::vector<cv::Mat> images_2d = {rgb_image};
// Predict 2D poses
auto poses_2d_all = kpt_model->predict(images_2d);
auto poses_2d_upd = utils_pipeline::update_keypoints(
poses_2d_all, joint_names_2d, whole_body);
auto &poses_2d = poses_2d_upd[0];
// Drop persons with no joints
std::vector<std::vector<std::array<float, 3>>> valid_poses;
for (auto &person : poses_2d)
{
float sum_conf = 0.0;
for (auto &kp : person)
{
sum_conf += kp[2];
}
if (sum_conf > 0.0)
{
valid_poses.push_back(person);
}
}
// Round poses to 3 decimal places
for (auto &person : valid_poses)
{
for (auto &kp : person)
{
kp[0] = std::round(kp[0] * 1000.0) / 1000.0;
kp[1] = std::round(kp[1] * 1000.0) / 1000.0;
kp[2] = std::round(kp[2] * 1000.0) / 1000.0;
}
}
return valid_poses;
}
// =================================================================================================
// =================================================================================================
int main(int argc, char **argv)
{
rclcpp::init(argc, argv);
const std::string cam_id = std::getenv("CAMID");
auto node = std::make_shared<Rpt2DWrapperNode>(cam_id);
rclcpp::spin(node);
rclcpp::shutdown();
return 0;
}