Merge branch 'mopts' into 'master'

More optimizations

See merge request Percipiote/RapidPoseTriangulation!8
This commit is contained in:
DANBER
2025-03-25 17:34:25 +00:00
18 changed files with 5473 additions and 4330 deletions

View File

@ -129,7 +129,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
- Build docker container:
```bash
docker build --progress=plain -f extras/ros/dockerfile -t rapidposetriangulation_ros .
docker build --progress=plain -f extras/ros/dockerfile_2d -t rapidposetriangulation_ros2d .
```
- Run and test:

View File

@ -1,10 +1,7 @@
version: "2.3"
# runtime: nvidia needs version 2 else change standard runtime at host pc
services:
test_node:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
@ -15,13 +12,14 @@ services:
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep infinity'
estimator:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia

View File

@ -61,7 +61,7 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
@ -70,7 +70,7 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show

23
extras/mmpose/README.md Normal file
View File

@ -0,0 +1,23 @@
# Finetuning MMPose models
See: <https://mmpose.readthedocs.io/en/latest/user_guides/train_and_test.html>
<br>
```bash
docker build --progress=plain -f extras/mmpose/dockerfile -t rpt_mmpose .
./extras/mmpose/run_container.sh
```
```bash
cd /mmpose/
export CUDA_VISIBLE_DEVICES=0
python3 ./tools/train.py \
/RapidPoseTriangulation/extras/mmpose/configs/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
--amp \
--cfg-options \
load_from=https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
base_lr=0.00004
```

View File

@ -0,0 +1,235 @@
_base_ = ['mmpose::_base_/default_runtime.py']
val_interval=1
max_epochs = 3
# common setting
num_keypoints = 133
input_size = (288, 384)
# runtime
stage2_num_epochs = 30
base_lr = 4e-3
train_batch_size = 32
val_batch_size = 32
train_cfg = dict(max_epochs=max_epochs, val_interval=val_interval)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
clip_grad=dict(max_norm=35, norm_type=2),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=input_size,
sigma=(6., 6.93),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=1.,
widen_factor=1.,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
)),
head=dict(
type='RTMCCHead',
in_channels=1024,
out_channels=num_keypoints,
input_size=codec['input_size'],
in_featuremap_size=tuple([s // 32 for s in codec['input_size']]),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = 'data/coco/'
backend_args = dict(backend='local')
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
# data loaders
train_dataloader = dict(
batch_size=train_batch_size,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=val_batch_size,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/coco_wholebody_val_v1.0.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
# bbox_file='data/coco/person_detection_results/'
# 'COCO_val2017_detections_AP_H_56_person.json',
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator

9
extras/mmpose/dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM rpt_mmdeploy
RUN apt-get update && apt-get install -y --no-install-recommends nano
RUN pip3 install --upgrade --no-cache-dir "albumentations<1.4"
RUN sed -i '94i\ self.runner.val_loop.run()' /usr/local/lib/python3.8/dist-packages/mmengine/runner/loops.py
WORKDIR /mmpose/
CMD ["/bin/bash"]

11
extras/mmpose/run_container.sh Executable file
View File

@ -0,0 +1,11 @@
#! /bin/bash
xhost +
docker run --privileged --rm --network host -it \
--gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
--volume "$(pwd)"/:/RapidPoseTriangulation/ \
--volume "$(pwd)"/../datasets/coco2017/annotations/:/mmpose/data/coco/annotations/ \
--volume "$(pwd)"/../datasets/coco2017/images/:/mmpose/data/coco/ \
--volume /tmp/.X11-unix:/tmp/.X11-unix \
--env DISPLAY --env QT_X11_NO_MITSHM=1 \
rpt_mmpose

View File

@ -7,10 +7,11 @@ Run pose estimator with ros topics as inputs and publish detected poses.
- Build container:
```bash
docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
docker build --progress=plain -t rapidposetriangulation_ros2d -f extras/ros/dockerfile_2d .
docker build --progress=plain -t rapidposetriangulation_ros3d -f extras/ros/dockerfile_3d .
```
- Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
- Update or remove the `ROS_DOMAIN_ID` in the `docker-compose.yml` files to fit your environment
- Run and test:

View File

@ -1,10 +1,7 @@
version: "2.3"
# runtime: nvidia needs version 2 else change standard runtime at host pc
services:
test_node:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
@ -15,13 +12,14 @@ services:
- /tmp/.X11-unix:/tmp/.X11-unix
- /dev/shm:/dev/shm
environment:
- CAMID
- DISPLAY
- QT_X11_NO_MITSHM=1
- "PYTHONUNBUFFERED=1"
command: /bin/bash -i -c 'sleep infinity'
estimator:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
@ -39,7 +37,7 @@ services:
command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
pose_visualizer:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia
@ -57,7 +55,7 @@ services:
command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer $CAMID'
pose_viewer:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros2d
network_mode: "host"
ipc: "host"
runtime: nvidia

View File

@ -1,10 +1,7 @@
version: "2.3"
# runtime: nvidia needs version 2 else change standard runtime at host pc
services:
test_node:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros3d
network_mode: "host"
ipc: "host"
runtime: nvidia
@ -21,7 +18,7 @@ services:
command: /bin/bash -i -c 'sleep infinity'
triangulator:
image: rapidposetriangulation_ros
image: rapidposetriangulation_ros3d
network_mode: "host"
ipc: "host"
runtime: nvidia

View File

@ -45,17 +45,14 @@ RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
# Copy modules
COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
COPY ./scripts/ /RapidPoseTriangulation/scripts/
COPY ./rpt/ /RapidPoseTriangulation/rpt/
COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
COPY ./extras/ros/rpt3d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/
# Link and build as ros package
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/ /project/dev_ws/src/
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Update ros packages -> autocompletion and check

44
extras/ros/dockerfile_3d Normal file
View File

@ -0,0 +1,44 @@
FROM ros:humble-ros-base-jammy
ARG DEBIAN_FRONTEND=noninteractive
# Set the working directory to /project
WORKDIR /project/
# Update and install basic tools
RUN apt-get update && apt-get upgrade -y
RUN apt-get update && apt-get install -y --no-install-recommends git nano wget
RUN apt-get update && apt-get install -y --no-install-recommends python3-pip
RUN pip3 install --upgrade pip
# Fix ros package building error
RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
# Add ROS2 sourcing by default
RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
# Create ROS2 workspace for project packages
RUN mkdir -p /project/dev_ws/src/
RUN cd /project/dev_ws/; colcon build
RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
# Copy modules
COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
COPY ./rpt/ /RapidPoseTriangulation/rpt/
COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
COPY ./extras/ros/rpt3d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/
# Link and build as ros package
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
RUN ln -s /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/ /project/dev_ws/src/
RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
# Update ros packages -> autocompletion and check
RUN /bin/bash -i -c 'ros2 pkg list'
# Clear cache to save space, only has an effect if image is squashed
RUN apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /RapidPoseTriangulation/
CMD ["/bin/bash"]

View File

@ -36,7 +36,7 @@ static const std::string pose_in_topic = "/poses/{}";
static const std::string cam_info_topic = "/{}/calibration";
static const std::string pose_out_topic = "/poses/humans3d";
static const float min_match_score = 0.95;
static const float min_match_score = 0.92;
static const size_t min_group_size = 1;
static const std::array<std::array<float, 3>, 2> roomparams = {{

File diff suppressed because it is too large Load Diff

View File

@ -525,6 +525,7 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
}
// Convert inputs to internal formats
this->num_cams = cameras.size();
std::vector<std::vector<std::vector<std::array<float, 3>>>> i_poses_2d = poses_2d;
std::vector<CameraInternal> internal_cameras;
for (size_t i = 0; i < cameras.size(); ++i)
@ -781,10 +782,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
all_scored_poses[i] = std::move(std::make_pair(pose3d, score));
}
elapsed = std::chrono::steady_clock::now() - stime;
pair_scoring_time += elapsed.count();
stime = std::chrono::steady_clock::now();
// Drop low scoring poses
size_t num_poses = all_scored_poses.size();
for (size_t i = num_poses; i > 0; --i)
@ -796,6 +793,10 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
}
}
elapsed = std::chrono::steady_clock::now() - stime;
pair_scoring_time += elapsed.count();
stime = std::chrono::steady_clock::now();
// Group pairs that share a person
std::vector<std::tuple<
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
@ -828,6 +829,27 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
auto [pose3d, score] = triangulate_and_score(
pose1, pose2, cam1, cam2, roomparams, {});
// Scale scores with 2D confidences
// They can improve the merge weighting, but especially the earlier step of pair-filtering
// works better if only per-view consistency is used, so they are not included before.
for (size_t j = 0; j < pose3d.size(); ++j)
{
float score1 = pose1[j][2];
float score2 = pose2[j][2];
float min_score = 0.1;
if (score1 > min_score && score2 > min_score)
{
float scoreP = (score1 + score2) * 0.5;
float scoreT = pose3d[j][3];
// Since the triangulation score is less sensitive and generally higher,
// weight it stronger to balance the two scores.
pose3d[j][3] = 0.9 * scoreT + 0.1 * scoreP;
}
}
all_full_poses[i] = std::move(pose3d);
}
@ -862,6 +884,7 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
add_extra_joints(final_poses_3d, joint_names);
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx);
add_missing_joints(final_poses_3d, joint_names);
replace_far_joints(final_poses_3d, joint_names);
// Store final result for use in the next frame.
last_poses_3d = final_poses_3d;
@ -1429,8 +1452,8 @@ std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triang
{
if (mask[i])
{
float score = 0.5 * (score1[i] + score2[i]);
pose3d[i][3] = score;
float scoreT = 0.5 * (score1[i] + score2[i]);
pose3d[i][3] = scoreT;
}
}
@ -1787,7 +1810,8 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
// Merge poses to create initial pose
// Use only those triangulations with a high score
std::vector<std::array<float, 4>> sum_poses(num_joints, {0.0, 0.0, 0.0, 0.0});
std::vector<int> sum_mask1(num_joints, 0);
std::vector<float> sum_mask1(num_joints, 0);
float offset1 = (1.0 - min_score) / 2.0;
for (size_t i = 0; i < num_poses; ++i)
{
const auto &pose = poses_3d[i];
@ -1795,13 +1819,14 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
for (size_t j = 0; j < num_joints; ++j)
{
float score = pose[j][3];
if (score > min_score)
if (score > min_score - offset1)
{
sum_poses[j][0] += pose[j][0];
sum_poses[j][1] += pose[j][1];
sum_poses[j][2] += pose[j][2];
sum_poses[j][3] += score;
sum_mask1[j]++;
float weight = std::pow(score, 2);
sum_poses[j][0] += pose[j][0] * weight;
sum_poses[j][1] += pose[j][1] * weight;
sum_poses[j][2] += pose[j][2] * weight;
sum_poses[j][3] += score * weight;
sum_mask1[j] += weight;
}
}
}
@ -1825,7 +1850,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
for (size_t j = 0; j < num_joints; ++j)
{
float score = initial_pose_3d[j][3];
if (score > min_score)
if (score > min_score - offset1)
{
jmask[j] = true;
center[0] += initial_pose_3d[j][0];
@ -1853,7 +1878,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
}
// Drop joints with low scores and filter outlying joints using distance threshold
float offset = 0.1;
float offset2 = (1.0 - min_score) * 2.0;
float max_dist_sq = 1.2 * 1.2;
std::vector<std::vector<bool>> mask(num_poses, std::vector<bool>(num_joints, false));
std::vector<std::vector<float>> distances(num_poses, std::vector<float>(num_joints, 0.0));
@ -1869,7 +1894,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
distances[i][j] = dist_sq;
float score = pose[j][3];
if (dist_sq <= max_dist_sq && score > (min_score - offset))
if (dist_sq <= max_dist_sq && score > (min_score - offset2))
{
mask[i][j] = true;
}
@ -1877,7 +1902,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
}
// Select the best-k proposals for each joint that are closest to the initial pose
int keep_best = std::max(3, (int)num_poses / 3);
int keep_best = std::max((int)std::ceil(this->num_cams / 2.0), (int)std::ceil(num_poses / 3.0));
std::vector<std::vector<bool>> best_k_mask(num_poses, std::vector<bool>(num_joints, false));
for (size_t j = 0; j < num_joints; ++j)
{
@ -1915,7 +1940,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
// Compute the final pose
std::vector<std::array<float, 4>> sum_poses2(num_joints, {0.0, 0.0, 0.0, 0.0});
std::vector<int> sum_mask2(num_joints, 0);
std::vector<float> sum_mask2(num_joints, 0);
for (size_t i = 0; i < num_poses; ++i)
{
const auto &pose = poses_3d[i];
@ -1924,11 +1949,13 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
{
if (mask[i][j])
{
sum_poses2[j][0] += pose[j][0];
sum_poses2[j][1] += pose[j][1];
sum_poses2[j][2] += pose[j][2];
sum_poses2[j][3] += pose[j][3];
sum_mask2[j]++;
// Use an exponential weighting to give more importance to higher scores
float weight = std::pow(pose[j][3], 4);
sum_poses2[j][0] += pose[j][0] * weight;
sum_poses2[j][1] += pose[j][1] * weight;
sum_poses2[j][2] += pose[j][2] * weight;
sum_poses2[j][3] += pose[j][3] * weight;
sum_mask2[j] += weight;
}
}
}
@ -1993,15 +2020,17 @@ void TriangulatorInternal::filter_poses(
for (size_t i = 0; i < poses.size(); ++i)
{
auto &pose = poses[i];
size_t num_joints = pose.size();
size_t num_core_joints = core_joint_idx.size();
size_t num_full_joints = pose.size();
// Collect valid joint indices
std::vector<size_t> valid_joint_idx;
for (size_t j = 0; j < num_joints; ++j)
for (size_t j = 0; j < num_core_joints; ++j)
{
if (pose[j][3] > min_score)
size_t idx = core_joint_idx[j];
if (pose[idx][3] > min_score)
{
valid_joint_idx.push_back(j);
valid_joint_idx.push_back(idx);
}
}
@ -2099,6 +2128,23 @@ void TriangulatorInternal::filter_poses(
continue;
}
// Set joint scores outside the room to a low value
for (size_t j = 0; j < num_full_joints; ++j)
{
if (pose[j][3] > min_score)
{
for (int k = 0; k < 3; ++k)
{
if (pose[j][k] > room_half_size[k] + room_center[k] + wdist ||
pose[j][k] < -room_half_size[k] + room_center[k] - wdist)
{
pose[j][3] = 0.001;
break;
}
}
}
}
// Calculate total limb length and average limb length
const float max_avg_length = 0.5;
const float min_avg_length = 0.1;
@ -2179,132 +2225,104 @@ void TriangulatorInternal::add_missing_joints(
{"wrist_right", {"elbow_right"}},
{"wrist_left", {"elbow_left"}},
{"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
{"foot_*_left_*", {"ankle_left"}},
{"foot_*_right_*", {"ankle_right"}},
{"face_*", {"nose"}},
{"hand_*_left_*", {"wrist_left"}},
{"hand_*_right_*", {"wrist_right"}}};
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}}};
// Collect adjacent joint mappings
std::vector<std::vector<size_t>> adjacent_indices;
adjacent_indices.resize(joint_names.size());
for (size_t j = 0; j < joint_names.size(); ++j)
{
std::string adname = "";
const std::string &jname = joint_names[j];
// Determine adjacent joint based on name patterns
if (adjacents.find(jname) != adjacents.end())
{
adname = jname;
}
if (!adname.empty())
{
auto it = adjacents.find(adname);
if (it != adjacents.end())
{
for (const std::string &adj_name : it->second)
{
auto jt = joint_name_to_idx.find(adj_name);
if (jt != joint_name_to_idx.end())
{
adjacent_indices[j].push_back(jt->second);
}
}
}
}
}
for (size_t i = 0; i < poses.size(); ++i)
{
auto &pose = poses[i];
size_t num_joints = pose.size();
// Collect valid joint indices
std::vector<size_t> valid_joint_idx;
// Compute body center as the mean of valid joints
std::array<float, 3> body_center = {0.0, 0.0, 0.0};
size_t valid_count = 0;
for (size_t j = 0; j < num_joints; ++j)
{
if (pose[j][3] > min_match_score)
{
valid_joint_idx.push_back(j);
body_center[0] += pose[j][0];
body_center[1] += pose[j][1];
body_center[2] += pose[j][2];
valid_count++;
}
}
if (valid_joint_idx.empty())
if (valid_count == 0)
{
continue;
}
// Compute body center as the mean of valid joints
std::array<float, 3> body_center = {0.0, 0.0, 0.0};
for (size_t idx : valid_joint_idx)
{
body_center[0] += pose[idx][0];
body_center[1] += pose[idx][1];
body_center[2] += pose[idx][2];
}
float inv_c = 1.0 / static_cast<float>(valid_count);
for (int j = 0; j < 3; ++j)
{
body_center[j] /= static_cast<float>(valid_joint_idx.size());
body_center[j] *= inv_c;
}
// Iterate over each joint
for (size_t j = 0; j < joint_names.size(); ++j)
{
std::string adname = "";
const std::string &jname = joint_names[j];
// Determine adjacent joint based on name patterns
if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos)
{
adname = "foot_*_left_*";
}
else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos)
{
adname = "foot_*_right_*";
}
else if (jname.substr(0, 5) == "face_")
{
adname = "face_*";
}
else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos)
{
adname = "hand_*_left_*";
}
else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos)
{
adname = "hand_*_right_*";
}
else if (adjacents.find(jname) != adjacents.end())
{
adname = jname;
}
if (adname == "")
{
// No adjacent joints defined for this joint
continue;
}
if (pose[j][3] == 0.0)
{
if (adjacents.find(adname) != adjacents.end())
// Joint is missing; attempt to estimate its position based on adjacent joints
const std::vector<size_t> &adj_indices = adjacent_indices[j];
std::array<float, 3> adjacent_position = {0.0, 0.0, 0.0};
size_t adjacent_count = 0;
for (size_t adj_idx : adj_indices)
{
// Joint is missing; attempt to estimate its position based on adjacent joints
std::array<float, 3> adjacent_position = {0.0, 0.0, 0.0};
size_t adjacent_count = 0;
const std::vector<std::string> &adjacent_joint_names = adjacents[adname];
for (const std::string &adj_name : adjacent_joint_names)
if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1)
{
auto it = joint_name_to_idx.find(adj_name);
if (it != joint_name_to_idx.end())
{
size_t adj_idx = it->second;
if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1)
{
adjacent_position[0] += pose[adj_idx][0];
adjacent_position[1] += pose[adj_idx][1];
adjacent_position[2] += pose[adj_idx][2];
++adjacent_count;
}
}
adjacent_position[0] += pose[adj_idx][0];
adjacent_position[1] += pose[adj_idx][1];
adjacent_position[2] += pose[adj_idx][2];
++adjacent_count;
}
}
if (adjacent_count > 0)
{
float inv_c = 1.0 / static_cast<float>(adjacent_count);
for (size_t k = 0; k < 3; ++k)
{
adjacent_position[k] *= inv_c;
}
if (adjacent_count > 0)
{
for (size_t k = 0; k < 3; ++k)
{
adjacent_position[k] /= static_cast<float>(adjacent_count);
}
// Update the missing joint's position
pose[j][0] = adjacent_position[0];
pose[j][1] = adjacent_position[1];
pose[j][2] = adjacent_position[2];
}
else
{
// No valid adjacent joints, use body center
pose[j][0] = body_center[0];
pose[j][1] = body_center[1];
pose[j][2] = body_center[2];
}
// Update the missing joint's position
pose[j][0] = adjacent_position[0];
pose[j][1] = adjacent_position[1];
pose[j][2] = adjacent_position[2];
}
else
{
// Adjacent joints not defined, use body center
// No valid adjacent joints, use body center
pose[j][0] = body_center[0];
pose[j][1] = body_center[1];
pose[j][2] = body_center[2];
@ -2316,3 +2334,170 @@ void TriangulatorInternal::add_missing_joints(
}
}
}
// =================================================================================================
void TriangulatorInternal::replace_far_joints(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names)
{
for (size_t i = 0; i < poses.size(); ++i)
{
auto &pose = poses[i];
size_t num_full_joints = pose.size();
// Calculate the average position of pose parts
std::array<float, 4> center_head = {0.0, 0.0, 0.0, 0};
std::array<float, 4> center_foot_left = {0.0, 0.0, 0.0, 0};
std::array<float, 4> center_foot_right = {0.0, 0.0, 0.0, 0};
std::array<float, 4> center_hand_left = {0.0, 0.0, 0.0, 0};
std::array<float, 4> center_hand_right = {0.0, 0.0, 0.0, 0};
for (size_t j = 0; j < num_full_joints; ++j)
{
const std::string &jname = joint_names[j];
float offset2 = (1.0 - min_match_score) * 2;
float min_score = this->min_match_score - offset2;
if (pose[j][3] > min_score)
{
if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
{
center_head[0] += pose[j][0];
center_head[1] += pose[j][1];
center_head[2] += pose[j][2];
center_head[3] += 1.0;
}
else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
{
if (jname.find("_left") != std::string::npos)
{
center_foot_left[0] += pose[j][0];
center_foot_left[1] += pose[j][1];
center_foot_left[2] += pose[j][2];
center_foot_left[3] += 1.0;
}
else if (jname.find("_right") != std::string::npos)
{
center_foot_right[0] += pose[j][0];
center_foot_right[1] += pose[j][1];
center_foot_right[2] += pose[j][2];
center_foot_right[3] += 1.0;
}
}
else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
{
if (jname.find("_left") != std::string::npos)
{
center_hand_left[0] += pose[j][0];
center_hand_left[1] += pose[j][1];
center_hand_left[2] += pose[j][2];
center_hand_left[3] += 1.0;
}
else if (jname.find("_right") != std::string::npos)
{
center_hand_right[0] += pose[j][0];
center_hand_right[1] += pose[j][1];
center_hand_right[2] += pose[j][2];
center_hand_right[3] += 1.0;
}
}
}
}
for (size_t k = 0; k < 3; k++)
{
center_head[k] /= center_head[3];
center_foot_left[k] /= center_foot_left[3];
center_foot_right[k] /= center_foot_right[3];
center_hand_left[k] /= center_hand_left[3];
center_hand_right[k] /= center_hand_right[3];
}
// Drop joints that are too far from the part center
const float max_dist_head_sq = 0.20 * 0.20;
const float max_dist_foot_sq = 0.25 * 0.25;
const float max_dist_hand_sq = 0.20 * 0.20;
for (size_t j = 0; j < num_full_joints; ++j)
{
const std::string &jname = joint_names[j];
if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
{
float dx = pose[j][0] - center_head[0];
float dy = pose[j][1] - center_head[1];
float dz = pose[j][2] - center_head[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if ((pose[j][3] > 0.0 && dist_sq > max_dist_head_sq) || pose[j][3] == 0.0)
{
pose[j][0] = center_head[0];
pose[j][1] = center_head[1];
pose[j][2] = center_head[2];
pose[j][3] = 0.1;
}
}
else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
{
if (jname.find("_left") != std::string::npos)
{
float dx = pose[j][0] - center_foot_left[0];
float dy = pose[j][1] - center_foot_left[1];
float dz = pose[j][2] - center_foot_left[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
{
pose[j][0] = center_foot_left[0];
pose[j][1] = center_foot_left[1];
pose[j][2] = center_foot_left[2];
pose[j][3] = 0.1;
}
}
else if (jname.find("_right") != std::string::npos)
{
float dx = pose[j][0] - center_foot_right[0];
float dy = pose[j][1] - center_foot_right[1];
float dz = pose[j][2] - center_foot_right[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
{
pose[j][0] = center_foot_right[0];
pose[j][1] = center_foot_right[1];
pose[j][2] = center_foot_right[2];
pose[j][3] = 0.1;
}
}
}
else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
{
if (jname.find("_left") != std::string::npos)
{
float dx = pose[j][0] - center_hand_left[0];
float dy = pose[j][1] - center_hand_left[1];
float dz = pose[j][2] - center_hand_left[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
{
pose[j][0] = center_hand_left[0];
pose[j][1] = center_hand_left[1];
pose[j][2] = center_hand_left[2];
pose[j][3] = 0.1;
}
}
else if (jname.find("_right") != std::string::npos)
{
float dx = pose[j][0] - center_hand_right[0];
float dy = pose[j][1] - center_hand_right[1];
float dz = pose[j][2] - center_hand_right[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
{
pose[j][0] = center_hand_right[0];
pose[j][1] = center_hand_right[1];
pose[j][2] = center_hand_right[2];
pose[j][3] = 0.1;
}
}
}
}
}
}

View File

@ -54,6 +54,7 @@ public:
private:
float min_match_score;
float min_group_size;
size_t num_cams;
const std::vector<std::string> core_joints = {
"shoulder_left",
@ -137,6 +138,10 @@ private:
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names);
void replace_far_joints(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names);
// Statistics
double num_calls = 0;
double total_time = 0;

View File

@ -1079,8 +1079,8 @@ namespace utils_2d_pose
std::string path_pose_m1 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
std::string path_pose_mb = base_path + "rtmpose-m_Bx384x288x3_fp16_extra-steps.onnx";
std::string path_pose_w1 = base_path + "rtmpose-l_wb_1x384x288x3_extra-steps.onnx";
std::string path_pose_wb = base_path + "rtmpose-l_wb_Bx384x288x3_extra-steps.onnx";
std::string path_pose_w1 = base_path + "rtmpose-l_wb_1x384x288x3_fp16_extra-steps.onnx";
std::string path_pose_wb = base_path + "rtmpose-l_wb_Bx384x288x3_fp16_extra-steps.onnx";
this->num_joints = whole_body ? 133 : 17;
std::string path_det;

View File

@ -139,47 +139,47 @@ namespace utils_pipeline
joint_names_2d.end(),
{
"hand_wrist_left",
"finger_thumb_left_1",
"finger_thumb_left_2",
"finger_thumb_left_3",
"finger_thumb_left_4",
"finger_index_left_1",
"finger_index_left_2",
"finger_index_left_3",
"finger_index_left_4",
"finger_middle_left_1",
"finger_middle_left_2",
"finger_middle_left_3",
"finger_middle_left_4",
"finger_ring_left_1",
"finger_ring_left_2",
"finger_ring_left_3",
"finger_ring_left_4",
"finger_pinky_left_1",
"finger_pinky_left_2",
"finger_pinky_left_3",
"finger_pinky_left_4",
"hand_finger_thumb_left_1",
"hand_finger_thumb_left_2",
"hand_finger_thumb_left_3",
"hand_finger_thumb_left_4",
"hand_finger_index_left_1",
"hand_finger_index_left_2",
"hand_finger_index_left_3",
"hand_finger_index_left_4",
"hand_finger_middle_left_1",
"hand_finger_middle_left_2",
"hand_finger_middle_left_3",
"hand_finger_middle_left_4",
"hand_finger_ring_left_1",
"hand_finger_ring_left_2",
"hand_finger_ring_left_3",
"hand_finger_ring_left_4",
"hand_finger_pinky_left_1",
"hand_finger_pinky_left_2",
"hand_finger_pinky_left_3",
"hand_finger_pinky_left_4",
"hand_wrist_right",
"finger_thumb_right_1",
"finger_thumb_right_2",
"finger_thumb_right_3",
"finger_thumb_right_4",
"finger_index_right_1",
"finger_index_right_2",
"finger_index_right_3",
"finger_index_right_4",
"finger_middle_right_1",
"finger_middle_right_2",
"finger_middle_right_3",
"finger_middle_right_4",
"finger_ring_right_1",
"finger_ring_right_2",
"finger_ring_right_3",
"finger_ring_right_4",
"finger_pinky_right_1",
"finger_pinky_right_2",
"finger_pinky_right_3",
"finger_pinky_right_4",
"hand_finger_thumb_right_1",
"hand_finger_thumb_right_2",
"hand_finger_thumb_right_3",
"hand_finger_thumb_right_4",
"hand_finger_index_right_1",
"hand_finger_index_right_2",
"hand_finger_index_right_3",
"hand_finger_index_right_4",
"hand_finger_middle_right_1",
"hand_finger_middle_right_2",
"hand_finger_middle_right_3",
"hand_finger_middle_right_4",
"hand_finger_ring_right_1",
"hand_finger_ring_right_2",
"hand_finger_ring_right_3",
"hand_finger_ring_right_4",
"hand_finger_pinky_right_1",
"hand_finger_pinky_right_2",
"hand_finger_pinky_right_3",
"hand_finger_pinky_right_4",
});
}