Merge branch 'mopts' into 'master'

More optimizations See merge request Percipiote/RapidPoseTriangulation!8
2025-03-25 17:34:25 +00:00
parent 63f373e7ec 64c3f3d09a
commit fb6ecf943f
18 changed files with 5473 additions and 4330 deletions
--- a/extras/jetson/README.md
+++ b/extras/jetson/README.md
@ -129,7 +129,7 @@ Tested with a _Jetson AGX Orin Developer Kit_ module.
 - Build docker container:
  ```bash
-  docker build --progress=plain -f extras/ros/dockerfile -t rapidposetriangulation_ros .
+  docker build --progress=plain -f extras/ros/dockerfile_2d -t rapidposetriangulation_ros2d .
  ```
 - Run and test:
--- a/extras/jetson/docker-compose-2d.yml
+++ b/extras/jetson/docker-compose-2d.yml
@ -1,10 +1,7 @@
 version: "2.3"
 # runtime: nvidia needs version 2 else change standard runtime at host pc
 services:
    test_node:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
@ -15,13 +12,14 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'sleep infinity'
    estimator:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
--- a/extras/mmdeploy/README.md
+++ b/extras/mmdeploy/README.md
@ -61,7 +61,7 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
 python3 ./tools/deploy.py \
    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
    /mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
-    https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
+    https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
    /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
    --work-dir work_dir \
    --show
@ -70,7 +70,7 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
 python3 ./tools/deploy.py \
    configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
    /mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
-    https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
+    https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-ucoco_dw-ucoco_270e-384x288-2438fd99_20230728.pth \
    /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
    --work-dir work_dir \
    --show
--- a/extras/mmpose/README.md
+++ b/extras/mmpose/README.md
@ -0,0 +1,23 @@
 # Finetuning MMPose models
 See: <https://mmpose.readthedocs.io/en/latest/user_guides/train_and_test.html>
 <br>
 ```bash
 docker build --progress=plain -f extras/mmpose/dockerfile -t rpt_mmpose .
 ./extras/mmpose/run_container.sh
 ```
 ```bash
 cd /mmpose/
 export CUDA_VISIBLE_DEVICES=0
 python3 ./tools/train.py \
    /RapidPoseTriangulation/extras/mmpose/configs/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \
    --amp \
    --cfg-options \
      load_from=https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \
      base_lr=0.00004
 ```
--- a/extras/mmpose/configs/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
+++ b/extras/mmpose/configs/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
@ -0,0 +1,235 @@
 _base_ = ['mmpose::_base_/default_runtime.py']
 val_interval=1
 max_epochs = 3
 # common setting
 num_keypoints = 133
 input_size = (288, 384)
 # runtime
 stage2_num_epochs = 30
 base_lr = 4e-3
 train_batch_size = 32
 val_batch_size = 32
 train_cfg = dict(max_epochs=max_epochs, val_interval=val_interval)
 randomness = dict(seed=21)
 # optimizer
 optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
    clip_grad=dict(max_norm=35, norm_type=2),
    paramwise_cfg=dict(
        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
 # learning rate
 param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1.0e-5,
        by_epoch=False,
        begin=0,
        end=1000),
    dict(
        type='CosineAnnealingLR',
        eta_min=base_lr * 0.05,
        begin=max_epochs // 2,
        end=max_epochs,
        T_max=max_epochs // 2,
        by_epoch=True,
        convert_to_iter_based=True),
 ]
 # automatically scaling LR based on the actual training batch size
 auto_scale_lr = dict(base_batch_size=512)
 # codec settings
 codec = dict(
    type='SimCCLabel',
    input_size=input_size,
    sigma=(6., 6.93),
    simcc_split_ratio=2.0,
    normalize=False,
    use_dark=False)
 # model settings
 model = dict(
    type='TopdownPoseEstimator',
    data_preprocessor=dict(
        type='PoseDataPreprocessor',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        bgr_to_rgb=True),
    backbone=dict(
        _scope_='mmdet',
        type='CSPNeXt',
        arch='P5',
        expand_ratio=0.5,
        deepen_factor=1.,
        widen_factor=1.,
        out_indices=(4, ),
        channel_attention=True,
        norm_cfg=dict(type='SyncBN'),
        act_cfg=dict(type='SiLU'),
        init_cfg=dict(
            type='Pretrained',
            prefix='backbone.',
            checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
            'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
        )),
    head=dict(
        type='RTMCCHead',
        in_channels=1024,
        out_channels=num_keypoints,
        input_size=codec['input_size'],
        in_featuremap_size=tuple([s // 32 for s in codec['input_size']]),
        simcc_split_ratio=codec['simcc_split_ratio'],
        final_layer_kernel_size=7,
        gau_cfg=dict(
            hidden_dims=256,
            s=128,
            expansion_factor=2,
            dropout_rate=0.,
            drop_path=0.,
            act_fn='SiLU',
            use_rel_bias=False,
            pos_enc=False),
        loss=dict(
            type='KLDiscretLoss',
            use_target_weight=True,
            beta=10.,
            label_softmax=True),
        decoder=codec),
    test_cfg=dict(flip_test=True, ))
 # base dataset settings
 dataset_type = 'CocoWholeBodyDataset'
 data_mode = 'topdown'
 data_root = 'data/coco/'
 backend_args = dict(backend='local')
 # pipelines
 train_pipeline = [
    dict(type='LoadImage', backend_args=backend_args),
    dict(type='GetBBoxCenterScale'),
    dict(type='RandomFlip', direction='horizontal'),
    dict(type='RandomHalfBody'),
    dict(
        type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
    dict(type='TopdownAffine', input_size=codec['input_size']),
    dict(type='mmdet.YOLOXHSVRandomAug'),
    dict(
        type='Albumentation',
        transforms=[
            dict(type='Blur', p=0.1),
            dict(type='MedianBlur', p=0.1),
            dict(
                type='CoarseDropout',
                max_holes=1,
                max_height=0.4,
                max_width=0.4,
                min_holes=1,
                min_height=0.2,
                min_width=0.2,
                p=1.0),
        ]),
    dict(type='GenerateTarget', encoder=codec),
    dict(type='PackPoseInputs')
 ]
 val_pipeline = [
    dict(type='LoadImage', backend_args=backend_args),
    dict(type='GetBBoxCenterScale'),
    dict(type='TopdownAffine', input_size=codec['input_size']),
    dict(type='PackPoseInputs')
 ]
 train_pipeline_stage2 = [
    dict(type='LoadImage', backend_args=backend_args),
    dict(type='GetBBoxCenterScale'),
    dict(type='RandomFlip', direction='horizontal'),
    dict(type='RandomHalfBody'),
    dict(
        type='RandomBBoxTransform',
        shift_factor=0.,
        scale_factor=[0.75, 1.25],
        rotate_factor=60),
    dict(type='TopdownAffine', input_size=codec['input_size']),
    dict(type='mmdet.YOLOXHSVRandomAug'),
    dict(
        type='Albumentation',
        transforms=[
            dict(type='Blur', p=0.1),
            dict(type='MedianBlur', p=0.1),
            dict(
                type='CoarseDropout',
                max_holes=1,
                max_height=0.4,
                max_width=0.4,
                min_holes=1,
                min_height=0.2,
                min_width=0.2,
                p=0.5),
        ]),
    dict(type='GenerateTarget', encoder=codec),
    dict(type='PackPoseInputs')
 ]
 # data loaders
 train_dataloader = dict(
    batch_size=train_batch_size,
    num_workers=10,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        data_mode=data_mode,
        ann_file='annotations/coco_wholebody_train_v1.0.json',
        data_prefix=dict(img='train2017/'),
        pipeline=train_pipeline,
    ))
 val_dataloader = dict(
    batch_size=val_batch_size,
    num_workers=10,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        data_mode=data_mode,
        ann_file='annotations/coco_wholebody_val_v1.0.json',
        data_prefix=dict(img='val2017/'),
        test_mode=True,
        # bbox_file='data/coco/person_detection_results/'
        # 'COCO_val2017_detections_AP_H_56_person.json',
        pipeline=val_pipeline,
    ))
 test_dataloader = val_dataloader
 # hooks
 default_hooks = dict(
    checkpoint=dict(
        save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
 custom_hooks = [
    dict(
        type='EMAHook',
        ema_type='ExpMomentumEMA',
        momentum=0.0002,
        update_buffers=True,
        priority=49),
    dict(
        type='mmdet.PipelineSwitchHook',
        switch_epoch=max_epochs - stage2_num_epochs,
        switch_pipeline=train_pipeline_stage2)
 ]
 # evaluators
 val_evaluator = dict(
    type='CocoWholeBodyMetric',
    ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
 test_evaluator = val_evaluator
--- a/extras/mmpose/dockerfile
+++ b/extras/mmpose/dockerfile
@ -0,0 +1,9 @@
 FROM rpt_mmdeploy
 RUN apt-get update && apt-get install -y --no-install-recommends nano
 RUN pip3 install --upgrade --no-cache-dir "albumentations<1.4"
 RUN sed -i '94i\        self.runner.val_loop.run()' /usr/local/lib/python3.8/dist-packages/mmengine/runner/loops.py
 WORKDIR /mmpose/
 CMD ["/bin/bash"]
--- a/extras/mmpose/run_container.sh
+++ b/extras/mmpose/run_container.sh
@ -0,0 +1,11 @@
 #! /bin/bash
 xhost +
 docker run --privileged --rm --network host -it \
  --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
  --volume "$(pwd)"/:/RapidPoseTriangulation/ \
  --volume "$(pwd)"/../datasets/coco2017/annotations/:/mmpose/data/coco/annotations/ \
  --volume "$(pwd)"/../datasets/coco2017/images/:/mmpose/data/coco/ \
  --volume /tmp/.X11-unix:/tmp/.X11-unix \
  --env DISPLAY --env QT_X11_NO_MITSHM=1 \
  rpt_mmpose
--- a/extras/ros/README.md
+++ b/extras/ros/README.md
@ -7,10 +7,11 @@ Run pose estimator with ros topics as inputs and publish detected poses.
 - Build container:
  ```bash
-  docker build --progress=plain -t rapidposetriangulation_ros -f extras/ros/dockerfile .
+  docker build --progress=plain -t rapidposetriangulation_ros2d -f extras/ros/dockerfile_2d .
  docker build --progress=plain -t rapidposetriangulation_ros3d -f extras/ros/dockerfile_3d .
  ```
- Update or remove the `ROS_DOMAIN_ID` in `docker-compose.yml` to fit your environment
+- Update or remove the `ROS_DOMAIN_ID` in the `docker-compose.yml` files to fit your environment
 - Run and test:
--- a/extras/ros/docker-compose-2d.yml
+++ b/extras/ros/docker-compose-2d.yml
@ -1,10 +1,7 @@
 version: "2.3"
 # runtime: nvidia needs version 2 else change standard runtime at host pc
 services:
    test_node:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
@ -15,13 +12,14 @@ services:
            - /tmp/.X11-unix:/tmp/.X11-unix
            - /dev/shm:/dev/shm
        environment:
            - CAMID
            - DISPLAY
            - QT_X11_NO_MITSHM=1
            - "PYTHONUNBUFFERED=1"
        command: /bin/bash -i -c 'sleep infinity'
    estimator:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
@ -39,7 +37,7 @@ services:
        command: /bin/bash -i -c 'export ROS_DOMAIN_ID=18 && ros2 run rpt2d_wrapper_cpp rpt2d_wrapper'
    pose_visualizer:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
@ -57,7 +55,7 @@ services:
        command: /bin/bash -i -c 'sleep 2 && export ROS_DOMAIN_ID=18 && ros2 run pose2d_visualizer pose2d_visualizer $CAMID'
    pose_viewer:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros2d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
--- a/extras/ros/docker-compose-3d.yml
+++ b/extras/ros/docker-compose-3d.yml
@ -1,10 +1,7 @@
 version: "2.3"
 # runtime: nvidia needs version 2 else change standard runtime at host pc
 services:
    test_node:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros3d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
@ -21,7 +18,7 @@ services:
        command: /bin/bash -i -c 'sleep infinity'
    triangulator:
-        image: rapidposetriangulation_ros
+        image: rapidposetriangulation_ros3d
        network_mode: "host"
        ipc: "host"
        runtime: nvidia
--- a/extras/ros/dockerfile_2d
+++ b/extras/ros/dockerfile_2d
@ -45,17 +45,14 @@ RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
 # Copy modules
 COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
 COPY ./scripts/ /RapidPoseTriangulation/scripts/
 COPY ./rpt/ /RapidPoseTriangulation/rpt/
 COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
 COPY ./extras/ros/pose2d_visualizer/ /RapidPoseTriangulation/extras/ros/pose2d_visualizer/
 COPY ./extras/ros/rpt2d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/
 COPY ./extras/ros/rpt3d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/
 # Link and build as ros package
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/pose2d_visualizer/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt2d_wrapper_cpp/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/ /project/dev_ws/src/
 RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
 # Update ros packages -> autocompletion and check
--- a/extras/ros/dockerfile_3d
+++ b/extras/ros/dockerfile_3d
@ -0,0 +1,44 @@
 FROM ros:humble-ros-base-jammy
 ARG DEBIAN_FRONTEND=noninteractive
 # Set the working directory to /project
 WORKDIR /project/
 # Update and install basic tools
 RUN apt-get update && apt-get upgrade -y
 RUN apt-get update && apt-get install -y --no-install-recommends git nano wget
 RUN apt-get update && apt-get install -y --no-install-recommends python3-pip
 RUN pip3 install --upgrade pip
 # Fix ros package building error
 RUN pip3 install --no-cache-dir "setuptools<=58.2.0"
 # Add ROS2 sourcing by default
 RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
 # Create ROS2 workspace for project packages
 RUN mkdir -p /project/dev_ws/src/
 RUN cd /project/dev_ws/; colcon build
 RUN echo "source /project/dev_ws/install/setup.bash" >> ~/.bashrc
 # Copy modules
 COPY ./extras/include/ /RapidPoseTriangulation/extras/include/
 COPY ./rpt/ /RapidPoseTriangulation/rpt/
 COPY ./extras/ros/rpt_msgs/ /RapidPoseTriangulation/extras/ros/rpt_msgs/
 COPY ./extras/ros/rpt3d_wrapper_cpp/ /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/
 # Link and build as ros package
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt_msgs/ /project/dev_ws/src/
 RUN ln -s /RapidPoseTriangulation/extras/ros/rpt3d_wrapper_cpp/ /project/dev_ws/src/
 RUN /bin/bash -i -c 'cd /project/dev_ws/; colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release'
 # Update ros packages -> autocompletion and check
 RUN /bin/bash -i -c 'ros2 pkg list'
 # Clear cache to save space, only has an effect if image is squashed
 RUN apt-get autoremove -y \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 WORKDIR /RapidPoseTriangulation/
 CMD ["/bin/bash"]
--- a/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp
+++ b/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp
@ -36,7 +36,7 @@ static const std::string pose_in_topic = "/poses/{}";
 static const std::string cam_info_topic = "/{}/calibration";
 static const std::string pose_out_topic = "/poses/humans3d";
-static const float min_match_score = 0.95;
+static const float min_match_score = 0.92;
 static const size_t min_group_size = 1;
 static const std::array<std::array<float, 3>, 2> roomparams = {{
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/rpt/triangulator.cpp
+++ b/rpt/triangulator.cpp
@ -525,6 +525,7 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
    }
    // Convert inputs to internal formats
    this->num_cams = cameras.size();
    std::vector<std::vector<std::vector<std::array<float, 3>>>> i_poses_2d = poses_2d;
    std::vector<CameraInternal> internal_cameras;
    for (size_t i = 0; i < cameras.size(); ++i)
@ -781,10 +782,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
        all_scored_poses[i] = std::move(std::make_pair(pose3d, score));
    }
    elapsed = std::chrono::steady_clock::now() - stime;
    pair_scoring_time += elapsed.count();
    stime = std::chrono::steady_clock::now();
    // Drop low scoring poses
    size_t num_poses = all_scored_poses.size();
    for (size_t i = num_poses; i > 0; --i)
@ -796,6 +793,10 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
        }
    }
    elapsed = std::chrono::steady_clock::now() - stime;
    pair_scoring_time += elapsed.count();
    stime = std::chrono::steady_clock::now();
    // Group pairs that share a person
    std::vector<std::tuple<
        std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
@ -828,6 +829,27 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
        auto [pose3d, score] = triangulate_and_score(
            pose1, pose2, cam1, cam2, roomparams, {});
        // Scale scores with 2D confidences
        // They can improve the merge weighting, but especially the earlier step of pair-filtering
        // works better if only per-view consistency is used, so they are not included before.
        for (size_t j = 0; j < pose3d.size(); ++j)
        {
            float score1 = pose1[j][2];
            float score2 = pose2[j][2];
            float min_score = 0.1;
            if (score1 > min_score && score2 > min_score)
            {
                float scoreP = (score1 + score2) * 0.5;
                float scoreT = pose3d[j][3];
                // Since the triangulation score is less sensitive and generally higher,
                // weight it stronger to balance the two scores.
                pose3d[j][3] = 0.9 * scoreT + 0.1 * scoreP;
            }
        }
        all_full_poses[i] = std::move(pose3d);
    }
@ -862,6 +884,7 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
    add_extra_joints(final_poses_3d, joint_names);
    filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx);
    add_missing_joints(final_poses_3d, joint_names);
    replace_far_joints(final_poses_3d, joint_names);
    // Store final result for use in the next frame.
    last_poses_3d = final_poses_3d;
@ -1429,8 +1452,8 @@ std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triang
    {
        if (mask[i])
        {
-            float score = 0.5 * (score1[i] + score2[i]);
+            float scoreT = 0.5 * (score1[i] + score2[i]);
-            pose3d[i][3] = score;
+            pose3d[i][3] = scoreT;
        }
    }
@ -1787,7 +1810,8 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
    // Merge poses to create initial pose
    // Use only those triangulations with a high score
    std::vector<std::array<float, 4>> sum_poses(num_joints, {0.0, 0.0, 0.0, 0.0});
-    std::vector<int> sum_mask1(num_joints, 0);
+    std::vector<float> sum_mask1(num_joints, 0);
    float offset1 = (1.0 - min_score) / 2.0;
    for (size_t i = 0; i < num_poses; ++i)
    {
        const auto &pose = poses_3d[i];
@ -1795,13 +1819,14 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
        for (size_t j = 0; j < num_joints; ++j)
        {
            float score = pose[j][3];
-            if (score > min_score)
+            if (score > min_score - offset1)
            {
-                sum_poses[j][0] += pose[j][0];
+                float weight = std::pow(score, 2);
-                sum_poses[j][1] += pose[j][1];
+                sum_poses[j][0] += pose[j][0] * weight;
-                sum_poses[j][2] += pose[j][2];
+                sum_poses[j][1] += pose[j][1] * weight;
-                sum_poses[j][3] += score;
+                sum_poses[j][2] += pose[j][2] * weight;
-                sum_mask1[j]++;
+                sum_poses[j][3] += score * weight;
                sum_mask1[j] += weight;
            }
        }
    }
@ -1825,7 +1850,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
    for (size_t j = 0; j < num_joints; ++j)
    {
        float score = initial_pose_3d[j][3];
-        if (score > min_score)
+        if (score > min_score - offset1)
        {
            jmask[j] = true;
            center[0] += initial_pose_3d[j][0];
@ -1853,7 +1878,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
    }
    // Drop joints with low scores and filter outlying joints using distance threshold
-    float offset = 0.1;
+    float offset2 = (1.0 - min_score) * 2.0;
    float max_dist_sq = 1.2 * 1.2;
    std::vector<std::vector<bool>> mask(num_poses, std::vector<bool>(num_joints, false));
    std::vector<std::vector<float>> distances(num_poses, std::vector<float>(num_joints, 0.0));
@ -1869,7 +1894,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
            distances[i][j] = dist_sq;
            float score = pose[j][3];
-            if (dist_sq <= max_dist_sq && score > (min_score - offset))
+            if (dist_sq <= max_dist_sq && score > (min_score - offset2))
            {
                mask[i][j] = true;
            }
@ -1877,7 +1902,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
    }
    // Select the best-k proposals for each joint that are closest to the initial pose
-    int keep_best = std::max(3, (int)num_poses / 3);
+    int keep_best = std::max((int)std::ceil(this->num_cams / 2.0), (int)std::ceil(num_poses / 3.0));
    std::vector<std::vector<bool>> best_k_mask(num_poses, std::vector<bool>(num_joints, false));
    for (size_t j = 0; j < num_joints; ++j)
    {
@ -1915,7 +1940,7 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
    // Compute the final pose
    std::vector<std::array<float, 4>> sum_poses2(num_joints, {0.0, 0.0, 0.0, 0.0});
-    std::vector<int> sum_mask2(num_joints, 0);
+    std::vector<float> sum_mask2(num_joints, 0);
    for (size_t i = 0; i < num_poses; ++i)
    {
        const auto &pose = poses_3d[i];
@ -1924,11 +1949,13 @@ std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
        {
            if (mask[i][j])
            {
-                sum_poses2[j][0] += pose[j][0];
+                // Use an exponential weighting to give more importance to higher scores
-                sum_poses2[j][1] += pose[j][1];
+                float weight = std::pow(pose[j][3], 4);
-                sum_poses2[j][2] += pose[j][2];
+                sum_poses2[j][0] += pose[j][0] * weight;
-                sum_poses2[j][3] += pose[j][3];
+                sum_poses2[j][1] += pose[j][1] * weight;
-                sum_mask2[j]++;
+                sum_poses2[j][2] += pose[j][2] * weight;
                sum_poses2[j][3] += pose[j][3] * weight;
                sum_mask2[j] += weight;
            }
        }
    }
@ -1993,15 +2020,17 @@ void TriangulatorInternal::filter_poses(
    for (size_t i = 0; i < poses.size(); ++i)
    {
        auto &pose = poses[i];
-        size_t num_joints = pose.size();
+        size_t num_core_joints = core_joint_idx.size();
        size_t num_full_joints = pose.size();
        // Collect valid joint indices
        std::vector<size_t> valid_joint_idx;
-        for (size_t j = 0; j < num_joints; ++j)
+        for (size_t j = 0; j < num_core_joints; ++j)
        {
-            if (pose[j][3] > min_score)
+            size_t idx = core_joint_idx[j];
            if (pose[idx][3] > min_score)
            {
-                valid_joint_idx.push_back(j);
+                valid_joint_idx.push_back(idx);
            }
        }
@ -2099,6 +2128,23 @@ void TriangulatorInternal::filter_poses(
            continue;
        }
        // Set joint scores outside the room to a low value
        for (size_t j = 0; j < num_full_joints; ++j)
        {
            if (pose[j][3] > min_score)
            {
                for (int k = 0; k < 3; ++k)
                {
                    if (pose[j][k] > room_half_size[k] + room_center[k] + wdist ||
                        pose[j][k] < -room_half_size[k] + room_center[k] - wdist)
                    {
                        pose[j][3] = 0.001;
                        break;
                    }
                }
            }
        }
        // Calculate total limb length and average limb length
        const float max_avg_length = 0.5;
        const float min_avg_length = 0.1;
@ -2179,99 +2225,79 @@ void TriangulatorInternal::add_missing_joints(
        {"wrist_right", {"elbow_right"}},
        {"wrist_left", {"elbow_left"}},
        {"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
-        {"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
+        {"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}}};
        {"foot_*_left_*", {"ankle_left"}},
        {"foot_*_right_*", {"ankle_right"}},
        {"face_*", {"nose"}},
        {"hand_*_left_*", {"wrist_left"}},
        {"hand_*_right_*", {"wrist_right"}}};
-    for (size_t i = 0; i < poses.size(); ++i)
+    // Collect adjacent joint mappings
-    {
+    std::vector<std::vector<size_t>> adjacent_indices;
-        auto &pose = poses[i];
+    adjacent_indices.resize(joint_names.size());
        size_t num_joints = pose.size();
        // Collect valid joint indices
        std::vector<size_t> valid_joint_idx;
        for (size_t j = 0; j < num_joints; ++j)
        {
            if (pose[j][3] > min_match_score)
            {
                valid_joint_idx.push_back(j);
            }
        }
        if (valid_joint_idx.empty())
        {
            continue;
        }
        // Compute body center as the mean of valid joints
        std::array<float, 3> body_center = {0.0, 0.0, 0.0};
        for (size_t idx : valid_joint_idx)
        {
            body_center[0] += pose[idx][0];
            body_center[1] += pose[idx][1];
            body_center[2] += pose[idx][2];
        }
        for (int j = 0; j < 3; ++j)
        {
            body_center[j] /= static_cast<float>(valid_joint_idx.size());
        }
        // Iterate over each joint
    for (size_t j = 0; j < joint_names.size(); ++j)
    {
        std::string adname = "";
        const std::string &jname = joint_names[j];
        // Determine adjacent joint based on name patterns
-            if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos)
+        if (adjacents.find(jname) != adjacents.end())
            {
                adname = "foot_*_left_*";
            }
            else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos)
            {
                adname = "foot_*_right_*";
            }
            else if (jname.substr(0, 5) == "face_")
            {
                adname = "face_*";
            }
            else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos)
            {
                adname = "hand_*_left_*";
            }
            else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos)
            {
                adname = "hand_*_right_*";
            }
            else if (adjacents.find(jname) != adjacents.end())
        {
            adname = jname;
        }
-            if (adname == "")
+        if (!adname.empty())
        {
-                // No adjacent joints defined for this joint
+            auto it = adjacents.find(adname);
-                continue;
+            if (it != adjacents.end())
            {
                for (const std::string &adj_name : it->second)
                {
                    auto jt = joint_name_to_idx.find(adj_name);
                    if (jt != joint_name_to_idx.end())
                    {
                        adjacent_indices[j].push_back(jt->second);
                    }
                }
            }
        }
    }
    for (size_t i = 0; i < poses.size(); ++i)
    {
        auto &pose = poses[i];
        size_t num_joints = pose.size();
        // Compute body center as the mean of valid joints
        std::array<float, 3> body_center = {0.0, 0.0, 0.0};
        size_t valid_count = 0;
        for (size_t j = 0; j < num_joints; ++j)
        {
            if (pose[j][3] > min_match_score)
            {
                body_center[0] += pose[j][0];
                body_center[1] += pose[j][1];
                body_center[2] += pose[j][2];
                valid_count++;
            }
        }
        if (valid_count == 0)
        {
            continue;
        }
        float inv_c = 1.0 / static_cast<float>(valid_count);
        for (int j = 0; j < 3; ++j)
        {
            body_center[j] *= inv_c;
        }
        // Iterate over each joint
        for (size_t j = 0; j < joint_names.size(); ++j)
        {
            if (pose[j][3] == 0.0)
            {
                if (adjacents.find(adname) != adjacents.end())
            {
                // Joint is missing; attempt to estimate its position based on adjacent joints
                const std::vector<size_t> &adj_indices = adjacent_indices[j];
                std::array<float, 3> adjacent_position = {0.0, 0.0, 0.0};
                size_t adjacent_count = 0;
-                    const std::vector<std::string> &adjacent_joint_names = adjacents[adname];
+                for (size_t adj_idx : adj_indices)
                    for (const std::string &adj_name : adjacent_joint_names)
                {
                        auto it = joint_name_to_idx.find(adj_name);
                        if (it != joint_name_to_idx.end())
                        {
                            size_t adj_idx = it->second;
                    if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1)
                    {
                        adjacent_position[0] += pose[adj_idx][0];
@ -2280,13 +2306,13 @@ void TriangulatorInternal::add_missing_joints(
                        ++adjacent_count;
                    }
                }
                    }
                if (adjacent_count > 0)
                {
                    float inv_c = 1.0 / static_cast<float>(adjacent_count);
                    for (size_t k = 0; k < 3; ++k)
                    {
-                            adjacent_position[k] /= static_cast<float>(adjacent_count);
+                        adjacent_position[k] *= inv_c;
                    }
                    // Update the missing joint's position
@ -2301,14 +2327,6 @@ void TriangulatorInternal::add_missing_joints(
                    pose[j][1] = body_center[1];
                    pose[j][2] = body_center[2];
                }
                }
                else
                {
                    // Adjacent joints not defined, use body center
                    pose[j][0] = body_center[0];
                    pose[j][1] = body_center[1];
                    pose[j][2] = body_center[2];
                }
                // Set a low confidence score
                pose[j][3] = 0.1;
@ -2316,3 +2334,170 @@ void TriangulatorInternal::add_missing_joints(
        }
    }
 }
 // =================================================================================================
 void TriangulatorInternal::replace_far_joints(
    std::vector<std::vector<std::array<float, 4>>> &poses,
    const std::vector<std::string> &joint_names)
 {
    for (size_t i = 0; i < poses.size(); ++i)
    {
        auto &pose = poses[i];
        size_t num_full_joints = pose.size();
        // Calculate the average position of pose parts
        std::array<float, 4> center_head = {0.0, 0.0, 0.0, 0};
        std::array<float, 4> center_foot_left = {0.0, 0.0, 0.0, 0};
        std::array<float, 4> center_foot_right = {0.0, 0.0, 0.0, 0};
        std::array<float, 4> center_hand_left = {0.0, 0.0, 0.0, 0};
        std::array<float, 4> center_hand_right = {0.0, 0.0, 0.0, 0};
        for (size_t j = 0; j < num_full_joints; ++j)
        {
            const std::string &jname = joint_names[j];
            float offset2 = (1.0 - min_match_score) * 2;
            float min_score = this->min_match_score - offset2;
            if (pose[j][3] > min_score)
            {
                if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
                    jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
                {
                    center_head[0] += pose[j][0];
                    center_head[1] += pose[j][1];
                    center_head[2] += pose[j][2];
                    center_head[3] += 1.0;
                }
                else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
                {
                    if (jname.find("_left") != std::string::npos)
                    {
                        center_foot_left[0] += pose[j][0];
                        center_foot_left[1] += pose[j][1];
                        center_foot_left[2] += pose[j][2];
                        center_foot_left[3] += 1.0;
                    }
                    else if (jname.find("_right") != std::string::npos)
                    {
                        center_foot_right[0] += pose[j][0];
                        center_foot_right[1] += pose[j][1];
                        center_foot_right[2] += pose[j][2];
                        center_foot_right[3] += 1.0;
                    }
                }
                else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
                {
                    if (jname.find("_left") != std::string::npos)
                    {
                        center_hand_left[0] += pose[j][0];
                        center_hand_left[1] += pose[j][1];
                        center_hand_left[2] += pose[j][2];
                        center_hand_left[3] += 1.0;
                    }
                    else if (jname.find("_right") != std::string::npos)
                    {
                        center_hand_right[0] += pose[j][0];
                        center_hand_right[1] += pose[j][1];
                        center_hand_right[2] += pose[j][2];
                        center_hand_right[3] += 1.0;
                    }
                }
            }
        }
        for (size_t k = 0; k < 3; k++)
        {
            center_head[k] /= center_head[3];
            center_foot_left[k] /= center_foot_left[3];
            center_foot_right[k] /= center_foot_right[3];
            center_hand_left[k] /= center_hand_left[3];
            center_hand_right[k] /= center_hand_right[3];
        }
        // Drop joints that are too far from the part center
        const float max_dist_head_sq = 0.20 * 0.20;
        const float max_dist_foot_sq = 0.25 * 0.25;
        const float max_dist_hand_sq = 0.20 * 0.20;
        for (size_t j = 0; j < num_full_joints; ++j)
        {
            const std::string &jname = joint_names[j];
            if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
                jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
            {
                float dx = pose[j][0] - center_head[0];
                float dy = pose[j][1] - center_head[1];
                float dz = pose[j][2] - center_head[2];
                float dist_sq = dx * dx + dy * dy + dz * dz;
                if ((pose[j][3] > 0.0 && dist_sq > max_dist_head_sq) || pose[j][3] == 0.0)
                {
                    pose[j][0] = center_head[0];
                    pose[j][1] = center_head[1];
                    pose[j][2] = center_head[2];
                    pose[j][3] = 0.1;
                }
            }
            else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
            {
                if (jname.find("_left") != std::string::npos)
                {
                    float dx = pose[j][0] - center_foot_left[0];
                    float dy = pose[j][1] - center_foot_left[1];
                    float dz = pose[j][2] - center_foot_left[2];
                    float dist_sq = dx * dx + dy * dy + dz * dz;
                    if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
                    {
                        pose[j][0] = center_foot_left[0];
                        pose[j][1] = center_foot_left[1];
                        pose[j][2] = center_foot_left[2];
                        pose[j][3] = 0.1;
                    }
                }
                else if (jname.find("_right") != std::string::npos)
                {
                    float dx = pose[j][0] - center_foot_right[0];
                    float dy = pose[j][1] - center_foot_right[1];
                    float dz = pose[j][2] - center_foot_right[2];
                    float dist_sq = dx * dx + dy * dy + dz * dz;
                    if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
                    {
                        pose[j][0] = center_foot_right[0];
                        pose[j][1] = center_foot_right[1];
                        pose[j][2] = center_foot_right[2];
                        pose[j][3] = 0.1;
                    }
                }
            }
            else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
            {
                if (jname.find("_left") != std::string::npos)
                {
                    float dx = pose[j][0] - center_hand_left[0];
                    float dy = pose[j][1] - center_hand_left[1];
                    float dz = pose[j][2] - center_hand_left[2];
                    float dist_sq = dx * dx + dy * dy + dz * dz;
                    if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
                    {
                        pose[j][0] = center_hand_left[0];
                        pose[j][1] = center_hand_left[1];
                        pose[j][2] = center_hand_left[2];
                        pose[j][3] = 0.1;
                    }
                }
                else if (jname.find("_right") != std::string::npos)
                {
                    float dx = pose[j][0] - center_hand_right[0];
                    float dy = pose[j][1] - center_hand_right[1];
                    float dz = pose[j][2] - center_hand_right[2];
                    float dist_sq = dx * dx + dy * dy + dz * dz;
                    if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
                    {
                        pose[j][0] = center_hand_right[0];
                        pose[j][1] = center_hand_right[1];
                        pose[j][2] = center_hand_right[2];
                        pose[j][3] = 0.1;
                    }
                }
            }
        }
    }
 }
--- a/rpt/triangulator.hpp
+++ b/rpt/triangulator.hpp
@ -54,6 +54,7 @@ public:
 private:
    float min_match_score;
    float min_group_size;
    size_t num_cams;
    const std::vector<std::string> core_joints = {
        "shoulder_left",
@ -137,6 +138,10 @@ private:
        std::vector<std::vector<std::array<float, 4>>> &poses,
        const std::vector<std::string> &joint_names);
    void replace_far_joints(
        std::vector<std::vector<std::array<float, 4>>> &poses,
        const std::vector<std::string> &joint_names);
    // Statistics
    double num_calls = 0;
    double total_time = 0;
--- a/scripts/utils_2d_pose.hpp
+++ b/scripts/utils_2d_pose.hpp
@ -1079,8 +1079,8 @@ namespace utils_2d_pose
            std::string path_pose_m1 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
            std::string path_pose_mb = base_path + "rtmpose-m_Bx384x288x3_fp16_extra-steps.onnx";
-            std::string path_pose_w1 = base_path + "rtmpose-l_wb_1x384x288x3_extra-steps.onnx";
+            std::string path_pose_w1 = base_path + "rtmpose-l_wb_1x384x288x3_fp16_extra-steps.onnx";
-            std::string path_pose_wb = base_path + "rtmpose-l_wb_Bx384x288x3_extra-steps.onnx";
+            std::string path_pose_wb = base_path + "rtmpose-l_wb_Bx384x288x3_fp16_extra-steps.onnx";
            this->num_joints = whole_body ? 133 : 17;
            std::string path_det;
--- a/scripts/utils_pipeline.hpp
+++ b/scripts/utils_pipeline.hpp
@ -139,47 +139,47 @@ namespace utils_pipeline
                joint_names_2d.end(),
                {
                    "hand_wrist_left",
-                    "finger_thumb_left_1",
+                    "hand_finger_thumb_left_1",
-                    "finger_thumb_left_2",
+                    "hand_finger_thumb_left_2",
-                    "finger_thumb_left_3",
+                    "hand_finger_thumb_left_3",
-                    "finger_thumb_left_4",
+                    "hand_finger_thumb_left_4",
-                    "finger_index_left_1",
+                    "hand_finger_index_left_1",
-                    "finger_index_left_2",
+                    "hand_finger_index_left_2",
-                    "finger_index_left_3",
+                    "hand_finger_index_left_3",
-                    "finger_index_left_4",
+                    "hand_finger_index_left_4",
-                    "finger_middle_left_1",
+                    "hand_finger_middle_left_1",
-                    "finger_middle_left_2",
+                    "hand_finger_middle_left_2",
-                    "finger_middle_left_3",
+                    "hand_finger_middle_left_3",
-                    "finger_middle_left_4",
+                    "hand_finger_middle_left_4",
-                    "finger_ring_left_1",
+                    "hand_finger_ring_left_1",
-                    "finger_ring_left_2",
+                    "hand_finger_ring_left_2",
-                    "finger_ring_left_3",
+                    "hand_finger_ring_left_3",
-                    "finger_ring_left_4",
+                    "hand_finger_ring_left_4",
-                    "finger_pinky_left_1",
+                    "hand_finger_pinky_left_1",
-                    "finger_pinky_left_2",
+                    "hand_finger_pinky_left_2",
-                    "finger_pinky_left_3",
+                    "hand_finger_pinky_left_3",
-                    "finger_pinky_left_4",
+                    "hand_finger_pinky_left_4",
                    "hand_wrist_right",
-                    "finger_thumb_right_1",
+                    "hand_finger_thumb_right_1",
-                    "finger_thumb_right_2",
+                    "hand_finger_thumb_right_2",
-                    "finger_thumb_right_3",
+                    "hand_finger_thumb_right_3",
-                    "finger_thumb_right_4",
+                    "hand_finger_thumb_right_4",
-                    "finger_index_right_1",
+                    "hand_finger_index_right_1",
-                    "finger_index_right_2",
+                    "hand_finger_index_right_2",
-                    "finger_index_right_3",
+                    "hand_finger_index_right_3",
-                    "finger_index_right_4",
+                    "hand_finger_index_right_4",
-                    "finger_middle_right_1",
+                    "hand_finger_middle_right_1",
-                    "finger_middle_right_2",
+                    "hand_finger_middle_right_2",
-                    "finger_middle_right_3",
+                    "hand_finger_middle_right_3",
-                    "finger_middle_right_4",
+                    "hand_finger_middle_right_4",
-                    "finger_ring_right_1",
+                    "hand_finger_ring_right_1",
-                    "finger_ring_right_2",
+                    "hand_finger_ring_right_2",
-                    "finger_ring_right_3",
+                    "hand_finger_ring_right_3",
-                    "finger_ring_right_4",
+                    "hand_finger_ring_right_4",
-                    "finger_pinky_right_1",
+                    "hand_finger_pinky_right_1",
-                    "finger_pinky_right_2",
+                    "hand_finger_pinky_right_2",
-                    "finger_pinky_right_3",
+                    "hand_finger_pinky_right_3",
-                    "finger_pinky_right_4",
+                    "hand_finger_pinky_right_4",
                });
        }