Make triangulation a zero-copy pure function

2026-03-11 22:29:21 +08:00
parent 5bed0f0aaf
commit 24f74c87f1
10 changed files with 596 additions and 947 deletions
@@ -1,7 +1,5 @@
 #include <algorithm>
 #include <cstdint>
-#include <cstring>
-#include <memory>
 #include <stdexcept>

 #include <nanobind/nanobind.h>
@@ -23,31 +21,28 @@ using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_c
 using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>;
 using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>;

-PoseBatch2D pose_batch_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
+PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
 {
    if (poses_2d.shape(0) != person_counts.shape(0))
    {
        throw std::invalid_argument("poses_2d and person_counts must have the same number of views.");
    }

-    PoseBatch2D batch;
-    batch.num_views = static_cast<size_t>(poses_2d.shape(0));
-    batch.max_persons = static_cast<size_t>(poses_2d.shape(1));
-    batch.num_joints = static_cast<size_t>(poses_2d.shape(2));
-    batch.person_counts.assign(person_counts.data(), person_counts.data() + batch.num_views);
-
-    for (size_t i = 0; i < batch.person_counts.size(); ++i)
+    for (size_t i = 0; i < static_cast<size_t>(person_counts.shape(0)); ++i)
    {
-        if (batch.person_counts[i] > batch.max_persons)
+        if (person_counts(i) > poses_2d.shape(1))
        {
            throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
        }
    }

-    const size_t total_size = batch.num_views * batch.max_persons * batch.num_joints * 3;
-    batch.data.resize(total_size);
-    std::memcpy(batch.data.data(), poses_2d.data(), total_size * sizeof(float));
-    return batch;
+    return PoseBatch2DView {
+        poses_2d.data(),
+        person_counts.data(),
+        static_cast<size_t>(poses_2d.shape(0)),
+        static_cast<size_t>(poses_2d.shape(1)),
+        static_cast<size_t>(poses_2d.shape(2)),
+    };
 }

 std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams)
@@ -93,29 +88,27 @@ NB_MODULE(_core, m)
            return camera.to_string();
        });

-    nb::class_<Triangulator>(m, "Triangulator")
-        .def(nb::init<float, size_t>(),
-             "min_match_score"_a = 0.95f,
-             "min_group_size"_a = 1)
-        .def(
-            "triangulate_poses",
-            [](Triangulator &self,
-               const PoseArray2D &poses_2d,
-               const CountArray &person_counts,
-               const std::vector<Camera> &cameras,
-               const RoomArray &roomparams,
-               const std::vector<std::string> &joint_names)
-            {
-                PoseBatch2D pose_batch = pose_batch_from_numpy(poses_2d, person_counts);
-                auto room = roomparams_from_numpy(roomparams);
-                PoseBatch3D poses_3d = self.triangulate_poses(pose_batch, cameras, room, joint_names);
-                return pose_batch_to_numpy(std::move(poses_3d));
-            },
-            "poses_2d"_a,
-            "person_counts"_a,
-            "cameras"_a,
-            "roomparams"_a,
-            "joint_names"_a)
-        .def("reset", &Triangulator::reset)
-        .def("print_stats", &Triangulator::print_stats);
+    m.def(
+        "triangulate_poses",
+        [](const PoseArray2D &poses_2d,
+           const CountArray &person_counts,
+           const std::vector<Camera> &cameras,
+           const RoomArray &roomparams,
+           const std::vector<std::string> &joint_names,
+           float min_match_score,
+           size_t min_group_size)
+        {
+            const PoseBatch2DView pose_batch = pose_batch_view_from_numpy(poses_2d, person_counts);
+            const auto room = roomparams_from_numpy(roomparams);
+            PoseBatch3D poses_3d = triangulate_poses(
+                pose_batch, cameras, room, joint_names, min_match_score, min_group_size);
+            return pose_batch_to_numpy(std::move(poses_3d));
+        },
+        "poses_2d"_a,
+        "person_counts"_a,
+        "cameras"_a,
+        "roomparams"_a,
+        "joint_names"_a,
+        "min_match_score"_a = 0.95f,
+        "min_group_size"_a = 1);
 }
@@ -66,8 +66,6 @@ public:
        this->all_poses_set.resize(cam_ids.size(), false);

        // Load 3D models
-        tri_model = std::make_unique<Triangulator>(
-            min_match_score, min_group_size);
        pose_tracker = std::make_unique<PoseTracker>(
            max_movement_speed, max_track_distance);

@@ -120,7 +118,6 @@ private:
    std::vector<rclcpp::Subscription<std_msgs::msg::String>::SharedPtr> sub_cam_list_;
    rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_;

-    std::unique_ptr<Triangulator> tri_model;
    std::unique_ptr<PoseTracker> pose_tracker;
    std::vector<Camera> all_cameras;
    std::mutex cams_mutex, pose_mutex, model_mutex;
@@ -240,8 +237,9 @@ void Rpt3DWrapperNode::call_model()
    cams_mutex.lock();
    pose_mutex.lock();
    PoseBatch2D pose_batch_2d = PoseBatch2D::from_nested(all_poses);
-    auto poses_3d = tri_model->triangulate_poses(
-        pose_batch_2d, all_cameras, roomparams, joint_names).to_nested();
+    auto poses_3d = triangulate_poses(
+        pose_batch_2d, all_cameras, roomparams, joint_names, min_match_score, min_group_size)
+                        .to_nested();

    double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end());
    this->all_poses_set = std::vector<bool>(cam_ids.size(), false);
@@ -1,7 +1,7 @@
+#include <algorithm>
 #include <stdexcept>
 #include <utility>

-#include "triangulator.hpp"
 #include "interface.hpp"

 // =================================================================================================
@@ -33,36 +33,19 @@ float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord)
    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
 }

+const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_t coord) const
+{
+    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
 const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
 {
    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
 }

-RaggedPoses2D PoseBatch2D::to_nested() const
+PoseBatch2DView PoseBatch2D::view() const
 {
-    if (person_counts.size() != num_views)
-    {
-        throw std::invalid_argument("PoseBatch2D person_counts size must match num_views.");
-    }
-
-    RaggedPoses2D poses_2d(num_views);
-    for (size_t view = 0; view < num_views; ++view)
-    {
-        const size_t num_persons = person_counts[view];
-        poses_2d[view].resize(num_persons);
-        for (size_t person = 0; person < num_persons; ++person)
-        {
-            poses_2d[view][person].resize(num_joints);
-            for (size_t joint = 0; joint < num_joints; ++joint)
-            {
-                for (size_t coord = 0; coord < 3; ++coord)
-                {
-                    poses_2d[view][person][joint][coord] = at(view, person, joint, coord);
-                }
-            }
-        }
-    }
-    return poses_2d;
+    return PoseBatch2DView {data.data(), person_counts.data(), num_views, max_persons, num_joints};
 }

 PoseBatch2D PoseBatch2D::from_nested(const RaggedPoses2D &poses_2d)
@@ -169,48 +152,3 @@ PoseBatch3D PoseBatch3D::from_nested(const NestedPoses3D &poses_3d)
    }
    return batch;
 }
-
-// =================================================================================================
-// =================================================================================================
-
-Triangulator::Triangulator(float min_match_score, size_t min_group_size)
-    : triangulator(std::make_unique<TriangulatorInternal>(min_match_score, min_group_size))
-{
-}
-
-Triangulator::~Triangulator() = default;
-
-// =================================================================================================
-
-PoseBatch3D Triangulator::triangulate_poses(
-    const PoseBatch2D &poses_2d,
-    const std::vector<Camera> &cameras,
-    const std::array<std::array<float, 3>, 2> &roomparams,
-    const std::vector<std::string> &joint_names)
-{
-    return PoseBatch3D::from_nested(
-        triangulator->triangulate_poses(poses_2d.to_nested(), cameras, roomparams, joint_names));
-}
-
-NestedPoses3D Triangulator::triangulate_poses(
-    const RaggedPoses2D &poses_2d,
-    const std::vector<Camera> &cameras,
-    const std::array<std::array<float, 3>, 2> &roomparams,
-    const std::vector<std::string> &joint_names)
-{
-    return triangulator->triangulate_poses(poses_2d, cameras, roomparams, joint_names);
-}
-
-// =================================================================================================
-
-void Triangulator::reset()
-{
-    triangulator->reset();
-}
-
-// =================================================================================================
-
-void Triangulator::print_stats()
-{
-    triangulator->print_stats();
-}
@@ -1,7 +1,7 @@
 #pragma once

+#include <array>
 #include <cstdint>
-#include <memory>
 #include <string>
 #include <vector>

@@ -14,6 +14,17 @@ using NestedPoses3D = std::vector<std::vector<std::array<float, 4>>>;

 // =================================================================================================

+struct PoseBatch2DView
+{
+    const float *data = nullptr;
+    const uint32_t *person_counts = nullptr;
+    size_t num_views = 0;
+    size_t max_persons = 0;
+    size_t num_joints = 0;
+
+    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+};
+
 struct PoseBatch2D
 {
    std::vector<float> data;
@@ -24,8 +35,8 @@ struct PoseBatch2D

    float &at(size_t view, size_t person, size_t joint, size_t coord);
    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+    PoseBatch2DView view() const;

-    RaggedPoses2D to_nested() const;
    static PoseBatch2D from_nested(const RaggedPoses2D &poses_2d);
 };

@@ -44,56 +55,34 @@ struct PoseBatch3D

 // =================================================================================================

-class TriangulatorInternal;
+/**
+ * Calculate a triangulation using a padded pose tensor.
+ *
+ * @param poses_2d Padded poses of shape [views, max_persons, joints, 3].
+ * @param cameras List of cameras.
+ * @param roomparams Room parameters (room size, room center).
+ * @param joint_names List of 2D joint names.
+ * @param min_match_score Minimum score to consider a triangulated joint as valid.
+ * @param min_group_size Minimum number of camera pairs that need to see a person.
+ *
+ * @return Pose tensor of shape [persons, joints, 4].
+ */
+PoseBatch3D triangulate_poses(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    float min_match_score = 0.95f,
+    size_t min_group_size = 1);

-// =================================================================================================
-
-class Triangulator
+inline PoseBatch3D triangulate_poses(
+    const PoseBatch2D &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    float min_match_score = 0.95f,
+    size_t min_group_size = 1)
 {
-public:
-    /**
-     * Triangulator to predict poses from multiple views.
-     *
-     * @param min_match_score Minimum score to consider a triangulated joint as valid.
-     * @param min_group_size Minimum number of camera pairs that need to see a person.
-     */
-    Triangulator(
-        float min_match_score = 0.95,
-        size_t min_group_size = 1);
-
-    ~Triangulator();
-
-    /**
-     * Calculate a triangulation using a padded pose tensor.
-     *
-     * @param poses_2d Padded poses of shape [views, max_persons, joints, 3].
-     * @param cameras List of cameras.
-     * @param roomparams Room parameters (room size, room center).
-     * @param joint_names List of 2D joint names.
-     *
-     * @return Pose tensor of shape [persons, joints, 4].
-     */
-    PoseBatch3D triangulate_poses(
-        const PoseBatch2D &poses_2d,
-        const std::vector<Camera> &cameras,
-        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<std::string> &joint_names);
-
-    /**
-     * Compatibility overload for callers that still produce ragged nested vectors.
-     */
-    NestedPoses3D triangulate_poses(
-        const RaggedPoses2D &poses_2d,
-        const std::vector<Camera> &cameras,
-        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<std::string> &joint_names);
-
-    /** Reset the triangulator. */
-    void reset();
-
-    /** Print triangulation statistics. */
-    void print_stats();
-
-private:
-    std::unique_ptr<TriangulatorInternal> triangulator;
-};
+    return triangulate_poses(
+        poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size);
+}
@@ -1,130 +0,0 @@
-#pragma once
-
-#include <array>
-#include <string>
-#include <vector>
-
-#include "camera.hpp"
-
-// =================================================================================================
-
-class TriangulatorInternal
-{
-public:
-    TriangulatorInternal(float min_match_score, size_t min_group_size);
-
-    std::vector<std::vector<std::array<float, 4>>> triangulate_poses(
-        const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
-        const std::vector<Camera> &cameras,
-        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<std::string> &joint_names);
-
-    void reset();
-    void print_stats();
-
-private:
-    float min_match_score;
-    float min_group_size;
-    size_t num_cams;
-
-    const std::vector<std::string> core_joints = {
-        "shoulder_left",
-        "shoulder_right",
-        "hip_left",
-        "hip_right",
-        "elbow_left",
-        "elbow_right",
-        "knee_left",
-        "knee_right",
-        "wrist_left",
-        "wrist_right",
-        "ankle_left",
-        "ankle_right",
-    };
-    const std::vector<std::pair<std::string, std::string>> core_limbs = {
-        {"knee_left", "ankle_left"},
-        {"hip_left", "knee_left"},
-        {"hip_right", "knee_right"},
-        {"knee_right", "ankle_right"},
-        {"elbow_left", "wrist_left"},
-        {"elbow_right", "wrist_right"},
-        {"shoulder_left", "elbow_left"},
-        {"shoulder_right", "elbow_right"},
-    };
-    std::vector<std::vector<std::array<float, 4>>> last_poses_3d;
-
-    void undistort_poses(
-        std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam);
-
-    std::tuple<std::vector<std::vector<std::array<float, 3>>>, std::vector<std::vector<float>>>
-    project_poses(
-        const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
-        const CameraInternal &icam,
-        bool calc_dists);
-
-    float calc_pose_score(
-        const std::vector<std::array<float, 3>> &pose1,
-        const std::vector<std::array<float, 3>> &pose2,
-        const std::vector<float> &dist1,
-        const CameraInternal &icam);
-
-    std::vector<float> score_projection(
-        const std::vector<std::array<float, 3>> &pose,
-        const std::vector<std::array<float, 3>> &repro,
-        const std::vector<float> &dists,
-        const std::vector<bool> &mask,
-        float iscale);
-
-    std::pair<std::vector<std::array<float, 4>>, float> triangulate_and_score(
-        const std::vector<std::array<float, 3>> &pose1,
-        const std::vector<std::array<float, 3>> &pose2,
-        const CameraInternal &cam1,
-        const CameraInternal &cam2,
-        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<std::array<size_t, 2>> &core_limbs_idx);
-
-    std::vector<std::tuple<
-        std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
-    calc_grouping(
-        const std::vector<std::pair<
-            std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
-        const std::vector<std::pair<std::vector<std::array<float, 4>>, float>> &all_scored_poses,
-        float min_score);
-
-    std::vector<std::array<float, 4>> merge_group(
-        const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
-        float min_score);
-
-    void add_extra_joints(
-        std::vector<std::vector<std::array<float, 4>>> &poses,
-        const std::vector<std::string> &joint_names);
-
-    void filter_poses(
-        std::vector<std::vector<std::array<float, 4>>> &poses,
-        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<size_t> &core_joint_idx,
-        const std::vector<std::array<size_t, 2>> &core_limbs_idx);
-
-    void add_missing_joints(
-        std::vector<std::vector<std::array<float, 4>>> &poses,
-        const std::vector<std::string> &joint_names);
-
-    void replace_far_joints(
-        std::vector<std::vector<std::array<float, 4>>> &poses,
-        const std::vector<std::string> &joint_names);
-
-    // Statistics
-    double num_calls = 0;
-    double total_time = 0;
-    double init_time = 0;
-    double undistort_time = 0;
-    double project_time = 0;
-    double match_time = 0;
-    double pairs_time = 0;
-    double pair_scoring_time = 0;
-    double grouping_time = 0;
-    double full_time = 0;
-    double merge_time = 0;
-    double post_time = 0;
-    double convert_time = 0;
-};
@@ -3,7 +3,7 @@ from __future__ import annotations
 from collections.abc import Sequence
 from typing import TYPE_CHECKING

-from ._core import Camera, Triangulator
+from ._core import Camera, triangulate_poses

 if TYPE_CHECKING:
    import numpy as np
@@ -28,7 +28,7 @@ def pack_poses_2d(

 __all__ = [
    "Camera",
-    "Triangulator",
    "convert_cameras",
    "pack_poses_2d",
+    "triangulate_poses",
 ]
@@ -1,19 +1,30 @@
 from __future__ import annotations

-from collections.abc import Mapping, Sequence
-from typing import Any, TypeAlias
+from collections.abc import Sequence
+from typing import TypeAlias, TypedDict

 import numpy as np
 import numpy.typing as npt

 from ._core import Camera

-CameraLike = Camera | Mapping[str, Any]
-PoseViewLike: TypeAlias = (
-    npt.NDArray[np.generic]
-    | Sequence[Sequence[Sequence[float]]]
-    | Sequence[Sequence[float]]
-)
+Matrix3x3Like: TypeAlias = Sequence[Sequence[float]]
+VectorLike: TypeAlias = Sequence[float]
+PoseViewLike: TypeAlias = npt.NDArray[np.generic] | Sequence[Sequence[Sequence[float]]] | Sequence[Sequence[float]]
+
+
+class CameraDict(TypedDict, total=False):
+    name: str
+    K: Matrix3x3Like
+    DC: VectorLike
+    R: Matrix3x3Like
+    T: Sequence[Sequence[float]]
+    width: int
+    height: int
+    type: str
+
+
+CameraLike = Camera | CameraDict


 def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
@@ -2,7 +2,7 @@

 Various module tests

-### Triangulator
+### Python Interface

  ```bash
  cd /RapidPoseTriangulation/
@@ -69,13 +69,13 @@ def test_camera_structure_repr():
 )
 def test_triangulate_samples(camera_path: str, pose_path: str, roomparams):
    poses_2d, person_counts, cameras = load_case(camera_path, pose_path)
-    triangulator = rpt.Triangulator(min_match_score=0.95)
-    poses_3d = triangulator.triangulate_poses(
+    poses_3d = rpt.triangulate_poses(
        poses_2d,
        person_counts,
        cameras,
        np.asarray(roomparams, dtype=np.float32),
        JOINT_NAMES,
+        min_match_score=0.95,
    )

    assert isinstance(poses_3d, np.ndarray)
@@ -86,18 +86,33 @@ def test_triangulate_samples(camera_path: str, pose_path: str, roomparams):
    assert np.isfinite(poses_3d).all()


-def test_triangulate_repeatability_after_reset():
+def test_triangulate_repeatability():
    poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
    roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
-    triangulator = rpt.Triangulator(min_match_score=0.95)

-    first = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
-    triangulator.reset()
-    second = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
+    first = rpt.triangulate_poses(
+        poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95
+    )
+    second = rpt.triangulate_poses(
+        poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95
+    )

    np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5)


+def test_triangulate_does_not_mutate_inputs():
+    poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
+    roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
+
+    poses_before = poses_2d.copy()
+    counts_before = person_counts.copy()
+
+    rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
+
+    np.testing.assert_array_equal(poses_2d, poses_before)
+    np.testing.assert_array_equal(person_counts, counts_before)
+
+
 def test_pack_poses_2d_from_ragged_inputs():
    packed, counts = rpt.pack_poses_2d(
        [