Make triangulation a zero-copy pure function

This commit is contained in:
2026-03-11 22:29:21 +08:00
parent 5bed0f0aaf
commit 24f74c87f1
10 changed files with 596 additions and 947 deletions
+33 -40
View File
@@ -1,7 +1,5 @@
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <nanobind/nanobind.h>
@@ -23,31 +21,28 @@ using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_c
using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>;
using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>;
PoseBatch2D pose_batch_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
{
if (poses_2d.shape(0) != person_counts.shape(0))
{
throw std::invalid_argument("poses_2d and person_counts must have the same number of views.");
}
PoseBatch2D batch;
batch.num_views = static_cast<size_t>(poses_2d.shape(0));
batch.max_persons = static_cast<size_t>(poses_2d.shape(1));
batch.num_joints = static_cast<size_t>(poses_2d.shape(2));
batch.person_counts.assign(person_counts.data(), person_counts.data() + batch.num_views);
for (size_t i = 0; i < batch.person_counts.size(); ++i)
for (size_t i = 0; i < static_cast<size_t>(person_counts.shape(0)); ++i)
{
if (batch.person_counts[i] > batch.max_persons)
if (person_counts(i) > poses_2d.shape(1))
{
throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
}
}
const size_t total_size = batch.num_views * batch.max_persons * batch.num_joints * 3;
batch.data.resize(total_size);
std::memcpy(batch.data.data(), poses_2d.data(), total_size * sizeof(float));
return batch;
return PoseBatch2DView {
poses_2d.data(),
person_counts.data(),
static_cast<size_t>(poses_2d.shape(0)),
static_cast<size_t>(poses_2d.shape(1)),
static_cast<size_t>(poses_2d.shape(2)),
};
}
std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams)
@@ -93,29 +88,27 @@ NB_MODULE(_core, m)
return camera.to_string();
});
nb::class_<Triangulator>(m, "Triangulator")
.def(nb::init<float, size_t>(),
"min_match_score"_a = 0.95f,
"min_group_size"_a = 1)
.def(
"triangulate_poses",
[](Triangulator &self,
const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const RoomArray &roomparams,
const std::vector<std::string> &joint_names)
{
PoseBatch2D pose_batch = pose_batch_from_numpy(poses_2d, person_counts);
auto room = roomparams_from_numpy(roomparams);
PoseBatch3D poses_3d = self.triangulate_poses(pose_batch, cameras, room, joint_names);
return pose_batch_to_numpy(std::move(poses_3d));
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"roomparams"_a,
"joint_names"_a)
.def("reset", &Triangulator::reset)
.def("print_stats", &Triangulator::print_stats);
m.def(
"triangulate_poses",
[](const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const RoomArray &roomparams,
const std::vector<std::string> &joint_names,
float min_match_score,
size_t min_group_size)
{
const PoseBatch2DView pose_batch = pose_batch_view_from_numpy(poses_2d, person_counts);
const auto room = roomparams_from_numpy(roomparams);
PoseBatch3D poses_3d = triangulate_poses(
pose_batch, cameras, room, joint_names, min_match_score, min_group_size);
return pose_batch_to_numpy(std::move(poses_3d));
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"roomparams"_a,
"joint_names"_a,
"min_match_score"_a = 0.95f,
"min_group_size"_a = 1);
}
@@ -66,8 +66,6 @@ public:
this->all_poses_set.resize(cam_ids.size(), false);
// Load 3D models
tri_model = std::make_unique<Triangulator>(
min_match_score, min_group_size);
pose_tracker = std::make_unique<PoseTracker>(
max_movement_speed, max_track_distance);
@@ -120,7 +118,6 @@ private:
std::vector<rclcpp::Subscription<std_msgs::msg::String>::SharedPtr> sub_cam_list_;
rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_;
std::unique_ptr<Triangulator> tri_model;
std::unique_ptr<PoseTracker> pose_tracker;
std::vector<Camera> all_cameras;
std::mutex cams_mutex, pose_mutex, model_mutex;
@@ -240,8 +237,9 @@ void Rpt3DWrapperNode::call_model()
cams_mutex.lock();
pose_mutex.lock();
PoseBatch2D pose_batch_2d = PoseBatch2D::from_nested(all_poses);
auto poses_3d = tri_model->triangulate_poses(
pose_batch_2d, all_cameras, roomparams, joint_names).to_nested();
auto poses_3d = triangulate_poses(
pose_batch_2d, all_cameras, roomparams, joint_names, min_match_score, min_group_size)
.to_nested();
double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end());
this->all_poses_set = std::vector<bool>(cam_ids.size(), false);
+8 -70
View File
@@ -1,7 +1,7 @@
#include <algorithm>
#include <stdexcept>
#include <utility>
#include "triangulator.hpp"
#include "interface.hpp"
// =================================================================================================
@@ -33,36 +33,19 @@ float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord)
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
}
const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_t coord) const
{
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
}
const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
{
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
}
RaggedPoses2D PoseBatch2D::to_nested() const
PoseBatch2DView PoseBatch2D::view() const
{
if (person_counts.size() != num_views)
{
throw std::invalid_argument("PoseBatch2D person_counts size must match num_views.");
}
RaggedPoses2D poses_2d(num_views);
for (size_t view = 0; view < num_views; ++view)
{
const size_t num_persons = person_counts[view];
poses_2d[view].resize(num_persons);
for (size_t person = 0; person < num_persons; ++person)
{
poses_2d[view][person].resize(num_joints);
for (size_t joint = 0; joint < num_joints; ++joint)
{
for (size_t coord = 0; coord < 3; ++coord)
{
poses_2d[view][person][joint][coord] = at(view, person, joint, coord);
}
}
}
}
return poses_2d;
return PoseBatch2DView {data.data(), person_counts.data(), num_views, max_persons, num_joints};
}
PoseBatch2D PoseBatch2D::from_nested(const RaggedPoses2D &poses_2d)
@@ -169,48 +152,3 @@ PoseBatch3D PoseBatch3D::from_nested(const NestedPoses3D &poses_3d)
}
return batch;
}
// =================================================================================================
// =================================================================================================
Triangulator::Triangulator(float min_match_score, size_t min_group_size)
: triangulator(std::make_unique<TriangulatorInternal>(min_match_score, min_group_size))
{
}
Triangulator::~Triangulator() = default;
// =================================================================================================
PoseBatch3D Triangulator::triangulate_poses(
const PoseBatch2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
return PoseBatch3D::from_nested(
triangulator->triangulate_poses(poses_2d.to_nested(), cameras, roomparams, joint_names));
}
NestedPoses3D Triangulator::triangulate_poses(
const RaggedPoses2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
return triangulator->triangulate_poses(poses_2d, cameras, roomparams, joint_names);
}
// =================================================================================================
void Triangulator::reset()
{
triangulator->reset();
}
// =================================================================================================
void Triangulator::print_stats()
{
triangulator->print_stats();
}
+42 -53
View File
@@ -1,7 +1,7 @@
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
@@ -14,6 +14,17 @@ using NestedPoses3D = std::vector<std::vector<std::array<float, 4>>>;
// =================================================================================================
struct PoseBatch2DView
{
const float *data = nullptr;
const uint32_t *person_counts = nullptr;
size_t num_views = 0;
size_t max_persons = 0;
size_t num_joints = 0;
const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
};
struct PoseBatch2D
{
std::vector<float> data;
@@ -24,8 +35,8 @@ struct PoseBatch2D
float &at(size_t view, size_t person, size_t joint, size_t coord);
const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
PoseBatch2DView view() const;
RaggedPoses2D to_nested() const;
static PoseBatch2D from_nested(const RaggedPoses2D &poses_2d);
};
@@ -44,56 +55,34 @@ struct PoseBatch3D
// =================================================================================================
class TriangulatorInternal;
/**
* Calculate a triangulation using a padded pose tensor.
*
* @param poses_2d Padded poses of shape [views, max_persons, joints, 3].
* @param cameras List of cameras.
* @param roomparams Room parameters (room size, room center).
* @param joint_names List of 2D joint names.
* @param min_match_score Minimum score to consider a triangulated joint as valid.
* @param min_group_size Minimum number of camera pairs that need to see a person.
*
* @return Pose tensor of shape [persons, joints, 4].
*/
PoseBatch3D triangulate_poses(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
float min_match_score = 0.95f,
size_t min_group_size = 1);
// =================================================================================================
class Triangulator
inline PoseBatch3D triangulate_poses(
const PoseBatch2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
float min_match_score = 0.95f,
size_t min_group_size = 1)
{
public:
/**
* Triangulator to predict poses from multiple views.
*
* @param min_match_score Minimum score to consider a triangulated joint as valid.
* @param min_group_size Minimum number of camera pairs that need to see a person.
*/
Triangulator(
float min_match_score = 0.95,
size_t min_group_size = 1);
~Triangulator();
/**
* Calculate a triangulation using a padded pose tensor.
*
* @param poses_2d Padded poses of shape [views, max_persons, joints, 3].
* @param cameras List of cameras.
* @param roomparams Room parameters (room size, room center).
* @param joint_names List of 2D joint names.
*
* @return Pose tensor of shape [persons, joints, 4].
*/
PoseBatch3D triangulate_poses(
const PoseBatch2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
/**
* Compatibility overload for callers that still produce ragged nested vectors.
*/
NestedPoses3D triangulate_poses(
const RaggedPoses2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
/** Reset the triangulator. */
void reset();
/** Print triangulation statistics. */
void print_stats();
private:
std::unique_ptr<TriangulatorInternal> triangulator;
};
return triangulate_poses(
poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size);
}
+466 -631
View File
File diff suppressed because it is too large Load Diff
-130
View File
@@ -1,130 +0,0 @@
#pragma once
#include <array>
#include <string>
#include <vector>
#include "camera.hpp"
// =================================================================================================
class TriangulatorInternal
{
public:
TriangulatorInternal(float min_match_score, size_t min_group_size);
std::vector<std::vector<std::array<float, 4>>> triangulate_poses(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
void reset();
void print_stats();
private:
float min_match_score;
float min_group_size;
size_t num_cams;
const std::vector<std::string> core_joints = {
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
};
const std::vector<std::pair<std::string, std::string>> core_limbs = {
{"knee_left", "ankle_left"},
{"hip_left", "knee_left"},
{"hip_right", "knee_right"},
{"knee_right", "ankle_right"},
{"elbow_left", "wrist_left"},
{"elbow_right", "wrist_right"},
{"shoulder_left", "elbow_left"},
{"shoulder_right", "elbow_right"},
};
std::vector<std::vector<std::array<float, 4>>> last_poses_3d;
void undistort_poses(
std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam);
std::tuple<std::vector<std::vector<std::array<float, 3>>>, std::vector<std::vector<float>>>
project_poses(
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
const CameraInternal &icam,
bool calc_dists);
float calc_pose_score(
const std::vector<std::array<float, 3>> &pose1,
const std::vector<std::array<float, 3>> &pose2,
const std::vector<float> &dist1,
const CameraInternal &icam);
std::vector<float> score_projection(
const std::vector<std::array<float, 3>> &pose,
const std::vector<std::array<float, 3>> &repro,
const std::vector<float> &dists,
const std::vector<bool> &mask,
float iscale);
std::pair<std::vector<std::array<float, 4>>, float> triangulate_and_score(
const std::vector<std::array<float, 3>> &pose1,
const std::vector<std::array<float, 3>> &pose2,
const CameraInternal &cam1,
const CameraInternal &cam2,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
std::vector<std::tuple<
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
calc_grouping(
const std::vector<std::pair<
std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
const std::vector<std::pair<std::vector<std::array<float, 4>>, float>> &all_scored_poses,
float min_score);
std::vector<std::array<float, 4>> merge_group(
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
float min_score);
void add_extra_joints(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names);
void filter_poses(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<size_t> &core_joint_idx,
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
void add_missing_joints(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names);
void replace_far_joints(
std::vector<std::vector<std::array<float, 4>>> &poses,
const std::vector<std::string> &joint_names);
// Statistics
double num_calls = 0;
double total_time = 0;
double init_time = 0;
double undistort_time = 0;
double project_time = 0;
double match_time = 0;
double pairs_time = 0;
double pair_scoring_time = 0;
double grouping_time = 0;
double full_time = 0;
double merge_time = 0;
double post_time = 0;
double convert_time = 0;
};
+2 -2
View File
@@ -3,7 +3,7 @@ from __future__ import annotations
from collections.abc import Sequence
from typing import TYPE_CHECKING
from ._core import Camera, Triangulator
from ._core import Camera, triangulate_poses
if TYPE_CHECKING:
import numpy as np
@@ -28,7 +28,7 @@ def pack_poses_2d(
__all__ = [
"Camera",
"Triangulator",
"convert_cameras",
"pack_poses_2d",
"triangulate_poses",
]
+19 -8
View File
@@ -1,19 +1,30 @@
from __future__ import annotations
from collections.abc import Mapping, Sequence
from typing import Any, TypeAlias
from collections.abc import Sequence
from typing import TypeAlias, TypedDict
import numpy as np
import numpy.typing as npt
from ._core import Camera
CameraLike = Camera | Mapping[str, Any]
PoseViewLike: TypeAlias = (
npt.NDArray[np.generic]
| Sequence[Sequence[Sequence[float]]]
| Sequence[Sequence[float]]
)
Matrix3x3Like: TypeAlias = Sequence[Sequence[float]]
VectorLike: TypeAlias = Sequence[float]
PoseViewLike: TypeAlias = npt.NDArray[np.generic] | Sequence[Sequence[Sequence[float]]] | Sequence[Sequence[float]]
class CameraDict(TypedDict, total=False):
name: str
K: Matrix3x3Like
DC: VectorLike
R: Matrix3x3Like
T: Sequence[Sequence[float]]
width: int
height: int
type: str
CameraLike = Camera | CameraDict
def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
+1 -1
View File
@@ -2,7 +2,7 @@
Various module tests
### Triangulator
### Python Interface
```bash
cd /RapidPoseTriangulation/
+22 -7
View File
@@ -69,13 +69,13 @@ def test_camera_structure_repr():
)
def test_triangulate_samples(camera_path: str, pose_path: str, roomparams):
poses_2d, person_counts, cameras = load_case(camera_path, pose_path)
triangulator = rpt.Triangulator(min_match_score=0.95)
poses_3d = triangulator.triangulate_poses(
poses_3d = rpt.triangulate_poses(
poses_2d,
person_counts,
cameras,
np.asarray(roomparams, dtype=np.float32),
JOINT_NAMES,
min_match_score=0.95,
)
assert isinstance(poses_3d, np.ndarray)
@@ -86,18 +86,33 @@ def test_triangulate_samples(camera_path: str, pose_path: str, roomparams):
assert np.isfinite(poses_3d).all()
def test_triangulate_repeatability_after_reset():
def test_triangulate_repeatability():
poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
triangulator = rpt.Triangulator(min_match_score=0.95)
first = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
triangulator.reset()
second = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
first = rpt.triangulate_poses(
poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95
)
second = rpt.triangulate_poses(
poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95
)
np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5)
def test_triangulate_does_not_mutate_inputs():
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
poses_before = poses_2d.copy()
counts_before = person_counts.copy()
rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
np.testing.assert_array_equal(poses_2d, poses_before)
np.testing.assert_array_equal(person_counts, counts_before)
def test_pack_poses_2d_from_ragged_inputs():
packed, counts = rpt.pack_poses_2d(
[