From 31c4690121d619ed66c2ea1c4b35990a0b32f1c7 Mon Sep 17 00:00:00 2001 From: crosstyan Date: Thu, 12 Mar 2026 00:24:02 +0800 Subject: [PATCH] Unify camera model into immutable factory-built type --- bindings/rpt_module.cpp | 69 +++++++++++++++--- rpt/cached_camera.hpp | 19 ----- rpt/camera.cpp | 40 ++++++++--- rpt/camera.hpp | 21 +++++- rpt/triangulator.cpp | 155 ++++++++++++---------------------------- src/rpt/__init__.py | 2 + src/rpt/_helpers.py | 24 ++++--- tests/test_interface.py | 19 ++--- 8 files changed, 177 insertions(+), 172 deletions(-) delete mode 100644 rpt/cached_camera.hpp diff --git a/bindings/rpt_module.cpp b/bindings/rpt_module.cpp index b6e4f1a..39aedc2 100644 --- a/bindings/rpt_module.cpp +++ b/bindings/rpt_module.cpp @@ -139,15 +139,54 @@ NB_MODULE(_core, m) .value("FISHEYE", CameraModel::Fisheye); nb::class_(m, "Camera") - .def(nb::init<>()) - .def_rw("name", &Camera::name) - .def_rw("K", &Camera::K) - .def_rw("DC", &Camera::DC) - .def_rw("R", &Camera::R) - .def_rw("T", &Camera::T) - .def_rw("width", &Camera::width) - .def_rw("height", &Camera::height) - .def_rw("model", &Camera::model) + .def_prop_ro("name", [](const Camera &camera) + { + return camera.name; + }) + .def_prop_ro("K", [](const Camera &camera) + { + return camera.K; + }) + .def_prop_ro("DC", [](const Camera &camera) + { + return camera.DC; + }) + .def_prop_ro("R", [](const Camera &camera) + { + return camera.R; + }) + .def_prop_ro("T", [](const Camera &camera) + { + return camera.T; + }) + .def_prop_ro("width", [](const Camera &camera) + { + return camera.width; + }) + .def_prop_ro("height", [](const Camera &camera) + { + return camera.height; + }) + .def_prop_ro("model", [](const Camera &camera) + { + return camera.model; + }) + .def_prop_ro("invR", [](const Camera &camera) + { + return camera.invR; + }) + .def_prop_ro("center", [](const Camera &camera) + { + return camera.center; + }) + .def_prop_ro("newK", [](const Camera &camera) + { + return camera.newK; + }) + .def_prop_ro("invK", [](const Camera &camera) + { + return camera.invK; + }) .def("__repr__", [](const Camera &camera) { return camera.to_string(); @@ -248,6 +287,18 @@ NB_MODULE(_core, m) return pose_batch_to_numpy_copy(trace.final_poses); }, nb::rv_policy::move); + m.def( + "make_camera", + &make_camera, + "name"_a, + "K"_a, + "DC"_a, + "R"_a, + "T"_a, + "width"_a, + "height"_a, + "model"_a); + m.def( "build_pair_candidates", [](const PoseArray2D &poses_2d, const CountArray &person_counts) diff --git a/rpt/cached_camera.hpp b/rpt/cached_camera.hpp deleted file mode 100644 index f9d51dc..0000000 --- a/rpt/cached_camera.hpp +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include - -#include "camera.hpp" - -struct CachedCamera -{ - const Camera cam; - const std::array, 3> invR; - const std::array center; - const std::array, 3> newK; - const std::array, 3> invK; -}; - -CachedCamera cache_camera(const Camera &camera); - -void undistort_point_pinhole(std::array &point, const std::array &distortion); -void undistort_point_fisheye(std::array &point, const std::array &distortion); diff --git a/rpt/camera.cpp b/rpt/camera.cpp index 5b2ffad..c89718c 100644 --- a/rpt/camera.cpp +++ b/rpt/camera.cpp @@ -1,12 +1,12 @@ #include #include #include -#include #include +#include +#include #include -#include -#include "cached_camera.hpp" +#include "camera.hpp" namespace { @@ -156,28 +156,46 @@ std::ostream &operator<<(std::ostream &out, const Camera &cam) // ================================================================================================= // ================================================================================================= -CachedCamera cache_camera(const Camera &cam) +Camera make_camera( + std::string name, + std::array, 3> K, + std::array DC, + std::array, 3> R, + std::array, 3> T, + int width, + int height, + CameraModel model) { - const std::array, 3> invR = transpose3x3(cam.R); + Camera cam { + .name = std::move(name), + .K = K, + .DC = DC, + .R = R, + .T = T, + .width = width, + .height = height, + .model = model, + }; + + cam.invR = transpose3x3(cam.R); // Camera center: // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T - const std::array center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]}; + cam.center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]}; // Undistort camera matrix // As with the undistortion, the own implementation avoids some overhead compared to OpenCV - std::array, 3> newK; if (cam.model == CameraModel::Fisheye) { - newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height}); + cam.newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height}); } else { - newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height}); + cam.newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height}); } - const std::array, 3> invK = invert3x3(newK); + cam.invK = invert3x3(cam.newK); - return CachedCamera {cam, invR, center, newK, invK}; + return cam; } // ================================================================================================= diff --git a/rpt/camera.hpp b/rpt/camera.hpp index 3247b67..64a5ca7 100644 --- a/rpt/camera.hpp +++ b/rpt/camera.hpp @@ -25,10 +25,27 @@ struct Camera std::array DC = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; std::array, 3> R; std::array, 3> T; - int width; - int height; + int width = 0; + int height = 0; CameraModel model = CameraModel::Pinhole; + std::array, 3> invR {}; + std::array center {}; + std::array, 3> newK {}; + std::array, 3> invK {}; friend std::ostream &operator<<(std::ostream &out, Camera const &camera); std::string to_string() const; }; + +Camera make_camera( + std::string name, + std::array, 3> K, + std::array DC, + std::array, 3> R, + std::array, 3> T, + int width, + int height, + CameraModel model); + +void undistort_point_pinhole(std::array &point, const std::array &distortion); +void undistort_point_fisheye(std::array &point, const std::array &distortion); diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index b066768..077bd8c 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -11,7 +11,6 @@ #include #include "interface.hpp" -#include "cached_camera.hpp" namespace { @@ -25,73 +24,14 @@ size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_jo return (((pose * num_joints) + joint) * 3) + coord; } -[[maybe_unused]] static void print_2d_poses(const Pose2D &poses) -{ - std::cout << "Poses: (" << poses.size() << ", 3)[" << std::endl; - for (const auto &pose : poses) - { - std::cout << " ["; - for (size_t i = 0; i < 3; ++i) - { - std::cout << std::fixed << std::setprecision(3) << pose[i]; - if (i < 2) - { - std::cout << ", "; - } - } - std::cout << "]" << std::endl; - } - std::cout << "]" << std::endl; -} - -[[maybe_unused]] static void print_3d_poses(const Pose3D &poses) -{ - std::cout << "Poses: (" << poses.size() << ", 4)[" << std::endl; - for (const auto &pose : poses) - { - std::cout << " ["; - for (size_t i = 0; i < 4; ++i) - { - std::cout << std::fixed << std::setprecision(3) << pose[i]; - if (i < 3) - { - std::cout << ", "; - } - } - std::cout << "]" << std::endl; - } - std::cout << "]" << std::endl; -} - -[[maybe_unused]] static void print_allpairs(const std::vector &all_pairs) -{ - std::cout << "All pairs: [" << std::endl; - for (const auto &pair : all_pairs) - { - const auto &tuple_part = pair.first; - const auto &pair_part = pair.second; - - std::cout << "(" - << std::get<0>(tuple_part) << ", " - << std::get<1>(tuple_part) << ", " - << std::get<2>(tuple_part) << ", " - << std::get<3>(tuple_part) << ")"; - std::cout << ", (" - << pair_part.first << ", " - << pair_part.second << ")" - << std::endl; - } - std::cout << "]" << std::endl; -} - std::array undistort_point( - const std::array &raw_point, const CachedCamera &icam) + const std::array &raw_point, const Camera &icam) { std::array point = raw_point; - const float ifx_old = 1.0f / icam.cam.K[0][0]; - const float ify_old = 1.0f / icam.cam.K[1][1]; - const float cx_old = icam.cam.K[0][2]; - const float cy_old = icam.cam.K[1][2]; + const float ifx_old = 1.0f / icam.K[0][0]; + const float ify_old = 1.0f / icam.K[1][1]; + const float cx_old = icam.K[0][2]; + const float cy_old = icam.K[1][2]; const float fx_new = icam.newK[0][0]; const float fy_new = icam.newK[1][1]; const float cx_new = icam.newK[0][2]; @@ -100,21 +40,21 @@ std::array undistort_point( point[0] = (point[0] - cx_old) * ifx_old; point[1] = (point[1] - cy_old) * ify_old; - if (icam.cam.model == CameraModel::Fisheye) + if (icam.model == CameraModel::Fisheye) { - undistort_point_fisheye(point, icam.cam.DC); + undistort_point_fisheye(point, icam.DC); } else { - undistort_point_pinhole(point, icam.cam.DC); + undistort_point_pinhole(point, icam.DC); } point[0] = (point[0] * fx_new) + cx_new; point[1] = (point[1] * fy_new) + cy_new; - const float mask_offset = (icam.cam.width + icam.cam.height) / 20.0f; - if (point[0] < -mask_offset || point[0] >= icam.cam.width + mask_offset || - point[1] < -mask_offset || point[1] >= icam.cam.height + mask_offset) + const float mask_offset = (icam.width + icam.height) / 20.0f; + if (point[0] < -mask_offset || point[0] >= icam.width + mask_offset || + point[1] < -mask_offset || point[1] >= icam.height + mask_offset) { point = {0.0f, 0.0f, 0.0f}; } @@ -127,7 +67,7 @@ class PackedPoseStore2D public: PackedPoseStore2D( const PoseBatch2DView &source, - const std::vector &internal_cameras) + const std::vector &internal_cameras) : person_counts(source.num_views), view_offsets(source.num_views), num_views(source.num_views), @@ -225,17 +165,17 @@ private: struct PreparedInputs { - std::vector internal_cameras; + const std::vector *internal_cameras = nullptr; std::vector core_joint_idx; std::vector> core_limbs_idx; PackedPoseStore2D packed_poses; PreparedInputs( - std::vector cameras_in, + const std::vector &cameras_in, std::vector core_joint_idx_in, std::vector> core_limbs_idx_in, PackedPoseStore2D packed_poses_in) - : internal_cameras(std::move(cameras_in)), + : internal_cameras(&cameras_in), core_joint_idx(std::move(core_joint_idx_in)), core_limbs_idx(std::move(core_limbs_idx_in)), packed_poses(std::move(packed_poses_in)) @@ -279,11 +219,11 @@ constexpr std::array, 8> kCoreLimb std::vector build_pair_candidates_from_packed(const PackedPoseStore2D &packed_poses); PreviousProjectionCache project_previous_poses( const PoseBatch3DView &previous_poses_3d, - const std::vector &internal_cameras, + const std::vector &internal_cameras, const std::vector &core_joint_idx); PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( const PackedPoseStore2D &packed_poses, - const std::vector &internal_cameras, + const std::vector &internal_cameras, const std::vector &core_joint_idx, const std::vector &pairs, const TriangulationOptions &options, @@ -292,10 +232,10 @@ float calc_pose_score( const Pose2D &pose, const Pose2D &reference_pose, const std::vector &dists, - const CachedCamera &icam); + const Camera &icam); std::tuple, std::vector>> project_poses( const std::vector &poses_3d, - const CachedCamera &icam, + const Camera &icam, bool calc_dists); std::vector score_projection( const Pose2D &pose, @@ -306,8 +246,8 @@ std::vector score_projection( std::pair triangulate_and_score( const Pose2D &pose1, const Pose2D &pose2, - const CachedCamera &cam1, - const CachedCamera &cam2, + const Camera &cam1, + const Camera &cam2, const std::array, 2> &roomparams, const std::vector> &core_limbs_idx); std::vector calc_grouping( @@ -379,13 +319,6 @@ PreparedInputs prepare_inputs( } } - std::vector internal_cameras; - internal_cameras.reserve(cameras.size()); - for (const auto &camera : cameras) - { - internal_cameras.push_back(cache_camera(camera)); - } - std::vector core_joint_idx; core_joint_idx.reserve(kCoreJoints.size()); for (const auto &joint : kCoreJoints) @@ -405,9 +338,9 @@ PreparedInputs prepare_inputs( static_cast(std::distance(kCoreJoints.begin(), it2))}); } - PackedPoseStore2D packed_poses(poses_2d, internal_cameras); + PackedPoseStore2D packed_poses(poses_2d, cameras); return PreparedInputs( - std::move(internal_cameras), + cameras, std::move(core_joint_idx), std::move(core_limbs_idx), std::move(packed_poses)); @@ -441,7 +374,7 @@ std::vector build_pair_candidates_from_packed(const PackedPoseSto PreviousProjectionCache project_previous_poses( const PoseBatch3DView &previous_poses_3d, - const std::vector &internal_cameras, + const std::vector &internal_cameras, const std::vector &core_joint_idx) { PreviousProjectionCache cache; @@ -476,7 +409,7 @@ float calc_pose_score( const Pose2D &pose, const Pose2D &reference_pose, const std::vector &dists, - const CachedCamera &icam) + const Camera &icam) { const float min_score = 0.1f; std::vector mask(pose.size(), false); @@ -494,7 +427,7 @@ float calc_pose_score( return 0.0f; } - const float iscale = (icam.cam.width + icam.cam.height) * 0.5f; + const float iscale = (icam.width + icam.height) * 0.5f; std::vector scores = score_projection(pose, reference_pose, dists, mask, iscale); const size_t drop_k = static_cast(pose.size() * 0.2f); if (drop_k > 0) @@ -512,7 +445,7 @@ float calc_pose_score( PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( const PackedPoseStore2D &packed_poses, - const std::vector &internal_cameras, + const std::vector &internal_cameras, const std::vector &core_joint_idx, const std::vector &pairs, const TriangulationOptions &options, @@ -643,7 +576,7 @@ TriangulationTrace triangulate_debug_impl( { trace.previous_filter = filter_pairs_with_previous_poses_impl( prepared.packed_poses, - prepared.internal_cameras, + *prepared.internal_cameras, prepared.core_joint_idx, trace.pairs, options, @@ -663,8 +596,8 @@ TriangulationTrace triangulate_debug_impl( for (size_t i = 0; i < active_pairs.size(); ++i) { const PairCandidate &pair = active_pairs[i]; - const CachedCamera &cam1 = prepared.internal_cameras[static_cast(pair.view1)]; - const CachedCamera &cam2 = prepared.internal_cameras[static_cast(pair.view2)]; + const Camera &cam1 = (*prepared.internal_cameras)[static_cast(pair.view1)]; + const Camera &cam2 = (*prepared.internal_cameras)[static_cast(pair.view2)]; const Pose2D pose1_core = prepared.packed_poses.pose( static_cast(pair.view1), static_cast(pair.person1), prepared.core_joint_idx); const Pose2D pose2_core = prepared.packed_poses.pose( @@ -849,8 +782,8 @@ TriangulationTrace triangulate_debug_impl( kept_pose_pairs[i].second.first, kept_pose_pairs[i].second.second, }; - const CachedCamera &cam1 = prepared.internal_cameras[static_cast(pair.view1)]; - const CachedCamera &cam2 = prepared.internal_cameras[static_cast(pair.view2)]; + const Camera &cam1 = (*prepared.internal_cameras)[static_cast(pair.view1)]; + const Camera &cam2 = (*prepared.internal_cameras)[static_cast(pair.view2)]; Pose2D pose1 = prepared.packed_poses.pose(static_cast(pair.view1), static_cast(pair.person1)); Pose2D pose2 = prepared.packed_poses.pose(static_cast(pair.view2), static_cast(pair.person2)); @@ -893,7 +826,7 @@ TriangulationTrace triangulate_debug_impl( poses.push_back(full_pose_buffer[static_cast(index)]); source_core_indices.push_back(kept_core_indices[static_cast(index)]); } - final_poses_3d[i] = merge_group(poses, options.min_match_score, prepared.internal_cameras.size()); + final_poses_3d[i] = merge_group(poses, options.min_match_score, prepared.internal_cameras->size()); trace.merge.group_proposal_indices.push_back(std::move(source_core_indices)); trace.merge.merged_poses.push_back(final_poses_3d[i]); } @@ -973,7 +906,7 @@ PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( return filter_pairs_with_previous_poses_impl( prepared.packed_poses, - prepared.internal_cameras, + *prepared.internal_cameras, prepared.core_joint_idx, pairs, options, @@ -982,7 +915,7 @@ PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( std::tuple, std::vector>> project_poses( const std::vector &poses_3d, - const CachedCamera &icam, + const Camera &icam, bool calc_dists) { if (poses_3d.empty()) @@ -998,7 +931,7 @@ std::tuple, std::vector>> project_poses( const std::array, 3> &K = icam.newK; const std::array, 3> &R = icam.invR; - const std::array, 3> &T = icam.cam.T; + const std::array, 3> &T = icam.T; for (size_t i = 0; i < num_persons; ++i) { @@ -1037,7 +970,7 @@ std::tuple, std::vector>> project_poses( v = (K[1][0] * x_cam + K[1][1] * y_cam + K[1][2] * z_cam) / z_cam; } - if (u < 0.0f || u >= icam.cam.width || v < 0.0f || v >= icam.cam.height) + if (u < 0.0f || u >= icam.width || v < 0.0f || v >= icam.height) { u = 0.0f; v = 0.0f; @@ -1155,19 +1088,19 @@ std::array mat_mul_vec( return res; } -std::array calc_ray_dir(const CachedCamera &icam, const std::array &pt) +std::array calc_ray_dir(const Camera &icam, const std::array &pt) { // Compute normalized ray direction from the point std::array uv1 = {pt[0], pt[1], 1.0}; - auto d = mat_mul_vec(icam.cam.R, mat_mul_vec(icam.invK, uv1)); + auto d = mat_mul_vec(icam.R, mat_mul_vec(icam.invK, uv1)); auto ray_dir = normalize(d); return ray_dir; } std::array triangulate_midpoint( - const CachedCamera &icam1, - const CachedCamera &icam2, + const Camera &icam1, + const Camera &icam2, const std::array &pt1, const std::array &pt2) { @@ -1206,8 +1139,8 @@ std::array triangulate_midpoint( std::pair>, float> triangulate_and_score( const std::vector> &pose1, const std::vector> &pose2, - const CachedCamera &cam1, - const CachedCamera &cam2, + const Camera &cam1, + const Camera &cam2, const std::array, 2> &roomparams, const std::vector> &core_limbs_idx) { @@ -1307,7 +1240,7 @@ std::pair>, float> triangulate_and_score( auto dists2 = dists2s[0]; // Calculate scores for each view - float iscale = (cam1.cam.width + cam1.cam.height) / 2.0; + float iscale = (cam1.width + cam1.height) / 2.0f; std::vector score1 = score_projection(pose1, repro1, dists1, mask, iscale); std::vector score2 = score_projection(pose2, repro2, dists2, mask, iscale); diff --git a/src/rpt/__init__.py b/src/rpt/__init__.py index 352e2f9..f8560bb 100644 --- a/src/rpt/__init__.py +++ b/src/rpt/__init__.py @@ -19,6 +19,7 @@ from ._core import ( TriangulationTrace, build_pair_candidates, filter_pairs_with_previous_poses, + make_camera, triangulate_debug, triangulate_poses, ) @@ -80,6 +81,7 @@ __all__ = [ "build_pair_candidates", "convert_cameras", "filter_pairs_with_previous_poses", + "make_camera", "make_triangulation_config", "pack_poses_2d", "triangulate_debug", diff --git a/src/rpt/_helpers.py b/src/rpt/_helpers.py index a9581f6..76b213e 100644 --- a/src/rpt/_helpers.py +++ b/src/rpt/_helpers.py @@ -6,7 +6,7 @@ from typing import Literal, TypeAlias, TypedDict import numpy as np import numpy.typing as npt -from ._core import Camera, CameraModel, TriangulationConfig, TriangulationOptions +from ._core import Camera, CameraModel, TriangulationConfig, TriangulationOptions, make_camera Matrix3x3Like: TypeAlias = Sequence[Sequence[float]] VectorLike: TypeAlias = Sequence[float] @@ -64,17 +64,19 @@ def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]: converted.append(cam) continue - camera = Camera() - camera.name = str(cam["name"]) - camera.K = cam["K"] camera_model = _coerce_camera_model(cam.get("model", cam.get("type", "pinhole"))) - camera.DC = _coerce_distortion(cam["DC"], camera_model) - camera.R = cam["R"] - camera.T = cam["T"] - camera.width = int(cam["width"]) - camera.height = int(cam["height"]) - camera.model = camera_model - converted.append(camera) + converted.append( + make_camera( + str(cam["name"]), + cam["K"], + _coerce_distortion(cam["DC"], camera_model), + cam["R"], + cam["T"], + int(cam["width"]), + int(cam["height"]), + camera_model, + ) + ) return converted diff --git a/tests/test_interface.py b/tests/test_interface.py index 2fb2508..27c0e9a 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -51,15 +51,16 @@ def make_config(cameras, roomparams) -> rpt.TriangulationConfig: def test_camera_structure_repr(): - camera = rpt.Camera() - camera.name = "Camera 1" - camera.K = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] - camera.DC = [0, 0, 0, 0, 0] - camera.R = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] - camera.T = [[1], [2], [3]] - camera.width = 640 - camera.height = 480 - camera.model = rpt.CameraModel.PINHOLE + camera = rpt.make_camera( + "Camera 1", + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], + [0, 0, 0, 0, 0], + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], + [[1], [2], [3]], + 640, + 480, + rpt.CameraModel.PINHOLE, + ) rendered = repr(camera) assert "Camera 1" in rendered