From 24f74c87f195de9c64e669fc3a1cd571e212ee68 Mon Sep 17 00:00:00 2001 From: crosstyan Date: Wed, 11 Mar 2026 22:29:21 +0800 Subject: [PATCH] Make triangulation a zero-copy pure function --- bindings/rpt_module.cpp | 73 +- .../rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp | 8 +- rpt/interface.cpp | 78 +- rpt/interface.hpp | 95 +- rpt/triangulator.cpp | 1097 +++++++---------- rpt/triangulator.hpp | 130 -- src/rpt/__init__.py | 4 +- src/rpt/_helpers.py | 27 +- tests/README.md | 2 +- tests/test_interface.py | 29 +- 10 files changed, 596 insertions(+), 947 deletions(-) delete mode 100644 rpt/triangulator.hpp diff --git a/bindings/rpt_module.cpp b/bindings/rpt_module.cpp index 627c81d..c1279df 100644 --- a/bindings/rpt_module.cpp +++ b/bindings/rpt_module.cpp @@ -1,7 +1,5 @@ #include #include -#include -#include #include #include @@ -23,31 +21,28 @@ using CountArray = nb::ndarray, nb::c_c using RoomArray = nb::ndarray, nb::c_contig>; using PoseArray3D = nb::ndarray, nb::c_contig>; -PoseBatch2D pose_batch_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts) +PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts) { if (poses_2d.shape(0) != person_counts.shape(0)) { throw std::invalid_argument("poses_2d and person_counts must have the same number of views."); } - PoseBatch2D batch; - batch.num_views = static_cast(poses_2d.shape(0)); - batch.max_persons = static_cast(poses_2d.shape(1)); - batch.num_joints = static_cast(poses_2d.shape(2)); - batch.person_counts.assign(person_counts.data(), person_counts.data() + batch.num_views); - - for (size_t i = 0; i < batch.person_counts.size(); ++i) + for (size_t i = 0; i < static_cast(person_counts.shape(0)); ++i) { - if (batch.person_counts[i] > batch.max_persons) + if (person_counts(i) > poses_2d.shape(1)) { throw std::invalid_argument("person_counts entries must not exceed the padded person dimension."); } } - const size_t total_size = batch.num_views * batch.max_persons * batch.num_joints * 3; - batch.data.resize(total_size); - std::memcpy(batch.data.data(), poses_2d.data(), total_size * sizeof(float)); - return batch; + return PoseBatch2DView { + poses_2d.data(), + person_counts.data(), + static_cast(poses_2d.shape(0)), + static_cast(poses_2d.shape(1)), + static_cast(poses_2d.shape(2)), + }; } std::array, 2> roomparams_from_numpy(const RoomArray &roomparams) @@ -93,29 +88,27 @@ NB_MODULE(_core, m) return camera.to_string(); }); - nb::class_(m, "Triangulator") - .def(nb::init(), - "min_match_score"_a = 0.95f, - "min_group_size"_a = 1) - .def( - "triangulate_poses", - [](Triangulator &self, - const PoseArray2D &poses_2d, - const CountArray &person_counts, - const std::vector &cameras, - const RoomArray &roomparams, - const std::vector &joint_names) - { - PoseBatch2D pose_batch = pose_batch_from_numpy(poses_2d, person_counts); - auto room = roomparams_from_numpy(roomparams); - PoseBatch3D poses_3d = self.triangulate_poses(pose_batch, cameras, room, joint_names); - return pose_batch_to_numpy(std::move(poses_3d)); - }, - "poses_2d"_a, - "person_counts"_a, - "cameras"_a, - "roomparams"_a, - "joint_names"_a) - .def("reset", &Triangulator::reset) - .def("print_stats", &Triangulator::print_stats); + m.def( + "triangulate_poses", + [](const PoseArray2D &poses_2d, + const CountArray &person_counts, + const std::vector &cameras, + const RoomArray &roomparams, + const std::vector &joint_names, + float min_match_score, + size_t min_group_size) + { + const PoseBatch2DView pose_batch = pose_batch_view_from_numpy(poses_2d, person_counts); + const auto room = roomparams_from_numpy(roomparams); + PoseBatch3D poses_3d = triangulate_poses( + pose_batch, cameras, room, joint_names, min_match_score, min_group_size); + return pose_batch_to_numpy(std::move(poses_3d)); + }, + "poses_2d"_a, + "person_counts"_a, + "cameras"_a, + "roomparams"_a, + "joint_names"_a, + "min_match_score"_a = 0.95f, + "min_group_size"_a = 1); } diff --git a/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp b/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp index 3a60178..be9104d 100644 --- a/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp +++ b/extras/ros/rpt3d_wrapper_cpp/src/rpt3d_wrapper.cpp @@ -66,8 +66,6 @@ public: this->all_poses_set.resize(cam_ids.size(), false); // Load 3D models - tri_model = std::make_unique( - min_match_score, min_group_size); pose_tracker = std::make_unique( max_movement_speed, max_track_distance); @@ -120,7 +118,6 @@ private: std::vector::SharedPtr> sub_cam_list_; rclcpp::Publisher::SharedPtr pose_pub_; - std::unique_ptr tri_model; std::unique_ptr pose_tracker; std::vector all_cameras; std::mutex cams_mutex, pose_mutex, model_mutex; @@ -240,8 +237,9 @@ void Rpt3DWrapperNode::call_model() cams_mutex.lock(); pose_mutex.lock(); PoseBatch2D pose_batch_2d = PoseBatch2D::from_nested(all_poses); - auto poses_3d = tri_model->triangulate_poses( - pose_batch_2d, all_cameras, roomparams, joint_names).to_nested(); + auto poses_3d = triangulate_poses( + pose_batch_2d, all_cameras, roomparams, joint_names, min_match_score, min_group_size) + .to_nested(); double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end()); this->all_poses_set = std::vector(cam_ids.size(), false); diff --git a/rpt/interface.cpp b/rpt/interface.cpp index e8b9ca2..477cc69 100644 --- a/rpt/interface.cpp +++ b/rpt/interface.cpp @@ -1,7 +1,7 @@ +#include #include #include -#include "triangulator.hpp" #include "interface.hpp" // ================================================================================================= @@ -33,36 +33,19 @@ float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)]; } +const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_t coord) const +{ + return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)]; +} + const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const { return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)]; } -RaggedPoses2D PoseBatch2D::to_nested() const +PoseBatch2DView PoseBatch2D::view() const { - if (person_counts.size() != num_views) - { - throw std::invalid_argument("PoseBatch2D person_counts size must match num_views."); - } - - RaggedPoses2D poses_2d(num_views); - for (size_t view = 0; view < num_views; ++view) - { - const size_t num_persons = person_counts[view]; - poses_2d[view].resize(num_persons); - for (size_t person = 0; person < num_persons; ++person) - { - poses_2d[view][person].resize(num_joints); - for (size_t joint = 0; joint < num_joints; ++joint) - { - for (size_t coord = 0; coord < 3; ++coord) - { - poses_2d[view][person][joint][coord] = at(view, person, joint, coord); - } - } - } - } - return poses_2d; + return PoseBatch2DView {data.data(), person_counts.data(), num_views, max_persons, num_joints}; } PoseBatch2D PoseBatch2D::from_nested(const RaggedPoses2D &poses_2d) @@ -169,48 +152,3 @@ PoseBatch3D PoseBatch3D::from_nested(const NestedPoses3D &poses_3d) } return batch; } - -// ================================================================================================= -// ================================================================================================= - -Triangulator::Triangulator(float min_match_score, size_t min_group_size) - : triangulator(std::make_unique(min_match_score, min_group_size)) -{ -} - -Triangulator::~Triangulator() = default; - -// ================================================================================================= - -PoseBatch3D Triangulator::triangulate_poses( - const PoseBatch2D &poses_2d, - const std::vector &cameras, - const std::array, 2> &roomparams, - const std::vector &joint_names) -{ - return PoseBatch3D::from_nested( - triangulator->triangulate_poses(poses_2d.to_nested(), cameras, roomparams, joint_names)); -} - -NestedPoses3D Triangulator::triangulate_poses( - const RaggedPoses2D &poses_2d, - const std::vector &cameras, - const std::array, 2> &roomparams, - const std::vector &joint_names) -{ - return triangulator->triangulate_poses(poses_2d, cameras, roomparams, joint_names); -} - -// ================================================================================================= - -void Triangulator::reset() -{ - triangulator->reset(); -} - -// ================================================================================================= - -void Triangulator::print_stats() -{ - triangulator->print_stats(); -} diff --git a/rpt/interface.hpp b/rpt/interface.hpp index d3f369a..f64f932 100644 --- a/rpt/interface.hpp +++ b/rpt/interface.hpp @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include #include @@ -14,6 +14,17 @@ using NestedPoses3D = std::vector>>; // ================================================================================================= +struct PoseBatch2DView +{ + const float *data = nullptr; + const uint32_t *person_counts = nullptr; + size_t num_views = 0; + size_t max_persons = 0; + size_t num_joints = 0; + + const float &at(size_t view, size_t person, size_t joint, size_t coord) const; +}; + struct PoseBatch2D { std::vector data; @@ -24,8 +35,8 @@ struct PoseBatch2D float &at(size_t view, size_t person, size_t joint, size_t coord); const float &at(size_t view, size_t person, size_t joint, size_t coord) const; + PoseBatch2DView view() const; - RaggedPoses2D to_nested() const; static PoseBatch2D from_nested(const RaggedPoses2D &poses_2d); }; @@ -44,56 +55,34 @@ struct PoseBatch3D // ================================================================================================= -class TriangulatorInternal; +/** + * Calculate a triangulation using a padded pose tensor. + * + * @param poses_2d Padded poses of shape [views, max_persons, joints, 3]. + * @param cameras List of cameras. + * @param roomparams Room parameters (room size, room center). + * @param joint_names List of 2D joint names. + * @param min_match_score Minimum score to consider a triangulated joint as valid. + * @param min_group_size Minimum number of camera pairs that need to see a person. + * + * @return Pose tensor of shape [persons, joints, 4]. + */ +PoseBatch3D triangulate_poses( + const PoseBatch2DView &poses_2d, + const std::vector &cameras, + const std::array, 2> &roomparams, + const std::vector &joint_names, + float min_match_score = 0.95f, + size_t min_group_size = 1); -// ================================================================================================= - -class Triangulator +inline PoseBatch3D triangulate_poses( + const PoseBatch2D &poses_2d, + const std::vector &cameras, + const std::array, 2> &roomparams, + const std::vector &joint_names, + float min_match_score = 0.95f, + size_t min_group_size = 1) { -public: - /** - * Triangulator to predict poses from multiple views. - * - * @param min_match_score Minimum score to consider a triangulated joint as valid. - * @param min_group_size Minimum number of camera pairs that need to see a person. - */ - Triangulator( - float min_match_score = 0.95, - size_t min_group_size = 1); - - ~Triangulator(); - - /** - * Calculate a triangulation using a padded pose tensor. - * - * @param poses_2d Padded poses of shape [views, max_persons, joints, 3]. - * @param cameras List of cameras. - * @param roomparams Room parameters (room size, room center). - * @param joint_names List of 2D joint names. - * - * @return Pose tensor of shape [persons, joints, 4]. - */ - PoseBatch3D triangulate_poses( - const PoseBatch2D &poses_2d, - const std::vector &cameras, - const std::array, 2> &roomparams, - const std::vector &joint_names); - - /** - * Compatibility overload for callers that still produce ragged nested vectors. - */ - NestedPoses3D triangulate_poses( - const RaggedPoses2D &poses_2d, - const std::vector &cameras, - const std::array, 2> &roomparams, - const std::vector &joint_names); - - /** Reset the triangulator. */ - void reset(); - - /** Print triangulation statistics. */ - void print_stats(); - -private: - std::unique_ptr triangulator; -}; + return triangulate_poses( + poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size); +} diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index 9746d5d..e70140b 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -1,19 +1,29 @@ #include -#include #include #include +#include +#include #include #include #include +#include #include -#include "camera.hpp" -#include "triangulator.hpp" +#include "interface.hpp" -// ================================================================================================= -// ================================================================================================= +namespace +{ +using Pose2D = std::vector>; +using Pose3D = std::vector>; +using PosePair = std::pair, std::pair>; +using GroupedPose = std::tuple, Pose3D, std::vector>; -[[maybe_unused]] static void print_2d_poses(const std::vector> &poses) +size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_joints) +{ + return (((pose * num_joints) + joint) * 3) + coord; +} + +[[maybe_unused]] static void print_2d_poses(const Pose2D &poses) { std::cout << "Poses: (" << poses.size() << ", 3)[" << std::endl; for (const auto &pose : poses) @@ -32,9 +42,7 @@ std::cout << "]" << std::endl; } -// ================================================================================================= - -[[maybe_unused]] static void print_3d_poses(const std::vector> &poses) +[[maybe_unused]] static void print_3d_poses(const Pose3D &poses) { std::cout << "Poses: (" << poses.size() << ", 4)[" << std::endl; for (const auto &pose : poses) @@ -53,10 +61,7 @@ std::cout << "]" << std::endl; } -// ================================================================================================= - -[[maybe_unused]] static void print_allpairs( - const std::vector, std::pair>> &all_pairs) +[[maybe_unused]] static void print_allpairs(const std::vector &all_pairs) { std::cout << "All pairs: [" << std::endl; for (const auto &pair : all_pairs) @@ -64,14 +69,11 @@ const auto &tuple_part = pair.first; const auto &pair_part = pair.second; - // Print the tuple part std::cout << "(" << std::get<0>(tuple_part) << ", " << std::get<1>(tuple_part) << ", " << std::get<2>(tuple_part) << ", " << std::get<3>(tuple_part) << ")"; - - // Print the pair part std::cout << ", (" << pair_part.first << ", " << pair_part.second << ")" @@ -80,40 +82,270 @@ std::cout << "]" << std::endl; } -// ================================================================================================= -// ================================================================================================= - -TriangulatorInternal::TriangulatorInternal(float min_match_score, size_t min_group_size) +std::array undistort_point( + const std::array &raw_point, const CameraInternal &icam) { - this->min_match_score = min_match_score; - this->min_group_size = min_group_size; + std::array point = raw_point; + const float ifx_old = 1.0f / icam.cam.K[0][0]; + const float ify_old = 1.0f / icam.cam.K[1][1]; + const float cx_old = icam.cam.K[0][2]; + const float cy_old = icam.cam.K[1][2]; + const float fx_new = icam.newK[0][0]; + const float fy_new = icam.newK[1][1]; + const float cx_new = icam.newK[0][2]; + const float cy_new = icam.newK[1][2]; + + point[0] = (point[0] - cx_old) * ifx_old; + point[1] = (point[1] - cy_old) * ify_old; + + if (icam.cam.type == "fisheye") + { + CameraInternal::undistort_point_fisheye(point, icam.cam.DC); + } + else + { + CameraInternal::undistort_point_pinhole(point, icam.cam.DC); + } + + point[0] = (point[0] * fx_new) + cx_new; + point[1] = (point[1] * fy_new) + cy_new; + + const float mask_offset = (icam.cam.width + icam.cam.height) / 20.0f; + if (point[0] < -mask_offset || point[0] >= icam.cam.width + mask_offset || + point[1] < -mask_offset || point[1] >= icam.cam.height + mask_offset) + { + point = {0.0f, 0.0f, 0.0f}; + } + + return point; } -// ================================================================================================= +class PackedPoseStore2D +{ +public: + PackedPoseStore2D( + const PoseBatch2DView &source, + const std::vector &internal_cameras) + : person_counts(source.num_views), + view_offsets(source.num_views), + num_views(source.num_views), + num_joints(source.num_joints) + { + for (size_t view = 0; view < num_views; ++view) + { + person_counts[view] = source.person_counts[view]; + if (person_counts[view] > source.max_persons) + { + throw std::invalid_argument( + "person_counts entries must not exceed the padded person dimension."); + } + view_offsets[view] = total_persons; + total_persons += person_counts[view]; + } -std::vector>> TriangulatorInternal::triangulate_poses( - const std::vector>>> &poses_2d, + data.resize(total_persons * num_joints * 3); + for (size_t view = 0; view < num_views; ++view) + { + for (size_t person = 0; person < person_counts[view]; ++person) + { + const size_t packed_pose = pose_index(view, person); + for (size_t joint = 0; joint < num_joints; ++joint) + { + const std::array undistorted = undistort_point( + { + source.at(view, person, joint, 0), + source.at(view, person, joint, 1), + source.at(view, person, joint, 2), + }, + internal_cameras[view]); + for (size_t coord = 0; coord < 3; ++coord) + { + at(packed_pose, joint, coord) = undistorted[coord]; + } + } + } + } + } + + size_t pose_index(size_t view, size_t person) const + { + return view_offsets[view] + person; + } + + float &at(size_t pose, size_t joint, size_t coord) + { + return data[packed_pose_offset(pose, joint, coord, num_joints)]; + } + + const float &at(size_t pose, size_t joint, size_t coord) const + { + return data[packed_pose_offset(pose, joint, coord, num_joints)]; + } + + Pose2D pose(size_t view, size_t person) const + { + const size_t packed_pose = pose_index(view, person); + Pose2D result(num_joints); + for (size_t joint = 0; joint < num_joints; ++joint) + { + for (size_t coord = 0; coord < 3; ++coord) + { + result[joint][coord] = at(packed_pose, joint, coord); + } + } + return result; + } + + Pose2D pose(size_t view, size_t person, const std::vector &joint_indices) const + { + const size_t packed_pose = pose_index(view, person); + Pose2D result(joint_indices.size()); + for (size_t joint = 0; joint < joint_indices.size(); ++joint) + { + const size_t source_joint = joint_indices[joint]; + for (size_t coord = 0; coord < 3; ++coord) + { + result[joint][coord] = at(packed_pose, source_joint, coord); + } + } + return result; + } + + std::vector person_counts; + std::vector view_offsets; + size_t num_views = 0; + size_t num_joints = 0; + size_t total_persons = 0; + +private: + std::vector data; +}; + +class TriangulatorInternal +{ +public: + TriangulatorInternal(float min_match_score, size_t min_group_size) + : min_match_score(min_match_score), min_group_size(min_group_size) + { + } + + PoseBatch3D triangulate_poses( + const PoseBatch2DView &poses_2d, + const std::vector &cameras, + const std::array, 2> &roomparams, + const std::vector &joint_names); + +private: + std::tuple, std::vector>> project_poses( + const std::vector &poses_3d, + const CameraInternal &icam, + bool calc_dists); + + std::vector score_projection( + const Pose2D &pose, + const Pose2D &repro, + const std::vector &dists, + const std::vector &mask, + float iscale); + + std::pair triangulate_and_score( + const Pose2D &pose1, + const Pose2D &pose2, + const CameraInternal &cam1, + const CameraInternal &cam2, + const std::array, 2> &roomparams, + const std::vector> &core_limbs_idx); + + std::vector calc_grouping( + const std::vector &all_pairs, + const std::vector> &all_scored_poses, + float min_score); + + Pose3D merge_group( + const std::vector &poses_3d, + float min_score); + + void add_extra_joints( + std::vector &poses, + const std::vector &joint_names); + + void filter_poses( + std::vector &poses, + const std::array, 2> &roomparams, + const std::vector &core_joint_idx, + const std::vector> &core_limbs_idx); + + void add_missing_joints( + std::vector &poses, + const std::vector &joint_names); + + void replace_far_joints( + std::vector &poses, + const std::vector &joint_names); + + float min_match_score; + size_t min_group_size; + size_t num_cams = 0; + + const std::vector core_joints = { + "shoulder_left", + "shoulder_right", + "hip_left", + "hip_right", + "elbow_left", + "elbow_right", + "knee_left", + "knee_right", + "wrist_left", + "wrist_right", + "ankle_left", + "ankle_right", + }; + const std::vector> core_limbs = { + {"knee_left", "ankle_left"}, + {"hip_left", "knee_left"}, + {"hip_right", "knee_right"}, + {"knee_right", "ankle_right"}, + {"elbow_left", "wrist_left"}, + {"elbow_right", "wrist_right"}, + {"shoulder_left", "elbow_left"}, + {"shoulder_right", "elbow_right"}, + }; +}; + +PoseBatch3D empty_pose_batch3d(size_t num_joints) +{ + PoseBatch3D poses_3d; + poses_3d.num_joints = num_joints; + return poses_3d; +} + +PoseBatch3D TriangulatorInternal::triangulate_poses( + const PoseBatch2DView &poses_2d, const std::vector &cameras, const std::array, 2> &roomparams, const std::vector &joint_names) { - auto ttime = std::chrono::steady_clock::now(); - auto stime = std::chrono::steady_clock::now(); - std::chrono::duration elapsed; - - // Check inputs - if (poses_2d.empty()) + if (poses_2d.num_views == 0) { throw std::invalid_argument("No 2D poses provided."); } - if (poses_2d.size() != cameras.size()) + if (poses_2d.person_counts == nullptr) + { + throw std::invalid_argument("person_counts must not be null."); + } + if (poses_2d.num_views != cameras.size()) { throw std::invalid_argument("Number of cameras and 2D poses must be the same."); } - if (joint_names.size() != poses_2d[0][0].size()) + if (joint_names.size() != poses_2d.num_joints) { throw std::invalid_argument("Number of joint names and 2D poses must be the same."); } + if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr) + { + throw std::invalid_argument("poses_2d data must not be null."); + } for (const auto &joint : core_joints) { if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end()) @@ -122,632 +354,315 @@ std::vector>> TriangulatorInternal::triangulate } } - // Convert inputs to internal formats - this->num_cams = cameras.size(); - std::vector>>> i_poses_2d = poses_2d; + num_cams = cameras.size(); + std::vector internal_cameras; - for (size_t i = 0; i < cameras.size(); ++i) + internal_cameras.reserve(cameras.size()); + for (const auto &camera : cameras) { - internal_cameras.emplace_back(cameras[i]); + internal_cameras.emplace_back(camera); } + std::vector core_joint_idx; + core_joint_idx.reserve(core_joints.size()); for (const auto &joint : core_joints) { auto it = std::find(joint_names.begin(), joint_names.end(), joint); - core_joint_idx.push_back(std::distance(joint_names.begin(), it)); + core_joint_idx.push_back(static_cast(std::distance(joint_names.begin(), it))); } + std::vector> core_limbs_idx; + core_limbs_idx.reserve(core_limbs.size()); for (const auto &limb : core_limbs) { auto it1 = std::find(core_joints.begin(), core_joints.end(), limb.first); auto it2 = std::find(core_joints.begin(), core_joints.end(), limb.second); - core_limbs_idx.push_back({(size_t)std::distance(core_joints.begin(), it1), - (size_t)std::distance(core_joints.begin(), it2)}); + core_limbs_idx.push_back( + {static_cast(std::distance(core_joints.begin(), it1)), + static_cast(std::distance(core_joints.begin(), it2))}); } - elapsed = std::chrono::steady_clock::now() - stime; - init_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); + PackedPoseStore2D packed_poses(poses_2d, internal_cameras); - // Undistort 2D poses - for (size_t i = 0; i < cameras.size(); ++i) + std::vector all_pairs; + for (size_t view1 = 0; view1 < cameras.size(); ++view1) { - undistort_poses(i_poses_2d[i], internal_cameras[i]); - } - - elapsed = std::chrono::steady_clock::now() - stime; - undistort_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Project last 3D poses to 2D - std::vector>>, - std::vector>>> - last_poses_2d; - if (!last_poses_3d.empty()) - { - // Select core joints - std::vector>> last_core_poses; - last_core_poses.resize(last_poses_3d.size()); - for (size_t i = 0; i < last_poses_3d.size(); ++i) + for (size_t view2 = view1 + 1; view2 < cameras.size(); ++view2) { - last_core_poses[i].resize(core_joint_idx.size()); - for (size_t j = 0; j < core_joint_idx.size(); ++j) + for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1) { - for (size_t k = 0; k < 4; ++k) + for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2) { - last_core_poses[i][j][k] = last_poses_3d[i][core_joint_idx[j]][k]; - } - } - } - - // Project - last_poses_2d.resize(cameras.size()); - for (size_t i = 0; i < cameras.size(); ++i) - { - auto [poses2d, dists] = project_poses(last_core_poses, internal_cameras[i], true); - last_poses_2d[i] = std::make_tuple(poses2d, dists); - } - } - - elapsed = std::chrono::steady_clock::now() - stime; - project_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Check matches to old poses - float threshold = min_match_score - 0.2; - std::map>> scored_pasts; - if (!last_poses_3d.empty()) - { - // Calculate index pairs and initialize vectors - std::vector> indices_ijk; - for (size_t i = 0; i < cameras.size(); ++i) - { - size_t num_last_persons = std::get<0>(last_poses_2d[i]).size(); - scored_pasts[i] = std::map>(); - - for (size_t j = 0; j < num_last_persons; ++j) - { - size_t num_new_persons = poses_2d[i].size(); - scored_pasts[i][j] = std::vector(); - - for (size_t k = 0; k < num_new_persons; ++k) - { - indices_ijk.push_back({i, j, k}); - } - } - } - std::vector> indices_ik; - for (size_t i = 0; i < cameras.size(); ++i) - { - size_t num_new_persons = poses_2d[i].size(); - for (size_t k = 0; k < num_new_persons; ++k) - { - indices_ik.push_back({i, k}); - } - } - - // Precalculate core poses - std::vector>> poses_2d_core_list; - poses_2d_core_list.resize(indices_ik.size()); - std::vector> mats_core_map; - mats_core_map.resize(cameras.size()); - for (size_t i = 0; i < cameras.size(); ++i) - { - size_t num_new_persons = poses_2d[i].size(); - for (size_t k = 0; k < num_new_persons; ++k) - { - mats_core_map[i].push_back(0); - } - } - for (size_t e = 0; e < indices_ik.size(); ++e) - { - const auto [i, k] = indices_ik[e]; - const auto &pose = i_poses_2d[i][k]; - std::vector> pose_core; - pose_core.resize(core_joint_idx.size()); - for (size_t j = 0; j < core_joint_idx.size(); ++j) - { - size_t idx = core_joint_idx[j]; - pose_core[j] = pose[idx]; - } - poses_2d_core_list[e] = pose_core; - mats_core_map[i][k] = e; - } - - // Calculate matching score - for (size_t e = 0; e < indices_ijk.size(); ++e) - { - const auto [i, j, k] = indices_ijk[e]; - - const auto &last_pose = std::get<0>(last_poses_2d[i])[j]; - const auto &last_dist = std::get<1>(last_poses_2d[i])[j]; - const auto &new_pose = poses_2d_core_list[mats_core_map[i][k]]; - - float score = calc_pose_score(new_pose, last_pose, last_dist, internal_cameras[i]); - if (score > threshold) - { - { - scored_pasts[i][j].push_back(k); + all_pairs.emplace_back( + std::make_tuple( + static_cast(view1), + static_cast(view2), + static_cast(person1), + static_cast(person2)), + std::make_pair( + static_cast(packed_poses.pose_index(view1, person1)), + static_cast(packed_poses.pose_index(view2, person2)))); } } } } - elapsed = std::chrono::steady_clock::now() - stime; - match_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Create pairs of persons - // Checks if the person was already matched to the last frame and if so only creates pairs - // with those, else it creates all possible pairs - std::vector num_persons_sum; - for (size_t i = 0; i < cameras.size(); ++i) + if (all_pairs.empty()) { - int nsum = poses_2d[i].size(); - if (i > 0) - { - nsum += num_persons_sum[i - 1]; - } - num_persons_sum.push_back(nsum); - } - std::vector, std::pair>> all_pairs; - std::vector> indices; - for (size_t i = 0; i < cameras.size(); ++i) - { - for (size_t j = i + 1; j < cameras.size(); ++j) - { - for (size_t k = 0; k < poses_2d[i].size(); ++k) - { - for (size_t l = 0; l < poses_2d[j].size(); ++l) - { - indices.push_back({i, j, k, l}); - } - } - } - } - for (size_t e = 0; e < indices.size(); ++e) - { - const auto [i, j, k, l] = indices[e]; - - int pid1 = (i > 0 ? num_persons_sum[i - 1] : 0) + k; - int pid2 = (j > 0 ? num_persons_sum[j - 1] : 0) + l; - bool match = false; - - if (!last_poses_3d.empty()) - { - for (size_t m = 0; m < last_poses_3d.size(); ++m) - { - auto &smi = scored_pasts[i][m]; - auto &smj = scored_pasts[j][m]; - bool in_smi = std::find(smi.begin(), smi.end(), k) != smi.end(); - bool in_smj = std::find(smj.begin(), smj.end(), l) != smj.end(); - - if (in_smi && in_smj) - { - match = true; - auto item = std::make_pair( - std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2)); - all_pairs.push_back(item); - } - else if (in_smi || in_smj) - { - match = true; - } - } - } - - if (!match) - { - auto item = std::make_pair( - std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2)); - all_pairs.push_back(item); - } + return empty_pose_batch3d(joint_names.size()); } - elapsed = std::chrono::steady_clock::now() - stime; - pairs_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Calculate pair scores - std::vector>, float>> all_scored_poses; - all_scored_poses.resize(all_pairs.size()); + std::vector> all_scored_poses(all_pairs.size()); for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; - - // Extract camera parameters const auto &cam1 = internal_cameras[std::get<0>(pids)]; const auto &cam2 = internal_cameras[std::get<1>(pids)]; + Pose2D pose1_core = packed_poses.pose(std::get<0>(pids), std::get<2>(pids), core_joint_idx); + Pose2D pose2_core = packed_poses.pose(std::get<1>(pids), std::get<3>(pids), core_joint_idx); - // Extract 2D poses - const auto &pose1 = i_poses_2d[std::get<0>(pids)][std::get<2>(pids)]; - const auto &pose2 = i_poses_2d[std::get<1>(pids)][std::get<3>(pids)]; - - // Select core joints - std::vector> pose1_core, pose2_core; - pose1_core.resize(core_joint_idx.size()); - pose2_core.resize(core_joint_idx.size()); - for (size_t j = 0; j < core_joint_idx.size(); ++j) - { - size_t idx = core_joint_idx[j]; - pose1_core[j] = pose1[idx]; - pose2_core[j] = pose2[idx]; - } - - // Triangulate and score auto [pose3d, score] = triangulate_and_score( pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx); all_scored_poses[i] = std::make_pair(std::move(pose3d), score); } - // Drop low scoring poses - size_t num_poses = all_scored_poses.size(); - for (size_t i = num_poses; i > 0; --i) + for (size_t i = all_scored_poses.size(); i > 0; --i) { if (all_scored_poses[i - 1].second < min_match_score) { - all_scored_poses.erase(all_scored_poses.begin() + i - 1); - all_pairs.erase(all_pairs.begin() + i - 1); + all_scored_poses.erase(all_scored_poses.begin() + static_cast(i - 1)); + all_pairs.erase(all_pairs.begin() + static_cast(i - 1)); } } - elapsed = std::chrono::steady_clock::now() - stime; - pair_scoring_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Group pairs that share a person - std::vector, std::vector>, std::vector>> - groups = calc_grouping(all_pairs, all_scored_poses, min_match_score); - - // Drop groups with too few matches - size_t num_groups_1 = groups.size(); - for (size_t i = num_groups_1; i > 0; --i) + if (all_pairs.empty()) { - if (std::get<2>(groups[i - 1]).size() < this->min_group_size) + return empty_pose_batch3d(joint_names.size()); + } + + std::vector groups = calc_grouping(all_pairs, all_scored_poses, min_match_score); + for (size_t i = groups.size(); i > 0; --i) + { + if (std::get<2>(groups[i - 1]).size() < min_group_size) { - groups.erase(groups.begin() + i - 1); + groups.erase(groups.begin() + static_cast(i - 1)); } } - // Drop pairs with double associations, where the same 2D person appears in multiple pairs. - // If two not-same persons have relatively similar poses, the triangulation could create a - // false positive virtual person. This can lead to a single 2D skeleton being associated with - // multiple 3D skeletons. To avoid this, check if the same 2D person appears in multiple valid - // pairs. If so, drop the pairs in less populated groups or with the lower scores. + if (groups.empty()) + { + return empty_pose_batch3d(joint_names.size()); + } + std::map, std::vector> pairs_map; for (size_t i = 0; i < all_pairs.size(); ++i) { - const auto &p = all_pairs[i]; + const auto &pair = all_pairs[i]; const auto &mid1 = std::make_tuple( - std::get<0>(p.first), std::get<1>(p.first), std::get<0>(p.second)); + std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second)); const auto &mid2 = std::make_tuple( - std::get<0>(p.first), std::get<1>(p.first), std::get<1>(p.second)); + std::get<0>(pair.first), std::get<1>(pair.first), std::get<1>(pair.second)); pairs_map[mid1].push_back(i); pairs_map[mid2].push_back(i); } - std::vector group_map; - group_map.resize(all_pairs.size()); + + std::vector group_map(all_pairs.size()); for (size_t i = 0; i < groups.size(); ++i) { - const auto &group = groups[i]; - const auto &indices = std::get<2>(group); - for (const auto &idx : indices) + for (const auto &idx : std::get<2>(groups[i])) { group_map[idx] = i; } } + std::set drop_indices; for (auto &pair : pairs_map) { auto &indices = pair.second; - if (indices.size() > 1) + if (indices.size() <= 1) { - std::vector group_sizes; - std::vector pair_scores; - for (auto &idx : indices) - { - group_sizes.push_back(std::get<2>(groups[group_map[idx]]).size()); - pair_scores.push_back(all_scored_poses[idx].second); - } + continue; + } - // Sort indices by group size (prio-1) and pair score (prio-2) - std::vector indices_sorted(indices.size()); - std::iota(indices_sorted.begin(), indices_sorted.end(), 0); - std::sort(indices_sorted.begin(), indices_sorted.end(), - [&group_sizes, &pair_scores](size_t a, size_t b) + std::vector group_sizes; + std::vector pair_scores; + group_sizes.reserve(indices.size()); + pair_scores.reserve(indices.size()); + for (auto idx : indices) + { + group_sizes.push_back(std::get<2>(groups[group_map[idx]]).size()); + pair_scores.push_back(all_scored_poses[idx].second); + } + + std::vector indices_sorted(indices.size()); + std::iota(indices_sorted.begin(), indices_sorted.end(), 0); + std::sort(indices_sorted.begin(), indices_sorted.end(), + [&group_sizes, &pair_scores](size_t a, size_t b) + { + if (group_sizes[a] != group_sizes[b]) { - if (group_sizes[a] != group_sizes[b]) - { - return group_sizes[a] > group_sizes[b]; - } - return pair_scores[a] > pair_scores[b]; - }); + return group_sizes[a] > group_sizes[b]; + } + return pair_scores[a] > pair_scores[b]; + }); - // Drop all but the first index - for (size_t j = 1; j < indices_sorted.size(); ++j) - { - size_t drop_idx = indices[indices_sorted[j]]; - drop_indices.insert(drop_idx); - } + for (size_t j = 1; j < indices_sorted.size(); ++j) + { + drop_indices.insert(indices[indices_sorted[j]]); } } + std::vector drop_list(drop_indices.begin(), drop_indices.end()); std::sort(drop_list.begin(), drop_list.end(), std::greater()); - for (size_t i = 0; i < drop_list.size(); ++i) + for (size_t drop_idx : drop_list) { - all_scored_poses.erase(all_scored_poses.begin() + drop_list[i]); - all_pairs.erase(all_pairs.begin() + drop_list[i]); + all_scored_poses.erase(all_scored_poses.begin() + static_cast(drop_idx)); + all_pairs.erase(all_pairs.begin() + static_cast(drop_idx)); - // Remove pairs from groups and update indices of the remaining pairs - for (size_t j = 0; j < groups.size(); ++j) + for (auto &group : groups) { - auto &indices = std::get<2>(groups[j]); - auto it = std::find(indices.begin(), indices.end(), drop_list[i]); + auto &indices = std::get<2>(group); + auto it = std::find(indices.begin(), indices.end(), static_cast(drop_idx)); if (it != indices.end()) { indices.erase(it); } - for (size_t k = 0; k < std::get<2>(groups[j]).size(); ++k) + for (auto &index : indices) { - if ((size_t)std::get<2>(groups[j])[k] > drop_list[i]) + if (static_cast(index) > drop_idx) { - std::get<2>(groups[j])[k] -= 1; + index -= 1; } } } } - // Drop groups with too few matches again - size_t num_groups_2 = groups.size(); - for (size_t i = num_groups_2; i > 0; --i) + for (size_t i = groups.size(); i > 0; --i) { - if (std::get<2>(groups[i - 1]).size() < this->min_group_size) + if (std::get<2>(groups[i - 1]).size() < min_group_size) { - groups.erase(groups.begin() + i - 1); + groups.erase(groups.begin() + static_cast(i - 1)); } } - elapsed = std::chrono::steady_clock::now() - stime; - grouping_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); + if (groups.empty()) + { + return empty_pose_batch3d(joint_names.size()); + } - // Calculate full 3D poses - std::vector>> all_full_poses; - all_full_poses.resize(all_pairs.size()); + std::vector all_full_poses(all_pairs.size()); for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; const auto &cam1 = internal_cameras[std::get<0>(pids)]; const auto &cam2 = internal_cameras[std::get<1>(pids)]; - const auto &pose1 = i_poses_2d[std::get<0>(pids)][std::get<2>(pids)]; - const auto &pose2 = i_poses_2d[std::get<1>(pids)][std::get<3>(pids)]; + Pose2D pose1 = packed_poses.pose(std::get<0>(pids), std::get<2>(pids)); + Pose2D pose2 = packed_poses.pose(std::get<1>(pids), std::get<3>(pids)); auto [pose3d, score] = triangulate_and_score( pose1, pose2, cam1, cam2, roomparams, {}); - // Scale scores with 2D confidences - // They can improve the merge weighting, but especially the earlier step of pair-filtering - // works better if only per-view consistency is used, so they are not included before. - for (size_t j = 0; j < pose3d.size(); ++j) + for (size_t joint = 0; joint < pose3d.size(); ++joint) { - float score1 = pose1[j][2]; - float score2 = pose2[j][2]; - float min_score = 0.1; + const float score1 = pose1[joint][2]; + const float score2 = pose2[joint][2]; + const float min_score = 0.1f; if (score1 > min_score && score2 > min_score) { - float scoreP = (score1 + score2) * 0.5; - float scoreT = pose3d[j][3]; - - // Since the triangulation score is less sensitive and generally higher, - // weight it stronger to balance the two scores. - pose3d[j][3] = 0.9 * scoreT + 0.1 * scoreP; + const float score_projection = (score1 + score2) * 0.5f; + const float score_triangulation = pose3d[joint][3]; + pose3d[joint][3] = 0.9f * score_triangulation + 0.1f * score_projection; } } all_full_poses[i] = std::move(pose3d); } - elapsed = std::chrono::steady_clock::now() - stime; - full_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Merge groups - std::vector>> all_merged_poses; - all_merged_poses.resize(groups.size()); + std::vector final_poses_3d(groups.size()); for (size_t i = 0; i < groups.size(); ++i) { - const auto &group = groups[i]; - std::vector>> poses; - poses.reserve(std::get<2>(group).size()); - - for (const auto &idx : std::get<2>(group)) + std::vector poses; + poses.reserve(std::get<2>(groups[i]).size()); + for (const auto &idx : std::get<2>(groups[i])) { poses.push_back(all_full_poses[idx]); } - - auto merged_pose = merge_group(poses, min_match_score); - all_merged_poses[i] = std::move(merged_pose); + final_poses_3d[i] = merge_group(poses, min_match_score); } - elapsed = std::chrono::steady_clock::now() - stime; - merge_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Run post-processing steps - std::vector>> final_poses_3d = std::move(all_merged_poses); add_extra_joints(final_poses_3d, joint_names); filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx); add_missing_joints(final_poses_3d, joint_names); replace_far_joints(final_poses_3d, joint_names); - // Store final result for use in the next frame. - last_poses_3d = final_poses_3d; + PoseBatch3D poses_3d; + poses_3d.num_joints = joint_names.size(); - elapsed = std::chrono::steady_clock::now() - stime; - post_time += elapsed.count(); - stime = std::chrono::steady_clock::now(); - - // Convert to output format - std::vector>> poses_3d; - poses_3d.reserve(final_poses_3d.size()); - for (size_t i = 0; i < final_poses_3d.size(); ++i) + for (const auto &pose : final_poses_3d) { - const size_t num_joints = final_poses_3d[i].size(); - std::vector> pose; - pose.reserve(num_joints); - size_t num_valid = 0; - - for (size_t j = 0; j < num_joints; ++j) - { - std::array point; - for (size_t k = 0; k < 4; ++k) + const auto is_valid = std::any_of( + pose.begin(), + pose.end(), + [this](const std::array &joint) { - point[k] = final_poses_3d[i][j][k]; - } - pose.push_back(point); - - if (point[3] > min_match_score) - { - num_valid++; - } - } - - if (num_valid > 0) + return joint[3] > min_match_score; + }); + if (is_valid) { - poses_3d.push_back(std::move(pose)); + ++poses_3d.num_persons; } } - elapsed = std::chrono::steady_clock::now() - stime; - convert_time += elapsed.count(); + poses_3d.data.resize(poses_3d.num_persons * poses_3d.num_joints * 4); + size_t person_index = 0; + for (const auto &pose : final_poses_3d) + { + const auto is_valid = std::any_of( + pose.begin(), + pose.end(), + [this](const std::array &joint) + { + return joint[3] > min_match_score; + }); + if (!is_valid) + { + continue; + } - elapsed = std::chrono::steady_clock::now() - ttime; - total_time += elapsed.count(); - num_calls++; + for (size_t joint = 0; joint < poses_3d.num_joints; ++joint) + { + for (size_t coord = 0; coord < 4; ++coord) + { + poses_3d.at(person_index, joint, coord) = pose[joint][coord]; + } + } + ++person_index; + } return poses_3d; } -// ================================================================================================= - -void TriangulatorInternal::reset() -{ - last_poses_3d.clear(); -} - -// ================================================================================================= - -void TriangulatorInternal::print_stats() -{ - std::cout << "{" << std::endl; - std::cout << " \"triangulator_calls\": " << num_calls << "," << std::endl; - std::cout << " \"init_time\": " << init_time / num_calls << "," << std::endl; - std::cout << " \"undistort_time\": " << undistort_time / num_calls << "," << std::endl; - std::cout << " \"project_time\": " << project_time / num_calls << "," << std::endl; - std::cout << " \"match_time\": " << match_time / num_calls << "," << std::endl; - std::cout << " \"pairs_time\": " << pairs_time / num_calls << "," << std::endl; - std::cout << " \"pair_scoring_time\": " << pair_scoring_time / num_calls << "," << std::endl; - std::cout << " \"grouping_time\": " << grouping_time / num_calls << "," << std::endl; - std::cout << " \"full_time\": " << full_time / num_calls << "," << std::endl; - std::cout << " \"merge_time\": " << merge_time / num_calls << "," << std::endl; - std::cout << " \"post_time\": " << post_time / num_calls << "," << std::endl; - std::cout << " \"convert_time\": " << convert_time / num_calls << "," << std::endl; - std::cout << " \"total_time\": " << total_time / num_calls << std::endl; - std::cout << "}" << std::endl; -} - -// ================================================================================================= - -void TriangulatorInternal::undistort_poses( - std::vector>> &poses_2d, CameraInternal &icam) -{ - float ifx_old = 1.0 / icam.cam.K[0][0]; - float ify_old = 1.0 / icam.cam.K[1][1]; - float cx_old = icam.cam.K[0][2]; - float cy_old = icam.cam.K[1][2]; - float fx_new = icam.newK[0][0]; - float fy_new = icam.newK[1][1]; - float cx_new = icam.newK[0][2]; - float cy_new = icam.newK[1][2]; - - // Undistort all the points - size_t num_persons = poses_2d.size(); - size_t num_joints = poses_2d[0].size(); - for (size_t i = 0; i < num_persons; ++i) - { - for (size_t j = 0; j < num_joints; ++j) - { - // Normalize the point using the original camera matrix - poses_2d[i][j][0] = (poses_2d[i][j][0] - cx_old) * ifx_old; - poses_2d[i][j][1] = (poses_2d[i][j][1] - cy_old) * ify_old; - - // Undistort - // Using own implementation is faster than using OpenCV, because it avoids the - // overhead of creating cv::Mat objects and further unnecessary calculations for - // additional distortion parameters and identity rotations in this usecase. - if (icam.cam.type == "fisheye") - { - CameraInternal::undistort_point_fisheye(poses_2d[i][j], icam.cam.DC); - } - else - { - CameraInternal::undistort_point_pinhole(poses_2d[i][j], icam.cam.DC); - } - - // Map the undistorted normalized point to the new image coordinates - poses_2d[i][j][0] = (poses_2d[i][j][0] * fx_new) + cx_new; - poses_2d[i][j][1] = (poses_2d[i][j][1] * fy_new) + cy_new; - } - } - - // Mask out points that are far outside the image (points slightly outside are still valid) - int width = icam.cam.width; - int height = icam.cam.height; - float mask_offset = (width + height) / 20.0; - for (size_t i = 0; i < num_persons; ++i) - { - for (size_t j = 0; j < num_joints; ++j) - { - if (poses_2d[i][j][0] < -mask_offset || poses_2d[i][j][0] >= width + mask_offset || - poses_2d[i][j][1] < -mask_offset || poses_2d[i][j][1] >= height + mask_offset) - { - poses_2d[i][j][0] = 0.0; - poses_2d[i][j][1] = 0.0; - poses_2d[i][j][2] = 0.0; - } - } - } -} - -// ================================================================================================= - -std::tuple>>, std::vector>> -TriangulatorInternal::project_poses( - const std::vector>> &poses_3d, +std::tuple, std::vector>> TriangulatorInternal::project_poses( + const std::vector &poses_3d, const CameraInternal &icam, bool calc_dists) { const size_t num_persons = poses_3d.size(); const size_t num_joints = poses_3d[0].size(); - // Prepare output vectors - std::vector>> poses_2d; - std::vector> all_dists; - poses_2d.resize(num_persons); - all_dists.resize(num_persons); + std::vector poses_2d(num_persons); + std::vector> all_dists(num_persons); - // Get camera parameters const std::array, 3> &K = icam.newK; const std::array, 3> &R = icam.invR; const std::array, 3> &T = icam.cam.T; for (size_t i = 0; i < num_persons; ++i) { - const std::vector> &body3D = poses_3d[i]; - std::vector> body2D(num_joints, std::array{0.0, 0.0, 0.0}); - std::vector dists(num_joints, 0.0); + const Pose3D &body3D = poses_3d[i]; + Pose2D body2D(num_joints, std::array{0.0f, 0.0f, 0.0f}); + std::vector dists(num_joints, 0.0f); for (size_t j = 0; j < num_joints; ++j) { @@ -756,47 +671,40 @@ TriangulatorInternal::project_poses( float z = body3D[j][2]; float score = body3D[j][3]; - // Project from world to camera coordinate system - float xt = x - T[0][0]; - float yt = y - T[1][0]; - float zt = z - T[2][0]; + const float xt = x - T[0][0]; + const float yt = y - T[1][0]; + const float zt = z - T[2][0]; float x_cam = xt * R[0][0] + yt * R[0][1] + zt * R[0][2]; float y_cam = xt * R[1][0] + yt * R[1][1] + zt * R[1][2]; float z_cam = xt * R[2][0] + yt * R[2][1] + zt * R[2][2]; - // Set points behind the camera to zero - if (z_cam <= 0.0) + if (z_cam <= 0.0f) { - x_cam = y_cam = z_cam = 0.0; + x_cam = 0.0f; + y_cam = 0.0f; + z_cam = 0.0f; } - // Calculate distance from camera center if required - if (calc_dists) - dists[j] = std::sqrt(x_cam * x_cam + y_cam * y_cam + z_cam * z_cam); - else - dists[j] = 0.0; + dists[j] = calc_dists ? std::sqrt(x_cam * x_cam + y_cam * y_cam + z_cam * z_cam) : 0.0f; - // Project points to image plane - // Since images are already undistorted, use the pinhole projection for all camera types - float u = 0.0, v = 0.0; - if (z_cam > 0.0) + float u = 0.0f; + float v = 0.0f; + if (z_cam > 0.0f) { u = (K[0][0] * x_cam + K[0][1] * y_cam + K[0][2] * z_cam) / z_cam; v = (K[1][0] * x_cam + K[1][1] * y_cam + K[1][2] * z_cam) / z_cam; } - // Filter invalid projections - if (u < 0.0 || u >= icam.cam.width || v < 0.0 || v >= icam.cam.height) + if (u < 0.0f || u >= icam.cam.width || v < 0.0f || v >= icam.cam.height) { - u = 0.0; - v = 0.0; - score = 0.0; + u = 0.0f; + v = 0.0f; + score = 0.0f; } body2D[j] = {u, v, score}; } - // Store results poses_2d[i] = std::move(body2D); all_dists[i] = std::move(dists); } @@ -804,105 +712,24 @@ TriangulatorInternal::project_poses( return std::make_tuple(poses_2d, all_dists); } -// ================================================================================================= - -float TriangulatorInternal::calc_pose_score( - const std::vector> &pose1, - const std::vector> &pose2, - const std::vector &dist1, - const CameraInternal &icam) -{ - const float min_score = 0.1; - const size_t num_joints = pose1.size(); - - // Create mask for valid points - std::vector mask; - mask.resize(num_joints); - for (size_t i = 0; i < num_joints; ++i) - { - if (pose1[i][2] <= min_score || pose2[i][2] <= min_score) - { - mask[i] = false; - } - else - { - mask[i] = true; - } - } - - // Drop if not enough valid points - int valid_count = 0; - for (size_t i = 0; i < num_joints; ++i) - { - if (mask[i]) - { - valid_count++; - } - } - if (valid_count < 3) - { - return 0.0; - } - - // Calculate scores - float iscale = (icam.cam.width + icam.cam.height) / 2; - std::vector scores = score_projection(pose1, pose2, dist1, mask, iscale); - - // Drop lowest scores - size_t drop_k = static_cast(num_joints * 0.2); - const size_t min_k = 3; - std::vector scores_vec; - scores_vec.reserve(valid_count); - for (size_t i = 0; i < scores.size(); ++i) - { - if (mask[i] > 0) - { - scores_vec.push_back(scores[i]); - } - } - size_t scores_size = scores_vec.size(); - if (scores_size >= min_k) - { - drop_k = std::min(drop_k, scores_size - min_k); - std::partial_sort(scores_vec.begin(), scores_vec.begin() + drop_k, scores_vec.end()); - scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k); - } - - // Calculate final score - float score = 0.0; - size_t n_items = scores_vec.size(); - if (n_items > 0) - { - float sum_scores = std::accumulate(scores_vec.begin(), scores_vec.end(), 0.0); - score = sum_scores / static_cast(n_items); - } - - return score; -} - -// ================================================================================================= - std::vector TriangulatorInternal::score_projection( - const std::vector> &pose, - const std::vector> &repro, + const Pose2D &pose, + const Pose2D &repro, const std::vector &dists, const std::vector &mask, float iscale) { - const float min_score = 0.1; + const float min_score = 0.1f; const size_t num_joints = pose.size(); - // Calculate error - std::vector error(num_joints, 0.0); + std::vector error(num_joints, 0.0f); for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { - float dx = pose[i][0] - repro[i][0]; - float dy = pose[i][1] - repro[i][1]; + const float dx = pose[i][0] - repro[i][0]; + const float dy = pose[i][1] - repro[i][1]; float err = std::sqrt(dx * dx + dy * dy); - - // Set errors of invisible reprojections to a high value if (repro[i][2] < min_score) { err = iscale; @@ -911,18 +738,14 @@ std::vector TriangulatorInternal::score_projection( } } - // Scale error by image size - const float inv_iscale = 1.0 / iscale; - const float iscale_quarter = iscale / 4.0; + const float inv_iscale = 1.0f / iscale; + const float iscale_quarter = iscale / 4.0f; for (size_t i = 0; i < num_joints; ++i) { - float err = error[i]; - err = std::max(0.0f, std::min(err, iscale_quarter)) * inv_iscale; - error[i] = err; + error[i] = std::max(0.0f, std::min(error[i], iscale_quarter)) * inv_iscale; } - // Scale error by distance to camera - float mean_dist = 0.0; + float mean_dist = 0.0f; int count = 0; for (size_t i = 0; i < num_joints; ++i) { @@ -934,26 +757,24 @@ std::vector TriangulatorInternal::score_projection( } if (count > 0) { - mean_dist /= count; + mean_dist /= static_cast(count); } - const float dscale = std::sqrt(mean_dist / 3.5); + + const float dscale = std::sqrt(mean_dist / 3.5f); for (size_t i = 0; i < num_joints; ++i) { error[i] *= dscale; } - // Convert error to score - std::vector score(num_joints, 0.0); + std::vector score(num_joints, 0.0f); for (size_t i = 0; i < num_joints; ++i) { - score[i] = 1.0 / (1.0 + error[i] * 10.0); + score[i] = 1.0f / (1.0f + error[i] * 10.0f); } return score; } -// ================================================================================================= - float dot(const std::array &a, const std::array &b) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; @@ -2202,3 +2023,17 @@ void TriangulatorInternal::replace_far_joints( } } } + +} // namespace + +PoseBatch3D triangulate_poses( + const PoseBatch2DView &poses_2d, + const std::vector &cameras, + const std::array, 2> &roomparams, + const std::vector &joint_names, + float min_match_score, + size_t min_group_size) +{ + TriangulatorInternal triangulator(min_match_score, min_group_size); + return triangulator.triangulate_poses(poses_2d, cameras, roomparams, joint_names); +} diff --git a/rpt/triangulator.hpp b/rpt/triangulator.hpp deleted file mode 100644 index a3db205..0000000 --- a/rpt/triangulator.hpp +++ /dev/null @@ -1,130 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "camera.hpp" - -// ================================================================================================= - -class TriangulatorInternal -{ -public: - TriangulatorInternal(float min_match_score, size_t min_group_size); - - std::vector>> triangulate_poses( - const std::vector>>> &poses_2d, - const std::vector &cameras, - const std::array, 2> &roomparams, - const std::vector &joint_names); - - void reset(); - void print_stats(); - -private: - float min_match_score; - float min_group_size; - size_t num_cams; - - const std::vector core_joints = { - "shoulder_left", - "shoulder_right", - "hip_left", - "hip_right", - "elbow_left", - "elbow_right", - "knee_left", - "knee_right", - "wrist_left", - "wrist_right", - "ankle_left", - "ankle_right", - }; - const std::vector> core_limbs = { - {"knee_left", "ankle_left"}, - {"hip_left", "knee_left"}, - {"hip_right", "knee_right"}, - {"knee_right", "ankle_right"}, - {"elbow_left", "wrist_left"}, - {"elbow_right", "wrist_right"}, - {"shoulder_left", "elbow_left"}, - {"shoulder_right", "elbow_right"}, - }; - std::vector>> last_poses_3d; - - void undistort_poses( - std::vector>> &poses_2d, CameraInternal &icam); - - std::tuple>>, std::vector>> - project_poses( - const std::vector>> &poses_3d, - const CameraInternal &icam, - bool calc_dists); - - float calc_pose_score( - const std::vector> &pose1, - const std::vector> &pose2, - const std::vector &dist1, - const CameraInternal &icam); - - std::vector score_projection( - const std::vector> &pose, - const std::vector> &repro, - const std::vector &dists, - const std::vector &mask, - float iscale); - - std::pair>, float> triangulate_and_score( - const std::vector> &pose1, - const std::vector> &pose2, - const CameraInternal &cam1, - const CameraInternal &cam2, - const std::array, 2> &roomparams, - const std::vector> &core_limbs_idx); - - std::vector, std::vector>, std::vector>> - calc_grouping( - const std::vector, std::pair>> &all_pairs, - const std::vector>, float>> &all_scored_poses, - float min_score); - - std::vector> merge_group( - const std::vector>> &poses_3d, - float min_score); - - void add_extra_joints( - std::vector>> &poses, - const std::vector &joint_names); - - void filter_poses( - std::vector>> &poses, - const std::array, 2> &roomparams, - const std::vector &core_joint_idx, - const std::vector> &core_limbs_idx); - - void add_missing_joints( - std::vector>> &poses, - const std::vector &joint_names); - - void replace_far_joints( - std::vector>> &poses, - const std::vector &joint_names); - - // Statistics - double num_calls = 0; - double total_time = 0; - double init_time = 0; - double undistort_time = 0; - double project_time = 0; - double match_time = 0; - double pairs_time = 0; - double pair_scoring_time = 0; - double grouping_time = 0; - double full_time = 0; - double merge_time = 0; - double post_time = 0; - double convert_time = 0; -}; diff --git a/src/rpt/__init__.py b/src/rpt/__init__.py index 3efae1f..093ba17 100644 --- a/src/rpt/__init__.py +++ b/src/rpt/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations from collections.abc import Sequence from typing import TYPE_CHECKING -from ._core import Camera, Triangulator +from ._core import Camera, triangulate_poses if TYPE_CHECKING: import numpy as np @@ -28,7 +28,7 @@ def pack_poses_2d( __all__ = [ "Camera", - "Triangulator", "convert_cameras", "pack_poses_2d", + "triangulate_poses", ] diff --git a/src/rpt/_helpers.py b/src/rpt/_helpers.py index 207b3fa..82c4505 100644 --- a/src/rpt/_helpers.py +++ b/src/rpt/_helpers.py @@ -1,19 +1,30 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence -from typing import Any, TypeAlias +from collections.abc import Sequence +from typing import TypeAlias, TypedDict import numpy as np import numpy.typing as npt from ._core import Camera -CameraLike = Camera | Mapping[str, Any] -PoseViewLike: TypeAlias = ( - npt.NDArray[np.generic] - | Sequence[Sequence[Sequence[float]]] - | Sequence[Sequence[float]] -) +Matrix3x3Like: TypeAlias = Sequence[Sequence[float]] +VectorLike: TypeAlias = Sequence[float] +PoseViewLike: TypeAlias = npt.NDArray[np.generic] | Sequence[Sequence[Sequence[float]]] | Sequence[Sequence[float]] + + +class CameraDict(TypedDict, total=False): + name: str + K: Matrix3x3Like + DC: VectorLike + R: Matrix3x3Like + T: Sequence[Sequence[float]] + width: int + height: int + type: str + + +CameraLike = Camera | CameraDict def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]: diff --git a/tests/README.md b/tests/README.md index e25d5c4..5333d8d 100644 --- a/tests/README.md +++ b/tests/README.md @@ -2,7 +2,7 @@ Various module tests -### Triangulator +### Python Interface ```bash cd /RapidPoseTriangulation/ diff --git a/tests/test_interface.py b/tests/test_interface.py index dd55d23..27e7b65 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -69,13 +69,13 @@ def test_camera_structure_repr(): ) def test_triangulate_samples(camera_path: str, pose_path: str, roomparams): poses_2d, person_counts, cameras = load_case(camera_path, pose_path) - triangulator = rpt.Triangulator(min_match_score=0.95) - poses_3d = triangulator.triangulate_poses( + poses_3d = rpt.triangulate_poses( poses_2d, person_counts, cameras, np.asarray(roomparams, dtype=np.float32), JOINT_NAMES, + min_match_score=0.95, ) assert isinstance(poses_3d, np.ndarray) @@ -86,18 +86,33 @@ def test_triangulate_samples(camera_path: str, pose_path: str, roomparams): assert np.isfinite(poses_3d).all() -def test_triangulate_repeatability_after_reset(): +def test_triangulate_repeatability(): poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json") roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32) - triangulator = rpt.Triangulator(min_match_score=0.95) - first = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES) - triangulator.reset() - second = triangulator.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES) + first = rpt.triangulate_poses( + poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95 + ) + second = rpt.triangulate_poses( + poses_2d, person_counts, cameras, roomparams, JOINT_NAMES, min_match_score=0.95 + ) np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5) +def test_triangulate_does_not_mutate_inputs(): + poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json") + roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32) + + poses_before = poses_2d.copy() + counts_before = person_counts.copy() + + rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES) + + np.testing.assert_array_equal(poses_2d, poses_before) + np.testing.assert_array_equal(person_counts, counts_before) + + def test_pack_poses_2d_from_ragged_inputs(): packed, counts = rpt.pack_poses_2d( [