Refactor triangulation stages and camera model

2026-03-11 23:42:07 +08:00
parent 24f74c87f1
commit 103aeb5887
10 changed files with 1268 additions and 232 deletions
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <array>
+
+#include "camera.hpp"
+
+struct CachedCamera
+{
+    const Camera cam;
+    const std::array<std::array<float, 3>, 3> invR;
+    const std::array<float, 3> center;
+    const std::array<std::array<float, 3>, 3> newK;
+    const std::array<std::array<float, 3>, 3> invK;
+};
+
+CachedCamera cache_camera(const Camera &camera);
+
+void undistort_point_pinhole(std::array<float, 3> &point, const std::array<float, 5> &distortion);
+void undistort_point_fisheye(std::array<float, 3> &point, const std::array<float, 5> &distortion);
@@ -1,11 +1,60 @@
 #include <array>
 #include <cmath>
 #include <iomanip>
+#include <stdexcept>
 #include <sstream>
 #include <vector>
 #include <cmath>

-#include "camera.hpp"
+#include "cached_camera.hpp"
+
+namespace
+{
+std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    return {{{M[0][0], M[1][0], M[2][0]},
+             {M[0][1], M[1][1], M[2][1]},
+             {M[0][2], M[1][2], M[2][2]}}};
+}
+
+std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    std::array<std::array<float, 3>, 3> adj = {
+        {{
+             M[1][1] * M[2][2] - M[1][2] * M[2][1],
+             M[0][2] * M[2][1] - M[0][1] * M[2][2],
+             M[0][1] * M[1][2] - M[0][2] * M[1][1],
+         },
+         {
+             M[1][2] * M[2][0] - M[1][0] * M[2][2],
+             M[0][0] * M[2][2] - M[0][2] * M[2][0],
+             M[0][2] * M[1][0] - M[0][0] * M[1][2],
+         },
+         {
+             M[1][0] * M[2][1] - M[1][1] * M[2][0],
+             M[0][1] * M[2][0] - M[0][0] * M[2][1],
+             M[0][0] * M[1][1] - M[0][1] * M[1][0],
+         }}};
+
+    float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
+    if (std::fabs(det) < 1e-6f)
+    {
+        return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
+    }
+
+    float idet = 1.0f / det;
+    return {{
+        {{adj[0][0] * idet, adj[0][1] * idet, adj[0][2] * idet}},
+        {{adj[1][0] * idet, adj[1][1] * idet, adj[1][2] * idet}},
+        {{adj[2][0] * idet, adj[2][1] * idet, adj[2][2] * idet}},
+    }};
+}
+
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
+    const Camera &cam, float balance, std::pair<int, int> new_size);
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
+    const Camera &cam, float alpha, std::pair<int, int> new_size);
+} // namespace

 // =================================================================================================
 // =================================================================================================
@@ -63,7 +112,7 @@ std::string Camera::to_string() const

    out << "'width': " << width << ", ";
    out << "'height': " << height << ", ";
-    out << "'type': " << type;
+    out << "'model': '" << camera_model_name(model) << "'";

    out << "}";
    return out.str();
@@ -71,6 +120,33 @@ std::string Camera::to_string() const

 // =================================================================================================

+const char *camera_model_name(CameraModel model)
+{
+    switch (model)
+    {
+    case CameraModel::Pinhole:
+        return "pinhole";
+    case CameraModel::Fisheye:
+        return "fisheye";
+    }
+    return "unknown";
+}
+
+CameraModel parse_camera_model(const std::string &value)
+{
+    if (value == "pinhole")
+    {
+        return CameraModel::Pinhole;
+    }
+    if (value == "fisheye")
+    {
+        return CameraModel::Fisheye;
+    }
+    throw std::invalid_argument("Unsupported camera model: " + value);
+}
+
+// =================================================================================================
+
 std::ostream &operator<<(std::ostream &out, const Camera &cam)
 {
    out << cam.to_string();
@@ -80,93 +156,33 @@ std::ostream &operator<<(std::ostream &out, const Camera &cam)
 // =================================================================================================
 // =================================================================================================

-CameraInternal::CameraInternal(const Camera &cam)
+CachedCamera cache_camera(const Camera &cam)
 {
-    this->cam = cam;
-    this->invR = transpose3x3(cam.R);
+    const std::array<std::array<float, 3>, 3> invR = transpose3x3(cam.R);

    // Camera center:
    // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
-    this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
+    const std::array<float, 3> center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};

    // Undistort camera matrix
    // As with the undistortion, the own implementation avoids some overhead compared to OpenCV
-    if (cam.type == "fisheye")
+    std::array<std::array<float, 3>, 3> newK;
+    if (cam.model == CameraModel::Fisheye)
    {
-        newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height});
+        newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height});
    }
    else
    {
-        newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height});
+        newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height});
    }
-    this->invK = invert3x3(newK);
+    const std::array<std::array<float, 3>, 3> invK = invert3x3(newK);
+
+    return CachedCamera {cam, invR, center, newK, invK};
 }

 // =================================================================================================

-std::array<std::array<float, 3>, 3> CameraInternal::transpose3x3(
-    const std::array<std::array<float, 3>, 3> &M)
-{
-    return {{{M[0][0], M[1][0], M[2][0]},
-             {M[0][1], M[1][1], M[2][1]},
-             {M[0][2], M[1][2], M[2][2]}}};
-}
-
-// =================================================================================================
-
-std::array<std::array<float, 3>, 3> CameraInternal::invert3x3(
-    const std::array<std::array<float, 3>, 3> &M)
-{
-    // Compute the inverse using the adjugate method
-    // See: https://scicomp.stackexchange.com/a/29206
-
-    std::array<std::array<float, 3>, 3> adj = {
-        {{
-             M[1][1] * M[2][2] - M[1][2] * M[2][1],
-             M[0][2] * M[2][1] - M[0][1] * M[2][2],
-             M[0][1] * M[1][2] - M[0][2] * M[1][1],
-         },
-         {
-             M[1][2] * M[2][0] - M[1][0] * M[2][2],
-             M[0][0] * M[2][2] - M[0][2] * M[2][0],
-             M[0][2] * M[1][0] - M[0][0] * M[1][2],
-         },
-         {
-             M[1][0] * M[2][1] - M[1][1] * M[2][0],
-             M[0][1] * M[2][0] - M[0][0] * M[2][1],
-             M[0][0] * M[1][1] - M[0][1] * M[1][0],
-         }}};
-
-    float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
-    if (std::fabs(det) < 1e-6f)
-    {
-        return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
-    }
-
-    float idet = 1.0f / det;
-    std::array<std::array<float, 3>, 3> inv = {
-        {{
-             adj[0][0] * idet,
-             adj[0][1] * idet,
-             adj[0][2] * idet,
-         },
-         {
-             adj[1][0] * idet,
-             adj[1][1] * idet,
-             adj[1][2] * idet,
-         },
-         {
-             adj[2][0] * idet,
-             adj[2][1] * idet,
-             adj[2][2] * idet,
-         }}};
-
-    return inv;
-}
-
-// =================================================================================================
-
-void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k)
+void undistort_point_pinhole(std::array<float, 3> &p, const std::array<float, 5> &k)
 {
    // Following: cv::cvUndistortPointsInternal
    // Uses only the distortion coefficients: [k1, k2, p1, p2, k3]
@@ -202,7 +218,7 @@ void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std:

 // =================================================================================================

-void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k)
+void undistort_point_fisheye(std::array<float, 3> &p, const std::array<float, 5> &k)
 {
    // Following: cv::fisheye::undistortPoints
    // Uses only the distortion coefficients: [k1, k2, k3, k4]
@@ -250,8 +266,10 @@ void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std:

 // =================================================================================================

-std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_fisheye(
-    float balance, std::pair<int, int> new_size)
+namespace
+{
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
+    const Camera &cam, float balance, std::pair<int, int> new_size)
 {
    // Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify
    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630
@@ -355,8 +373,8 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_f

 // =================================================================================================

-std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_pinhole(
-    float alpha, std::pair<int, int> new_size)
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
+    const Camera &cam, float alpha, std::pair<int, int> new_size)
 {
    // Following: cv::getOptimalNewCameraMatrix
    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565
@@ -479,3 +497,4 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
         {0.0, 0.0, 1.0}}};
    return newK;
 }
+} // namespace
@@ -7,46 +7,28 @@

 // =================================================================================================

+enum class CameraModel
+{
+    Pinhole,
+    Fisheye,
+};
+
+const char *camera_model_name(CameraModel model);
+CameraModel parse_camera_model(const std::string &value);
+
+// =================================================================================================
+
 struct Camera
 {
    std::string name;
    std::array<std::array<float, 3>, 3> K;
-    std::vector<float> DC;
+    std::array<float, 5> DC = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
    std::array<std::array<float, 3>, 3> R;
    std::array<std::array<float, 1>, 3> T;
    int width;
    int height;
-    std::string type;
+    CameraModel model = CameraModel::Pinhole;

    friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
    std::string to_string() const;
 };
-
-// =================================================================================================
-
-class CameraInternal
-{
-public:
-    CameraInternal(const Camera &cam);
-
-    Camera cam;
-
-    std::array<std::array<float, 3>, 3> invR;
-    std::array<float, 3> center;
-    std::array<std::array<float, 3>, 3> newK;
-    std::array<std::array<float, 3>, 3> invK;
-
-    static std::array<std::array<float, 3>, 3> transpose3x3(
-        const std::array<std::array<float, 3>, 3> &M);
-
-    static std::array<std::array<float, 3>, 3> invert3x3(
-        const std::array<std::array<float, 3>, 3> &M);
-
-    static void undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k);
-    static void undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k);
-
-    std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
-        float balance, std::pair<int, int> new_size);
-    std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
-        float alpha, std::pair<int, int> new_size);
-};
@@ -38,6 +38,11 @@ const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_
    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
 }

+const float &PoseBatch3DView::at(size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_offset(person, joint, coord, num_joints)];
+}
+
 const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
 {
    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
@@ -109,6 +114,11 @@ const float &PoseBatch3D::at(size_t person, size_t joint, size_t coord) const
    return data[pose3d_offset(person, joint, coord, num_joints)];
 }

+PoseBatch3DView PoseBatch3D::view() const
+{
+    return PoseBatch3DView {data.data(), num_persons, num_joints};
+}
+
 NestedPoses3D PoseBatch3D::to_nested() const
 {
    NestedPoses3D poses_3d(num_persons);
@@ -25,6 +25,15 @@ struct PoseBatch2DView
    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
 };

+struct PoseBatch3DView
+{
+    const float *data = nullptr;
+    size_t num_persons = 0;
+    size_t num_joints = 0;
+
+    const float &at(size_t person, size_t joint, size_t coord) const;
+};
+
 struct PoseBatch2D
 {
    std::vector<float> data;
@@ -48,6 +57,7 @@ struct PoseBatch3D

    float &at(size_t person, size_t joint, size_t coord);
    const float &at(size_t person, size_t joint, size_t coord) const;
+    PoseBatch3DView view() const;

    NestedPoses3D to_nested() const;
    static PoseBatch3D from_nested(const NestedPoses3D &poses_3d);
@@ -55,6 +65,136 @@ struct PoseBatch3D

 // =================================================================================================

+struct PairCandidate
+{
+    int view1 = -1;
+    int view2 = -1;
+    int person1 = -1;
+    int person2 = -1;
+    int global_person1 = -1;
+    int global_person2 = -1;
+};
+
+struct PreviousPoseMatch
+{
+    int previous_pose_index = -1;
+    float score_view1 = 0.0f;
+    float score_view2 = 0.0f;
+    bool matched_view1 = false;
+    bool matched_view2 = false;
+    bool kept = true;
+    std::string decision = "unfiltered";
+};
+
+struct PreviousPoseFilterDebug
+{
+    bool used_previous_poses = false;
+    std::vector<PreviousPoseMatch> matches;
+    std::vector<int> kept_pair_indices;
+    std::vector<PairCandidate> kept_pairs;
+};
+
+struct CoreProposalDebug
+{
+    int pair_index = -1;
+    PairCandidate pair;
+    std::vector<std::array<float, 4>> pose_3d;
+    float score = 0.0f;
+    bool kept = false;
+    std::string drop_reason = "uninitialized";
+};
+
+struct ProposalGroupDebug
+{
+    std::array<float, 3> center = {0.0f, 0.0f, 0.0f};
+    std::vector<std::array<float, 4>> pose_3d;
+    std::vector<int> proposal_indices;
+};
+
+struct GroupingDebug
+{
+    std::vector<ProposalGroupDebug> initial_groups;
+    std::vector<int> duplicate_pair_drops;
+    std::vector<ProposalGroupDebug> groups;
+};
+
+struct FullProposalDebug
+{
+    int source_core_proposal_index = -1;
+    PairCandidate pair;
+    std::vector<std::array<float, 4>> pose_3d;
+};
+
+struct MergeDebug
+{
+    std::vector<std::vector<std::array<float, 4>>> merged_poses;
+    std::vector<std::vector<int>> group_proposal_indices;
+};
+
+struct TriangulationTrace
+{
+    std::vector<PairCandidate> pairs;
+    PreviousPoseFilterDebug previous_filter;
+    std::vector<CoreProposalDebug> core_proposals;
+    GroupingDebug grouping;
+    std::vector<FullProposalDebug> full_proposals;
+    MergeDebug merge;
+    PoseBatch3D final_poses;
+};
+
+// =================================================================================================
+
+std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d);
+
+PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView &previous_poses_3d,
+    float min_match_score = 0.95f);
+
+inline PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+    const PoseBatch2D &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3D &previous_poses_3d,
+    float min_match_score = 0.95f)
+{
+    return filter_pairs_with_previous_poses(
+        poses_2d.view(), cameras, joint_names, previous_poses_3d.view(), min_match_score);
+}
+
+TriangulationTrace triangulate_debug(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView *previous_poses_3d = nullptr,
+    float min_match_score = 0.95f,
+    size_t min_group_size = 1);
+
+inline TriangulationTrace triangulate_debug(
+    const PoseBatch2D &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3D *previous_poses_3d = nullptr,
+    float min_match_score = 0.95f,
+    size_t min_group_size = 1)
+{
+    PoseBatch3DView previous_view_storage;
+    const PoseBatch3DView *previous_view = nullptr;
+    if (previous_poses_3d != nullptr)
+    {
+        previous_view_storage = previous_poses_3d->view();
+        previous_view = &previous_view_storage;
+    }
+    return triangulate_debug(
+        poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
+}
+
+// =================================================================================================
+
 /**
 * Calculate a triangulation using a padded pose tensor.
 *
@@ -72,6 +212,7 @@ PoseBatch3D triangulate_poses(
    const std::vector<Camera> &cameras,
    const std::array<std::array<float, 3>, 2> &roomparams,
    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView *previous_poses_3d = nullptr,
    float min_match_score = 0.95f,
    size_t min_group_size = 1);

@@ -80,9 +221,17 @@ inline PoseBatch3D triangulate_poses(
    const std::vector<Camera> &cameras,
    const std::array<std::array<float, 3>, 2> &roomparams,
    const std::vector<std::string> &joint_names,
+    const PoseBatch3D *previous_poses_3d = nullptr,
    float min_match_score = 0.95f,
    size_t min_group_size = 1)
 {
+    PoseBatch3DView previous_view_storage;
+    const PoseBatch3DView *previous_view = nullptr;
+    if (previous_poses_3d != nullptr)
+    {
+        previous_view_storage = previous_poses_3d->view();
+        previous_view = &previous_view_storage;
+    }
    return triangulate_poses(
-        poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size);
+        poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
 }
@@ -10,6 +10,7 @@
 #include <unordered_map>

 #include "interface.hpp"
+#include "cached_camera.hpp"

 namespace
 {
@@ -83,7 +84,7 @@ size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_jo
 }

 std::array<float, 3> undistort_point(
-    const std::array<float, 3> &raw_point, const CameraInternal &icam)
+    const std::array<float, 3> &raw_point, const CachedCamera &icam)
 {
    std::array<float, 3> point = raw_point;
    const float ifx_old = 1.0f / icam.cam.K[0][0];
@@ -98,13 +99,13 @@ std::array<float, 3> undistort_point(
    point[0] = (point[0] - cx_old) * ifx_old;
    point[1] = (point[1] - cy_old) * ify_old;

-    if (icam.cam.type == "fisheye")
+    if (icam.cam.model == CameraModel::Fisheye)
    {
-        CameraInternal::undistort_point_fisheye(point, icam.cam.DC);
+        undistort_point_fisheye(point, icam.cam.DC);
    }
    else
    {
-        CameraInternal::undistort_point_pinhole(point, icam.cam.DC);
+        undistort_point_pinhole(point, icam.cam.DC);
    }

    point[0] = (point[0] * fx_new) + cx_new;
@@ -125,7 +126,7 @@ class PackedPoseStore2D
 public:
    PackedPoseStore2D(
        const PoseBatch2DView &source,
-        const std::vector<CameraInternal> &internal_cameras)
+        const std::vector<CachedCamera> &internal_cameras)
        : person_counts(source.num_views),
          view_offsets(source.num_views),
          num_views(source.num_views),
@@ -229,16 +230,75 @@ public:
    {
    }

-    PoseBatch3D triangulate_poses(
+    TriangulationTrace triangulate_debug(
        const PoseBatch2DView &poses_2d,
        const std::vector<Camera> &cameras,
        const std::array<std::array<float, 3>, 2> &roomparams,
-        const std::vector<std::string> &joint_names);
+        const std::vector<std::string> &joint_names,
+        const PoseBatch3DView *previous_poses_3d);
+
+    PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+        const PoseBatch2DView &poses_2d,
+        const std::vector<Camera> &cameras,
+        const std::vector<std::string> &joint_names,
+        const PoseBatch3DView &previous_poses_3d);

 private:
+    struct PreparedInputs
+    {
+        std::vector<CachedCamera> internal_cameras;
+        std::vector<size_t> core_joint_idx;
+        std::vector<std::array<size_t, 2>> core_limbs_idx;
+        PackedPoseStore2D packed_poses;
+
+        PreparedInputs(
+            std::vector<CachedCamera> cameras_in,
+            std::vector<size_t> core_joint_idx_in,
+            std::vector<std::array<size_t, 2>> core_limbs_idx_in,
+            PackedPoseStore2D packed_poses_in)
+            : internal_cameras(std::move(cameras_in)),
+              core_joint_idx(std::move(core_joint_idx_in)),
+              core_limbs_idx(std::move(core_limbs_idx_in)),
+              packed_poses(std::move(packed_poses_in))
+        {
+        }
+    };
+
+    struct PreviousProjectionCache
+    {
+        std::vector<std::vector<Pose2D>> projected_poses;
+        std::vector<std::vector<std::vector<float>>> projected_dists;
+        std::vector<Pose3D> core_poses;
+    };
+
+    PreparedInputs prepare_inputs(
+        const PoseBatch2DView &poses_2d,
+        const std::vector<Camera> &cameras,
+        const std::vector<std::string> &joint_names);
+
+    std::vector<PairCandidate> build_pair_candidates(const PackedPoseStore2D &packed_poses) const;
+
+    PreviousProjectionCache project_previous_poses(
+        const PoseBatch3DView &previous_poses_3d,
+        const std::vector<CachedCamera> &internal_cameras,
+        const std::vector<size_t> &core_joint_idx);
+
+    PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+        const PackedPoseStore2D &packed_poses,
+        const std::vector<CachedCamera> &internal_cameras,
+        const std::vector<size_t> &core_joint_idx,
+        const std::vector<PairCandidate> &pairs,
+        const PoseBatch3DView &previous_poses_3d);
+
+    float calc_pose_score(
+        const Pose2D &pose,
+        const Pose2D &reference_pose,
+        const std::vector<float> &dists,
+        const CachedCamera &icam);
+
    std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
        const std::vector<Pose3D> &poses_3d,
-        const CameraInternal &icam,
+        const CachedCamera &icam,
        bool calc_dists);

    std::vector<float> score_projection(
@@ -251,8 +311,8 @@ private:
    std::pair<Pose3D, float> triangulate_and_score(
        const Pose2D &pose1,
        const Pose2D &pose2,
-        const CameraInternal &cam1,
-        const CameraInternal &cam2,
+        const CachedCamera &cam1,
+        const CachedCamera &cam2,
        const std::array<std::array<float, 3>, 2> &roomparams,
        const std::vector<std::array<size_t, 2>> &core_limbs_idx);

@@ -313,17 +373,9 @@ private:
    };
 };

-PoseBatch3D empty_pose_batch3d(size_t num_joints)
-{
-    PoseBatch3D poses_3d;
-    poses_3d.num_joints = num_joints;
-    return poses_3d;
-}
-
-PoseBatch3D TriangulatorInternal::triangulate_poses(
+TriangulatorInternal::PreparedInputs TriangulatorInternal::prepare_inputs(
    const PoseBatch2DView &poses_2d,
    const std::vector<Camera> &cameras,
-    const std::array<std::array<float, 3>, 2> &roomparams,
    const std::vector<std::string> &joint_names)
 {
    if (poses_2d.num_views == 0)
@@ -342,6 +394,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
    {
        throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
    }
+    if (poses_2d.num_joints == 0)
+    {
+        throw std::invalid_argument("At least one joint is required.");
+    }
    if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr)
    {
        throw std::invalid_argument("poses_2d data must not be null.");
@@ -356,11 +412,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(

    num_cams = cameras.size();

-    std::vector<CameraInternal> internal_cameras;
+    std::vector<CachedCamera> internal_cameras;
    internal_cameras.reserve(cameras.size());
    for (const auto &camera : cameras)
    {
-        internal_cameras.emplace_back(camera);
+        internal_cameras.push_back(cache_camera(camera));
    }

    std::vector<size_t> core_joint_idx;
@@ -383,64 +439,310 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
    }

    PackedPoseStore2D packed_poses(poses_2d, internal_cameras);
+    return PreparedInputs(
+        std::move(internal_cameras),
+        std::move(core_joint_idx),
+        std::move(core_limbs_idx),
+        std::move(packed_poses));
+}

-    std::vector<PosePair> all_pairs;
-    for (size_t view1 = 0; view1 < cameras.size(); ++view1)
+std::vector<PairCandidate> TriangulatorInternal::build_pair_candidates(const PackedPoseStore2D &packed_poses) const
+{
+    std::vector<PairCandidate> pairs;
+    for (size_t view1 = 0; view1 < packed_poses.num_views; ++view1)
    {
-        for (size_t view2 = view1 + 1; view2 < cameras.size(); ++view2)
+        for (size_t view2 = view1 + 1; view2 < packed_poses.num_views; ++view2)
        {
            for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1)
            {
                for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2)
                {
-                    all_pairs.emplace_back(
-                        std::make_tuple(
-                            static_cast<int>(view1),
-                            static_cast<int>(view2),
-                            static_cast<int>(person1),
-                            static_cast<int>(person2)),
-                        std::make_pair(
-                            static_cast<int>(packed_poses.pose_index(view1, person1)),
-                            static_cast<int>(packed_poses.pose_index(view2, person2))));
+                    pairs.push_back(PairCandidate {
+                        static_cast<int>(view1),
+                        static_cast<int>(view2),
+                        static_cast<int>(person1),
+                        static_cast<int>(person2),
+                        static_cast<int>(packed_poses.pose_index(view1, person1)),
+                        static_cast<int>(packed_poses.pose_index(view2, person2)),
+                    });
                }
            }
        }
    }
+    return pairs;
+}

-    if (all_pairs.empty())
+TriangulatorInternal::PreviousProjectionCache TriangulatorInternal::project_previous_poses(
+    const PoseBatch3DView &previous_poses_3d,
+    const std::vector<CachedCamera> &internal_cameras,
+    const std::vector<size_t> &core_joint_idx)
+{
+    PreviousProjectionCache cache;
+    cache.core_poses.resize(previous_poses_3d.num_persons);
+    for (size_t person = 0; person < previous_poses_3d.num_persons; ++person)
    {
-        return empty_pose_batch3d(joint_names.size());
-    }
-
-    std::vector<std::pair<Pose3D, float>> all_scored_poses(all_pairs.size());
-    for (size_t i = 0; i < all_pairs.size(); ++i)
-    {
-        const auto &pids = all_pairs[i].first;
-        const auto &cam1 = internal_cameras[std::get<0>(pids)];
-        const auto &cam2 = internal_cameras[std::get<1>(pids)];
-        Pose2D pose1_core = packed_poses.pose(std::get<0>(pids), std::get<2>(pids), core_joint_idx);
-        Pose2D pose2_core = packed_poses.pose(std::get<1>(pids), std::get<3>(pids), core_joint_idx);
-
-        auto [pose3d, score] = triangulate_and_score(
-            pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx);
-        all_scored_poses[i] = std::make_pair(std::move(pose3d), score);
-    }
-
-    for (size_t i = all_scored_poses.size(); i > 0; --i)
-    {
-        if (all_scored_poses[i - 1].second < min_match_score)
+        Pose3D pose(core_joint_idx.size(), {0.0f, 0.0f, 0.0f, 0.0f});
+        for (size_t joint = 0; joint < core_joint_idx.size(); ++joint)
        {
-            all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(i - 1));
-            all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(i - 1));
+            const size_t source_joint = core_joint_idx[joint];
+            for (size_t coord = 0; coord < 4; ++coord)
+            {
+                pose[joint][coord] = previous_poses_3d.at(person, source_joint, coord);
+            }
+        }
+        cache.core_poses[person] = std::move(pose);
+    }
+
+    cache.projected_poses.resize(internal_cameras.size());
+    cache.projected_dists.resize(internal_cameras.size());
+    for (size_t view = 0; view < internal_cameras.size(); ++view)
+    {
+        auto [projected_poses, projected_dists] = project_poses(cache.core_poses, internal_cameras[view], true);
+        cache.projected_poses[view] = std::move(projected_poses);
+        cache.projected_dists[view] = std::move(projected_dists);
+    }
+
+    return cache;
+}
+
+float TriangulatorInternal::calc_pose_score(
+    const Pose2D &pose,
+    const Pose2D &reference_pose,
+    const std::vector<float> &dists,
+    const CachedCamera &icam)
+{
+    const float min_score = 0.1f;
+    std::vector<bool> mask(pose.size(), false);
+    size_t valid_count = 0;
+    for (size_t joint = 0; joint < pose.size(); ++joint)
+    {
+        mask[joint] = pose[joint][2] > min_score && reference_pose[joint][2] > min_score;
+        if (mask[joint])
+        {
+            ++valid_count;
+        }
+    }
+    if (valid_count < 3)
+    {
+        return 0.0f;
+    }
+
+    const float iscale = (icam.cam.width + icam.cam.height) * 0.5f;
+    std::vector<float> scores = score_projection(pose, reference_pose, dists, mask, iscale);
+    const size_t drop_k = static_cast<size_t>(pose.size() * 0.2f);
+    if (drop_k > 0)
+    {
+        std::partial_sort(scores.begin(), scores.begin() + drop_k, scores.end());
+    }
+
+    float total = 0.0f;
+    for (size_t i = drop_k; i < scores.size(); ++i)
+    {
+        total += scores[i];
+    }
+    return total / static_cast<float>(scores.size() - drop_k);
+}
+
+PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
+    const PackedPoseStore2D &packed_poses,
+    const std::vector<CachedCamera> &internal_cameras,
+    const std::vector<size_t> &core_joint_idx,
+    const std::vector<PairCandidate> &pairs,
+    const PoseBatch3DView &previous_poses_3d)
+{
+    PreviousPoseFilterDebug debug;
+    debug.used_previous_poses = true;
+    debug.matches.resize(pairs.size());
+
+    if (previous_poses_3d.num_persons == 0)
+    {
+        debug.kept_pair_indices.resize(pairs.size());
+        std::iota(debug.kept_pair_indices.begin(), debug.kept_pair_indices.end(), 0);
+        debug.kept_pairs = pairs;
+        for (auto &match : debug.matches)
+        {
+            match.decision = "no_previous_poses";
+        }
+        return debug;
+    }
+
+    const PreviousProjectionCache projected_previous =
+        project_previous_poses(previous_poses_3d, internal_cameras, core_joint_idx);
+    const float threshold = min_match_score;
+
+    for (size_t pair_index = 0; pair_index < pairs.size(); ++pair_index)
+    {
+        const PairCandidate &pair = pairs[pair_index];
+        const Pose2D pose1_core = packed_poses.pose(
+            static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), core_joint_idx);
+        const Pose2D pose2_core = packed_poses.pose(
+            static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), core_joint_idx);
+
+        PreviousPoseMatch best_match;
+        bool found_decision = false;
+        for (size_t previous_index = 0; previous_index < previous_poses_3d.num_persons; ++previous_index)
+        {
+            const float score_view1 = calc_pose_score(
+                pose1_core,
+                projected_previous.projected_poses[static_cast<size_t>(pair.view1)][previous_index],
+                projected_previous.projected_dists[static_cast<size_t>(pair.view1)][previous_index],
+                internal_cameras[static_cast<size_t>(pair.view1)]);
+            const float score_view2 = calc_pose_score(
+                pose2_core,
+                projected_previous.projected_poses[static_cast<size_t>(pair.view2)][previous_index],
+                projected_previous.projected_dists[static_cast<size_t>(pair.view2)][previous_index],
+                internal_cameras[static_cast<size_t>(pair.view2)]);
+            const bool matched_view1 = score_view1 > threshold;
+            const bool matched_view2 = score_view2 > threshold;
+
+            if (matched_view1 && matched_view2)
+            {
+                best_match = PreviousPoseMatch {
+                    static_cast<int>(previous_index),
+                    score_view1,
+                    score_view2,
+                    true,
+                    true,
+                    true,
+                    "matched_previous_pose",
+                };
+                found_decision = true;
+                break;
+            }
+
+            if (matched_view1 || matched_view2)
+            {
+                best_match = PreviousPoseMatch {
+                    static_cast<int>(previous_index),
+                    score_view1,
+                    score_view2,
+                    matched_view1,
+                    matched_view2,
+                    false,
+                    "dropped_single_view_match",
+                };
+                found_decision = true;
+                break;
+            }
+        }
+
+        if (!found_decision)
+        {
+            best_match.decision = "new_person_candidate";
+            best_match.kept = true;
+        }
+
+        debug.matches[pair_index] = best_match;
+        if (best_match.kept)
+        {
+            debug.kept_pair_indices.push_back(static_cast<int>(pair_index));
+            debug.kept_pairs.push_back(pair);
        }
    }

-    if (all_pairs.empty())
+    return debug;
+}
+
+TriangulationTrace TriangulatorInternal::triangulate_debug(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView *previous_poses_3d)
+{
+    TriangulationTrace trace;
+    trace.final_poses.num_joints = joint_names.size();
+
+    if (previous_poses_3d != nullptr && previous_poses_3d->num_persons > 0 &&
+        previous_poses_3d->num_joints != joint_names.size())
    {
-        return empty_pose_batch3d(joint_names.size());
+        throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
+    }
+
+    PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
+    trace.pairs = build_pair_candidates(prepared.packed_poses);
+    if (trace.pairs.empty())
+    {
+        return trace;
+    }
+
+    std::vector<int> active_pair_indices(trace.pairs.size());
+    std::iota(active_pair_indices.begin(), active_pair_indices.end(), 0);
+    std::vector<PairCandidate> active_pairs = trace.pairs;
+
+    if (previous_poses_3d != nullptr)
+    {
+        trace.previous_filter = filter_pairs_with_previous_poses(
+            prepared.packed_poses,
+            prepared.internal_cameras,
+            prepared.core_joint_idx,
+            trace.pairs,
+            *previous_poses_3d);
+        active_pair_indices = trace.previous_filter.kept_pair_indices;
+        active_pairs = trace.previous_filter.kept_pairs;
+        if (active_pairs.empty())
+        {
+            return trace;
+        }
+    }
+
+    trace.core_proposals.reserve(active_pairs.size());
+    std::vector<PosePair> kept_pose_pairs;
+    std::vector<std::pair<Pose3D, float>> kept_scored_poses;
+    std::vector<int> kept_core_indices;
+    for (size_t i = 0; i < active_pairs.size(); ++i)
+    {
+        const PairCandidate &pair = active_pairs[i];
+        const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
+        const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
+        const Pose2D pose1_core = prepared.packed_poses.pose(
+            static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), prepared.core_joint_idx);
+        const Pose2D pose2_core = prepared.packed_poses.pose(
+            static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), prepared.core_joint_idx);
+
+        auto [pose3d, score] = triangulate_and_score(
+            pose1_core, pose2_core, cam1, cam2, roomparams, prepared.core_limbs_idx);
+
+        CoreProposalDebug proposal;
+        proposal.pair_index = active_pair_indices[i];
+        proposal.pair = pair;
+        proposal.pose_3d = pose3d;
+        proposal.score = score;
+        proposal.kept = score >= min_match_score;
+        proposal.drop_reason = proposal.kept ? "kept" : "below_min_match_score";
+        trace.core_proposals.push_back(proposal);
+
+        if (!proposal.kept)
+        {
+            continue;
+        }
+
+        kept_core_indices.push_back(static_cast<int>(trace.core_proposals.size() - 1));
+        kept_scored_poses.emplace_back(std::move(pose3d), score);
+        kept_pose_pairs.emplace_back(
+            std::make_tuple(pair.view1, pair.view2, pair.person1, pair.person2),
+            std::make_pair(pair.global_person1, pair.global_person2));
+    }
+
+    if (kept_pose_pairs.empty())
+    {
+        return trace;
+    }
+
+    auto groups = calc_grouping(kept_pose_pairs, kept_scored_poses, min_match_score);
+    trace.grouping.initial_groups.reserve(groups.size());
+    for (const auto &group : groups)
+    {
+        ProposalGroupDebug debug_group;
+        debug_group.center = std::get<0>(group);
+        debug_group.pose_3d = std::get<1>(group);
+        for (const int index : std::get<2>(group))
+        {
+            debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
+        }
+        trace.grouping.initial_groups.push_back(std::move(debug_group));
    }

-    std::vector<GroupedPose> groups = calc_grouping(all_pairs, all_scored_poses, min_match_score);
    for (size_t i = groups.size(); i > 0; --i)
    {
        if (std::get<2>(groups[i - 1]).size() < min_group_size)
@@ -448,16 +750,15 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
        }
    }
-
    if (groups.empty())
    {
-        return empty_pose_batch3d(joint_names.size());
+        return trace;
    }

    std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map;
-    for (size_t i = 0; i < all_pairs.size(); ++i)
+    for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
    {
-        const auto &pair = all_pairs[i];
+        const auto &pair = kept_pose_pairs[i];
        const auto &mid1 = std::make_tuple(
            std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second));
        const auto &mid2 = std::make_tuple(
@@ -466,19 +767,19 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
        pairs_map[mid2].push_back(i);
    }

-    std::vector<size_t> group_map(all_pairs.size());
+    std::vector<size_t> group_map(kept_pose_pairs.size());
    for (size_t i = 0; i < groups.size(); ++i)
    {
-        for (const auto &idx : std::get<2>(groups[i]))
+        for (const int index : std::get<2>(groups[i]))
        {
-            group_map[idx] = i;
+            group_map[static_cast<size_t>(index)] = i;
        }
    }

    std::set<size_t> drop_indices;
-    for (auto &pair : pairs_map)
+    for (auto &pair_entry : pairs_map)
    {
-        auto &indices = pair.second;
+        auto &indices = pair_entry.second;
        if (indices.size() <= 1)
        {
            continue;
@@ -488,10 +789,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
        std::vector<float> pair_scores;
        group_sizes.reserve(indices.size());
        pair_scores.reserve(indices.size());
-        for (auto idx : indices)
+        for (const size_t index : indices)
        {
-            group_sizes.push_back(std::get<2>(groups[group_map[idx]]).size());
-            pair_scores.push_back(all_scored_poses[idx].second);
+            group_sizes.push_back(std::get<2>(groups[group_map[index]]).size());
+            pair_scores.push_back(kept_scored_poses[index].second);
        }

        std::vector<size_t> indices_sorted(indices.size());
@@ -514,10 +815,12 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(

    std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end());
    std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>());
-    for (size_t drop_idx : drop_list)
+    for (const size_t drop_idx : drop_list)
    {
-        all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
-        all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
+        trace.grouping.duplicate_pair_drops.push_back(kept_core_indices[drop_idx]);
+        kept_scored_poses.erase(kept_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
+        kept_pose_pairs.erase(kept_pose_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
+        kept_core_indices.erase(kept_core_indices.begin() + static_cast<std::ptrdiff_t>(drop_idx));

        for (auto &group : groups)
        {
@@ -527,11 +830,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            {
                indices.erase(it);
            }
-            for (auto &index : indices)
+            for (int &index : indices)
            {
                if (static_cast<size_t>(index) > drop_idx)
                {
-                    index -= 1;
+                    --index;
                }
            }
        }
@@ -544,24 +847,45 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
        }
    }
-
    if (groups.empty())
    {
-        return empty_pose_batch3d(joint_names.size());
+        return trace;
    }

-    std::vector<Pose3D> all_full_poses(all_pairs.size());
-    for (size_t i = 0; i < all_pairs.size(); ++i)
+    trace.grouping.groups.reserve(groups.size());
+    for (const auto &group : groups)
    {
-        const auto &pids = all_pairs[i].first;
-        const auto &cam1 = internal_cameras[std::get<0>(pids)];
-        const auto &cam2 = internal_cameras[std::get<1>(pids)];
-        Pose2D pose1 = packed_poses.pose(std::get<0>(pids), std::get<2>(pids));
-        Pose2D pose2 = packed_poses.pose(std::get<1>(pids), std::get<3>(pids));
+        ProposalGroupDebug debug_group;
+        debug_group.center = std::get<0>(group);
+        debug_group.pose_3d = std::get<1>(group);
+        for (const int index : std::get<2>(group))
+        {
+            debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
+        }
+        trace.grouping.groups.push_back(std::move(debug_group));
+    }

-        auto [pose3d, score] = triangulate_and_score(
-            pose1, pose2, cam1, cam2, roomparams, {});
+    std::vector<Pose3D> full_pose_buffer(kept_pose_pairs.size());
+    std::vector<int> full_proposal_index_by_core(trace.core_proposals.size(), -1);
+    trace.full_proposals.reserve(kept_pose_pairs.size());
+    for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
+    {
+        const auto &pair_ids = kept_pose_pairs[i].first;
+        const PairCandidate pair = {
+            std::get<0>(pair_ids),
+            std::get<1>(pair_ids),
+            std::get<2>(pair_ids),
+            std::get<3>(pair_ids),
+            kept_pose_pairs[i].second.first,
+            kept_pose_pairs[i].second.second,
+        };
+        const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
+        const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
+        Pose2D pose1 = prepared.packed_poses.pose(static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1));
+        Pose2D pose2 = prepared.packed_poses.pose(static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2));

+        auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams, {});
+        (void)score;
        for (size_t joint = 0; joint < pose3d.size(); ++joint)
        {
            const float score1 = pose1[joint][2];
@@ -576,32 +900,43 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            }
        }

-        all_full_poses[i] = std::move(pose3d);
+        full_pose_buffer[i] = pose3d;
+        full_proposal_index_by_core[kept_core_indices[i]] = static_cast<int>(trace.full_proposals.size());
+        trace.full_proposals.push_back(FullProposalDebug {
+            kept_core_indices[i],
+            pair,
+            std::move(pose3d),
+        });
    }

    std::vector<Pose3D> final_poses_3d(groups.size());
+    trace.merge.group_proposal_indices.reserve(groups.size());
+    trace.merge.merged_poses.reserve(groups.size());
    for (size_t i = 0; i < groups.size(); ++i)
    {
        std::vector<Pose3D> poses;
+        std::vector<int> source_core_indices;
        poses.reserve(std::get<2>(groups[i]).size());
-        for (const auto &idx : std::get<2>(groups[i]))
+        source_core_indices.reserve(std::get<2>(groups[i]).size());
+        for (const int index : std::get<2>(groups[i]))
        {
-            poses.push_back(all_full_poses[idx]);
+            poses.push_back(full_pose_buffer[static_cast<size_t>(index)]);
+            source_core_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
        }
        final_poses_3d[i] = merge_group(poses, min_match_score);
+        trace.merge.group_proposal_indices.push_back(std::move(source_core_indices));
+        trace.merge.merged_poses.push_back(final_poses_3d[i]);
    }

    add_extra_joints(final_poses_3d, joint_names);
-    filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx);
+    filter_poses(final_poses_3d, roomparams, prepared.core_joint_idx, prepared.core_limbs_idx);
    add_missing_joints(final_poses_3d, joint_names);
    replace_far_joints(final_poses_3d, joint_names);

-    PoseBatch3D poses_3d;
-    poses_3d.num_joints = joint_names.size();
-
+    size_t valid_persons = 0;
    for (const auto &pose : final_poses_3d)
    {
-        const auto is_valid = std::any_of(
+        const bool is_valid = std::any_of(
            pose.begin(),
            pose.end(),
            [this](const std::array<float, 4> &joint)
@@ -610,15 +945,17 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            });
        if (is_valid)
        {
-            ++poses_3d.num_persons;
+            ++valid_persons;
        }
    }

-    poses_3d.data.resize(poses_3d.num_persons * poses_3d.num_joints * 4);
+    trace.final_poses.num_persons = valid_persons;
+    trace.final_poses.data.resize(valid_persons * trace.final_poses.num_joints * 4);
+
    size_t person_index = 0;
    for (const auto &pose : final_poses_3d)
    {
-        const auto is_valid = std::any_of(
+        const bool is_valid = std::any_of(
            pose.begin(),
            pose.end(),
            [this](const std::array<float, 4> &joint)
@@ -630,24 +967,57 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
            continue;
        }

-        for (size_t joint = 0; joint < poses_3d.num_joints; ++joint)
+        for (size_t joint = 0; joint < trace.final_poses.num_joints; ++joint)
        {
            for (size_t coord = 0; coord < 4; ++coord)
            {
-                poses_3d.at(person_index, joint, coord) = pose[joint][coord];
+                trace.final_poses.at(person_index, joint, coord) = pose[joint][coord];
            }
        }
        ++person_index;
    }

-    return poses_3d;
+    return trace;
+}
+
+PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView &previous_poses_3d)
+{
+    if (previous_poses_3d.num_persons > 0 && previous_poses_3d.num_joints != joint_names.size())
+    {
+        throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
+    }
+
+    PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
+    const std::vector<PairCandidate> pairs = build_pair_candidates(prepared.packed_poses);
+    if (pairs.empty())
+    {
+        PreviousPoseFilterDebug empty;
+        empty.used_previous_poses = true;
+        return empty;
+    }
+
+    return filter_pairs_with_previous_poses(
+        prepared.packed_poses,
+        prepared.internal_cameras,
+        prepared.core_joint_idx,
+        pairs,
+        previous_poses_3d);
 }

 std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> TriangulatorInternal::project_poses(
    const std::vector<Pose3D> &poses_3d,
-    const CameraInternal &icam,
+    const CachedCamera &icam,
    bool calc_dists)
 {
+    if (poses_3d.empty())
+    {
+        return std::make_tuple(std::vector<Pose2D> {}, std::vector<std::vector<float>> {});
+    }
+
    const size_t num_persons = poses_3d.size();
    const size_t num_joints = poses_3d[0].size();

@@ -813,7 +1183,7 @@ std::array<float, 3> mat_mul_vec(
    return res;
 }

-std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<float, 2> &pt)
+std::array<float, 3> calc_ray_dir(const CachedCamera &icam, const std::array<float, 2> &pt)
 {
    // Compute normalized ray direction from the point
    std::array<float, 3> uv1 = {pt[0], pt[1], 1.0};
@@ -824,8 +1194,8 @@ std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<f
 }

 std::array<float, 4> triangulate_midpoint(
-    const CameraInternal &icam1,
-    const CameraInternal &icam2,
+    const CachedCamera &icam1,
+    const CachedCamera &icam2,
    const std::array<float, 2> &pt1,
    const std::array<float, 2> &pt2)
 {
@@ -864,8 +1234,8 @@ std::array<float, 4> triangulate_midpoint(
 std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
    const std::vector<std::array<float, 3>> &pose1,
    const std::vector<std::array<float, 3>> &pose2,
-    const CameraInternal &cam1,
-    const CameraInternal &cam2,
+    const CachedCamera &cam1,
+    const CachedCamera &cam2,
    const std::array<std::array<float, 3>, 2> &roomparams,
    const std::vector<std::array<size_t, 2>> &core_limbs_idx)
 {
@@ -2026,14 +2396,89 @@ void TriangulatorInternal::replace_far_joints(

 } // namespace

+std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d)
+{
+    if (poses_2d.person_counts == nullptr)
+    {
+        throw std::invalid_argument("person_counts must not be null.");
+    }
+
+    std::vector<PairCandidate> pairs;
+    std::vector<int> offsets(poses_2d.num_views, 0);
+    int total = 0;
+    for (size_t view = 0; view < poses_2d.num_views; ++view)
+    {
+        if (poses_2d.person_counts[view] > poses_2d.max_persons)
+        {
+            throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
+        }
+        offsets[view] = total;
+        total += static_cast<int>(poses_2d.person_counts[view]);
+    }
+
+    for (size_t view1 = 0; view1 < poses_2d.num_views; ++view1)
+    {
+        for (size_t view2 = view1 + 1; view2 < poses_2d.num_views; ++view2)
+        {
+            for (size_t person1 = 0; person1 < poses_2d.person_counts[view1]; ++person1)
+            {
+                for (size_t person2 = 0; person2 < poses_2d.person_counts[view2]; ++person2)
+                {
+                    pairs.push_back(PairCandidate {
+                        static_cast<int>(view1),
+                        static_cast<int>(view2),
+                        static_cast<int>(person1),
+                        static_cast<int>(person2),
+                        offsets[view1] + static_cast<int>(person1),
+                        offsets[view2] + static_cast<int>(person2),
+                    });
+                }
+            }
+        }
+    }
+    return pairs;
+}
+
+PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView &previous_poses_3d,
+    float min_match_score)
+{
+    TriangulatorInternal triangulator(min_match_score, 1);
+    return triangulator.filter_pairs_with_previous_poses(poses_2d, cameras, joint_names, previous_poses_3d);
+}
+
+TriangulationTrace triangulate_debug(
+    const PoseBatch2DView &poses_2d,
+    const std::vector<Camera> &cameras,
+    const std::array<std::array<float, 3>, 2> &roomparams,
+    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView *previous_poses_3d,
+    float min_match_score,
+    size_t min_group_size)
+{
+    TriangulatorInternal triangulator(min_match_score, min_group_size);
+    return triangulator.triangulate_debug(poses_2d, cameras, roomparams, joint_names, previous_poses_3d);
+}
+
 PoseBatch3D triangulate_poses(
    const PoseBatch2DView &poses_2d,
    const std::vector<Camera> &cameras,
    const std::array<std::array<float, 3>, 2> &roomparams,
    const std::vector<std::string> &joint_names,
+    const PoseBatch3DView *previous_poses_3d,
    float min_match_score,
    size_t min_group_size)
 {
-    TriangulatorInternal triangulator(min_match_score, min_group_size);
-    return triangulator.triangulate_poses(poses_2d, cameras, roomparams, joint_names);
+    return triangulate_debug(
+               poses_2d,
+               cameras,
+               roomparams,
+               joint_names,
+               previous_poses_3d,
+               min_match_score,
+               min_group_size)
+        .final_poses;
 }