Clean up nanobind typing and source layout

2026-03-27 11:19:53 +08:00
parent 9d63177de0
commit 69e83d8430
19 changed files with 792 additions and 47 deletions
@@ -0,0 +1,25 @@
+# RPT Core Library
+
+set(RPT_SOURCES
+    camera.cpp
+    interface.cpp
+    rgbd_merger.cpp
+    triangulator.cpp
+)
+
+add_library(rpt_core STATIC ${RPT_SOURCES})
+
+target_include_directories(rpt_core PUBLIC
+    ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+target_compile_options(rpt_core PRIVATE
+    -fPIC
+    -march=native
+    -Wall
+)
+
+# Release mode optimizations
+target_compile_options(rpt_core PRIVATE
+    $<$<CONFIG:Release>:-O3;-flto=auto>
+)
@@ -0,0 +1,518 @@
+#include <array>
+#include <cmath>
+#include <iomanip>
+#include <sstream>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include "camera.hpp"
+
+namespace
+{
+std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    return {{{M[0][0], M[1][0], M[2][0]},
+             {M[0][1], M[1][1], M[2][1]},
+             {M[0][2], M[1][2], M[2][2]}}};
+}
+
+std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    std::array<std::array<float, 3>, 3> adj = {
+        {{
+             M[1][1] * M[2][2] - M[1][2] * M[2][1],
+             M[0][2] * M[2][1] - M[0][1] * M[2][2],
+             M[0][1] * M[1][2] - M[0][2] * M[1][1],
+         },
+         {
+             M[1][2] * M[2][0] - M[1][0] * M[2][2],
+             M[0][0] * M[2][2] - M[0][2] * M[2][0],
+             M[0][2] * M[1][0] - M[0][0] * M[1][2],
+         },
+         {
+             M[1][0] * M[2][1] - M[1][1] * M[2][0],
+             M[0][1] * M[2][0] - M[0][0] * M[2][1],
+             M[0][0] * M[1][1] - M[0][1] * M[1][0],
+         }}};
+
+    float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
+    if (std::fabs(det) < 1e-6f)
+    {
+        return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
+    }
+
+    float idet = 1.0f / det;
+    return {{
+        {{adj[0][0] * idet, adj[0][1] * idet, adj[0][2] * idet}},
+        {{adj[1][0] * idet, adj[1][1] * idet, adj[1][2] * idet}},
+        {{adj[2][0] * idet, adj[2][1] * idet, adj[2][2] * idet}},
+    }};
+}
+
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
+    const Camera &cam, float balance, std::pair<int, int> new_size);
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
+    const Camera &cam, float alpha, std::pair<int, int> new_size);
+} // namespace
+
+// =================================================================================================
+// =================================================================================================
+
+template <size_t M, size_t N>
+static const std::string print_matrix(const std::array<std::array<float, N>, M> &matrix)
+{
+    std::ostringstream out;
+    out << "[";
+    for (size_t j = 0; j < M; ++j)
+    {
+        out << "[";
+        for (size_t i = 0; i < N; ++i)
+        {
+            out << matrix[j][i];
+            if (i < N - 1)
+            {
+                out << ", ";
+            }
+        }
+        out << "]";
+        if (j < M - 1)
+        {
+            out << ", ";
+        }
+    }
+    out << "]";
+    return out.str();
+}
+
+// =================================================================================================
+// =================================================================================================
+
+std::string Camera::to_string() const
+{
+    std::ostringstream out;
+    out << std::fixed << std::setprecision(6);
+
+    out << "{";
+    out << "'name': '" << name << "', ";
+
+    out << "'K': " << print_matrix(K) << ", ";
+
+    out << "'DC': [";
+    for (size_t i = 0; i < DC.size(); ++i)
+    {
+        out << DC[i];
+        if (i < DC.size() - 1)
+            out << ", ";
+    }
+    out << "], ";
+
+    out << "'R': " << print_matrix(R) << ", ";
+    out << "'T': " << print_matrix(T) << ", ";
+
+    out << "'width': " << width << ", ";
+    out << "'height': " << height << ", ";
+    out << "'model': '" << camera_model_name(model) << "'";
+
+    out << "}";
+    return out.str();
+}
+
+// =================================================================================================
+
+const char *camera_model_name(CameraModel model)
+{
+    switch (model)
+    {
+    case CameraModel::Pinhole:
+        return "pinhole";
+    case CameraModel::Fisheye:
+        return "fisheye";
+    }
+    return "unknown";
+}
+
+CameraModel parse_camera_model(const std::string &value)
+{
+    if (value == "pinhole")
+    {
+        return CameraModel::Pinhole;
+    }
+    if (value == "fisheye")
+    {
+        return CameraModel::Fisheye;
+    }
+    throw std::invalid_argument("Unsupported camera model: " + value);
+}
+
+// =================================================================================================
+
+std::ostream &operator<<(std::ostream &out, const Camera &cam)
+{
+    out << cam.to_string();
+    return out;
+}
+
+// =================================================================================================
+// =================================================================================================
+
+Camera make_camera(
+    std::string name,
+    std::array<std::array<float, 3>, 3> K,
+    std::array<float, 5> DC,
+    std::array<std::array<float, 3>, 3> R,
+    std::array<std::array<float, 1>, 3> T,
+    int width,
+    int height,
+    CameraModel model)
+{
+    Camera cam {
+        .name = std::move(name),
+        .K = K,
+        .DC = DC,
+        .R = R,
+        .T = T,
+        .width = width,
+        .height = height,
+        .model = model,
+    };
+
+    cam.invR = transpose3x3(cam.R);
+
+    // Camera center:
+    // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
+    cam.center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
+
+    // Undistort camera matrix
+    // As with the undistortion, the own implementation avoids some overhead compared to OpenCV
+    if (cam.model == CameraModel::Fisheye)
+    {
+        cam.newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height});
+    }
+    else
+    {
+        cam.newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height});
+    }
+    cam.invK = invert3x3(cam.newK);
+
+    return cam;
+}
+
+// =================================================================================================
+
+void undistort_point_pinhole(std::array<float, 3> &p, const std::array<float, 5> &k)
+{
+    // Following: cv::cvUndistortPointsInternal
+    // Uses only the distortion coefficients: [k1, k2, p1, p2, k3]
+    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/undistort.dispatch.cpp#L432
+
+    float x0 = p[0];
+    float y0 = p[1];
+    float x = x0;
+    float y = y0;
+
+    // Iteratively refine the estimate for the undistorted point.
+    int max_iterations = 5;
+    for (int iter = 0; iter < max_iterations; ++iter)
+    {
+        float r2 = x * x + y * y;
+        double icdist = 1.0 / (1 + ((k[4] * r2 + k[1]) * r2 + k[0]) * r2);
+        if (icdist < 0)
+        {
+            x = x0;
+            y = y0;
+            break;
+        }
+
+        float deltaX = 2 * k[2] * x * y + k[3] * (r2 + 2 * x * x);
+        float deltaY = k[2] * (r2 + 2 * y * y) + 2 * k[3] * x * y;
+        x = (x0 - deltaX) * icdist;
+        y = (y0 - deltaY) * icdist;
+    }
+
+    p[0] = x;
+    p[1] = y;
+}
+
+// =================================================================================================
+
+void undistort_point_fisheye(std::array<float, 3> &p, const std::array<float, 5> &k)
+{
+    // Following: cv::fisheye::undistortPoints
+    // Uses only the distortion coefficients: [k1, k2, k3, k4]
+    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L429
+
+    float theta_d = std::sqrt(p[0] * p[0] + p[1] * p[1]);
+    float pi_half = M_PI * 0.5;
+    theta_d = std::min(std::max(-pi_half, theta_d), pi_half);
+
+    if (theta_d < 1e-6)
+    {
+        return;
+    }
+
+    float scale = 0.0;
+    float theta = theta_d;
+
+    int max_iterations = 5;
+    for (int iter = 0; iter < max_iterations; ++iter)
+    {
+        float theta2 = theta * theta;
+        float theta4 = theta2 * theta2;
+        float theta6 = theta4 * theta2;
+        float theta8 = theta4 * theta4;
+
+        float k0_theta2 = k[0] * theta2;
+        float k1_theta4 = k[1] * theta4;
+        float k2_theta6 = k[2] * theta6;
+        float k3_theta8 = k[3] * theta8;
+
+        float theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) /
+                          (1 + 3 * k0_theta2 + 5 * k1_theta4 + 7 * k2_theta6 + 9 * k3_theta8);
+        theta = theta - theta_fix;
+
+        if (std::fabs(theta_fix) < 1e-6)
+        {
+            break;
+        }
+    }
+
+    scale = std::tan(theta) / theta_d;
+    p[0] *= scale;
+    p[1] *= scale;
+}
+
+// =================================================================================================
+
+namespace
+{
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
+    const Camera &cam, float balance, std::pair<int, int> new_size)
+{
+    // Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify
+    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630
+
+    float fov_scale = 1.0;
+    float w = static_cast<float>(cam.width);
+    float h = static_cast<float>(cam.height);
+    balance = std::min(std::max(balance, 0.0f), 1.0f);
+
+    // Define four key points at the middle of each edge
+    std::vector<std ::array<float, 2>> pts = {
+        {w * 0.5f, 0.0},
+        {w, h * 0.5f},
+        {w * 0.5f, h},
+        {0.0, h * 0.5f}};
+
+    // Extract intrinsic parameters
+    float fx = cam.K[0][0];
+    float fy = cam.K[1][1];
+    float cx = cam.K[0][2];
+    float cy = cam.K[1][2];
+
+    // Undistort the edge points
+    for (auto &pt : pts)
+    {
+        std::array<float, 3> p_normed = {(pt[0] - cx) / fx, (pt[1] - cy) / fy, 1.0};
+        undistort_point_fisheye(p_normed, cam.DC);
+        pt[0] = p_normed[0];
+        pt[1] = p_normed[1];
+    }
+
+    // Compute center mass of the undistorted edge points
+    float sum_x = 0.0, sum_y = 0.0;
+    for (const auto &pt : pts)
+    {
+        sum_x += pt[0];
+        sum_y += pt[1];
+    }
+    float cn_x = sum_x / pts.size();
+    float cn_y = sum_y / pts.size();
+
+    // Convert to identity ratio
+    float aspect_ratio = fx / fy;
+    cn_y *= aspect_ratio;
+    for (auto &pt : pts)
+        pt[1] *= aspect_ratio;
+
+    // Find the bounding box of the undistorted points
+    float minx = std::numeric_limits<float>::max();
+    float miny = std::numeric_limits<float>::max();
+    float maxx = -std::numeric_limits<float>::max();
+    float maxy = -std::numeric_limits<float>::max();
+    for (const auto &pt : pts)
+    {
+        minx = std::min(minx, pt[0]);
+        maxx = std::max(maxx, pt[0]);
+        miny = std::min(miny, pt[1]);
+        maxy = std::max(maxy, pt[1]);
+    }
+
+    // Calculate candidate focal lengths
+    float f1 = (w * 0.5) / (cn_x - minx);
+    float f2 = (w * 0.5) / (maxx - cn_x);
+    float f3 = (h * 0.5 * aspect_ratio) / (cn_y - miny);
+    float f4 = (h * 0.5 * aspect_ratio) / (maxy - cn_y);
+    float fmin = std::min({f1, f2, f3, f4});
+    float fmax = std::max({f1, f2, f3, f4});
+
+    // Blend the candidate focal lengths
+    float f_val = balance * fmin + (1.0f - balance) * fmax;
+    if (fov_scale > 0.0f)
+        f_val /= fov_scale;
+
+    // Compute new intrinsic parameters
+    float new_fx = f_val;
+    float new_fy = f_val;
+    float new_cx = -cn_x * f_val + (w * 0.5);
+    float new_cy = -cn_y * f_val + (h * aspect_ratio * 0.5);
+
+    // Restore aspect ratio
+    new_fy /= aspect_ratio;
+    new_cy /= aspect_ratio;
+
+    // Optionally scale parameters to new new image size
+    if (new_size.first > 0 && new_size.second > 0)
+    {
+        float rx = static_cast<float>(new_size.first) / w;
+        float ry = static_cast<float>(new_size.second) / h;
+        new_fx *= rx;
+        new_fy *= ry;
+        new_cx *= rx;
+        new_cy *= ry;
+    }
+
+    std::array<std::array<float, 3>, 3> newK = {
+        {{new_fx, 0.0, new_cx},
+         {0.0, new_fy, new_cy},
+         {0.0, 0.0, 1.0}}};
+    return newK;
+}
+
+// =================================================================================================
+
+std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
+    const Camera &cam, float alpha, std::pair<int, int> new_size)
+{
+    // Following: cv::getOptimalNewCameraMatrix
+    // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565
+
+    bool center_principal_point = false;
+    bool use_pix_roi = false;
+    float w = static_cast<float>(cam.width);
+    float h = static_cast<float>(cam.height);
+    alpha = std::min(std::max(alpha, 0.0f), 1.0f);
+
+    if (center_principal_point || use_pix_roi)
+    {
+        // Not implemented
+        exit(1);
+    }
+
+    // Define key points
+    // Calculate only the contour points of the image, and use less points,
+    // the edges and centers should be enough if the camera has no strange distortions
+    const size_t N = 3;
+    std::vector<std ::array<float, 2>> pts;
+    pts.reserve(4 * (N - 1));
+    for (size_t y = 0; y < N; ++y)
+    {
+        for (size_t x = 0; x < N; ++x)
+        {
+            if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
+            {
+                continue;
+            }
+            pts.push_back({x * (w - 1) / (N - 1), y * (h - 1) / (N - 1)});
+        }
+    }
+
+    // Extract intrinsic parameters
+    float fx = cam.K[0][0];
+    float fy = cam.K[1][1];
+    float cx = cam.K[0][2];
+    float cy = cam.K[1][2];
+
+    // Undistort the key points
+    for (auto &pt : pts)
+    {
+        std::array<float, 3> p_normed = {(pt[0] - cx) / fx, (pt[1] - cy) / fy, 1.0};
+        undistort_point_pinhole(p_normed, cam.DC);
+        pt[0] = p_normed[0];
+        pt[1] = p_normed[1];
+    }
+
+    // Get inscribed and circumscribed rectangles in normalized coordinates
+    float iX0 = -std::numeric_limits<float>::max();
+    float iX1 = std::numeric_limits<float>::max();
+    float iY0 = -std::numeric_limits<float>::max();
+    float iY1 = std::numeric_limits<float>::max();
+    float oX0 = std::numeric_limits<float>::max();
+    float oX1 = -std::numeric_limits<float>::max();
+    float oY0 = std::numeric_limits<float>::max();
+    float oY1 = -std::numeric_limits<float>::max();
+    size_t k = 0;
+    for (size_t y = 0; y < N; ++y)
+    {
+        for (size_t x = 0; x < N; ++x)
+        {
+            if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
+            {
+                continue;
+            }
+
+            auto &pt = pts[k];
+            k += 1;
+
+            if (x == 0)
+            {
+                oX0 = std::min(oX0, pt[0]);
+                iX0 = std::max(iX0, pt[0]);
+            }
+            if (x == N - 1)
+            {
+                oX1 = std::max(oX1, pt[0]);
+                iX1 = std::min(iX1, pt[0]);
+            }
+            if (y == 0)
+            {
+                oY0 = std::min(oY0, pt[1]);
+                iY0 = std::max(iY0, pt[1]);
+            }
+            if (y == N - 1)
+            {
+                oY1 = std::max(oY1, pt[1]);
+                iY1 = std::min(iY1, pt[1]);
+            }
+        }
+    }
+    float inner_width = iX1 - iX0;
+    float inner_height = iY1 - iY0;
+    float outer_width = oX1 - oX0;
+    float outer_height = oY1 - oY0;
+
+    // Projection mapping inner rectangle to viewport
+    float fx0 = (new_size.first - 1) / inner_width;
+    float fy0 = (new_size.second - 1) / inner_height;
+    float cx0 = -fx0 * iX0;
+    float cy0 = -fy0 * iY0;
+
+    // Projection mapping outer rectangle to viewport
+    float fx1 = (new_size.first - 1) / outer_width;
+    float fy1 = (new_size.second - 1) / outer_height;
+    float cx1 = -fx1 * oX0;
+    float cy1 = -fy1 * oY0;
+
+    // Interpolate between the two optimal projections
+    float new_fx = fx0 * (1 - alpha) + fx1 * alpha;
+    float new_fy = fy0 * (1 - alpha) + fy1 * alpha;
+    float new_cx = cx0 * (1 - alpha) + cx1 * alpha;
+    float new_cy = cy0 * (1 - alpha) + cy1 * alpha;
+
+    std::array<std::array<float, 3>, 3> newK = {
+        {{new_fx, 0.0, new_cx},
+         {0.0, new_fy, new_cy},
+         {0.0, 0.0, 1.0}}};
+    return newK;
+}
+} // namespace
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <array>
+#include <iostream>
+#include <string>
+#include <vector>
+
+// =================================================================================================
+
+enum class CameraModel
+{
+    Pinhole,
+    Fisheye,
+};
+
+const char *camera_model_name(CameraModel model);
+CameraModel parse_camera_model(const std::string &value);
+
+// =================================================================================================
+
+struct Camera
+{
+    std::string name;
+    std::array<std::array<float, 3>, 3> K;
+    std::array<float, 5> DC = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
+    std::array<std::array<float, 3>, 3> R;
+    std::array<std::array<float, 1>, 3> T;
+    int width = 0;
+    int height = 0;
+    CameraModel model = CameraModel::Pinhole;
+    std::array<std::array<float, 3>, 3> invR {};
+    std::array<float, 3> center {};
+    std::array<std::array<float, 3>, 3> newK {};
+    std::array<std::array<float, 3>, 3> invK {};
+
+    friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
+    std::string to_string() const;
+};
+
+Camera make_camera(
+    std::string name,
+    std::array<std::array<float, 3>, 3> K,
+    std::array<float, 5> DC,
+    std::array<std::array<float, 3>, 3> R,
+    std::array<std::array<float, 1>, 3> T,
+    int width,
+    int height,
+    CameraModel model);
+
+void undistort_point_pinhole(std::array<float, 3> &point, const std::array<float, 5> &distortion);
+void undistort_point_fisheye(std::array<float, 3> &point, const std::array<float, 5> &distortion);
@@ -0,0 +1,211 @@
+#include <algorithm>
+#include <stdexcept>
+#include <utility>
+
+#include "interface.hpp"
+
+// =================================================================================================
+
+namespace
+{
+size_t pose2d_offset(
+    size_t view,
+    size_t person,
+    size_t joint,
+    size_t coord,
+    size_t max_persons,
+    size_t num_joints)
+{
+    return ((((view * max_persons) + person) * num_joints) + joint) * 3 + coord;
+}
+
+size_t pose3d_offset(size_t person, size_t joint, size_t coord, size_t num_joints)
+{
+    return (((person * num_joints) + joint) * 4) + coord;
+}
+
+size_t pose3d_by_view_offset(
+    size_t view,
+    size_t person,
+    size_t joint,
+    size_t coord,
+    size_t max_persons,
+    size_t num_joints)
+{
+    return ((((view * max_persons) + person) * num_joints) + joint) * 4 + coord;
+}
+} // namespace
+
+// =================================================================================================
+// =================================================================================================
+
+float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord)
+{
+    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_t coord) const
+{
+    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+const float &PoseBatch3DView::at(size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_offset(person, joint, coord, num_joints)];
+}
+
+int64_t TrackedPoseBatch3DView::track_id(size_t person) const
+{
+    return track_ids[person];
+}
+
+const float &TrackedPoseBatch3DView::at(size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_offset(person, joint, coord, num_joints)];
+}
+
+const float &PoseBatch3DByViewView::at(size_t view, size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_by_view_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
+{
+    return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+PoseBatch2DView PoseBatch2D::view() const
+{
+    return PoseBatch2DView {data.data(), person_counts.data(), num_views, max_persons, num_joints};
+}
+
+PoseBatch2D PoseBatch2D::from_nested(const RaggedPoses2D &poses_2d)
+{
+    PoseBatch2D batch;
+    batch.num_views = poses_2d.size();
+    for (const auto &view : poses_2d)
+    {
+        batch.max_persons = std::max(batch.max_persons, view.size());
+        if (!view.empty())
+        {
+            if (batch.num_joints == 0)
+            {
+                batch.num_joints = view[0].size();
+            }
+            else if (batch.num_joints != view[0].size())
+            {
+                throw std::invalid_argument("All views must use the same joint count.");
+            }
+
+            for (const auto &person : view)
+            {
+                if (person.size() != batch.num_joints)
+                {
+                    throw std::invalid_argument("All persons must use the same joint count.");
+                }
+            }
+        }
+    }
+
+    batch.person_counts.resize(batch.num_views);
+    batch.data.assign(batch.num_views * batch.max_persons * batch.num_joints * 3, 0.0f);
+
+    for (size_t view = 0; view < batch.num_views; ++view)
+    {
+        batch.person_counts[view] = static_cast<uint32_t>(poses_2d[view].size());
+        for (size_t person = 0; person < poses_2d[view].size(); ++person)
+        {
+            for (size_t joint = 0; joint < batch.num_joints; ++joint)
+            {
+                for (size_t coord = 0; coord < 3; ++coord)
+                {
+                    batch.at(view, person, joint, coord) = poses_2d[view][person][joint][coord];
+                }
+            }
+        }
+    }
+
+    return batch;
+}
+
+// =================================================================================================
+
+float &PoseBatch3D::at(size_t person, size_t joint, size_t coord)
+{
+    return data[pose3d_offset(person, joint, coord, num_joints)];
+}
+
+const float &PoseBatch3D::at(size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_offset(person, joint, coord, num_joints)];
+}
+
+PoseBatch3DView PoseBatch3D::view() const
+{
+    return PoseBatch3DView {data.data(), num_persons, num_joints};
+}
+
+float &PoseBatch3DByView::at(size_t view, size_t person, size_t joint, size_t coord)
+{
+    return data[pose3d_by_view_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+const float &PoseBatch3DByView::at(size_t view, size_t person, size_t joint, size_t coord) const
+{
+    return data[pose3d_by_view_offset(view, person, joint, coord, max_persons, num_joints)];
+}
+
+PoseBatch3DByViewView PoseBatch3DByView::view() const
+{
+    return PoseBatch3DByViewView {
+        data.data(),
+        person_counts.data(),
+        num_views,
+        max_persons,
+        num_joints,
+    };
+}
+
+NestedPoses3D PoseBatch3D::to_nested() const
+{
+    NestedPoses3D poses_3d(num_persons);
+    for (size_t person = 0; person < num_persons; ++person)
+    {
+        poses_3d[person].resize(num_joints);
+        for (size_t joint = 0; joint < num_joints; ++joint)
+        {
+            for (size_t coord = 0; coord < 4; ++coord)
+            {
+                poses_3d[person][joint][coord] = at(person, joint, coord);
+            }
+        }
+    }
+    return poses_3d;
+}
+
+PoseBatch3D PoseBatch3D::from_nested(const NestedPoses3D &poses_3d)
+{
+    PoseBatch3D batch;
+    batch.num_persons = poses_3d.size();
+    if (!poses_3d.empty())
+    {
+        batch.num_joints = poses_3d[0].size();
+    }
+
+    batch.data.resize(batch.num_persons * batch.num_joints * 4);
+    for (size_t person = 0; person < batch.num_persons; ++person)
+    {
+        if (poses_3d[person].size() != batch.num_joints)
+        {
+            throw std::invalid_argument("All 3D poses must use the same joint count.");
+        }
+        for (size_t joint = 0; joint < batch.num_joints; ++joint)
+        {
+            for (size_t coord = 0; coord < 4; ++coord)
+            {
+                batch.at(person, joint, coord) = poses_3d[person][joint][coord];
+            }
+        }
+    }
+    return batch;
+}
@@ -0,0 +1,311 @@
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "camera.hpp"
+
+// =================================================================================================
+
+using RaggedPoses2D = std::vector<std::vector<std::vector<std::array<float, 3>>>>;
+using NestedPoses3D = std::vector<std::vector<std::array<float, 4>>>;
+
+// =================================================================================================
+
+struct PoseBatch2DView
+{
+    const float *data = nullptr;
+    const uint32_t *person_counts = nullptr;
+    size_t num_views = 0;
+    size_t max_persons = 0;
+    size_t num_joints = 0;
+
+    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+};
+
+struct PoseBatch3DView
+{
+    const float *data = nullptr;
+    size_t num_persons = 0;
+    size_t num_joints = 0;
+
+    const float &at(size_t person, size_t joint, size_t coord) const;
+};
+
+struct TrackedPoseBatch3DView
+{
+    const int64_t *track_ids = nullptr;
+    const float *data = nullptr;
+    size_t num_persons = 0;
+    size_t num_joints = 0;
+
+    int64_t track_id(size_t person) const;
+    const float &at(size_t person, size_t joint, size_t coord) const;
+};
+
+struct PoseBatch3DByViewView
+{
+    const float *data = nullptr;
+    const uint32_t *person_counts = nullptr;
+    size_t num_views = 0;
+    size_t max_persons = 0;
+    size_t num_joints = 0;
+
+    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+};
+
+struct PoseBatch2D
+{
+    std::vector<float> data;
+    std::vector<uint32_t> person_counts;
+    size_t num_views = 0;
+    size_t max_persons = 0;
+    size_t num_joints = 0;
+
+    float &at(size_t view, size_t person, size_t joint, size_t coord);
+    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+    PoseBatch2DView view() const;
+
+    static PoseBatch2D from_nested(const RaggedPoses2D &poses_2d);
+};
+
+struct PoseBatch3D
+{
+    std::vector<float> data;
+    size_t num_persons = 0;
+    size_t num_joints = 0;
+
+    float &at(size_t person, size_t joint, size_t coord);
+    const float &at(size_t person, size_t joint, size_t coord) const;
+    PoseBatch3DView view() const;
+
+    NestedPoses3D to_nested() const;
+    static PoseBatch3D from_nested(const NestedPoses3D &poses_3d);
+};
+
+struct PoseBatch3DByView
+{
+    std::vector<float> data;
+    std::vector<uint32_t> person_counts;
+    size_t num_views = 0;
+    size_t max_persons = 0;
+    size_t num_joints = 0;
+
+    float &at(size_t view, size_t person, size_t joint, size_t coord);
+    const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
+    PoseBatch3DByViewView view() const;
+};
+
+// =================================================================================================
+
+struct PairCandidate
+{
+    int view1 = -1;
+    int view2 = -1;
+    int person1 = -1;
+    int person2 = -1;
+    int global_person1 = -1;
+    int global_person2 = -1;
+};
+
+struct PreviousPoseMatch
+{
+    int previous_pose_index = -1;
+    int64_t previous_track_id = -1;
+    float score_view1 = 0.0f;
+    float score_view2 = 0.0f;
+    bool matched_view1 = false;
+    bool matched_view2 = false;
+    bool kept = true;
+    std::string decision = "unfiltered";
+};
+
+struct PreviousPoseFilterDebug
+{
+    bool used_previous_poses = false;
+    std::vector<PreviousPoseMatch> matches;
+    std::vector<int> kept_pair_indices;
+    std::vector<PairCandidate> kept_pairs;
+};
+
+struct CoreProposalDebug
+{
+    int pair_index = -1;
+    PairCandidate pair;
+    std::vector<std::array<float, 4>> pose_3d;
+    float score = 0.0f;
+    bool kept = false;
+    std::string drop_reason = "uninitialized";
+};
+
+struct ProposalGroupDebug
+{
+    std::array<float, 3> center = {0.0f, 0.0f, 0.0f};
+    std::vector<std::array<float, 4>> pose_3d;
+    std::vector<int> proposal_indices;
+};
+
+struct GroupingDebug
+{
+    std::vector<ProposalGroupDebug> initial_groups;
+    std::vector<int> duplicate_pair_drops;
+    std::vector<ProposalGroupDebug> groups;
+};
+
+struct FullProposalDebug
+{
+    int source_core_proposal_index = -1;
+    PairCandidate pair;
+    std::vector<std::array<float, 4>> pose_3d;
+};
+
+struct MergeDebug
+{
+    std::vector<std::vector<std::array<float, 4>>> merged_poses;
+    std::vector<std::vector<int>> group_proposal_indices;
+};
+
+enum class AssociationStatus
+{
+    Matched,
+    New,
+    Ambiguous,
+};
+
+struct AssociationReport
+{
+    std::vector<int> pose_previous_indices;
+    std::vector<int64_t> pose_previous_track_ids;
+    std::vector<AssociationStatus> pose_status;
+    std::vector<std::vector<int>> pose_candidate_previous_indices;
+    std::vector<std::vector<int64_t>> pose_candidate_previous_track_ids;
+    std::vector<int> unmatched_previous_indices;
+    std::vector<int64_t> unmatched_previous_track_ids;
+    std::vector<int> new_pose_indices;
+    std::vector<int> ambiguous_pose_indices;
+};
+
+struct FinalPoseAssociationDebug
+{
+    int final_pose_index = -1;
+    std::vector<int> source_core_proposal_indices;
+    std::vector<int> source_pair_indices;
+    std::vector<int> candidate_previous_indices;
+    std::vector<int64_t> candidate_previous_track_ids;
+    int resolved_previous_index = -1;
+    int64_t resolved_previous_track_id = -1;
+    AssociationStatus status = AssociationStatus::New;
+};
+
+struct TriangulationTrace
+{
+    std::vector<PairCandidate> pairs;
+    PreviousPoseFilterDebug previous_filter;
+    std::vector<CoreProposalDebug> core_proposals;
+    GroupingDebug grouping;
+    std::vector<FullProposalDebug> full_proposals;
+    MergeDebug merge;
+    AssociationReport association;
+    std::vector<FinalPoseAssociationDebug> final_pose_associations;
+    PoseBatch3D final_poses;
+};
+
+struct TriangulationResult
+{
+    PoseBatch3D poses;
+    AssociationReport association;
+};
+
+// =================================================================================================
+
+struct TriangulationOptions
+{
+    float min_match_score = 0.95f;
+    size_t min_group_size = 1;
+};
+
+struct TriangulationConfig
+{
+    std::vector<Camera> cameras;
+    std::array<std::array<float, 3>, 2> roomparams {{{0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f}}};
+    std::vector<std::string> joint_names;
+    TriangulationOptions options;
+};
+
+// =================================================================================================
+
+std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d);
+
+PreviousPoseFilterDebug filter_pairs_with_previous_poses(
+    const PoseBatch2DView &poses_2d,
+    const TriangulationConfig &config,
+    const TrackedPoseBatch3DView &previous_poses_3d,
+    const TriangulationOptions *options_override = nullptr);
+
+TriangulationTrace triangulate_debug(
+    const PoseBatch2DView &poses_2d,
+    const TriangulationConfig &config,
+    const TrackedPoseBatch3DView *previous_poses_3d = nullptr,
+    const TriangulationOptions *options_override = nullptr);
+
+// =================================================================================================
+
+/**
+ * Calculate a triangulation using a padded pose tensor.
+ *
+ * @param poses_2d Padded poses of shape [views, max_persons, joints, 3].
+ * @param config Triangulation configuration (cameras, room parameters, joint names, options).
+ * @param options_override Optional per-call options override. Defaults to config.options.
+ *
+ * @return Pose tensor of shape [persons, joints, 4].
+ */
+PoseBatch3D triangulate_poses(
+    const PoseBatch2DView &poses_2d,
+    const TriangulationConfig &config,
+    const TriangulationOptions *options_override = nullptr);
+
+PoseBatch3D merge_rgbd_views(
+    const PoseBatch3DByViewView &poses_3d,
+    const TriangulationConfig &config,
+    float max_distance = 0.5f);
+
+TriangulationResult triangulate_with_report(
+    const PoseBatch2DView &poses_2d,
+    const TriangulationConfig &config,
+    const TrackedPoseBatch3DView &previous_poses_3d,
+    const TriangulationOptions *options_override = nullptr);
+
+inline PoseBatch3D triangulate_poses(
+    const PoseBatch2D &poses_2d,
+    const TriangulationConfig &config,
+    const TriangulationOptions *options_override = nullptr)
+{
+    return triangulate_poses(poses_2d.view(), config, options_override);
+}
+
+inline PoseBatch3D merge_rgbd_views(
+    const PoseBatch3DByView &poses_3d,
+    const TriangulationConfig &config,
+    float max_distance = 0.5f)
+{
+    return merge_rgbd_views(poses_3d.view(), config, max_distance);
+}
+
+inline TriangulationTrace triangulate_debug(
+    const PoseBatch2D &poses_2d,
+    const TriangulationConfig &config,
+    const TriangulationOptions *options_override = nullptr)
+{
+    return triangulate_debug(poses_2d.view(), config, nullptr, options_override);
+}
+
+inline TriangulationResult triangulate_with_report(
+    const PoseBatch2D &poses_2d,
+    const TriangulationConfig &config,
+    const TrackedPoseBatch3DView &previous_poses_3d,
+    const TriangulationOptions *options_override = nullptr)
+{
+    return triangulate_with_report(poses_2d.view(), config, previous_poses_3d, options_override);
+}