Refactor triangulation stages and camera model

This commit is contained in:
2026-03-11 23:42:07 +08:00
parent 24f74c87f1
commit 103aeb5887
10 changed files with 1268 additions and 232 deletions
+292 -6
View File
@@ -1,6 +1,8 @@
#include <algorithm> #include <algorithm>
#include <array>
#include <cstdint> #include <cstdint>
#include <stdexcept> #include <stdexcept>
#include <vector>
#include <nanobind/nanobind.h> #include <nanobind/nanobind.h>
#include <nanobind/ndarray.h> #include <nanobind/ndarray.h>
@@ -19,7 +21,10 @@ using PoseArray2D =
nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, -1, 3>, nb::c_contig>; nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, -1, 3>, nb::c_contig>;
using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_contig>; using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_contig>;
using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>; using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>;
using PoseArray3DConst =
nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, 4>, nb::c_contig>;
using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>; using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>;
using PoseArray2DOut = nb::ndarray<nb::numpy, float, nb::shape<-1, 4>, nb::c_contig>;
PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts) PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
{ {
@@ -45,6 +50,15 @@ PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const Co
}; };
} }
PoseBatch3DView pose_batch3d_view_from_numpy(const PoseArray3DConst &poses_3d)
{
return PoseBatch3DView {
poses_3d.data(),
static_cast<size_t>(poses_3d.shape(0)),
static_cast<size_t>(poses_3d.shape(1)),
};
}
std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams) std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams)
{ {
std::array<std::array<float, 3>, 2> result {}; std::array<std::array<float, 3>, 2> result {};
@@ -69,10 +83,75 @@ PoseArray3D pose_batch_to_numpy(PoseBatch3D batch)
const size_t shape[3] = {batch.num_persons, batch.num_joints, 4}; const size_t shape[3] = {batch.num_persons, batch.num_joints, 4};
return PoseArray3D(storage->data(), 3, shape, owner); return PoseArray3D(storage->data(), 3, shape, owner);
} }
PoseArray3D pose_batch_to_numpy_copy(const PoseBatch3D &batch)
{
PoseBatch3D copy = batch;
return pose_batch_to_numpy(std::move(copy));
}
PoseArray2DOut pose_rows_to_numpy_copy(const std::vector<std::array<float, 4>> &rows)
{
auto *storage = new std::vector<float>(rows.size() * 4, 0.0f);
for (size_t row = 0; row < rows.size(); ++row)
{
for (size_t coord = 0; coord < 4; ++coord)
{
(*storage)[row * 4 + coord] = rows[row][coord];
}
}
nb::capsule owner(storage, [](void *value) noexcept
{
delete static_cast<std::vector<float> *>(value);
});
const size_t shape[2] = {rows.size(), 4};
return PoseArray2DOut(storage->data(), 2, shape, owner);
}
PoseArray3D merged_poses_to_numpy_copy(const std::vector<std::vector<std::array<float, 4>>> &poses)
{
size_t num_poses = poses.size();
size_t num_joints = 0;
if (!poses.empty())
{
num_joints = poses[0].size();
}
auto *storage = new std::vector<float>(num_poses * num_joints * 4, 0.0f);
for (size_t pose = 0; pose < num_poses; ++pose)
{
if (poses[pose].size() != num_joints)
{
delete storage;
throw std::invalid_argument("Merged poses must use a consistent joint count.");
}
for (size_t joint = 0; joint < num_joints; ++joint)
{
for (size_t coord = 0; coord < 4; ++coord)
{
(*storage)[((pose * num_joints) + joint) * 4 + coord] = poses[pose][joint][coord];
}
}
}
nb::capsule owner(storage, [](void *value) noexcept
{
delete static_cast<std::vector<float> *>(value);
});
const size_t shape[3] = {num_poses, num_joints, 4};
return PoseArray3D(storage->data(), 3, shape, owner);
}
} // namespace } // namespace
NB_MODULE(_core, m) NB_MODULE(_core, m)
{ {
nb::enum_<CameraModel>(m, "CameraModel")
.value("PINHOLE", CameraModel::Pinhole)
.value("FISHEYE", CameraModel::Fisheye);
nb::class_<Camera>(m, "Camera") nb::class_<Camera>(m, "Camera")
.def(nb::init<>()) .def(nb::init<>())
.def_rw("name", &Camera::name) .def_rw("name", &Camera::name)
@@ -82,12 +161,184 @@ NB_MODULE(_core, m)
.def_rw("T", &Camera::T) .def_rw("T", &Camera::T)
.def_rw("width", &Camera::width) .def_rw("width", &Camera::width)
.def_rw("height", &Camera::height) .def_rw("height", &Camera::height)
.def_rw("type", &Camera::type) .def_rw("model", &Camera::model)
.def("__repr__", [](const Camera &camera) .def("__repr__", [](const Camera &camera)
{ {
return camera.to_string(); return camera.to_string();
}); });
nb::class_<PairCandidate>(m, "PairCandidate")
.def(nb::init<>())
.def_rw("view1", &PairCandidate::view1)
.def_rw("view2", &PairCandidate::view2)
.def_rw("person1", &PairCandidate::person1)
.def_rw("person2", &PairCandidate::person2)
.def_rw("global_person1", &PairCandidate::global_person1)
.def_rw("global_person2", &PairCandidate::global_person2);
nb::class_<PreviousPoseMatch>(m, "PreviousPoseMatch")
.def(nb::init<>())
.def_rw("previous_pose_index", &PreviousPoseMatch::previous_pose_index)
.def_rw("score_view1", &PreviousPoseMatch::score_view1)
.def_rw("score_view2", &PreviousPoseMatch::score_view2)
.def_rw("matched_view1", &PreviousPoseMatch::matched_view1)
.def_rw("matched_view2", &PreviousPoseMatch::matched_view2)
.def_rw("kept", &PreviousPoseMatch::kept)
.def_rw("decision", &PreviousPoseMatch::decision);
nb::class_<PreviousPoseFilterDebug>(m, "PreviousPoseFilterDebug")
.def(nb::init<>())
.def_rw("used_previous_poses", &PreviousPoseFilterDebug::used_previous_poses)
.def_rw("matches", &PreviousPoseFilterDebug::matches)
.def_rw("kept_pair_indices", &PreviousPoseFilterDebug::kept_pair_indices)
.def_rw("kept_pairs", &PreviousPoseFilterDebug::kept_pairs);
nb::class_<CoreProposalDebug>(m, "CoreProposalDebug")
.def(nb::init<>())
.def_rw("pair_index", &CoreProposalDebug::pair_index)
.def_rw("pair", &CoreProposalDebug::pair)
.def_rw("score", &CoreProposalDebug::score)
.def_rw("kept", &CoreProposalDebug::kept)
.def_rw("drop_reason", &CoreProposalDebug::drop_reason)
.def_prop_ro("pose_3d", [](const CoreProposalDebug &proposal)
{
return pose_rows_to_numpy_copy(proposal.pose_3d);
}, nb::rv_policy::move);
nb::class_<ProposalGroupDebug>(m, "ProposalGroupDebug")
.def(nb::init<>())
.def_rw("center", &ProposalGroupDebug::center)
.def_rw("proposal_indices", &ProposalGroupDebug::proposal_indices)
.def_prop_ro("pose_3d", [](const ProposalGroupDebug &group)
{
return pose_rows_to_numpy_copy(group.pose_3d);
}, nb::rv_policy::move);
nb::class_<GroupingDebug>(m, "GroupingDebug")
.def(nb::init<>())
.def_rw("initial_groups", &GroupingDebug::initial_groups)
.def_rw("duplicate_pair_drops", &GroupingDebug::duplicate_pair_drops)
.def_rw("groups", &GroupingDebug::groups);
nb::class_<FullProposalDebug>(m, "FullProposalDebug")
.def(nb::init<>())
.def_rw("source_core_proposal_index", &FullProposalDebug::source_core_proposal_index)
.def_rw("pair", &FullProposalDebug::pair)
.def_prop_ro("pose_3d", [](const FullProposalDebug &proposal)
{
return pose_rows_to_numpy_copy(proposal.pose_3d);
}, nb::rv_policy::move);
nb::class_<MergeDebug>(m, "MergeDebug")
.def(nb::init<>())
.def_rw("group_proposal_indices", &MergeDebug::group_proposal_indices)
.def_prop_ro("merged_poses", [](const MergeDebug &merge)
{
return merged_poses_to_numpy_copy(merge.merged_poses);
}, nb::rv_policy::move);
nb::class_<TriangulationTrace>(m, "TriangulationTrace")
.def(nb::init<>())
.def_rw("pairs", &TriangulationTrace::pairs)
.def_rw("previous_filter", &TriangulationTrace::previous_filter)
.def_rw("core_proposals", &TriangulationTrace::core_proposals)
.def_rw("grouping", &TriangulationTrace::grouping)
.def_rw("full_proposals", &TriangulationTrace::full_proposals)
.def_rw("merge", &TriangulationTrace::merge)
.def_prop_ro("final_poses", [](const TriangulationTrace &trace)
{
return pose_batch_to_numpy_copy(trace.final_poses);
}, nb::rv_policy::move);
m.def(
"build_pair_candidates",
[](const PoseArray2D &poses_2d, const CountArray &person_counts)
{
return build_pair_candidates(pose_batch_view_from_numpy(poses_2d, person_counts));
},
"poses_2d"_a,
"person_counts"_a);
m.def(
"filter_pairs_with_previous_poses",
[](const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseArray3DConst &previous_poses_3d,
float min_match_score)
{
return filter_pairs_with_previous_poses(
pose_batch_view_from_numpy(poses_2d, person_counts),
cameras,
joint_names,
pose_batch3d_view_from_numpy(previous_poses_3d),
min_match_score);
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"joint_names"_a,
"previous_poses_3d"_a,
"min_match_score"_a = 0.95f);
m.def(
"triangulate_debug",
[](const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const RoomArray &roomparams,
const std::vector<std::string> &joint_names,
float min_match_score,
size_t min_group_size)
{
return triangulate_debug(
pose_batch_view_from_numpy(poses_2d, person_counts),
cameras,
roomparams_from_numpy(roomparams),
joint_names,
nullptr,
min_match_score,
min_group_size);
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"roomparams"_a,
"joint_names"_a,
"min_match_score"_a = 0.95f,
"min_group_size"_a = 1);
m.def(
"triangulate_debug",
[](const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const RoomArray &roomparams,
const std::vector<std::string> &joint_names,
const PoseArray3DConst &previous_poses_3d,
float min_match_score,
size_t min_group_size)
{
const PoseBatch3DView previous_view = pose_batch3d_view_from_numpy(previous_poses_3d);
return triangulate_debug(
pose_batch_view_from_numpy(poses_2d, person_counts),
cameras,
roomparams_from_numpy(roomparams),
joint_names,
&previous_view,
min_match_score,
min_group_size);
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"roomparams"_a,
"joint_names"_a,
"previous_poses_3d"_a,
"min_match_score"_a = 0.95f,
"min_group_size"_a = 1);
m.def( m.def(
"triangulate_poses", "triangulate_poses",
[](const PoseArray2D &poses_2d, [](const PoseArray2D &poses_2d,
@@ -98,11 +349,15 @@ NB_MODULE(_core, m)
float min_match_score, float min_match_score,
size_t min_group_size) size_t min_group_size)
{ {
const PoseBatch2DView pose_batch = pose_batch_view_from_numpy(poses_2d, person_counts); const PoseBatch3D poses_3d = triangulate_poses(
const auto room = roomparams_from_numpy(roomparams); pose_batch_view_from_numpy(poses_2d, person_counts),
PoseBatch3D poses_3d = triangulate_poses( cameras,
pose_batch, cameras, room, joint_names, min_match_score, min_group_size); roomparams_from_numpy(roomparams),
return pose_batch_to_numpy(std::move(poses_3d)); joint_names,
nullptr,
min_match_score,
min_group_size);
return pose_batch_to_numpy(poses_3d);
}, },
"poses_2d"_a, "poses_2d"_a,
"person_counts"_a, "person_counts"_a,
@@ -111,4 +366,35 @@ NB_MODULE(_core, m)
"joint_names"_a, "joint_names"_a,
"min_match_score"_a = 0.95f, "min_match_score"_a = 0.95f,
"min_group_size"_a = 1); "min_group_size"_a = 1);
m.def(
"triangulate_poses",
[](const PoseArray2D &poses_2d,
const CountArray &person_counts,
const std::vector<Camera> &cameras,
const RoomArray &roomparams,
const std::vector<std::string> &joint_names,
const PoseArray3DConst &previous_poses_3d,
float min_match_score,
size_t min_group_size)
{
const PoseBatch3DView previous_view = pose_batch3d_view_from_numpy(previous_poses_3d);
const PoseBatch3D poses_3d = triangulate_poses(
pose_batch_view_from_numpy(poses_2d, person_counts),
cameras,
roomparams_from_numpy(roomparams),
joint_names,
&previous_view,
min_match_score,
min_group_size);
return pose_batch_to_numpy(poses_3d);
},
"poses_2d"_a,
"person_counts"_a,
"cameras"_a,
"roomparams"_a,
"joint_names"_a,
"previous_poses_3d"_a,
"min_match_score"_a = 0.95f,
"min_group_size"_a = 1);
} }
+19
View File
@@ -0,0 +1,19 @@
#pragma once
#include <array>
#include "camera.hpp"
struct CachedCamera
{
const Camera cam;
const std::array<std::array<float, 3>, 3> invR;
const std::array<float, 3> center;
const std::array<std::array<float, 3>, 3> newK;
const std::array<std::array<float, 3>, 3> invK;
};
CachedCamera cache_camera(const Camera &camera);
void undistort_point_pinhole(std::array<float, 3> &point, const std::array<float, 5> &distortion);
void undistort_point_fisheye(std::array<float, 3> &point, const std::array<float, 5> &distortion);
+97 -78
View File
@@ -1,11 +1,60 @@
#include <array> #include <array>
#include <cmath> #include <cmath>
#include <iomanip> #include <iomanip>
#include <stdexcept>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include <cmath> #include <cmath>
#include "camera.hpp" #include "cached_camera.hpp"
namespace
{
std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
{
return {{{M[0][0], M[1][0], M[2][0]},
{M[0][1], M[1][1], M[2][1]},
{M[0][2], M[1][2], M[2][2]}}};
}
std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
{
std::array<std::array<float, 3>, 3> adj = {
{{
M[1][1] * M[2][2] - M[1][2] * M[2][1],
M[0][2] * M[2][1] - M[0][1] * M[2][2],
M[0][1] * M[1][2] - M[0][2] * M[1][1],
},
{
M[1][2] * M[2][0] - M[1][0] * M[2][2],
M[0][0] * M[2][2] - M[0][2] * M[2][0],
M[0][2] * M[1][0] - M[0][0] * M[1][2],
},
{
M[1][0] * M[2][1] - M[1][1] * M[2][0],
M[0][1] * M[2][0] - M[0][0] * M[2][1],
M[0][0] * M[1][1] - M[0][1] * M[1][0],
}}};
float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
if (std::fabs(det) < 1e-6f)
{
return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
}
float idet = 1.0f / det;
return {{
{{adj[0][0] * idet, adj[0][1] * idet, adj[0][2] * idet}},
{{adj[1][0] * idet, adj[1][1] * idet, adj[1][2] * idet}},
{{adj[2][0] * idet, adj[2][1] * idet, adj[2][2] * idet}},
}};
}
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
const Camera &cam, float balance, std::pair<int, int> new_size);
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
const Camera &cam, float alpha, std::pair<int, int> new_size);
} // namespace
// ================================================================================================= // =================================================================================================
// ================================================================================================= // =================================================================================================
@@ -63,7 +112,7 @@ std::string Camera::to_string() const
out << "'width': " << width << ", "; out << "'width': " << width << ", ";
out << "'height': " << height << ", "; out << "'height': " << height << ", ";
out << "'type': " << type; out << "'model': '" << camera_model_name(model) << "'";
out << "}"; out << "}";
return out.str(); return out.str();
@@ -71,6 +120,33 @@ std::string Camera::to_string() const
// ================================================================================================= // =================================================================================================
const char *camera_model_name(CameraModel model)
{
switch (model)
{
case CameraModel::Pinhole:
return "pinhole";
case CameraModel::Fisheye:
return "fisheye";
}
return "unknown";
}
CameraModel parse_camera_model(const std::string &value)
{
if (value == "pinhole")
{
return CameraModel::Pinhole;
}
if (value == "fisheye")
{
return CameraModel::Fisheye;
}
throw std::invalid_argument("Unsupported camera model: " + value);
}
// =================================================================================================
std::ostream &operator<<(std::ostream &out, const Camera &cam) std::ostream &operator<<(std::ostream &out, const Camera &cam)
{ {
out << cam.to_string(); out << cam.to_string();
@@ -80,93 +156,33 @@ std::ostream &operator<<(std::ostream &out, const Camera &cam)
// ================================================================================================= // =================================================================================================
// ================================================================================================= // =================================================================================================
CameraInternal::CameraInternal(const Camera &cam) CachedCamera cache_camera(const Camera &cam)
{ {
this->cam = cam; const std::array<std::array<float, 3>, 3> invR = transpose3x3(cam.R);
this->invR = transpose3x3(cam.R);
// Camera center: // Camera center:
// C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]}; const std::array<float, 3> center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
// Undistort camera matrix // Undistort camera matrix
// As with the undistortion, the own implementation avoids some overhead compared to OpenCV // As with the undistortion, the own implementation avoids some overhead compared to OpenCV
if (cam.type == "fisheye") std::array<std::array<float, 3>, 3> newK;
if (cam.model == CameraModel::Fisheye)
{ {
newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height}); newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height});
} }
else else
{ {
newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height}); newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height});
} }
this->invK = invert3x3(newK); const std::array<std::array<float, 3>, 3> invK = invert3x3(newK);
return CachedCamera {cam, invR, center, newK, invK};
} }
// ================================================================================================= // =================================================================================================
std::array<std::array<float, 3>, 3> CameraInternal::transpose3x3( void undistort_point_pinhole(std::array<float, 3> &p, const std::array<float, 5> &k)
const std::array<std::array<float, 3>, 3> &M)
{
return {{{M[0][0], M[1][0], M[2][0]},
{M[0][1], M[1][1], M[2][1]},
{M[0][2], M[1][2], M[2][2]}}};
}
// =================================================================================================
std::array<std::array<float, 3>, 3> CameraInternal::invert3x3(
const std::array<std::array<float, 3>, 3> &M)
{
// Compute the inverse using the adjugate method
// See: https://scicomp.stackexchange.com/a/29206
std::array<std::array<float, 3>, 3> adj = {
{{
M[1][1] * M[2][2] - M[1][2] * M[2][1],
M[0][2] * M[2][1] - M[0][1] * M[2][2],
M[0][1] * M[1][2] - M[0][2] * M[1][1],
},
{
M[1][2] * M[2][0] - M[1][0] * M[2][2],
M[0][0] * M[2][2] - M[0][2] * M[2][0],
M[0][2] * M[1][0] - M[0][0] * M[1][2],
},
{
M[1][0] * M[2][1] - M[1][1] * M[2][0],
M[0][1] * M[2][0] - M[0][0] * M[2][1],
M[0][0] * M[1][1] - M[0][1] * M[1][0],
}}};
float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
if (std::fabs(det) < 1e-6f)
{
return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
}
float idet = 1.0f / det;
std::array<std::array<float, 3>, 3> inv = {
{{
adj[0][0] * idet,
adj[0][1] * idet,
adj[0][2] * idet,
},
{
adj[1][0] * idet,
adj[1][1] * idet,
adj[1][2] * idet,
},
{
adj[2][0] * idet,
adj[2][1] * idet,
adj[2][2] * idet,
}}};
return inv;
}
// =================================================================================================
void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k)
{ {
// Following: cv::cvUndistortPointsInternal // Following: cv::cvUndistortPointsInternal
// Uses only the distortion coefficients: [k1, k2, p1, p2, k3] // Uses only the distortion coefficients: [k1, k2, p1, p2, k3]
@@ -202,7 +218,7 @@ void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std:
// ================================================================================================= // =================================================================================================
void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k) void undistort_point_fisheye(std::array<float, 3> &p, const std::array<float, 5> &k)
{ {
// Following: cv::fisheye::undistortPoints // Following: cv::fisheye::undistortPoints
// Uses only the distortion coefficients: [k1, k2, k3, k4] // Uses only the distortion coefficients: [k1, k2, k3, k4]
@@ -250,8 +266,10 @@ void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std:
// ================================================================================================= // =================================================================================================
std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_fisheye( namespace
float balance, std::pair<int, int> new_size) {
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
const Camera &cam, float balance, std::pair<int, int> new_size)
{ {
// Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify // Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630 // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630
@@ -355,8 +373,8 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_f
// ================================================================================================= // =================================================================================================
std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_pinhole( std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
float alpha, std::pair<int, int> new_size) const Camera &cam, float alpha, std::pair<int, int> new_size)
{ {
// Following: cv::getOptimalNewCameraMatrix // Following: cv::getOptimalNewCameraMatrix
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565 // https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565
@@ -479,3 +497,4 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
{0.0, 0.0, 1.0}}}; {0.0, 0.0, 1.0}}};
return newK; return newK;
} }
} // namespace
+13 -31
View File
@@ -7,46 +7,28 @@
// ================================================================================================= // =================================================================================================
enum class CameraModel
{
Pinhole,
Fisheye,
};
const char *camera_model_name(CameraModel model);
CameraModel parse_camera_model(const std::string &value);
// =================================================================================================
struct Camera struct Camera
{ {
std::string name; std::string name;
std::array<std::array<float, 3>, 3> K; std::array<std::array<float, 3>, 3> K;
std::vector<float> DC; std::array<float, 5> DC = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
std::array<std::array<float, 3>, 3> R; std::array<std::array<float, 3>, 3> R;
std::array<std::array<float, 1>, 3> T; std::array<std::array<float, 1>, 3> T;
int width; int width;
int height; int height;
std::string type; CameraModel model = CameraModel::Pinhole;
friend std::ostream &operator<<(std::ostream &out, Camera const &camera); friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
std::string to_string() const; std::string to_string() const;
}; };
// =================================================================================================
class CameraInternal
{
public:
CameraInternal(const Camera &cam);
Camera cam;
std::array<std::array<float, 3>, 3> invR;
std::array<float, 3> center;
std::array<std::array<float, 3>, 3> newK;
std::array<std::array<float, 3>, 3> invK;
static std::array<std::array<float, 3>, 3> transpose3x3(
const std::array<std::array<float, 3>, 3> &M);
static std::array<std::array<float, 3>, 3> invert3x3(
const std::array<std::array<float, 3>, 3> &M);
static void undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k);
static void undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k);
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
float balance, std::pair<int, int> new_size);
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
float alpha, std::pair<int, int> new_size);
};
+10
View File
@@ -38,6 +38,11 @@ const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)]; return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
} }
const float &PoseBatch3DView::at(size_t person, size_t joint, size_t coord) const
{
return data[pose3d_offset(person, joint, coord, num_joints)];
}
const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
{ {
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)]; return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
@@ -109,6 +114,11 @@ const float &PoseBatch3D::at(size_t person, size_t joint, size_t coord) const
return data[pose3d_offset(person, joint, coord, num_joints)]; return data[pose3d_offset(person, joint, coord, num_joints)];
} }
PoseBatch3DView PoseBatch3D::view() const
{
return PoseBatch3DView {data.data(), num_persons, num_joints};
}
NestedPoses3D PoseBatch3D::to_nested() const NestedPoses3D PoseBatch3D::to_nested() const
{ {
NestedPoses3D poses_3d(num_persons); NestedPoses3D poses_3d(num_persons);
+151 -2
View File
@@ -25,6 +25,15 @@ struct PoseBatch2DView
const float &at(size_t view, size_t person, size_t joint, size_t coord) const; const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
}; };
struct PoseBatch3DView
{
const float *data = nullptr;
size_t num_persons = 0;
size_t num_joints = 0;
const float &at(size_t person, size_t joint, size_t coord) const;
};
struct PoseBatch2D struct PoseBatch2D
{ {
std::vector<float> data; std::vector<float> data;
@@ -48,6 +57,7 @@ struct PoseBatch3D
float &at(size_t person, size_t joint, size_t coord); float &at(size_t person, size_t joint, size_t coord);
const float &at(size_t person, size_t joint, size_t coord) const; const float &at(size_t person, size_t joint, size_t coord) const;
PoseBatch3DView view() const;
NestedPoses3D to_nested() const; NestedPoses3D to_nested() const;
static PoseBatch3D from_nested(const NestedPoses3D &poses_3d); static PoseBatch3D from_nested(const NestedPoses3D &poses_3d);
@@ -55,6 +65,136 @@ struct PoseBatch3D
// ================================================================================================= // =================================================================================================
struct PairCandidate
{
int view1 = -1;
int view2 = -1;
int person1 = -1;
int person2 = -1;
int global_person1 = -1;
int global_person2 = -1;
};
struct PreviousPoseMatch
{
int previous_pose_index = -1;
float score_view1 = 0.0f;
float score_view2 = 0.0f;
bool matched_view1 = false;
bool matched_view2 = false;
bool kept = true;
std::string decision = "unfiltered";
};
struct PreviousPoseFilterDebug
{
bool used_previous_poses = false;
std::vector<PreviousPoseMatch> matches;
std::vector<int> kept_pair_indices;
std::vector<PairCandidate> kept_pairs;
};
struct CoreProposalDebug
{
int pair_index = -1;
PairCandidate pair;
std::vector<std::array<float, 4>> pose_3d;
float score = 0.0f;
bool kept = false;
std::string drop_reason = "uninitialized";
};
struct ProposalGroupDebug
{
std::array<float, 3> center = {0.0f, 0.0f, 0.0f};
std::vector<std::array<float, 4>> pose_3d;
std::vector<int> proposal_indices;
};
struct GroupingDebug
{
std::vector<ProposalGroupDebug> initial_groups;
std::vector<int> duplicate_pair_drops;
std::vector<ProposalGroupDebug> groups;
};
struct FullProposalDebug
{
int source_core_proposal_index = -1;
PairCandidate pair;
std::vector<std::array<float, 4>> pose_3d;
};
struct MergeDebug
{
std::vector<std::vector<std::array<float, 4>>> merged_poses;
std::vector<std::vector<int>> group_proposal_indices;
};
struct TriangulationTrace
{
std::vector<PairCandidate> pairs;
PreviousPoseFilterDebug previous_filter;
std::vector<CoreProposalDebug> core_proposals;
GroupingDebug grouping;
std::vector<FullProposalDebug> full_proposals;
MergeDebug merge;
PoseBatch3D final_poses;
};
// =================================================================================================
std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d);
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseBatch3DView &previous_poses_3d,
float min_match_score = 0.95f);
inline PreviousPoseFilterDebug filter_pairs_with_previous_poses(
const PoseBatch2D &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseBatch3D &previous_poses_3d,
float min_match_score = 0.95f)
{
return filter_pairs_with_previous_poses(
poses_2d.view(), cameras, joint_names, previous_poses_3d.view(), min_match_score);
}
TriangulationTrace triangulate_debug(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d = nullptr,
float min_match_score = 0.95f,
size_t min_group_size = 1);
inline TriangulationTrace triangulate_debug(
const PoseBatch2D &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
const PoseBatch3D *previous_poses_3d = nullptr,
float min_match_score = 0.95f,
size_t min_group_size = 1)
{
PoseBatch3DView previous_view_storage;
const PoseBatch3DView *previous_view = nullptr;
if (previous_poses_3d != nullptr)
{
previous_view_storage = previous_poses_3d->view();
previous_view = &previous_view_storage;
}
return triangulate_debug(
poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
}
// =================================================================================================
/** /**
* Calculate a triangulation using a padded pose tensor. * Calculate a triangulation using a padded pose tensor.
* *
@@ -72,6 +212,7 @@ PoseBatch3D triangulate_poses(
const std::vector<Camera> &cameras, const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names, const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d = nullptr,
float min_match_score = 0.95f, float min_match_score = 0.95f,
size_t min_group_size = 1); size_t min_group_size = 1);
@@ -80,9 +221,17 @@ inline PoseBatch3D triangulate_poses(
const std::vector<Camera> &cameras, const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names, const std::vector<std::string> &joint_names,
const PoseBatch3D *previous_poses_3d = nullptr,
float min_match_score = 0.95f, float min_match_score = 0.95f,
size_t min_group_size = 1) size_t min_group_size = 1)
{ {
return triangulate_poses( PoseBatch3DView previous_view_storage;
poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size); const PoseBatch3DView *previous_view = nullptr;
if (previous_poses_3d != nullptr)
{
previous_view_storage = previous_poses_3d->view();
previous_view = &previous_view_storage;
}
return triangulate_poses(
poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
} }
+545 -100
View File
@@ -10,6 +10,7 @@
#include <unordered_map> #include <unordered_map>
#include "interface.hpp" #include "interface.hpp"
#include "cached_camera.hpp"
namespace namespace
{ {
@@ -83,7 +84,7 @@ size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_jo
} }
std::array<float, 3> undistort_point( std::array<float, 3> undistort_point(
const std::array<float, 3> &raw_point, const CameraInternal &icam) const std::array<float, 3> &raw_point, const CachedCamera &icam)
{ {
std::array<float, 3> point = raw_point; std::array<float, 3> point = raw_point;
const float ifx_old = 1.0f / icam.cam.K[0][0]; const float ifx_old = 1.0f / icam.cam.K[0][0];
@@ -98,13 +99,13 @@ std::array<float, 3> undistort_point(
point[0] = (point[0] - cx_old) * ifx_old; point[0] = (point[0] - cx_old) * ifx_old;
point[1] = (point[1] - cy_old) * ify_old; point[1] = (point[1] - cy_old) * ify_old;
if (icam.cam.type == "fisheye") if (icam.cam.model == CameraModel::Fisheye)
{ {
CameraInternal::undistort_point_fisheye(point, icam.cam.DC); undistort_point_fisheye(point, icam.cam.DC);
} }
else else
{ {
CameraInternal::undistort_point_pinhole(point, icam.cam.DC); undistort_point_pinhole(point, icam.cam.DC);
} }
point[0] = (point[0] * fx_new) + cx_new; point[0] = (point[0] * fx_new) + cx_new;
@@ -125,7 +126,7 @@ class PackedPoseStore2D
public: public:
PackedPoseStore2D( PackedPoseStore2D(
const PoseBatch2DView &source, const PoseBatch2DView &source,
const std::vector<CameraInternal> &internal_cameras) const std::vector<CachedCamera> &internal_cameras)
: person_counts(source.num_views), : person_counts(source.num_views),
view_offsets(source.num_views), view_offsets(source.num_views),
num_views(source.num_views), num_views(source.num_views),
@@ -229,16 +230,75 @@ public:
{ {
} }
PoseBatch3D triangulate_poses( TriangulationTrace triangulate_debug(
const PoseBatch2DView &poses_2d, const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras, const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names); const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d);
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseBatch3DView &previous_poses_3d);
private: private:
struct PreparedInputs
{
std::vector<CachedCamera> internal_cameras;
std::vector<size_t> core_joint_idx;
std::vector<std::array<size_t, 2>> core_limbs_idx;
PackedPoseStore2D packed_poses;
PreparedInputs(
std::vector<CachedCamera> cameras_in,
std::vector<size_t> core_joint_idx_in,
std::vector<std::array<size_t, 2>> core_limbs_idx_in,
PackedPoseStore2D packed_poses_in)
: internal_cameras(std::move(cameras_in)),
core_joint_idx(std::move(core_joint_idx_in)),
core_limbs_idx(std::move(core_limbs_idx_in)),
packed_poses(std::move(packed_poses_in))
{
}
};
struct PreviousProjectionCache
{
std::vector<std::vector<Pose2D>> projected_poses;
std::vector<std::vector<std::vector<float>>> projected_dists;
std::vector<Pose3D> core_poses;
};
PreparedInputs prepare_inputs(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names);
std::vector<PairCandidate> build_pair_candidates(const PackedPoseStore2D &packed_poses) const;
PreviousProjectionCache project_previous_poses(
const PoseBatch3DView &previous_poses_3d,
const std::vector<CachedCamera> &internal_cameras,
const std::vector<size_t> &core_joint_idx);
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
const PackedPoseStore2D &packed_poses,
const std::vector<CachedCamera> &internal_cameras,
const std::vector<size_t> &core_joint_idx,
const std::vector<PairCandidate> &pairs,
const PoseBatch3DView &previous_poses_3d);
float calc_pose_score(
const Pose2D &pose,
const Pose2D &reference_pose,
const std::vector<float> &dists,
const CachedCamera &icam);
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses( std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
const std::vector<Pose3D> &poses_3d, const std::vector<Pose3D> &poses_3d,
const CameraInternal &icam, const CachedCamera &icam,
bool calc_dists); bool calc_dists);
std::vector<float> score_projection( std::vector<float> score_projection(
@@ -251,8 +311,8 @@ private:
std::pair<Pose3D, float> triangulate_and_score( std::pair<Pose3D, float> triangulate_and_score(
const Pose2D &pose1, const Pose2D &pose1,
const Pose2D &pose2, const Pose2D &pose2,
const CameraInternal &cam1, const CachedCamera &cam1,
const CameraInternal &cam2, const CachedCamera &cam2,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::array<size_t, 2>> &core_limbs_idx); const std::vector<std::array<size_t, 2>> &core_limbs_idx);
@@ -313,17 +373,9 @@ private:
}; };
}; };
PoseBatch3D empty_pose_batch3d(size_t num_joints) TriangulatorInternal::PreparedInputs TriangulatorInternal::prepare_inputs(
{
PoseBatch3D poses_3d;
poses_3d.num_joints = num_joints;
return poses_3d;
}
PoseBatch3D TriangulatorInternal::triangulate_poses(
const PoseBatch2DView &poses_2d, const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras, const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names) const std::vector<std::string> &joint_names)
{ {
if (poses_2d.num_views == 0) if (poses_2d.num_views == 0)
@@ -342,6 +394,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
{ {
throw std::invalid_argument("Number of joint names and 2D poses must be the same."); throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
} }
if (poses_2d.num_joints == 0)
{
throw std::invalid_argument("At least one joint is required.");
}
if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr) if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr)
{ {
throw std::invalid_argument("poses_2d data must not be null."); throw std::invalid_argument("poses_2d data must not be null.");
@@ -356,11 +412,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
num_cams = cameras.size(); num_cams = cameras.size();
std::vector<CameraInternal> internal_cameras; std::vector<CachedCamera> internal_cameras;
internal_cameras.reserve(cameras.size()); internal_cameras.reserve(cameras.size());
for (const auto &camera : cameras) for (const auto &camera : cameras)
{ {
internal_cameras.emplace_back(camera); internal_cameras.push_back(cache_camera(camera));
} }
std::vector<size_t> core_joint_idx; std::vector<size_t> core_joint_idx;
@@ -383,64 +439,310 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
} }
PackedPoseStore2D packed_poses(poses_2d, internal_cameras); PackedPoseStore2D packed_poses(poses_2d, internal_cameras);
return PreparedInputs(
std::move(internal_cameras),
std::move(core_joint_idx),
std::move(core_limbs_idx),
std::move(packed_poses));
}
std::vector<PosePair> all_pairs; std::vector<PairCandidate> TriangulatorInternal::build_pair_candidates(const PackedPoseStore2D &packed_poses) const
for (size_t view1 = 0; view1 < cameras.size(); ++view1)
{ {
for (size_t view2 = view1 + 1; view2 < cameras.size(); ++view2) std::vector<PairCandidate> pairs;
for (size_t view1 = 0; view1 < packed_poses.num_views; ++view1)
{
for (size_t view2 = view1 + 1; view2 < packed_poses.num_views; ++view2)
{ {
for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1) for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1)
{ {
for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2) for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2)
{ {
all_pairs.emplace_back( pairs.push_back(PairCandidate {
std::make_tuple(
static_cast<int>(view1), static_cast<int>(view1),
static_cast<int>(view2), static_cast<int>(view2),
static_cast<int>(person1), static_cast<int>(person1),
static_cast<int>(person2)), static_cast<int>(person2),
std::make_pair(
static_cast<int>(packed_poses.pose_index(view1, person1)), static_cast<int>(packed_poses.pose_index(view1, person1)),
static_cast<int>(packed_poses.pose_index(view2, person2)))); static_cast<int>(packed_poses.pose_index(view2, person2)),
});
} }
} }
} }
} }
return pairs;
}
TriangulatorInternal::PreviousProjectionCache TriangulatorInternal::project_previous_poses(
const PoseBatch3DView &previous_poses_3d,
const std::vector<CachedCamera> &internal_cameras,
const std::vector<size_t> &core_joint_idx)
{
PreviousProjectionCache cache;
cache.core_poses.resize(previous_poses_3d.num_persons);
for (size_t person = 0; person < previous_poses_3d.num_persons; ++person)
{
Pose3D pose(core_joint_idx.size(), {0.0f, 0.0f, 0.0f, 0.0f});
for (size_t joint = 0; joint < core_joint_idx.size(); ++joint)
{
const size_t source_joint = core_joint_idx[joint];
for (size_t coord = 0; coord < 4; ++coord)
{
pose[joint][coord] = previous_poses_3d.at(person, source_joint, coord);
}
}
cache.core_poses[person] = std::move(pose);
}
cache.projected_poses.resize(internal_cameras.size());
cache.projected_dists.resize(internal_cameras.size());
for (size_t view = 0; view < internal_cameras.size(); ++view)
{
auto [projected_poses, projected_dists] = project_poses(cache.core_poses, internal_cameras[view], true);
cache.projected_poses[view] = std::move(projected_poses);
cache.projected_dists[view] = std::move(projected_dists);
}
return cache;
}
float TriangulatorInternal::calc_pose_score(
const Pose2D &pose,
const Pose2D &reference_pose,
const std::vector<float> &dists,
const CachedCamera &icam)
{
const float min_score = 0.1f;
std::vector<bool> mask(pose.size(), false);
size_t valid_count = 0;
for (size_t joint = 0; joint < pose.size(); ++joint)
{
mask[joint] = pose[joint][2] > min_score && reference_pose[joint][2] > min_score;
if (mask[joint])
{
++valid_count;
}
}
if (valid_count < 3)
{
return 0.0f;
}
const float iscale = (icam.cam.width + icam.cam.height) * 0.5f;
std::vector<float> scores = score_projection(pose, reference_pose, dists, mask, iscale);
const size_t drop_k = static_cast<size_t>(pose.size() * 0.2f);
if (drop_k > 0)
{
std::partial_sort(scores.begin(), scores.begin() + drop_k, scores.end());
}
float total = 0.0f;
for (size_t i = drop_k; i < scores.size(); ++i)
{
total += scores[i];
}
return total / static_cast<float>(scores.size() - drop_k);
}
PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
const PackedPoseStore2D &packed_poses,
const std::vector<CachedCamera> &internal_cameras,
const std::vector<size_t> &core_joint_idx,
const std::vector<PairCandidate> &pairs,
const PoseBatch3DView &previous_poses_3d)
{
PreviousPoseFilterDebug debug;
debug.used_previous_poses = true;
debug.matches.resize(pairs.size());
if (previous_poses_3d.num_persons == 0)
{
debug.kept_pair_indices.resize(pairs.size());
std::iota(debug.kept_pair_indices.begin(), debug.kept_pair_indices.end(), 0);
debug.kept_pairs = pairs;
for (auto &match : debug.matches)
{
match.decision = "no_previous_poses";
}
return debug;
}
const PreviousProjectionCache projected_previous =
project_previous_poses(previous_poses_3d, internal_cameras, core_joint_idx);
const float threshold = min_match_score;
for (size_t pair_index = 0; pair_index < pairs.size(); ++pair_index)
{
const PairCandidate &pair = pairs[pair_index];
const Pose2D pose1_core = packed_poses.pose(
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), core_joint_idx);
const Pose2D pose2_core = packed_poses.pose(
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), core_joint_idx);
PreviousPoseMatch best_match;
bool found_decision = false;
for (size_t previous_index = 0; previous_index < previous_poses_3d.num_persons; ++previous_index)
{
const float score_view1 = calc_pose_score(
pose1_core,
projected_previous.projected_poses[static_cast<size_t>(pair.view1)][previous_index],
projected_previous.projected_dists[static_cast<size_t>(pair.view1)][previous_index],
internal_cameras[static_cast<size_t>(pair.view1)]);
const float score_view2 = calc_pose_score(
pose2_core,
projected_previous.projected_poses[static_cast<size_t>(pair.view2)][previous_index],
projected_previous.projected_dists[static_cast<size_t>(pair.view2)][previous_index],
internal_cameras[static_cast<size_t>(pair.view2)]);
const bool matched_view1 = score_view1 > threshold;
const bool matched_view2 = score_view2 > threshold;
if (matched_view1 && matched_view2)
{
best_match = PreviousPoseMatch {
static_cast<int>(previous_index),
score_view1,
score_view2,
true,
true,
true,
"matched_previous_pose",
};
found_decision = true;
break;
}
if (matched_view1 || matched_view2)
{
best_match = PreviousPoseMatch {
static_cast<int>(previous_index),
score_view1,
score_view2,
matched_view1,
matched_view2,
false,
"dropped_single_view_match",
};
found_decision = true;
break;
}
}
if (all_pairs.empty()) if (!found_decision)
{ {
return empty_pose_batch3d(joint_names.size()); best_match.decision = "new_person_candidate";
best_match.kept = true;
} }
std::vector<std::pair<Pose3D, float>> all_scored_poses(all_pairs.size()); debug.matches[pair_index] = best_match;
for (size_t i = 0; i < all_pairs.size(); ++i) if (best_match.kept)
{ {
const auto &pids = all_pairs[i].first; debug.kept_pair_indices.push_back(static_cast<int>(pair_index));
const auto &cam1 = internal_cameras[std::get<0>(pids)]; debug.kept_pairs.push_back(pair);
const auto &cam2 = internal_cameras[std::get<1>(pids)]; }
Pose2D pose1_core = packed_poses.pose(std::get<0>(pids), std::get<2>(pids), core_joint_idx); }
Pose2D pose2_core = packed_poses.pose(std::get<1>(pids), std::get<3>(pids), core_joint_idx);
return debug;
}
TriangulationTrace TriangulatorInternal::triangulate_debug(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d)
{
TriangulationTrace trace;
trace.final_poses.num_joints = joint_names.size();
if (previous_poses_3d != nullptr && previous_poses_3d->num_persons > 0 &&
previous_poses_3d->num_joints != joint_names.size())
{
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
}
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
trace.pairs = build_pair_candidates(prepared.packed_poses);
if (trace.pairs.empty())
{
return trace;
}
std::vector<int> active_pair_indices(trace.pairs.size());
std::iota(active_pair_indices.begin(), active_pair_indices.end(), 0);
std::vector<PairCandidate> active_pairs = trace.pairs;
if (previous_poses_3d != nullptr)
{
trace.previous_filter = filter_pairs_with_previous_poses(
prepared.packed_poses,
prepared.internal_cameras,
prepared.core_joint_idx,
trace.pairs,
*previous_poses_3d);
active_pair_indices = trace.previous_filter.kept_pair_indices;
active_pairs = trace.previous_filter.kept_pairs;
if (active_pairs.empty())
{
return trace;
}
}
trace.core_proposals.reserve(active_pairs.size());
std::vector<PosePair> kept_pose_pairs;
std::vector<std::pair<Pose3D, float>> kept_scored_poses;
std::vector<int> kept_core_indices;
for (size_t i = 0; i < active_pairs.size(); ++i)
{
const PairCandidate &pair = active_pairs[i];
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
const Pose2D pose1_core = prepared.packed_poses.pose(
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), prepared.core_joint_idx);
const Pose2D pose2_core = prepared.packed_poses.pose(
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), prepared.core_joint_idx);
auto [pose3d, score] = triangulate_and_score( auto [pose3d, score] = triangulate_and_score(
pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx); pose1_core, pose2_core, cam1, cam2, roomparams, prepared.core_limbs_idx);
all_scored_poses[i] = std::make_pair(std::move(pose3d), score);
CoreProposalDebug proposal;
proposal.pair_index = active_pair_indices[i];
proposal.pair = pair;
proposal.pose_3d = pose3d;
proposal.score = score;
proposal.kept = score >= min_match_score;
proposal.drop_reason = proposal.kept ? "kept" : "below_min_match_score";
trace.core_proposals.push_back(proposal);
if (!proposal.kept)
{
continue;
} }
for (size_t i = all_scored_poses.size(); i > 0; --i) kept_core_indices.push_back(static_cast<int>(trace.core_proposals.size() - 1));
{ kept_scored_poses.emplace_back(std::move(pose3d), score);
if (all_scored_poses[i - 1].second < min_match_score) kept_pose_pairs.emplace_back(
{ std::make_tuple(pair.view1, pair.view2, pair.person1, pair.person2),
all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(i - 1)); std::make_pair(pair.global_person1, pair.global_person2));
all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(i - 1));
}
} }
if (all_pairs.empty()) if (kept_pose_pairs.empty())
{ {
return empty_pose_batch3d(joint_names.size()); return trace;
}
auto groups = calc_grouping(kept_pose_pairs, kept_scored_poses, min_match_score);
trace.grouping.initial_groups.reserve(groups.size());
for (const auto &group : groups)
{
ProposalGroupDebug debug_group;
debug_group.center = std::get<0>(group);
debug_group.pose_3d = std::get<1>(group);
for (const int index : std::get<2>(group))
{
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
}
trace.grouping.initial_groups.push_back(std::move(debug_group));
} }
std::vector<GroupedPose> groups = calc_grouping(all_pairs, all_scored_poses, min_match_score);
for (size_t i = groups.size(); i > 0; --i) for (size_t i = groups.size(); i > 0; --i)
{ {
if (std::get<2>(groups[i - 1]).size() < min_group_size) if (std::get<2>(groups[i - 1]).size() < min_group_size)
@@ -448,16 +750,15 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1)); groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
} }
} }
if (groups.empty()) if (groups.empty())
{ {
return empty_pose_batch3d(joint_names.size()); return trace;
} }
std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map; std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map;
for (size_t i = 0; i < all_pairs.size(); ++i) for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
{ {
const auto &pair = all_pairs[i]; const auto &pair = kept_pose_pairs[i];
const auto &mid1 = std::make_tuple( const auto &mid1 = std::make_tuple(
std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second)); std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second));
const auto &mid2 = std::make_tuple( const auto &mid2 = std::make_tuple(
@@ -466,19 +767,19 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
pairs_map[mid2].push_back(i); pairs_map[mid2].push_back(i);
} }
std::vector<size_t> group_map(all_pairs.size()); std::vector<size_t> group_map(kept_pose_pairs.size());
for (size_t i = 0; i < groups.size(); ++i) for (size_t i = 0; i < groups.size(); ++i)
{ {
for (const auto &idx : std::get<2>(groups[i])) for (const int index : std::get<2>(groups[i]))
{ {
group_map[idx] = i; group_map[static_cast<size_t>(index)] = i;
} }
} }
std::set<size_t> drop_indices; std::set<size_t> drop_indices;
for (auto &pair : pairs_map) for (auto &pair_entry : pairs_map)
{ {
auto &indices = pair.second; auto &indices = pair_entry.second;
if (indices.size() <= 1) if (indices.size() <= 1)
{ {
continue; continue;
@@ -488,10 +789,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
std::vector<float> pair_scores; std::vector<float> pair_scores;
group_sizes.reserve(indices.size()); group_sizes.reserve(indices.size());
pair_scores.reserve(indices.size()); pair_scores.reserve(indices.size());
for (auto idx : indices) for (const size_t index : indices)
{ {
group_sizes.push_back(std::get<2>(groups[group_map[idx]]).size()); group_sizes.push_back(std::get<2>(groups[group_map[index]]).size());
pair_scores.push_back(all_scored_poses[idx].second); pair_scores.push_back(kept_scored_poses[index].second);
} }
std::vector<size_t> indices_sorted(indices.size()); std::vector<size_t> indices_sorted(indices.size());
@@ -514,10 +815,12 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end()); std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end());
std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>()); std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>());
for (size_t drop_idx : drop_list) for (const size_t drop_idx : drop_list)
{ {
all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx)); trace.grouping.duplicate_pair_drops.push_back(kept_core_indices[drop_idx]);
all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx)); kept_scored_poses.erase(kept_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
kept_pose_pairs.erase(kept_pose_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
kept_core_indices.erase(kept_core_indices.begin() + static_cast<std::ptrdiff_t>(drop_idx));
for (auto &group : groups) for (auto &group : groups)
{ {
@@ -527,11 +830,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
{ {
indices.erase(it); indices.erase(it);
} }
for (auto &index : indices) for (int &index : indices)
{ {
if (static_cast<size_t>(index) > drop_idx) if (static_cast<size_t>(index) > drop_idx)
{ {
index -= 1; --index;
} }
} }
} }
@@ -544,24 +847,45 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1)); groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
} }
} }
if (groups.empty()) if (groups.empty())
{ {
return empty_pose_batch3d(joint_names.size()); return trace;
} }
std::vector<Pose3D> all_full_poses(all_pairs.size()); trace.grouping.groups.reserve(groups.size());
for (size_t i = 0; i < all_pairs.size(); ++i) for (const auto &group : groups)
{ {
const auto &pids = all_pairs[i].first; ProposalGroupDebug debug_group;
const auto &cam1 = internal_cameras[std::get<0>(pids)]; debug_group.center = std::get<0>(group);
const auto &cam2 = internal_cameras[std::get<1>(pids)]; debug_group.pose_3d = std::get<1>(group);
Pose2D pose1 = packed_poses.pose(std::get<0>(pids), std::get<2>(pids)); for (const int index : std::get<2>(group))
Pose2D pose2 = packed_poses.pose(std::get<1>(pids), std::get<3>(pids)); {
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
}
trace.grouping.groups.push_back(std::move(debug_group));
}
auto [pose3d, score] = triangulate_and_score( std::vector<Pose3D> full_pose_buffer(kept_pose_pairs.size());
pose1, pose2, cam1, cam2, roomparams, {}); std::vector<int> full_proposal_index_by_core(trace.core_proposals.size(), -1);
trace.full_proposals.reserve(kept_pose_pairs.size());
for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
{
const auto &pair_ids = kept_pose_pairs[i].first;
const PairCandidate pair = {
std::get<0>(pair_ids),
std::get<1>(pair_ids),
std::get<2>(pair_ids),
std::get<3>(pair_ids),
kept_pose_pairs[i].second.first,
kept_pose_pairs[i].second.second,
};
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
Pose2D pose1 = prepared.packed_poses.pose(static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1));
Pose2D pose2 = prepared.packed_poses.pose(static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2));
auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams, {});
(void)score;
for (size_t joint = 0; joint < pose3d.size(); ++joint) for (size_t joint = 0; joint < pose3d.size(); ++joint)
{ {
const float score1 = pose1[joint][2]; const float score1 = pose1[joint][2];
@@ -576,32 +900,43 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
} }
} }
all_full_poses[i] = std::move(pose3d); full_pose_buffer[i] = pose3d;
full_proposal_index_by_core[kept_core_indices[i]] = static_cast<int>(trace.full_proposals.size());
trace.full_proposals.push_back(FullProposalDebug {
kept_core_indices[i],
pair,
std::move(pose3d),
});
} }
std::vector<Pose3D> final_poses_3d(groups.size()); std::vector<Pose3D> final_poses_3d(groups.size());
trace.merge.group_proposal_indices.reserve(groups.size());
trace.merge.merged_poses.reserve(groups.size());
for (size_t i = 0; i < groups.size(); ++i) for (size_t i = 0; i < groups.size(); ++i)
{ {
std::vector<Pose3D> poses; std::vector<Pose3D> poses;
std::vector<int> source_core_indices;
poses.reserve(std::get<2>(groups[i]).size()); poses.reserve(std::get<2>(groups[i]).size());
for (const auto &idx : std::get<2>(groups[i])) source_core_indices.reserve(std::get<2>(groups[i]).size());
for (const int index : std::get<2>(groups[i]))
{ {
poses.push_back(all_full_poses[idx]); poses.push_back(full_pose_buffer[static_cast<size_t>(index)]);
source_core_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
} }
final_poses_3d[i] = merge_group(poses, min_match_score); final_poses_3d[i] = merge_group(poses, min_match_score);
trace.merge.group_proposal_indices.push_back(std::move(source_core_indices));
trace.merge.merged_poses.push_back(final_poses_3d[i]);
} }
add_extra_joints(final_poses_3d, joint_names); add_extra_joints(final_poses_3d, joint_names);
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx); filter_poses(final_poses_3d, roomparams, prepared.core_joint_idx, prepared.core_limbs_idx);
add_missing_joints(final_poses_3d, joint_names); add_missing_joints(final_poses_3d, joint_names);
replace_far_joints(final_poses_3d, joint_names); replace_far_joints(final_poses_3d, joint_names);
PoseBatch3D poses_3d; size_t valid_persons = 0;
poses_3d.num_joints = joint_names.size();
for (const auto &pose : final_poses_3d) for (const auto &pose : final_poses_3d)
{ {
const auto is_valid = std::any_of( const bool is_valid = std::any_of(
pose.begin(), pose.begin(),
pose.end(), pose.end(),
[this](const std::array<float, 4> &joint) [this](const std::array<float, 4> &joint)
@@ -610,15 +945,17 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
}); });
if (is_valid) if (is_valid)
{ {
++poses_3d.num_persons; ++valid_persons;
} }
} }
poses_3d.data.resize(poses_3d.num_persons * poses_3d.num_joints * 4); trace.final_poses.num_persons = valid_persons;
trace.final_poses.data.resize(valid_persons * trace.final_poses.num_joints * 4);
size_t person_index = 0; size_t person_index = 0;
for (const auto &pose : final_poses_3d) for (const auto &pose : final_poses_3d)
{ {
const auto is_valid = std::any_of( const bool is_valid = std::any_of(
pose.begin(), pose.begin(),
pose.end(), pose.end(),
[this](const std::array<float, 4> &joint) [this](const std::array<float, 4> &joint)
@@ -630,24 +967,57 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
continue; continue;
} }
for (size_t joint = 0; joint < poses_3d.num_joints; ++joint) for (size_t joint = 0; joint < trace.final_poses.num_joints; ++joint)
{ {
for (size_t coord = 0; coord < 4; ++coord) for (size_t coord = 0; coord < 4; ++coord)
{ {
poses_3d.at(person_index, joint, coord) = pose[joint][coord]; trace.final_poses.at(person_index, joint, coord) = pose[joint][coord];
} }
} }
++person_index; ++person_index;
} }
return poses_3d; return trace;
}
PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseBatch3DView &previous_poses_3d)
{
if (previous_poses_3d.num_persons > 0 && previous_poses_3d.num_joints != joint_names.size())
{
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
}
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
const std::vector<PairCandidate> pairs = build_pair_candidates(prepared.packed_poses);
if (pairs.empty())
{
PreviousPoseFilterDebug empty;
empty.used_previous_poses = true;
return empty;
}
return filter_pairs_with_previous_poses(
prepared.packed_poses,
prepared.internal_cameras,
prepared.core_joint_idx,
pairs,
previous_poses_3d);
} }
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> TriangulatorInternal::project_poses( std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> TriangulatorInternal::project_poses(
const std::vector<Pose3D> &poses_3d, const std::vector<Pose3D> &poses_3d,
const CameraInternal &icam, const CachedCamera &icam,
bool calc_dists) bool calc_dists)
{ {
if (poses_3d.empty())
{
return std::make_tuple(std::vector<Pose2D> {}, std::vector<std::vector<float>> {});
}
const size_t num_persons = poses_3d.size(); const size_t num_persons = poses_3d.size();
const size_t num_joints = poses_3d[0].size(); const size_t num_joints = poses_3d[0].size();
@@ -813,7 +1183,7 @@ std::array<float, 3> mat_mul_vec(
return res; return res;
} }
std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<float, 2> &pt) std::array<float, 3> calc_ray_dir(const CachedCamera &icam, const std::array<float, 2> &pt)
{ {
// Compute normalized ray direction from the point // Compute normalized ray direction from the point
std::array<float, 3> uv1 = {pt[0], pt[1], 1.0}; std::array<float, 3> uv1 = {pt[0], pt[1], 1.0};
@@ -824,8 +1194,8 @@ std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<f
} }
std::array<float, 4> triangulate_midpoint( std::array<float, 4> triangulate_midpoint(
const CameraInternal &icam1, const CachedCamera &icam1,
const CameraInternal &icam2, const CachedCamera &icam2,
const std::array<float, 2> &pt1, const std::array<float, 2> &pt1,
const std::array<float, 2> &pt2) const std::array<float, 2> &pt2)
{ {
@@ -864,8 +1234,8 @@ std::array<float, 4> triangulate_midpoint(
std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score( std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
const std::vector<std::array<float, 3>> &pose1, const std::vector<std::array<float, 3>> &pose1,
const std::vector<std::array<float, 3>> &pose2, const std::vector<std::array<float, 3>> &pose2,
const CameraInternal &cam1, const CachedCamera &cam1,
const CameraInternal &cam2, const CachedCamera &cam2,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::array<size_t, 2>> &core_limbs_idx) const std::vector<std::array<size_t, 2>> &core_limbs_idx)
{ {
@@ -2026,14 +2396,89 @@ void TriangulatorInternal::replace_far_joints(
} // namespace } // namespace
std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d)
{
if (poses_2d.person_counts == nullptr)
{
throw std::invalid_argument("person_counts must not be null.");
}
std::vector<PairCandidate> pairs;
std::vector<int> offsets(poses_2d.num_views, 0);
int total = 0;
for (size_t view = 0; view < poses_2d.num_views; ++view)
{
if (poses_2d.person_counts[view] > poses_2d.max_persons)
{
throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
}
offsets[view] = total;
total += static_cast<int>(poses_2d.person_counts[view]);
}
for (size_t view1 = 0; view1 < poses_2d.num_views; ++view1)
{
for (size_t view2 = view1 + 1; view2 < poses_2d.num_views; ++view2)
{
for (size_t person1 = 0; person1 < poses_2d.person_counts[view1]; ++person1)
{
for (size_t person2 = 0; person2 < poses_2d.person_counts[view2]; ++person2)
{
pairs.push_back(PairCandidate {
static_cast<int>(view1),
static_cast<int>(view2),
static_cast<int>(person1),
static_cast<int>(person2),
offsets[view1] + static_cast<int>(person1),
offsets[view2] + static_cast<int>(person2),
});
}
}
}
}
return pairs;
}
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::vector<std::string> &joint_names,
const PoseBatch3DView &previous_poses_3d,
float min_match_score)
{
TriangulatorInternal triangulator(min_match_score, 1);
return triangulator.filter_pairs_with_previous_poses(poses_2d, cameras, joint_names, previous_poses_3d);
}
TriangulationTrace triangulate_debug(
const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d,
float min_match_score,
size_t min_group_size)
{
TriangulatorInternal triangulator(min_match_score, min_group_size);
return triangulator.triangulate_debug(poses_2d, cameras, roomparams, joint_names, previous_poses_3d);
}
PoseBatch3D triangulate_poses( PoseBatch3D triangulate_poses(
const PoseBatch2DView &poses_2d, const PoseBatch2DView &poses_2d,
const std::vector<Camera> &cameras, const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams, const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names, const std::vector<std::string> &joint_names,
const PoseBatch3DView *previous_poses_3d,
float min_match_score, float min_match_score,
size_t min_group_size) size_t min_group_size)
{ {
TriangulatorInternal triangulator(min_match_score, min_group_size); return triangulate_debug(
return triangulator.triangulate_poses(poses_2d, cameras, roomparams, joint_names); poses_2d,
cameras,
roomparams,
joint_names,
previous_poses_3d,
min_match_score,
min_group_size)
.final_poses;
} }
+30 -1
View File
@@ -3,7 +3,23 @@ from __future__ import annotations
from collections.abc import Sequence from collections.abc import Sequence
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from ._core import Camera, triangulate_poses from ._core import (
Camera,
CameraModel,
CoreProposalDebug,
FullProposalDebug,
GroupingDebug,
MergeDebug,
PairCandidate,
PreviousPoseFilterDebug,
PreviousPoseMatch,
ProposalGroupDebug,
TriangulationTrace,
build_pair_candidates,
filter_pairs_with_previous_poses,
triangulate_debug,
triangulate_poses,
)
if TYPE_CHECKING: if TYPE_CHECKING:
import numpy as np import numpy as np
@@ -28,7 +44,20 @@ def pack_poses_2d(
__all__ = [ __all__ = [
"Camera", "Camera",
"CameraModel",
"CoreProposalDebug",
"FullProposalDebug",
"GroupingDebug",
"MergeDebug",
"PairCandidate",
"PreviousPoseFilterDebug",
"PreviousPoseMatch",
"ProposalGroupDebug",
"TriangulationTrace",
"build_pair_candidates",
"convert_cameras", "convert_cameras",
"filter_pairs_with_previous_poses",
"pack_poses_2d", "pack_poses_2d",
"triangulate_debug",
"triangulate_poses", "triangulate_poses",
] ]
+33 -5
View File
@@ -1,12 +1,12 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import Sequence from collections.abc import Sequence
from typing import TypeAlias, TypedDict from typing import Literal, TypeAlias, TypedDict
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
from ._core import Camera from ._core import Camera, CameraModel
Matrix3x3Like: TypeAlias = Sequence[Sequence[float]] Matrix3x3Like: TypeAlias = Sequence[Sequence[float]]
VectorLike: TypeAlias = Sequence[float] VectorLike: TypeAlias = Sequence[float]
@@ -21,12 +21,39 @@ class CameraDict(TypedDict, total=False):
T: Sequence[Sequence[float]] T: Sequence[Sequence[float]]
width: int width: int
height: int height: int
type: str type: Literal["pinhole", "fisheye"]
model: Literal["pinhole", "fisheye"] | CameraModel
CameraModelLike: TypeAlias = CameraModel | Literal["pinhole", "fisheye"]
CameraLike = Camera | CameraDict CameraLike = Camera | CameraDict
def _coerce_camera_model(model: CameraModelLike) -> CameraModel:
if isinstance(model, CameraModel):
return model
if model == "pinhole":
return CameraModel.PINHOLE
if model == "fisheye":
return CameraModel.FISHEYE
raise ValueError(f"Unsupported camera model: {model}")
def _coerce_distortion(distortion: VectorLike, camera_model: CameraModel) -> tuple[float, float, float, float, float]:
values = tuple(float(value) for value in distortion)
expected = 4 if camera_model is CameraModel.FISHEYE else 5
if len(values) not in {expected, 5}:
raise ValueError(
f"{camera_model.name.lower()} cameras require {expected} distortion coefficients"
+ (" (or 5 with a trailing zero)." if camera_model is CameraModel.FISHEYE else ".")
)
if camera_model is CameraModel.FISHEYE and len(values) == 4:
values = values + (0.0,)
if len(values) != 5:
raise ValueError("Distortion coefficients must normalize to exactly 5 values.")
return values
def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]: def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
"""Normalize mappings or existing Camera objects into bound Camera instances.""" """Normalize mappings or existing Camera objects into bound Camera instances."""
@@ -39,12 +66,13 @@ def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
camera = Camera() camera = Camera()
camera.name = str(cam["name"]) camera.name = str(cam["name"])
camera.K = cam["K"] camera.K = cam["K"]
camera.DC = cam["DC"] camera_model = _coerce_camera_model(cam.get("model", cam.get("type", "pinhole")))
camera.DC = _coerce_distortion(cam["DC"], camera_model)
camera.R = cam["R"] camera.R = cam["R"]
camera.T = cam["T"] camera.T = cam["T"]
camera.width = int(cam["width"]) camera.width = int(cam["width"])
camera.height = int(cam["height"]) camera.height = int(cam["height"])
camera.type = str(cam.get("type", "pinhole")) camera.model = camera_model
converted.append(camera) converted.append(camera)
return converted return converted
+70 -1
View File
@@ -52,7 +52,7 @@ def test_camera_structure_repr():
camera.T = [[1], [2], [3]] camera.T = [[1], [2], [3]]
camera.width = 640 camera.width = 640
camera.height = 480 camera.height = 480
camera.type = "pinhole" camera.model = rpt.CameraModel.PINHOLE
rendered = repr(camera) rendered = repr(camera)
assert "Camera 1" in rendered assert "Camera 1" in rendered
@@ -100,6 +100,75 @@ def test_triangulate_repeatability():
np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5)
def test_build_pair_candidates_exposes_cartesian_view_pairs():
poses_2d, person_counts = rpt.pack_poses_2d(
[
np.zeros((2, 2, 3), dtype=np.float32),
np.zeros((1, 2, 3), dtype=np.float32),
np.zeros((3, 2, 3), dtype=np.float32),
],
joint_count=2,
)
pairs = rpt.build_pair_candidates(poses_2d, person_counts)
assert len(pairs) == (2 * 1) + (2 * 3) + (1 * 3)
assert (pairs[0].view1, pairs[0].view2, pairs[0].person1, pairs[0].person2) == (0, 1, 0, 0)
assert pairs[-1].global_person2 == 5
def test_triangulate_accepts_empty_previous_poses():
poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
empty_previous = np.zeros((0, len(JOINT_NAMES), 4), dtype=np.float32)
baseline = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
with_previous = rpt.triangulate_poses(
poses_2d,
person_counts,
cameras,
roomparams,
JOINT_NAMES,
empty_previous,
)
np.testing.assert_allclose(with_previous, baseline, rtol=1e-5, atol=1e-5)
def test_triangulate_debug_matches_final_output():
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
final_poses = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
trace = rpt.triangulate_debug(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
np.testing.assert_allclose(trace.final_poses, final_poses, rtol=1e-5, atol=1e-5)
assert len(trace.pairs) >= len(trace.core_proposals)
for group in trace.grouping.groups:
assert all(0 <= index < len(trace.core_proposals) for index in group.proposal_indices)
for merge_indices in trace.merge.group_proposal_indices:
assert all(0 <= index < len(trace.core_proposals) for index in merge_indices)
def test_filter_pairs_with_previous_poses_returns_debug_matches():
poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
previous_poses = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
debug = rpt.filter_pairs_with_previous_poses(
poses_2d,
person_counts,
cameras,
JOINT_NAMES,
previous_poses,
)
assert debug.used_previous_poses is True
assert len(debug.matches) == len(rpt.build_pair_candidates(poses_2d, person_counts))
assert len(debug.kept_pairs) == len(debug.kept_pair_indices)
assert any(match.matched_view1 or match.matched_view2 for match in debug.matches)
def test_triangulate_does_not_mutate_inputs(): def test_triangulate_does_not_mutate_inputs():
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json") poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32) roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)