Refactor triangulation stages and camera model
This commit is contained in:
+292
-6
@@ -1,6 +1,8 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <nanobind/nanobind.h>
|
#include <nanobind/nanobind.h>
|
||||||
#include <nanobind/ndarray.h>
|
#include <nanobind/ndarray.h>
|
||||||
@@ -19,7 +21,10 @@ using PoseArray2D =
|
|||||||
nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, -1, 3>, nb::c_contig>;
|
nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, -1, 3>, nb::c_contig>;
|
||||||
using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_contig>;
|
using CountArray = nb::ndarray<nb::numpy, const uint32_t, nb::shape<-1>, nb::c_contig>;
|
||||||
using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>;
|
using RoomArray = nb::ndarray<nb::numpy, const float, nb::shape<2, 3>, nb::c_contig>;
|
||||||
|
using PoseArray3DConst =
|
||||||
|
nb::ndarray<nb::numpy, const float, nb::shape<-1, -1, 4>, nb::c_contig>;
|
||||||
using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>;
|
using PoseArray3D = nb::ndarray<nb::numpy, float, nb::shape<-1, -1, 4>, nb::c_contig>;
|
||||||
|
using PoseArray2DOut = nb::ndarray<nb::numpy, float, nb::shape<-1, 4>, nb::c_contig>;
|
||||||
|
|
||||||
PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
|
PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const CountArray &person_counts)
|
||||||
{
|
{
|
||||||
@@ -45,6 +50,15 @@ PoseBatch2DView pose_batch_view_from_numpy(const PoseArray2D &poses_2d, const Co
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PoseBatch3DView pose_batch3d_view_from_numpy(const PoseArray3DConst &poses_3d)
|
||||||
|
{
|
||||||
|
return PoseBatch3DView {
|
||||||
|
poses_3d.data(),
|
||||||
|
static_cast<size_t>(poses_3d.shape(0)),
|
||||||
|
static_cast<size_t>(poses_3d.shape(1)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams)
|
std::array<std::array<float, 3>, 2> roomparams_from_numpy(const RoomArray &roomparams)
|
||||||
{
|
{
|
||||||
std::array<std::array<float, 3>, 2> result {};
|
std::array<std::array<float, 3>, 2> result {};
|
||||||
@@ -69,10 +83,75 @@ PoseArray3D pose_batch_to_numpy(PoseBatch3D batch)
|
|||||||
const size_t shape[3] = {batch.num_persons, batch.num_joints, 4};
|
const size_t shape[3] = {batch.num_persons, batch.num_joints, 4};
|
||||||
return PoseArray3D(storage->data(), 3, shape, owner);
|
return PoseArray3D(storage->data(), 3, shape, owner);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PoseArray3D pose_batch_to_numpy_copy(const PoseBatch3D &batch)
|
||||||
|
{
|
||||||
|
PoseBatch3D copy = batch;
|
||||||
|
return pose_batch_to_numpy(std::move(copy));
|
||||||
|
}
|
||||||
|
|
||||||
|
PoseArray2DOut pose_rows_to_numpy_copy(const std::vector<std::array<float, 4>> &rows)
|
||||||
|
{
|
||||||
|
auto *storage = new std::vector<float>(rows.size() * 4, 0.0f);
|
||||||
|
for (size_t row = 0; row < rows.size(); ++row)
|
||||||
|
{
|
||||||
|
for (size_t coord = 0; coord < 4; ++coord)
|
||||||
|
{
|
||||||
|
(*storage)[row * 4 + coord] = rows[row][coord];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nb::capsule owner(storage, [](void *value) noexcept
|
||||||
|
{
|
||||||
|
delete static_cast<std::vector<float> *>(value);
|
||||||
|
});
|
||||||
|
|
||||||
|
const size_t shape[2] = {rows.size(), 4};
|
||||||
|
return PoseArray2DOut(storage->data(), 2, shape, owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
PoseArray3D merged_poses_to_numpy_copy(const std::vector<std::vector<std::array<float, 4>>> &poses)
|
||||||
|
{
|
||||||
|
size_t num_poses = poses.size();
|
||||||
|
size_t num_joints = 0;
|
||||||
|
if (!poses.empty())
|
||||||
|
{
|
||||||
|
num_joints = poses[0].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto *storage = new std::vector<float>(num_poses * num_joints * 4, 0.0f);
|
||||||
|
for (size_t pose = 0; pose < num_poses; ++pose)
|
||||||
|
{
|
||||||
|
if (poses[pose].size() != num_joints)
|
||||||
|
{
|
||||||
|
delete storage;
|
||||||
|
throw std::invalid_argument("Merged poses must use a consistent joint count.");
|
||||||
|
}
|
||||||
|
for (size_t joint = 0; joint < num_joints; ++joint)
|
||||||
|
{
|
||||||
|
for (size_t coord = 0; coord < 4; ++coord)
|
||||||
|
{
|
||||||
|
(*storage)[((pose * num_joints) + joint) * 4 + coord] = poses[pose][joint][coord];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nb::capsule owner(storage, [](void *value) noexcept
|
||||||
|
{
|
||||||
|
delete static_cast<std::vector<float> *>(value);
|
||||||
|
});
|
||||||
|
|
||||||
|
const size_t shape[3] = {num_poses, num_joints, 4};
|
||||||
|
return PoseArray3D(storage->data(), 3, shape, owner);
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
NB_MODULE(_core, m)
|
NB_MODULE(_core, m)
|
||||||
{
|
{
|
||||||
|
nb::enum_<CameraModel>(m, "CameraModel")
|
||||||
|
.value("PINHOLE", CameraModel::Pinhole)
|
||||||
|
.value("FISHEYE", CameraModel::Fisheye);
|
||||||
|
|
||||||
nb::class_<Camera>(m, "Camera")
|
nb::class_<Camera>(m, "Camera")
|
||||||
.def(nb::init<>())
|
.def(nb::init<>())
|
||||||
.def_rw("name", &Camera::name)
|
.def_rw("name", &Camera::name)
|
||||||
@@ -82,12 +161,184 @@ NB_MODULE(_core, m)
|
|||||||
.def_rw("T", &Camera::T)
|
.def_rw("T", &Camera::T)
|
||||||
.def_rw("width", &Camera::width)
|
.def_rw("width", &Camera::width)
|
||||||
.def_rw("height", &Camera::height)
|
.def_rw("height", &Camera::height)
|
||||||
.def_rw("type", &Camera::type)
|
.def_rw("model", &Camera::model)
|
||||||
.def("__repr__", [](const Camera &camera)
|
.def("__repr__", [](const Camera &camera)
|
||||||
{
|
{
|
||||||
return camera.to_string();
|
return camera.to_string();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
nb::class_<PairCandidate>(m, "PairCandidate")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("view1", &PairCandidate::view1)
|
||||||
|
.def_rw("view2", &PairCandidate::view2)
|
||||||
|
.def_rw("person1", &PairCandidate::person1)
|
||||||
|
.def_rw("person2", &PairCandidate::person2)
|
||||||
|
.def_rw("global_person1", &PairCandidate::global_person1)
|
||||||
|
.def_rw("global_person2", &PairCandidate::global_person2);
|
||||||
|
|
||||||
|
nb::class_<PreviousPoseMatch>(m, "PreviousPoseMatch")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("previous_pose_index", &PreviousPoseMatch::previous_pose_index)
|
||||||
|
.def_rw("score_view1", &PreviousPoseMatch::score_view1)
|
||||||
|
.def_rw("score_view2", &PreviousPoseMatch::score_view2)
|
||||||
|
.def_rw("matched_view1", &PreviousPoseMatch::matched_view1)
|
||||||
|
.def_rw("matched_view2", &PreviousPoseMatch::matched_view2)
|
||||||
|
.def_rw("kept", &PreviousPoseMatch::kept)
|
||||||
|
.def_rw("decision", &PreviousPoseMatch::decision);
|
||||||
|
|
||||||
|
nb::class_<PreviousPoseFilterDebug>(m, "PreviousPoseFilterDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("used_previous_poses", &PreviousPoseFilterDebug::used_previous_poses)
|
||||||
|
.def_rw("matches", &PreviousPoseFilterDebug::matches)
|
||||||
|
.def_rw("kept_pair_indices", &PreviousPoseFilterDebug::kept_pair_indices)
|
||||||
|
.def_rw("kept_pairs", &PreviousPoseFilterDebug::kept_pairs);
|
||||||
|
|
||||||
|
nb::class_<CoreProposalDebug>(m, "CoreProposalDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("pair_index", &CoreProposalDebug::pair_index)
|
||||||
|
.def_rw("pair", &CoreProposalDebug::pair)
|
||||||
|
.def_rw("score", &CoreProposalDebug::score)
|
||||||
|
.def_rw("kept", &CoreProposalDebug::kept)
|
||||||
|
.def_rw("drop_reason", &CoreProposalDebug::drop_reason)
|
||||||
|
.def_prop_ro("pose_3d", [](const CoreProposalDebug &proposal)
|
||||||
|
{
|
||||||
|
return pose_rows_to_numpy_copy(proposal.pose_3d);
|
||||||
|
}, nb::rv_policy::move);
|
||||||
|
|
||||||
|
nb::class_<ProposalGroupDebug>(m, "ProposalGroupDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("center", &ProposalGroupDebug::center)
|
||||||
|
.def_rw("proposal_indices", &ProposalGroupDebug::proposal_indices)
|
||||||
|
.def_prop_ro("pose_3d", [](const ProposalGroupDebug &group)
|
||||||
|
{
|
||||||
|
return pose_rows_to_numpy_copy(group.pose_3d);
|
||||||
|
}, nb::rv_policy::move);
|
||||||
|
|
||||||
|
nb::class_<GroupingDebug>(m, "GroupingDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("initial_groups", &GroupingDebug::initial_groups)
|
||||||
|
.def_rw("duplicate_pair_drops", &GroupingDebug::duplicate_pair_drops)
|
||||||
|
.def_rw("groups", &GroupingDebug::groups);
|
||||||
|
|
||||||
|
nb::class_<FullProposalDebug>(m, "FullProposalDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("source_core_proposal_index", &FullProposalDebug::source_core_proposal_index)
|
||||||
|
.def_rw("pair", &FullProposalDebug::pair)
|
||||||
|
.def_prop_ro("pose_3d", [](const FullProposalDebug &proposal)
|
||||||
|
{
|
||||||
|
return pose_rows_to_numpy_copy(proposal.pose_3d);
|
||||||
|
}, nb::rv_policy::move);
|
||||||
|
|
||||||
|
nb::class_<MergeDebug>(m, "MergeDebug")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("group_proposal_indices", &MergeDebug::group_proposal_indices)
|
||||||
|
.def_prop_ro("merged_poses", [](const MergeDebug &merge)
|
||||||
|
{
|
||||||
|
return merged_poses_to_numpy_copy(merge.merged_poses);
|
||||||
|
}, nb::rv_policy::move);
|
||||||
|
|
||||||
|
nb::class_<TriangulationTrace>(m, "TriangulationTrace")
|
||||||
|
.def(nb::init<>())
|
||||||
|
.def_rw("pairs", &TriangulationTrace::pairs)
|
||||||
|
.def_rw("previous_filter", &TriangulationTrace::previous_filter)
|
||||||
|
.def_rw("core_proposals", &TriangulationTrace::core_proposals)
|
||||||
|
.def_rw("grouping", &TriangulationTrace::grouping)
|
||||||
|
.def_rw("full_proposals", &TriangulationTrace::full_proposals)
|
||||||
|
.def_rw("merge", &TriangulationTrace::merge)
|
||||||
|
.def_prop_ro("final_poses", [](const TriangulationTrace &trace)
|
||||||
|
{
|
||||||
|
return pose_batch_to_numpy_copy(trace.final_poses);
|
||||||
|
}, nb::rv_policy::move);
|
||||||
|
|
||||||
|
m.def(
|
||||||
|
"build_pair_candidates",
|
||||||
|
[](const PoseArray2D &poses_2d, const CountArray &person_counts)
|
||||||
|
{
|
||||||
|
return build_pair_candidates(pose_batch_view_from_numpy(poses_2d, person_counts));
|
||||||
|
},
|
||||||
|
"poses_2d"_a,
|
||||||
|
"person_counts"_a);
|
||||||
|
|
||||||
|
m.def(
|
||||||
|
"filter_pairs_with_previous_poses",
|
||||||
|
[](const PoseArray2D &poses_2d,
|
||||||
|
const CountArray &person_counts,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseArray3DConst &previous_poses_3d,
|
||||||
|
float min_match_score)
|
||||||
|
{
|
||||||
|
return filter_pairs_with_previous_poses(
|
||||||
|
pose_batch_view_from_numpy(poses_2d, person_counts),
|
||||||
|
cameras,
|
||||||
|
joint_names,
|
||||||
|
pose_batch3d_view_from_numpy(previous_poses_3d),
|
||||||
|
min_match_score);
|
||||||
|
},
|
||||||
|
"poses_2d"_a,
|
||||||
|
"person_counts"_a,
|
||||||
|
"cameras"_a,
|
||||||
|
"joint_names"_a,
|
||||||
|
"previous_poses_3d"_a,
|
||||||
|
"min_match_score"_a = 0.95f);
|
||||||
|
|
||||||
|
m.def(
|
||||||
|
"triangulate_debug",
|
||||||
|
[](const PoseArray2D &poses_2d,
|
||||||
|
const CountArray &person_counts,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const RoomArray &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
float min_match_score,
|
||||||
|
size_t min_group_size)
|
||||||
|
{
|
||||||
|
return triangulate_debug(
|
||||||
|
pose_batch_view_from_numpy(poses_2d, person_counts),
|
||||||
|
cameras,
|
||||||
|
roomparams_from_numpy(roomparams),
|
||||||
|
joint_names,
|
||||||
|
nullptr,
|
||||||
|
min_match_score,
|
||||||
|
min_group_size);
|
||||||
|
},
|
||||||
|
"poses_2d"_a,
|
||||||
|
"person_counts"_a,
|
||||||
|
"cameras"_a,
|
||||||
|
"roomparams"_a,
|
||||||
|
"joint_names"_a,
|
||||||
|
"min_match_score"_a = 0.95f,
|
||||||
|
"min_group_size"_a = 1);
|
||||||
|
|
||||||
|
m.def(
|
||||||
|
"triangulate_debug",
|
||||||
|
[](const PoseArray2D &poses_2d,
|
||||||
|
const CountArray &person_counts,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const RoomArray &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseArray3DConst &previous_poses_3d,
|
||||||
|
float min_match_score,
|
||||||
|
size_t min_group_size)
|
||||||
|
{
|
||||||
|
const PoseBatch3DView previous_view = pose_batch3d_view_from_numpy(previous_poses_3d);
|
||||||
|
return triangulate_debug(
|
||||||
|
pose_batch_view_from_numpy(poses_2d, person_counts),
|
||||||
|
cameras,
|
||||||
|
roomparams_from_numpy(roomparams),
|
||||||
|
joint_names,
|
||||||
|
&previous_view,
|
||||||
|
min_match_score,
|
||||||
|
min_group_size);
|
||||||
|
},
|
||||||
|
"poses_2d"_a,
|
||||||
|
"person_counts"_a,
|
||||||
|
"cameras"_a,
|
||||||
|
"roomparams"_a,
|
||||||
|
"joint_names"_a,
|
||||||
|
"previous_poses_3d"_a,
|
||||||
|
"min_match_score"_a = 0.95f,
|
||||||
|
"min_group_size"_a = 1);
|
||||||
|
|
||||||
m.def(
|
m.def(
|
||||||
"triangulate_poses",
|
"triangulate_poses",
|
||||||
[](const PoseArray2D &poses_2d,
|
[](const PoseArray2D &poses_2d,
|
||||||
@@ -98,11 +349,15 @@ NB_MODULE(_core, m)
|
|||||||
float min_match_score,
|
float min_match_score,
|
||||||
size_t min_group_size)
|
size_t min_group_size)
|
||||||
{
|
{
|
||||||
const PoseBatch2DView pose_batch = pose_batch_view_from_numpy(poses_2d, person_counts);
|
const PoseBatch3D poses_3d = triangulate_poses(
|
||||||
const auto room = roomparams_from_numpy(roomparams);
|
pose_batch_view_from_numpy(poses_2d, person_counts),
|
||||||
PoseBatch3D poses_3d = triangulate_poses(
|
cameras,
|
||||||
pose_batch, cameras, room, joint_names, min_match_score, min_group_size);
|
roomparams_from_numpy(roomparams),
|
||||||
return pose_batch_to_numpy(std::move(poses_3d));
|
joint_names,
|
||||||
|
nullptr,
|
||||||
|
min_match_score,
|
||||||
|
min_group_size);
|
||||||
|
return pose_batch_to_numpy(poses_3d);
|
||||||
},
|
},
|
||||||
"poses_2d"_a,
|
"poses_2d"_a,
|
||||||
"person_counts"_a,
|
"person_counts"_a,
|
||||||
@@ -111,4 +366,35 @@ NB_MODULE(_core, m)
|
|||||||
"joint_names"_a,
|
"joint_names"_a,
|
||||||
"min_match_score"_a = 0.95f,
|
"min_match_score"_a = 0.95f,
|
||||||
"min_group_size"_a = 1);
|
"min_group_size"_a = 1);
|
||||||
|
|
||||||
|
m.def(
|
||||||
|
"triangulate_poses",
|
||||||
|
[](const PoseArray2D &poses_2d,
|
||||||
|
const CountArray &person_counts,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const RoomArray &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseArray3DConst &previous_poses_3d,
|
||||||
|
float min_match_score,
|
||||||
|
size_t min_group_size)
|
||||||
|
{
|
||||||
|
const PoseBatch3DView previous_view = pose_batch3d_view_from_numpy(previous_poses_3d);
|
||||||
|
const PoseBatch3D poses_3d = triangulate_poses(
|
||||||
|
pose_batch_view_from_numpy(poses_2d, person_counts),
|
||||||
|
cameras,
|
||||||
|
roomparams_from_numpy(roomparams),
|
||||||
|
joint_names,
|
||||||
|
&previous_view,
|
||||||
|
min_match_score,
|
||||||
|
min_group_size);
|
||||||
|
return pose_batch_to_numpy(poses_3d);
|
||||||
|
},
|
||||||
|
"poses_2d"_a,
|
||||||
|
"person_counts"_a,
|
||||||
|
"cameras"_a,
|
||||||
|
"roomparams"_a,
|
||||||
|
"joint_names"_a,
|
||||||
|
"previous_poses_3d"_a,
|
||||||
|
"min_match_score"_a = 0.95f,
|
||||||
|
"min_group_size"_a = 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "camera.hpp"
|
||||||
|
|
||||||
|
struct CachedCamera
|
||||||
|
{
|
||||||
|
const Camera cam;
|
||||||
|
const std::array<std::array<float, 3>, 3> invR;
|
||||||
|
const std::array<float, 3> center;
|
||||||
|
const std::array<std::array<float, 3>, 3> newK;
|
||||||
|
const std::array<std::array<float, 3>, 3> invK;
|
||||||
|
};
|
||||||
|
|
||||||
|
CachedCamera cache_camera(const Camera &camera);
|
||||||
|
|
||||||
|
void undistort_point_pinhole(std::array<float, 3> &point, const std::array<float, 5> &distortion);
|
||||||
|
void undistort_point_fisheye(std::array<float, 3> &point, const std::array<float, 5> &distortion);
|
||||||
+97
-78
@@ -1,11 +1,60 @@
|
|||||||
#include <array>
|
#include <array>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#include <stdexcept>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "camera.hpp"
|
#include "cached_camera.hpp"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
|
||||||
|
{
|
||||||
|
return {{{M[0][0], M[1][0], M[2][0]},
|
||||||
|
{M[0][1], M[1][1], M[2][1]},
|
||||||
|
{M[0][2], M[1][2], M[2][2]}}};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
|
||||||
|
{
|
||||||
|
std::array<std::array<float, 3>, 3> adj = {
|
||||||
|
{{
|
||||||
|
M[1][1] * M[2][2] - M[1][2] * M[2][1],
|
||||||
|
M[0][2] * M[2][1] - M[0][1] * M[2][2],
|
||||||
|
M[0][1] * M[1][2] - M[0][2] * M[1][1],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
M[1][2] * M[2][0] - M[1][0] * M[2][2],
|
||||||
|
M[0][0] * M[2][2] - M[0][2] * M[2][0],
|
||||||
|
M[0][2] * M[1][0] - M[0][0] * M[1][2],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
M[1][0] * M[2][1] - M[1][1] * M[2][0],
|
||||||
|
M[0][1] * M[2][0] - M[0][0] * M[2][1],
|
||||||
|
M[0][0] * M[1][1] - M[0][1] * M[1][0],
|
||||||
|
}}};
|
||||||
|
|
||||||
|
float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
|
||||||
|
if (std::fabs(det) < 1e-6f)
|
||||||
|
{
|
||||||
|
return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
|
||||||
|
}
|
||||||
|
|
||||||
|
float idet = 1.0f / det;
|
||||||
|
return {{
|
||||||
|
{{adj[0][0] * idet, adj[0][1] * idet, adj[0][2] * idet}},
|
||||||
|
{{adj[1][0] * idet, adj[1][1] * idet, adj[1][2] * idet}},
|
||||||
|
{{adj[2][0] * idet, adj[2][1] * idet, adj[2][2] * idet}},
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
|
||||||
|
const Camera &cam, float balance, std::pair<int, int> new_size);
|
||||||
|
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
|
||||||
|
const Camera &cam, float alpha, std::pair<int, int> new_size);
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
@@ -63,7 +112,7 @@ std::string Camera::to_string() const
|
|||||||
|
|
||||||
out << "'width': " << width << ", ";
|
out << "'width': " << width << ", ";
|
||||||
out << "'height': " << height << ", ";
|
out << "'height': " << height << ", ";
|
||||||
out << "'type': " << type;
|
out << "'model': '" << camera_model_name(model) << "'";
|
||||||
|
|
||||||
out << "}";
|
out << "}";
|
||||||
return out.str();
|
return out.str();
|
||||||
@@ -71,6 +120,33 @@ std::string Camera::to_string() const
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
const char *camera_model_name(CameraModel model)
|
||||||
|
{
|
||||||
|
switch (model)
|
||||||
|
{
|
||||||
|
case CameraModel::Pinhole:
|
||||||
|
return "pinhole";
|
||||||
|
case CameraModel::Fisheye:
|
||||||
|
return "fisheye";
|
||||||
|
}
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
CameraModel parse_camera_model(const std::string &value)
|
||||||
|
{
|
||||||
|
if (value == "pinhole")
|
||||||
|
{
|
||||||
|
return CameraModel::Pinhole;
|
||||||
|
}
|
||||||
|
if (value == "fisheye")
|
||||||
|
{
|
||||||
|
return CameraModel::Fisheye;
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("Unsupported camera model: " + value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &out, const Camera &cam)
|
std::ostream &operator<<(std::ostream &out, const Camera &cam)
|
||||||
{
|
{
|
||||||
out << cam.to_string();
|
out << cam.to_string();
|
||||||
@@ -80,93 +156,33 @@ std::ostream &operator<<(std::ostream &out, const Camera &cam)
|
|||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
CameraInternal::CameraInternal(const Camera &cam)
|
CachedCamera cache_camera(const Camera &cam)
|
||||||
{
|
{
|
||||||
this->cam = cam;
|
const std::array<std::array<float, 3>, 3> invR = transpose3x3(cam.R);
|
||||||
this->invR = transpose3x3(cam.R);
|
|
||||||
|
|
||||||
// Camera center:
|
// Camera center:
|
||||||
// C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
|
// C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
|
||||||
this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
|
const std::array<float, 3> center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
|
||||||
|
|
||||||
// Undistort camera matrix
|
// Undistort camera matrix
|
||||||
// As with the undistortion, the own implementation avoids some overhead compared to OpenCV
|
// As with the undistortion, the own implementation avoids some overhead compared to OpenCV
|
||||||
if (cam.type == "fisheye")
|
std::array<std::array<float, 3>, 3> newK;
|
||||||
|
if (cam.model == CameraModel::Fisheye)
|
||||||
{
|
{
|
||||||
newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height});
|
newK = calc_optimal_camera_matrix_fisheye(cam, 1.0f, {cam.width, cam.height});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height});
|
newK = calc_optimal_camera_matrix_pinhole(cam, 1.0f, {cam.width, cam.height});
|
||||||
}
|
}
|
||||||
this->invK = invert3x3(newK);
|
const std::array<std::array<float, 3>, 3> invK = invert3x3(newK);
|
||||||
|
|
||||||
|
return CachedCamera {cam, invR, center, newK, invK};
|
||||||
}
|
}
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> CameraInternal::transpose3x3(
|
void undistort_point_pinhole(std::array<float, 3> &p, const std::array<float, 5> &k)
|
||||||
const std::array<std::array<float, 3>, 3> &M)
|
|
||||||
{
|
|
||||||
return {{{M[0][0], M[1][0], M[2][0]},
|
|
||||||
{M[0][1], M[1][1], M[2][1]},
|
|
||||||
{M[0][2], M[1][2], M[2][2]}}};
|
|
||||||
}
|
|
||||||
|
|
||||||
// =================================================================================================
|
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> CameraInternal::invert3x3(
|
|
||||||
const std::array<std::array<float, 3>, 3> &M)
|
|
||||||
{
|
|
||||||
// Compute the inverse using the adjugate method
|
|
||||||
// See: https://scicomp.stackexchange.com/a/29206
|
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> adj = {
|
|
||||||
{{
|
|
||||||
M[1][1] * M[2][2] - M[1][2] * M[2][1],
|
|
||||||
M[0][2] * M[2][1] - M[0][1] * M[2][2],
|
|
||||||
M[0][1] * M[1][2] - M[0][2] * M[1][1],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
M[1][2] * M[2][0] - M[1][0] * M[2][2],
|
|
||||||
M[0][0] * M[2][2] - M[0][2] * M[2][0],
|
|
||||||
M[0][2] * M[1][0] - M[0][0] * M[1][2],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
M[1][0] * M[2][1] - M[1][1] * M[2][0],
|
|
||||||
M[0][1] * M[2][0] - M[0][0] * M[2][1],
|
|
||||||
M[0][0] * M[1][1] - M[0][1] * M[1][0],
|
|
||||||
}}};
|
|
||||||
|
|
||||||
float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
|
|
||||||
if (std::fabs(det) < 1e-6f)
|
|
||||||
{
|
|
||||||
return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
|
|
||||||
}
|
|
||||||
|
|
||||||
float idet = 1.0f / det;
|
|
||||||
std::array<std::array<float, 3>, 3> inv = {
|
|
||||||
{{
|
|
||||||
adj[0][0] * idet,
|
|
||||||
adj[0][1] * idet,
|
|
||||||
adj[0][2] * idet,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
adj[1][0] * idet,
|
|
||||||
adj[1][1] * idet,
|
|
||||||
adj[1][2] * idet,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
adj[2][0] * idet,
|
|
||||||
adj[2][1] * idet,
|
|
||||||
adj[2][2] * idet,
|
|
||||||
}}};
|
|
||||||
|
|
||||||
return inv;
|
|
||||||
}
|
|
||||||
|
|
||||||
// =================================================================================================
|
|
||||||
|
|
||||||
void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k)
|
|
||||||
{
|
{
|
||||||
// Following: cv::cvUndistortPointsInternal
|
// Following: cv::cvUndistortPointsInternal
|
||||||
// Uses only the distortion coefficients: [k1, k2, p1, p2, k3]
|
// Uses only the distortion coefficients: [k1, k2, p1, p2, k3]
|
||||||
@@ -202,7 +218,7 @@ void CameraInternal::undistort_point_pinhole(std::array<float, 3> &p, const std:
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k)
|
void undistort_point_fisheye(std::array<float, 3> &p, const std::array<float, 5> &k)
|
||||||
{
|
{
|
||||||
// Following: cv::fisheye::undistortPoints
|
// Following: cv::fisheye::undistortPoints
|
||||||
// Uses only the distortion coefficients: [k1, k2, k3, k4]
|
// Uses only the distortion coefficients: [k1, k2, k3, k4]
|
||||||
@@ -250,8 +266,10 @@ void CameraInternal::undistort_point_fisheye(std::array<float, 3> &p, const std:
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_fisheye(
|
namespace
|
||||||
float balance, std::pair<int, int> new_size)
|
{
|
||||||
|
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
|
||||||
|
const Camera &cam, float balance, std::pair<int, int> new_size)
|
||||||
{
|
{
|
||||||
// Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify
|
// Following: cv::fisheye::estimateNewCameraMatrixForUndistortRectify
|
||||||
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630
|
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/fisheye.cpp#L630
|
||||||
@@ -355,8 +373,8 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_f
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_pinhole(
|
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
|
||||||
float alpha, std::pair<int, int> new_size)
|
const Camera &cam, float alpha, std::pair<int, int> new_size)
|
||||||
{
|
{
|
||||||
// Following: cv::getOptimalNewCameraMatrix
|
// Following: cv::getOptimalNewCameraMatrix
|
||||||
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565
|
// https://github.com/opencv/opencv/blob/4.x/modules/calib3d/src/calibration_base.cpp#L1565
|
||||||
@@ -479,3 +497,4 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
|
|||||||
{0.0, 0.0, 1.0}}};
|
{0.0, 0.0, 1.0}}};
|
||||||
return newK;
|
return newK;
|
||||||
}
|
}
|
||||||
|
} // namespace
|
||||||
|
|||||||
+13
-31
@@ -7,46 +7,28 @@
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
enum class CameraModel
|
||||||
|
{
|
||||||
|
Pinhole,
|
||||||
|
Fisheye,
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *camera_model_name(CameraModel model);
|
||||||
|
CameraModel parse_camera_model(const std::string &value);
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
struct Camera
|
struct Camera
|
||||||
{
|
{
|
||||||
std::string name;
|
std::string name;
|
||||||
std::array<std::array<float, 3>, 3> K;
|
std::array<std::array<float, 3>, 3> K;
|
||||||
std::vector<float> DC;
|
std::array<float, 5> DC = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
std::array<std::array<float, 3>, 3> R;
|
std::array<std::array<float, 3>, 3> R;
|
||||||
std::array<std::array<float, 1>, 3> T;
|
std::array<std::array<float, 1>, 3> T;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
std::string type;
|
CameraModel model = CameraModel::Pinhole;
|
||||||
|
|
||||||
friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
|
friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
|
||||||
std::string to_string() const;
|
std::string to_string() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
// =================================================================================================
|
|
||||||
|
|
||||||
class CameraInternal
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
CameraInternal(const Camera &cam);
|
|
||||||
|
|
||||||
Camera cam;
|
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> invR;
|
|
||||||
std::array<float, 3> center;
|
|
||||||
std::array<std::array<float, 3>, 3> newK;
|
|
||||||
std::array<std::array<float, 3>, 3> invK;
|
|
||||||
|
|
||||||
static std::array<std::array<float, 3>, 3> transpose3x3(
|
|
||||||
const std::array<std::array<float, 3>, 3> &M);
|
|
||||||
|
|
||||||
static std::array<std::array<float, 3>, 3> invert3x3(
|
|
||||||
const std::array<std::array<float, 3>, 3> &M);
|
|
||||||
|
|
||||||
static void undistort_point_pinhole(std::array<float, 3> &p, const std::vector<float> &k);
|
|
||||||
static void undistort_point_fisheye(std::array<float, 3> &p, const std::vector<float> &k);
|
|
||||||
|
|
||||||
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_fisheye(
|
|
||||||
float balance, std::pair<int, int> new_size);
|
|
||||||
std::array<std::array<float, 3>, 3> calc_optimal_camera_matrix_pinhole(
|
|
||||||
float alpha, std::pair<int, int> new_size);
|
|
||||||
};
|
|
||||||
|
|||||||
@@ -38,6 +38,11 @@ const float &PoseBatch2DView::at(size_t view, size_t person, size_t joint, size_
|
|||||||
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
|
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const float &PoseBatch3DView::at(size_t person, size_t joint, size_t coord) const
|
||||||
|
{
|
||||||
|
return data[pose3d_offset(person, joint, coord, num_joints)];
|
||||||
|
}
|
||||||
|
|
||||||
const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
|
const float &PoseBatch2D::at(size_t view, size_t person, size_t joint, size_t coord) const
|
||||||
{
|
{
|
||||||
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
|
return data[pose2d_offset(view, person, joint, coord, max_persons, num_joints)];
|
||||||
@@ -109,6 +114,11 @@ const float &PoseBatch3D::at(size_t person, size_t joint, size_t coord) const
|
|||||||
return data[pose3d_offset(person, joint, coord, num_joints)];
|
return data[pose3d_offset(person, joint, coord, num_joints)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PoseBatch3DView PoseBatch3D::view() const
|
||||||
|
{
|
||||||
|
return PoseBatch3DView {data.data(), num_persons, num_joints};
|
||||||
|
}
|
||||||
|
|
||||||
NestedPoses3D PoseBatch3D::to_nested() const
|
NestedPoses3D PoseBatch3D::to_nested() const
|
||||||
{
|
{
|
||||||
NestedPoses3D poses_3d(num_persons);
|
NestedPoses3D poses_3d(num_persons);
|
||||||
|
|||||||
+150
-1
@@ -25,6 +25,15 @@ struct PoseBatch2DView
|
|||||||
const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
|
const float &at(size_t view, size_t person, size_t joint, size_t coord) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PoseBatch3DView
|
||||||
|
{
|
||||||
|
const float *data = nullptr;
|
||||||
|
size_t num_persons = 0;
|
||||||
|
size_t num_joints = 0;
|
||||||
|
|
||||||
|
const float &at(size_t person, size_t joint, size_t coord) const;
|
||||||
|
};
|
||||||
|
|
||||||
struct PoseBatch2D
|
struct PoseBatch2D
|
||||||
{
|
{
|
||||||
std::vector<float> data;
|
std::vector<float> data;
|
||||||
@@ -48,6 +57,7 @@ struct PoseBatch3D
|
|||||||
|
|
||||||
float &at(size_t person, size_t joint, size_t coord);
|
float &at(size_t person, size_t joint, size_t coord);
|
||||||
const float &at(size_t person, size_t joint, size_t coord) const;
|
const float &at(size_t person, size_t joint, size_t coord) const;
|
||||||
|
PoseBatch3DView view() const;
|
||||||
|
|
||||||
NestedPoses3D to_nested() const;
|
NestedPoses3D to_nested() const;
|
||||||
static PoseBatch3D from_nested(const NestedPoses3D &poses_3d);
|
static PoseBatch3D from_nested(const NestedPoses3D &poses_3d);
|
||||||
@@ -55,6 +65,136 @@ struct PoseBatch3D
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
struct PairCandidate
|
||||||
|
{
|
||||||
|
int view1 = -1;
|
||||||
|
int view2 = -1;
|
||||||
|
int person1 = -1;
|
||||||
|
int person2 = -1;
|
||||||
|
int global_person1 = -1;
|
||||||
|
int global_person2 = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PreviousPoseMatch
|
||||||
|
{
|
||||||
|
int previous_pose_index = -1;
|
||||||
|
float score_view1 = 0.0f;
|
||||||
|
float score_view2 = 0.0f;
|
||||||
|
bool matched_view1 = false;
|
||||||
|
bool matched_view2 = false;
|
||||||
|
bool kept = true;
|
||||||
|
std::string decision = "unfiltered";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PreviousPoseFilterDebug
|
||||||
|
{
|
||||||
|
bool used_previous_poses = false;
|
||||||
|
std::vector<PreviousPoseMatch> matches;
|
||||||
|
std::vector<int> kept_pair_indices;
|
||||||
|
std::vector<PairCandidate> kept_pairs;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CoreProposalDebug
|
||||||
|
{
|
||||||
|
int pair_index = -1;
|
||||||
|
PairCandidate pair;
|
||||||
|
std::vector<std::array<float, 4>> pose_3d;
|
||||||
|
float score = 0.0f;
|
||||||
|
bool kept = false;
|
||||||
|
std::string drop_reason = "uninitialized";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ProposalGroupDebug
|
||||||
|
{
|
||||||
|
std::array<float, 3> center = {0.0f, 0.0f, 0.0f};
|
||||||
|
std::vector<std::array<float, 4>> pose_3d;
|
||||||
|
std::vector<int> proposal_indices;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GroupingDebug
|
||||||
|
{
|
||||||
|
std::vector<ProposalGroupDebug> initial_groups;
|
||||||
|
std::vector<int> duplicate_pair_drops;
|
||||||
|
std::vector<ProposalGroupDebug> groups;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FullProposalDebug
|
||||||
|
{
|
||||||
|
int source_core_proposal_index = -1;
|
||||||
|
PairCandidate pair;
|
||||||
|
std::vector<std::array<float, 4>> pose_3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MergeDebug
|
||||||
|
{
|
||||||
|
std::vector<std::vector<std::array<float, 4>>> merged_poses;
|
||||||
|
std::vector<std::vector<int>> group_proposal_indices;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TriangulationTrace
|
||||||
|
{
|
||||||
|
std::vector<PairCandidate> pairs;
|
||||||
|
PreviousPoseFilterDebug previous_filter;
|
||||||
|
std::vector<CoreProposalDebug> core_proposals;
|
||||||
|
GroupingDebug grouping;
|
||||||
|
std::vector<FullProposalDebug> full_proposals;
|
||||||
|
MergeDebug merge;
|
||||||
|
PoseBatch3D final_poses;
|
||||||
|
};
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d);
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView &previous_poses_3d,
|
||||||
|
float min_match_score = 0.95f);
|
||||||
|
|
||||||
|
inline PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
||||||
|
const PoseBatch2D &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3D &previous_poses_3d,
|
||||||
|
float min_match_score = 0.95f)
|
||||||
|
{
|
||||||
|
return filter_pairs_with_previous_poses(
|
||||||
|
poses_2d.view(), cameras, joint_names, previous_poses_3d.view(), min_match_score);
|
||||||
|
}
|
||||||
|
|
||||||
|
TriangulationTrace triangulate_debug(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d = nullptr,
|
||||||
|
float min_match_score = 0.95f,
|
||||||
|
size_t min_group_size = 1);
|
||||||
|
|
||||||
|
inline TriangulationTrace triangulate_debug(
|
||||||
|
const PoseBatch2D &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3D *previous_poses_3d = nullptr,
|
||||||
|
float min_match_score = 0.95f,
|
||||||
|
size_t min_group_size = 1)
|
||||||
|
{
|
||||||
|
PoseBatch3DView previous_view_storage;
|
||||||
|
const PoseBatch3DView *previous_view = nullptr;
|
||||||
|
if (previous_poses_3d != nullptr)
|
||||||
|
{
|
||||||
|
previous_view_storage = previous_poses_3d->view();
|
||||||
|
previous_view = &previous_view_storage;
|
||||||
|
}
|
||||||
|
return triangulate_debug(
|
||||||
|
poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate a triangulation using a padded pose tensor.
|
* Calculate a triangulation using a padded pose tensor.
|
||||||
*
|
*
|
||||||
@@ -72,6 +212,7 @@ PoseBatch3D triangulate_poses(
|
|||||||
const std::vector<Camera> &cameras,
|
const std::vector<Camera> &cameras,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::string> &joint_names,
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d = nullptr,
|
||||||
float min_match_score = 0.95f,
|
float min_match_score = 0.95f,
|
||||||
size_t min_group_size = 1);
|
size_t min_group_size = 1);
|
||||||
|
|
||||||
@@ -80,9 +221,17 @@ inline PoseBatch3D triangulate_poses(
|
|||||||
const std::vector<Camera> &cameras,
|
const std::vector<Camera> &cameras,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::string> &joint_names,
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3D *previous_poses_3d = nullptr,
|
||||||
float min_match_score = 0.95f,
|
float min_match_score = 0.95f,
|
||||||
size_t min_group_size = 1)
|
size_t min_group_size = 1)
|
||||||
{
|
{
|
||||||
|
PoseBatch3DView previous_view_storage;
|
||||||
|
const PoseBatch3DView *previous_view = nullptr;
|
||||||
|
if (previous_poses_3d != nullptr)
|
||||||
|
{
|
||||||
|
previous_view_storage = previous_poses_3d->view();
|
||||||
|
previous_view = &previous_view_storage;
|
||||||
|
}
|
||||||
return triangulate_poses(
|
return triangulate_poses(
|
||||||
poses_2d.view(), cameras, roomparams, joint_names, min_match_score, min_group_size);
|
poses_2d.view(), cameras, roomparams, joint_names, previous_view, min_match_score, min_group_size);
|
||||||
}
|
}
|
||||||
|
|||||||
+545
-100
@@ -10,6 +10,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "interface.hpp"
|
#include "interface.hpp"
|
||||||
|
#include "cached_camera.hpp"
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@@ -83,7 +84,7 @@ size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_jo
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::array<float, 3> undistort_point(
|
std::array<float, 3> undistort_point(
|
||||||
const std::array<float, 3> &raw_point, const CameraInternal &icam)
|
const std::array<float, 3> &raw_point, const CachedCamera &icam)
|
||||||
{
|
{
|
||||||
std::array<float, 3> point = raw_point;
|
std::array<float, 3> point = raw_point;
|
||||||
const float ifx_old = 1.0f / icam.cam.K[0][0];
|
const float ifx_old = 1.0f / icam.cam.K[0][0];
|
||||||
@@ -98,13 +99,13 @@ std::array<float, 3> undistort_point(
|
|||||||
point[0] = (point[0] - cx_old) * ifx_old;
|
point[0] = (point[0] - cx_old) * ifx_old;
|
||||||
point[1] = (point[1] - cy_old) * ify_old;
|
point[1] = (point[1] - cy_old) * ify_old;
|
||||||
|
|
||||||
if (icam.cam.type == "fisheye")
|
if (icam.cam.model == CameraModel::Fisheye)
|
||||||
{
|
{
|
||||||
CameraInternal::undistort_point_fisheye(point, icam.cam.DC);
|
undistort_point_fisheye(point, icam.cam.DC);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
CameraInternal::undistort_point_pinhole(point, icam.cam.DC);
|
undistort_point_pinhole(point, icam.cam.DC);
|
||||||
}
|
}
|
||||||
|
|
||||||
point[0] = (point[0] * fx_new) + cx_new;
|
point[0] = (point[0] * fx_new) + cx_new;
|
||||||
@@ -125,7 +126,7 @@ class PackedPoseStore2D
|
|||||||
public:
|
public:
|
||||||
PackedPoseStore2D(
|
PackedPoseStore2D(
|
||||||
const PoseBatch2DView &source,
|
const PoseBatch2DView &source,
|
||||||
const std::vector<CameraInternal> &internal_cameras)
|
const std::vector<CachedCamera> &internal_cameras)
|
||||||
: person_counts(source.num_views),
|
: person_counts(source.num_views),
|
||||||
view_offsets(source.num_views),
|
view_offsets(source.num_views),
|
||||||
num_views(source.num_views),
|
num_views(source.num_views),
|
||||||
@@ -229,16 +230,75 @@ public:
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
PoseBatch3D triangulate_poses(
|
TriangulationTrace triangulate_debug(
|
||||||
const PoseBatch2DView &poses_2d,
|
const PoseBatch2DView &poses_2d,
|
||||||
const std::vector<Camera> &cameras,
|
const std::vector<Camera> &cameras,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::string> &joint_names);
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d);
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView &previous_poses_3d);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct PreparedInputs
|
||||||
|
{
|
||||||
|
std::vector<CachedCamera> internal_cameras;
|
||||||
|
std::vector<size_t> core_joint_idx;
|
||||||
|
std::vector<std::array<size_t, 2>> core_limbs_idx;
|
||||||
|
PackedPoseStore2D packed_poses;
|
||||||
|
|
||||||
|
PreparedInputs(
|
||||||
|
std::vector<CachedCamera> cameras_in,
|
||||||
|
std::vector<size_t> core_joint_idx_in,
|
||||||
|
std::vector<std::array<size_t, 2>> core_limbs_idx_in,
|
||||||
|
PackedPoseStore2D packed_poses_in)
|
||||||
|
: internal_cameras(std::move(cameras_in)),
|
||||||
|
core_joint_idx(std::move(core_joint_idx_in)),
|
||||||
|
core_limbs_idx(std::move(core_limbs_idx_in)),
|
||||||
|
packed_poses(std::move(packed_poses_in))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PreviousProjectionCache
|
||||||
|
{
|
||||||
|
std::vector<std::vector<Pose2D>> projected_poses;
|
||||||
|
std::vector<std::vector<std::vector<float>>> projected_dists;
|
||||||
|
std::vector<Pose3D> core_poses;
|
||||||
|
};
|
||||||
|
|
||||||
|
PreparedInputs prepare_inputs(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names);
|
||||||
|
|
||||||
|
std::vector<PairCandidate> build_pair_candidates(const PackedPoseStore2D &packed_poses) const;
|
||||||
|
|
||||||
|
PreviousProjectionCache project_previous_poses(
|
||||||
|
const PoseBatch3DView &previous_poses_3d,
|
||||||
|
const std::vector<CachedCamera> &internal_cameras,
|
||||||
|
const std::vector<size_t> &core_joint_idx);
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
||||||
|
const PackedPoseStore2D &packed_poses,
|
||||||
|
const std::vector<CachedCamera> &internal_cameras,
|
||||||
|
const std::vector<size_t> &core_joint_idx,
|
||||||
|
const std::vector<PairCandidate> &pairs,
|
||||||
|
const PoseBatch3DView &previous_poses_3d);
|
||||||
|
|
||||||
|
float calc_pose_score(
|
||||||
|
const Pose2D &pose,
|
||||||
|
const Pose2D &reference_pose,
|
||||||
|
const std::vector<float> &dists,
|
||||||
|
const CachedCamera &icam);
|
||||||
|
|
||||||
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
|
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
|
||||||
const std::vector<Pose3D> &poses_3d,
|
const std::vector<Pose3D> &poses_3d,
|
||||||
const CameraInternal &icam,
|
const CachedCamera &icam,
|
||||||
bool calc_dists);
|
bool calc_dists);
|
||||||
|
|
||||||
std::vector<float> score_projection(
|
std::vector<float> score_projection(
|
||||||
@@ -251,8 +311,8 @@ private:
|
|||||||
std::pair<Pose3D, float> triangulate_and_score(
|
std::pair<Pose3D, float> triangulate_and_score(
|
||||||
const Pose2D &pose1,
|
const Pose2D &pose1,
|
||||||
const Pose2D &pose2,
|
const Pose2D &pose2,
|
||||||
const CameraInternal &cam1,
|
const CachedCamera &cam1,
|
||||||
const CameraInternal &cam2,
|
const CachedCamera &cam2,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
|
||||||
|
|
||||||
@@ -313,17 +373,9 @@ private:
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
PoseBatch3D empty_pose_batch3d(size_t num_joints)
|
TriangulatorInternal::PreparedInputs TriangulatorInternal::prepare_inputs(
|
||||||
{
|
|
||||||
PoseBatch3D poses_3d;
|
|
||||||
poses_3d.num_joints = num_joints;
|
|
||||||
return poses_3d;
|
|
||||||
}
|
|
||||||
|
|
||||||
PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|
||||||
const PoseBatch2DView &poses_2d,
|
const PoseBatch2DView &poses_2d,
|
||||||
const std::vector<Camera> &cameras,
|
const std::vector<Camera> &cameras,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
||||||
const std::vector<std::string> &joint_names)
|
const std::vector<std::string> &joint_names)
|
||||||
{
|
{
|
||||||
if (poses_2d.num_views == 0)
|
if (poses_2d.num_views == 0)
|
||||||
@@ -342,6 +394,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
{
|
{
|
||||||
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
|
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
|
||||||
}
|
}
|
||||||
|
if (poses_2d.num_joints == 0)
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("At least one joint is required.");
|
||||||
|
}
|
||||||
if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr)
|
if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr)
|
||||||
{
|
{
|
||||||
throw std::invalid_argument("poses_2d data must not be null.");
|
throw std::invalid_argument("poses_2d data must not be null.");
|
||||||
@@ -356,11 +412,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
|
|
||||||
num_cams = cameras.size();
|
num_cams = cameras.size();
|
||||||
|
|
||||||
std::vector<CameraInternal> internal_cameras;
|
std::vector<CachedCamera> internal_cameras;
|
||||||
internal_cameras.reserve(cameras.size());
|
internal_cameras.reserve(cameras.size());
|
||||||
for (const auto &camera : cameras)
|
for (const auto &camera : cameras)
|
||||||
{
|
{
|
||||||
internal_cameras.emplace_back(camera);
|
internal_cameras.push_back(cache_camera(camera));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> core_joint_idx;
|
std::vector<size_t> core_joint_idx;
|
||||||
@@ -383,64 +439,310 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
}
|
}
|
||||||
|
|
||||||
PackedPoseStore2D packed_poses(poses_2d, internal_cameras);
|
PackedPoseStore2D packed_poses(poses_2d, internal_cameras);
|
||||||
|
return PreparedInputs(
|
||||||
|
std::move(internal_cameras),
|
||||||
|
std::move(core_joint_idx),
|
||||||
|
std::move(core_limbs_idx),
|
||||||
|
std::move(packed_poses));
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<PosePair> all_pairs;
|
std::vector<PairCandidate> TriangulatorInternal::build_pair_candidates(const PackedPoseStore2D &packed_poses) const
|
||||||
for (size_t view1 = 0; view1 < cameras.size(); ++view1)
|
{
|
||||||
|
std::vector<PairCandidate> pairs;
|
||||||
|
for (size_t view1 = 0; view1 < packed_poses.num_views; ++view1)
|
||||||
{
|
{
|
||||||
for (size_t view2 = view1 + 1; view2 < cameras.size(); ++view2)
|
for (size_t view2 = view1 + 1; view2 < packed_poses.num_views; ++view2)
|
||||||
{
|
{
|
||||||
for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1)
|
for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1)
|
||||||
{
|
{
|
||||||
for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2)
|
for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2)
|
||||||
{
|
{
|
||||||
all_pairs.emplace_back(
|
pairs.push_back(PairCandidate {
|
||||||
std::make_tuple(
|
|
||||||
static_cast<int>(view1),
|
static_cast<int>(view1),
|
||||||
static_cast<int>(view2),
|
static_cast<int>(view2),
|
||||||
static_cast<int>(person1),
|
static_cast<int>(person1),
|
||||||
static_cast<int>(person2)),
|
static_cast<int>(person2),
|
||||||
std::make_pair(
|
|
||||||
static_cast<int>(packed_poses.pose_index(view1, person1)),
|
static_cast<int>(packed_poses.pose_index(view1, person1)),
|
||||||
static_cast<int>(packed_poses.pose_index(view2, person2))));
|
static_cast<int>(packed_poses.pose_index(view2, person2)),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return pairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
TriangulatorInternal::PreviousProjectionCache TriangulatorInternal::project_previous_poses(
|
||||||
|
const PoseBatch3DView &previous_poses_3d,
|
||||||
|
const std::vector<CachedCamera> &internal_cameras,
|
||||||
|
const std::vector<size_t> &core_joint_idx)
|
||||||
|
{
|
||||||
|
PreviousProjectionCache cache;
|
||||||
|
cache.core_poses.resize(previous_poses_3d.num_persons);
|
||||||
|
for (size_t person = 0; person < previous_poses_3d.num_persons; ++person)
|
||||||
|
{
|
||||||
|
Pose3D pose(core_joint_idx.size(), {0.0f, 0.0f, 0.0f, 0.0f});
|
||||||
|
for (size_t joint = 0; joint < core_joint_idx.size(); ++joint)
|
||||||
|
{
|
||||||
|
const size_t source_joint = core_joint_idx[joint];
|
||||||
|
for (size_t coord = 0; coord < 4; ++coord)
|
||||||
|
{
|
||||||
|
pose[joint][coord] = previous_poses_3d.at(person, source_joint, coord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cache.core_poses[person] = std::move(pose);
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.projected_poses.resize(internal_cameras.size());
|
||||||
|
cache.projected_dists.resize(internal_cameras.size());
|
||||||
|
for (size_t view = 0; view < internal_cameras.size(); ++view)
|
||||||
|
{
|
||||||
|
auto [projected_poses, projected_dists] = project_poses(cache.core_poses, internal_cameras[view], true);
|
||||||
|
cache.projected_poses[view] = std::move(projected_poses);
|
||||||
|
cache.projected_dists[view] = std::move(projected_dists);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
float TriangulatorInternal::calc_pose_score(
|
||||||
|
const Pose2D &pose,
|
||||||
|
const Pose2D &reference_pose,
|
||||||
|
const std::vector<float> &dists,
|
||||||
|
const CachedCamera &icam)
|
||||||
|
{
|
||||||
|
const float min_score = 0.1f;
|
||||||
|
std::vector<bool> mask(pose.size(), false);
|
||||||
|
size_t valid_count = 0;
|
||||||
|
for (size_t joint = 0; joint < pose.size(); ++joint)
|
||||||
|
{
|
||||||
|
mask[joint] = pose[joint][2] > min_score && reference_pose[joint][2] > min_score;
|
||||||
|
if (mask[joint])
|
||||||
|
{
|
||||||
|
++valid_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (valid_count < 3)
|
||||||
|
{
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
const float iscale = (icam.cam.width + icam.cam.height) * 0.5f;
|
||||||
|
std::vector<float> scores = score_projection(pose, reference_pose, dists, mask, iscale);
|
||||||
|
const size_t drop_k = static_cast<size_t>(pose.size() * 0.2f);
|
||||||
|
if (drop_k > 0)
|
||||||
|
{
|
||||||
|
std::partial_sort(scores.begin(), scores.begin() + drop_k, scores.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
float total = 0.0f;
|
||||||
|
for (size_t i = drop_k; i < scores.size(); ++i)
|
||||||
|
{
|
||||||
|
total += scores[i];
|
||||||
|
}
|
||||||
|
return total / static_cast<float>(scores.size() - drop_k);
|
||||||
|
}
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
|
||||||
|
const PackedPoseStore2D &packed_poses,
|
||||||
|
const std::vector<CachedCamera> &internal_cameras,
|
||||||
|
const std::vector<size_t> &core_joint_idx,
|
||||||
|
const std::vector<PairCandidate> &pairs,
|
||||||
|
const PoseBatch3DView &previous_poses_3d)
|
||||||
|
{
|
||||||
|
PreviousPoseFilterDebug debug;
|
||||||
|
debug.used_previous_poses = true;
|
||||||
|
debug.matches.resize(pairs.size());
|
||||||
|
|
||||||
|
if (previous_poses_3d.num_persons == 0)
|
||||||
|
{
|
||||||
|
debug.kept_pair_indices.resize(pairs.size());
|
||||||
|
std::iota(debug.kept_pair_indices.begin(), debug.kept_pair_indices.end(), 0);
|
||||||
|
debug.kept_pairs = pairs;
|
||||||
|
for (auto &match : debug.matches)
|
||||||
|
{
|
||||||
|
match.decision = "no_previous_poses";
|
||||||
|
}
|
||||||
|
return debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
const PreviousProjectionCache projected_previous =
|
||||||
|
project_previous_poses(previous_poses_3d, internal_cameras, core_joint_idx);
|
||||||
|
const float threshold = min_match_score;
|
||||||
|
|
||||||
|
for (size_t pair_index = 0; pair_index < pairs.size(); ++pair_index)
|
||||||
|
{
|
||||||
|
const PairCandidate &pair = pairs[pair_index];
|
||||||
|
const Pose2D pose1_core = packed_poses.pose(
|
||||||
|
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), core_joint_idx);
|
||||||
|
const Pose2D pose2_core = packed_poses.pose(
|
||||||
|
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), core_joint_idx);
|
||||||
|
|
||||||
|
PreviousPoseMatch best_match;
|
||||||
|
bool found_decision = false;
|
||||||
|
for (size_t previous_index = 0; previous_index < previous_poses_3d.num_persons; ++previous_index)
|
||||||
|
{
|
||||||
|
const float score_view1 = calc_pose_score(
|
||||||
|
pose1_core,
|
||||||
|
projected_previous.projected_poses[static_cast<size_t>(pair.view1)][previous_index],
|
||||||
|
projected_previous.projected_dists[static_cast<size_t>(pair.view1)][previous_index],
|
||||||
|
internal_cameras[static_cast<size_t>(pair.view1)]);
|
||||||
|
const float score_view2 = calc_pose_score(
|
||||||
|
pose2_core,
|
||||||
|
projected_previous.projected_poses[static_cast<size_t>(pair.view2)][previous_index],
|
||||||
|
projected_previous.projected_dists[static_cast<size_t>(pair.view2)][previous_index],
|
||||||
|
internal_cameras[static_cast<size_t>(pair.view2)]);
|
||||||
|
const bool matched_view1 = score_view1 > threshold;
|
||||||
|
const bool matched_view2 = score_view2 > threshold;
|
||||||
|
|
||||||
|
if (matched_view1 && matched_view2)
|
||||||
|
{
|
||||||
|
best_match = PreviousPoseMatch {
|
||||||
|
static_cast<int>(previous_index),
|
||||||
|
score_view1,
|
||||||
|
score_view2,
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
"matched_previous_pose",
|
||||||
|
};
|
||||||
|
found_decision = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matched_view1 || matched_view2)
|
||||||
|
{
|
||||||
|
best_match = PreviousPoseMatch {
|
||||||
|
static_cast<int>(previous_index),
|
||||||
|
score_view1,
|
||||||
|
score_view2,
|
||||||
|
matched_view1,
|
||||||
|
matched_view2,
|
||||||
|
false,
|
||||||
|
"dropped_single_view_match",
|
||||||
|
};
|
||||||
|
found_decision = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (all_pairs.empty())
|
if (!found_decision)
|
||||||
{
|
{
|
||||||
return empty_pose_batch3d(joint_names.size());
|
best_match.decision = "new_person_candidate";
|
||||||
|
best_match.kept = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<Pose3D, float>> all_scored_poses(all_pairs.size());
|
debug.matches[pair_index] = best_match;
|
||||||
for (size_t i = 0; i < all_pairs.size(); ++i)
|
if (best_match.kept)
|
||||||
{
|
{
|
||||||
const auto &pids = all_pairs[i].first;
|
debug.kept_pair_indices.push_back(static_cast<int>(pair_index));
|
||||||
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
debug.kept_pairs.push_back(pair);
|
||||||
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
}
|
||||||
Pose2D pose1_core = packed_poses.pose(std::get<0>(pids), std::get<2>(pids), core_joint_idx);
|
}
|
||||||
Pose2D pose2_core = packed_poses.pose(std::get<1>(pids), std::get<3>(pids), core_joint_idx);
|
|
||||||
|
return debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
TriangulationTrace TriangulatorInternal::triangulate_debug(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d)
|
||||||
|
{
|
||||||
|
TriangulationTrace trace;
|
||||||
|
trace.final_poses.num_joints = joint_names.size();
|
||||||
|
|
||||||
|
if (previous_poses_3d != nullptr && previous_poses_3d->num_persons > 0 &&
|
||||||
|
previous_poses_3d->num_joints != joint_names.size())
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
|
||||||
|
}
|
||||||
|
|
||||||
|
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
|
||||||
|
trace.pairs = build_pair_candidates(prepared.packed_poses);
|
||||||
|
if (trace.pairs.empty())
|
||||||
|
{
|
||||||
|
return trace;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> active_pair_indices(trace.pairs.size());
|
||||||
|
std::iota(active_pair_indices.begin(), active_pair_indices.end(), 0);
|
||||||
|
std::vector<PairCandidate> active_pairs = trace.pairs;
|
||||||
|
|
||||||
|
if (previous_poses_3d != nullptr)
|
||||||
|
{
|
||||||
|
trace.previous_filter = filter_pairs_with_previous_poses(
|
||||||
|
prepared.packed_poses,
|
||||||
|
prepared.internal_cameras,
|
||||||
|
prepared.core_joint_idx,
|
||||||
|
trace.pairs,
|
||||||
|
*previous_poses_3d);
|
||||||
|
active_pair_indices = trace.previous_filter.kept_pair_indices;
|
||||||
|
active_pairs = trace.previous_filter.kept_pairs;
|
||||||
|
if (active_pairs.empty())
|
||||||
|
{
|
||||||
|
return trace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trace.core_proposals.reserve(active_pairs.size());
|
||||||
|
std::vector<PosePair> kept_pose_pairs;
|
||||||
|
std::vector<std::pair<Pose3D, float>> kept_scored_poses;
|
||||||
|
std::vector<int> kept_core_indices;
|
||||||
|
for (size_t i = 0; i < active_pairs.size(); ++i)
|
||||||
|
{
|
||||||
|
const PairCandidate &pair = active_pairs[i];
|
||||||
|
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
|
||||||
|
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
|
||||||
|
const Pose2D pose1_core = prepared.packed_poses.pose(
|
||||||
|
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), prepared.core_joint_idx);
|
||||||
|
const Pose2D pose2_core = prepared.packed_poses.pose(
|
||||||
|
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), prepared.core_joint_idx);
|
||||||
|
|
||||||
auto [pose3d, score] = triangulate_and_score(
|
auto [pose3d, score] = triangulate_and_score(
|
||||||
pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx);
|
pose1_core, pose2_core, cam1, cam2, roomparams, prepared.core_limbs_idx);
|
||||||
all_scored_poses[i] = std::make_pair(std::move(pose3d), score);
|
|
||||||
|
CoreProposalDebug proposal;
|
||||||
|
proposal.pair_index = active_pair_indices[i];
|
||||||
|
proposal.pair = pair;
|
||||||
|
proposal.pose_3d = pose3d;
|
||||||
|
proposal.score = score;
|
||||||
|
proposal.kept = score >= min_match_score;
|
||||||
|
proposal.drop_reason = proposal.kept ? "kept" : "below_min_match_score";
|
||||||
|
trace.core_proposals.push_back(proposal);
|
||||||
|
|
||||||
|
if (!proposal.kept)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = all_scored_poses.size(); i > 0; --i)
|
kept_core_indices.push_back(static_cast<int>(trace.core_proposals.size() - 1));
|
||||||
{
|
kept_scored_poses.emplace_back(std::move(pose3d), score);
|
||||||
if (all_scored_poses[i - 1].second < min_match_score)
|
kept_pose_pairs.emplace_back(
|
||||||
{
|
std::make_tuple(pair.view1, pair.view2, pair.person1, pair.person2),
|
||||||
all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
std::make_pair(pair.global_person1, pair.global_person2));
|
||||||
all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (all_pairs.empty())
|
if (kept_pose_pairs.empty())
|
||||||
{
|
{
|
||||||
return empty_pose_batch3d(joint_names.size());
|
return trace;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto groups = calc_grouping(kept_pose_pairs, kept_scored_poses, min_match_score);
|
||||||
|
trace.grouping.initial_groups.reserve(groups.size());
|
||||||
|
for (const auto &group : groups)
|
||||||
|
{
|
||||||
|
ProposalGroupDebug debug_group;
|
||||||
|
debug_group.center = std::get<0>(group);
|
||||||
|
debug_group.pose_3d = std::get<1>(group);
|
||||||
|
for (const int index : std::get<2>(group))
|
||||||
|
{
|
||||||
|
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
||||||
|
}
|
||||||
|
trace.grouping.initial_groups.push_back(std::move(debug_group));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<GroupedPose> groups = calc_grouping(all_pairs, all_scored_poses, min_match_score);
|
|
||||||
for (size_t i = groups.size(); i > 0; --i)
|
for (size_t i = groups.size(); i > 0; --i)
|
||||||
{
|
{
|
||||||
if (std::get<2>(groups[i - 1]).size() < min_group_size)
|
if (std::get<2>(groups[i - 1]).size() < min_group_size)
|
||||||
@@ -448,16 +750,15 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (groups.empty())
|
if (groups.empty())
|
||||||
{
|
{
|
||||||
return empty_pose_batch3d(joint_names.size());
|
return trace;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map;
|
std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map;
|
||||||
for (size_t i = 0; i < all_pairs.size(); ++i)
|
for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
|
||||||
{
|
{
|
||||||
const auto &pair = all_pairs[i];
|
const auto &pair = kept_pose_pairs[i];
|
||||||
const auto &mid1 = std::make_tuple(
|
const auto &mid1 = std::make_tuple(
|
||||||
std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second));
|
std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second));
|
||||||
const auto &mid2 = std::make_tuple(
|
const auto &mid2 = std::make_tuple(
|
||||||
@@ -466,19 +767,19 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
pairs_map[mid2].push_back(i);
|
pairs_map[mid2].push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> group_map(all_pairs.size());
|
std::vector<size_t> group_map(kept_pose_pairs.size());
|
||||||
for (size_t i = 0; i < groups.size(); ++i)
|
for (size_t i = 0; i < groups.size(); ++i)
|
||||||
{
|
{
|
||||||
for (const auto &idx : std::get<2>(groups[i]))
|
for (const int index : std::get<2>(groups[i]))
|
||||||
{
|
{
|
||||||
group_map[idx] = i;
|
group_map[static_cast<size_t>(index)] = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<size_t> drop_indices;
|
std::set<size_t> drop_indices;
|
||||||
for (auto &pair : pairs_map)
|
for (auto &pair_entry : pairs_map)
|
||||||
{
|
{
|
||||||
auto &indices = pair.second;
|
auto &indices = pair_entry.second;
|
||||||
if (indices.size() <= 1)
|
if (indices.size() <= 1)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
@@ -488,10 +789,10 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
std::vector<float> pair_scores;
|
std::vector<float> pair_scores;
|
||||||
group_sizes.reserve(indices.size());
|
group_sizes.reserve(indices.size());
|
||||||
pair_scores.reserve(indices.size());
|
pair_scores.reserve(indices.size());
|
||||||
for (auto idx : indices)
|
for (const size_t index : indices)
|
||||||
{
|
{
|
||||||
group_sizes.push_back(std::get<2>(groups[group_map[idx]]).size());
|
group_sizes.push_back(std::get<2>(groups[group_map[index]]).size());
|
||||||
pair_scores.push_back(all_scored_poses[idx].second);
|
pair_scores.push_back(kept_scored_poses[index].second);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> indices_sorted(indices.size());
|
std::vector<size_t> indices_sorted(indices.size());
|
||||||
@@ -514,10 +815,12 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
|
|
||||||
std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end());
|
std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end());
|
||||||
std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>());
|
std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>());
|
||||||
for (size_t drop_idx : drop_list)
|
for (const size_t drop_idx : drop_list)
|
||||||
{
|
{
|
||||||
all_scored_poses.erase(all_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
trace.grouping.duplicate_pair_drops.push_back(kept_core_indices[drop_idx]);
|
||||||
all_pairs.erase(all_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
kept_scored_poses.erase(kept_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
||||||
|
kept_pose_pairs.erase(kept_pose_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
||||||
|
kept_core_indices.erase(kept_core_indices.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
||||||
|
|
||||||
for (auto &group : groups)
|
for (auto &group : groups)
|
||||||
{
|
{
|
||||||
@@ -527,11 +830,11 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
{
|
{
|
||||||
indices.erase(it);
|
indices.erase(it);
|
||||||
}
|
}
|
||||||
for (auto &index : indices)
|
for (int &index : indices)
|
||||||
{
|
{
|
||||||
if (static_cast<size_t>(index) > drop_idx)
|
if (static_cast<size_t>(index) > drop_idx)
|
||||||
{
|
{
|
||||||
index -= 1;
|
--index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -544,24 +847,45 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (groups.empty())
|
if (groups.empty())
|
||||||
{
|
{
|
||||||
return empty_pose_batch3d(joint_names.size());
|
return trace;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Pose3D> all_full_poses(all_pairs.size());
|
trace.grouping.groups.reserve(groups.size());
|
||||||
for (size_t i = 0; i < all_pairs.size(); ++i)
|
for (const auto &group : groups)
|
||||||
{
|
{
|
||||||
const auto &pids = all_pairs[i].first;
|
ProposalGroupDebug debug_group;
|
||||||
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
debug_group.center = std::get<0>(group);
|
||||||
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
debug_group.pose_3d = std::get<1>(group);
|
||||||
Pose2D pose1 = packed_poses.pose(std::get<0>(pids), std::get<2>(pids));
|
for (const int index : std::get<2>(group))
|
||||||
Pose2D pose2 = packed_poses.pose(std::get<1>(pids), std::get<3>(pids));
|
{
|
||||||
|
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
||||||
|
}
|
||||||
|
trace.grouping.groups.push_back(std::move(debug_group));
|
||||||
|
}
|
||||||
|
|
||||||
auto [pose3d, score] = triangulate_and_score(
|
std::vector<Pose3D> full_pose_buffer(kept_pose_pairs.size());
|
||||||
pose1, pose2, cam1, cam2, roomparams, {});
|
std::vector<int> full_proposal_index_by_core(trace.core_proposals.size(), -1);
|
||||||
|
trace.full_proposals.reserve(kept_pose_pairs.size());
|
||||||
|
for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
|
||||||
|
{
|
||||||
|
const auto &pair_ids = kept_pose_pairs[i].first;
|
||||||
|
const PairCandidate pair = {
|
||||||
|
std::get<0>(pair_ids),
|
||||||
|
std::get<1>(pair_ids),
|
||||||
|
std::get<2>(pair_ids),
|
||||||
|
std::get<3>(pair_ids),
|
||||||
|
kept_pose_pairs[i].second.first,
|
||||||
|
kept_pose_pairs[i].second.second,
|
||||||
|
};
|
||||||
|
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
|
||||||
|
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
|
||||||
|
Pose2D pose1 = prepared.packed_poses.pose(static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1));
|
||||||
|
Pose2D pose2 = prepared.packed_poses.pose(static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2));
|
||||||
|
|
||||||
|
auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams, {});
|
||||||
|
(void)score;
|
||||||
for (size_t joint = 0; joint < pose3d.size(); ++joint)
|
for (size_t joint = 0; joint < pose3d.size(); ++joint)
|
||||||
{
|
{
|
||||||
const float score1 = pose1[joint][2];
|
const float score1 = pose1[joint][2];
|
||||||
@@ -576,32 +900,43 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
all_full_poses[i] = std::move(pose3d);
|
full_pose_buffer[i] = pose3d;
|
||||||
|
full_proposal_index_by_core[kept_core_indices[i]] = static_cast<int>(trace.full_proposals.size());
|
||||||
|
trace.full_proposals.push_back(FullProposalDebug {
|
||||||
|
kept_core_indices[i],
|
||||||
|
pair,
|
||||||
|
std::move(pose3d),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Pose3D> final_poses_3d(groups.size());
|
std::vector<Pose3D> final_poses_3d(groups.size());
|
||||||
|
trace.merge.group_proposal_indices.reserve(groups.size());
|
||||||
|
trace.merge.merged_poses.reserve(groups.size());
|
||||||
for (size_t i = 0; i < groups.size(); ++i)
|
for (size_t i = 0; i < groups.size(); ++i)
|
||||||
{
|
{
|
||||||
std::vector<Pose3D> poses;
|
std::vector<Pose3D> poses;
|
||||||
|
std::vector<int> source_core_indices;
|
||||||
poses.reserve(std::get<2>(groups[i]).size());
|
poses.reserve(std::get<2>(groups[i]).size());
|
||||||
for (const auto &idx : std::get<2>(groups[i]))
|
source_core_indices.reserve(std::get<2>(groups[i]).size());
|
||||||
|
for (const int index : std::get<2>(groups[i]))
|
||||||
{
|
{
|
||||||
poses.push_back(all_full_poses[idx]);
|
poses.push_back(full_pose_buffer[static_cast<size_t>(index)]);
|
||||||
|
source_core_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
||||||
}
|
}
|
||||||
final_poses_3d[i] = merge_group(poses, min_match_score);
|
final_poses_3d[i] = merge_group(poses, min_match_score);
|
||||||
|
trace.merge.group_proposal_indices.push_back(std::move(source_core_indices));
|
||||||
|
trace.merge.merged_poses.push_back(final_poses_3d[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
add_extra_joints(final_poses_3d, joint_names);
|
add_extra_joints(final_poses_3d, joint_names);
|
||||||
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx);
|
filter_poses(final_poses_3d, roomparams, prepared.core_joint_idx, prepared.core_limbs_idx);
|
||||||
add_missing_joints(final_poses_3d, joint_names);
|
add_missing_joints(final_poses_3d, joint_names);
|
||||||
replace_far_joints(final_poses_3d, joint_names);
|
replace_far_joints(final_poses_3d, joint_names);
|
||||||
|
|
||||||
PoseBatch3D poses_3d;
|
size_t valid_persons = 0;
|
||||||
poses_3d.num_joints = joint_names.size();
|
|
||||||
|
|
||||||
for (const auto &pose : final_poses_3d)
|
for (const auto &pose : final_poses_3d)
|
||||||
{
|
{
|
||||||
const auto is_valid = std::any_of(
|
const bool is_valid = std::any_of(
|
||||||
pose.begin(),
|
pose.begin(),
|
||||||
pose.end(),
|
pose.end(),
|
||||||
[this](const std::array<float, 4> &joint)
|
[this](const std::array<float, 4> &joint)
|
||||||
@@ -610,15 +945,17 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
});
|
});
|
||||||
if (is_valid)
|
if (is_valid)
|
||||||
{
|
{
|
||||||
++poses_3d.num_persons;
|
++valid_persons;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
poses_3d.data.resize(poses_3d.num_persons * poses_3d.num_joints * 4);
|
trace.final_poses.num_persons = valid_persons;
|
||||||
|
trace.final_poses.data.resize(valid_persons * trace.final_poses.num_joints * 4);
|
||||||
|
|
||||||
size_t person_index = 0;
|
size_t person_index = 0;
|
||||||
for (const auto &pose : final_poses_3d)
|
for (const auto &pose : final_poses_3d)
|
||||||
{
|
{
|
||||||
const auto is_valid = std::any_of(
|
const bool is_valid = std::any_of(
|
||||||
pose.begin(),
|
pose.begin(),
|
||||||
pose.end(),
|
pose.end(),
|
||||||
[this](const std::array<float, 4> &joint)
|
[this](const std::array<float, 4> &joint)
|
||||||
@@ -630,24 +967,57 @@ PoseBatch3D TriangulatorInternal::triangulate_poses(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t joint = 0; joint < poses_3d.num_joints; ++joint)
|
for (size_t joint = 0; joint < trace.final_poses.num_joints; ++joint)
|
||||||
{
|
{
|
||||||
for (size_t coord = 0; coord < 4; ++coord)
|
for (size_t coord = 0; coord < 4; ++coord)
|
||||||
{
|
{
|
||||||
poses_3d.at(person_index, joint, coord) = pose[joint][coord];
|
trace.final_poses.at(person_index, joint, coord) = pose[joint][coord];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++person_index;
|
++person_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
return poses_3d;
|
return trace;
|
||||||
|
}
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug TriangulatorInternal::filter_pairs_with_previous_poses(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView &previous_poses_3d)
|
||||||
|
{
|
||||||
|
if (previous_poses_3d.num_persons > 0 && previous_poses_3d.num_joints != joint_names.size())
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
|
||||||
|
}
|
||||||
|
|
||||||
|
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
|
||||||
|
const std::vector<PairCandidate> pairs = build_pair_candidates(prepared.packed_poses);
|
||||||
|
if (pairs.empty())
|
||||||
|
{
|
||||||
|
PreviousPoseFilterDebug empty;
|
||||||
|
empty.used_previous_poses = true;
|
||||||
|
return empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
return filter_pairs_with_previous_poses(
|
||||||
|
prepared.packed_poses,
|
||||||
|
prepared.internal_cameras,
|
||||||
|
prepared.core_joint_idx,
|
||||||
|
pairs,
|
||||||
|
previous_poses_3d);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> TriangulatorInternal::project_poses(
|
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> TriangulatorInternal::project_poses(
|
||||||
const std::vector<Pose3D> &poses_3d,
|
const std::vector<Pose3D> &poses_3d,
|
||||||
const CameraInternal &icam,
|
const CachedCamera &icam,
|
||||||
bool calc_dists)
|
bool calc_dists)
|
||||||
{
|
{
|
||||||
|
if (poses_3d.empty())
|
||||||
|
{
|
||||||
|
return std::make_tuple(std::vector<Pose2D> {}, std::vector<std::vector<float>> {});
|
||||||
|
}
|
||||||
|
|
||||||
const size_t num_persons = poses_3d.size();
|
const size_t num_persons = poses_3d.size();
|
||||||
const size_t num_joints = poses_3d[0].size();
|
const size_t num_joints = poses_3d[0].size();
|
||||||
|
|
||||||
@@ -813,7 +1183,7 @@ std::array<float, 3> mat_mul_vec(
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<float, 2> &pt)
|
std::array<float, 3> calc_ray_dir(const CachedCamera &icam, const std::array<float, 2> &pt)
|
||||||
{
|
{
|
||||||
// Compute normalized ray direction from the point
|
// Compute normalized ray direction from the point
|
||||||
std::array<float, 3> uv1 = {pt[0], pt[1], 1.0};
|
std::array<float, 3> uv1 = {pt[0], pt[1], 1.0};
|
||||||
@@ -824,8 +1194,8 @@ std::array<float, 3> calc_ray_dir(const CameraInternal &icam, const std::array<f
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::array<float, 4> triangulate_midpoint(
|
std::array<float, 4> triangulate_midpoint(
|
||||||
const CameraInternal &icam1,
|
const CachedCamera &icam1,
|
||||||
const CameraInternal &icam2,
|
const CachedCamera &icam2,
|
||||||
const std::array<float, 2> &pt1,
|
const std::array<float, 2> &pt1,
|
||||||
const std::array<float, 2> &pt2)
|
const std::array<float, 2> &pt2)
|
||||||
{
|
{
|
||||||
@@ -864,8 +1234,8 @@ std::array<float, 4> triangulate_midpoint(
|
|||||||
std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
|
std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
|
||||||
const std::vector<std::array<float, 3>> &pose1,
|
const std::vector<std::array<float, 3>> &pose1,
|
||||||
const std::vector<std::array<float, 3>> &pose2,
|
const std::vector<std::array<float, 3>> &pose2,
|
||||||
const CameraInternal &cam1,
|
const CachedCamera &cam1,
|
||||||
const CameraInternal &cam2,
|
const CachedCamera &cam2,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
||||||
{
|
{
|
||||||
@@ -2026,14 +2396,89 @@ void TriangulatorInternal::replace_far_joints(
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d)
|
||||||
|
{
|
||||||
|
if (poses_2d.person_counts == nullptr)
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("person_counts must not be null.");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<PairCandidate> pairs;
|
||||||
|
std::vector<int> offsets(poses_2d.num_views, 0);
|
||||||
|
int total = 0;
|
||||||
|
for (size_t view = 0; view < poses_2d.num_views; ++view)
|
||||||
|
{
|
||||||
|
if (poses_2d.person_counts[view] > poses_2d.max_persons)
|
||||||
|
{
|
||||||
|
throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
|
||||||
|
}
|
||||||
|
offsets[view] = total;
|
||||||
|
total += static_cast<int>(poses_2d.person_counts[view]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t view1 = 0; view1 < poses_2d.num_views; ++view1)
|
||||||
|
{
|
||||||
|
for (size_t view2 = view1 + 1; view2 < poses_2d.num_views; ++view2)
|
||||||
|
{
|
||||||
|
for (size_t person1 = 0; person1 < poses_2d.person_counts[view1]; ++person1)
|
||||||
|
{
|
||||||
|
for (size_t person2 = 0; person2 < poses_2d.person_counts[view2]; ++person2)
|
||||||
|
{
|
||||||
|
pairs.push_back(PairCandidate {
|
||||||
|
static_cast<int>(view1),
|
||||||
|
static_cast<int>(view2),
|
||||||
|
static_cast<int>(person1),
|
||||||
|
static_cast<int>(person2),
|
||||||
|
offsets[view1] + static_cast<int>(person1),
|
||||||
|
offsets[view2] + static_cast<int>(person2),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView &previous_poses_3d,
|
||||||
|
float min_match_score)
|
||||||
|
{
|
||||||
|
TriangulatorInternal triangulator(min_match_score, 1);
|
||||||
|
return triangulator.filter_pairs_with_previous_poses(poses_2d, cameras, joint_names, previous_poses_3d);
|
||||||
|
}
|
||||||
|
|
||||||
|
TriangulationTrace triangulate_debug(
|
||||||
|
const PoseBatch2DView &poses_2d,
|
||||||
|
const std::vector<Camera> &cameras,
|
||||||
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d,
|
||||||
|
float min_match_score,
|
||||||
|
size_t min_group_size)
|
||||||
|
{
|
||||||
|
TriangulatorInternal triangulator(min_match_score, min_group_size);
|
||||||
|
return triangulator.triangulate_debug(poses_2d, cameras, roomparams, joint_names, previous_poses_3d);
|
||||||
|
}
|
||||||
|
|
||||||
PoseBatch3D triangulate_poses(
|
PoseBatch3D triangulate_poses(
|
||||||
const PoseBatch2DView &poses_2d,
|
const PoseBatch2DView &poses_2d,
|
||||||
const std::vector<Camera> &cameras,
|
const std::vector<Camera> &cameras,
|
||||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||||
const std::vector<std::string> &joint_names,
|
const std::vector<std::string> &joint_names,
|
||||||
|
const PoseBatch3DView *previous_poses_3d,
|
||||||
float min_match_score,
|
float min_match_score,
|
||||||
size_t min_group_size)
|
size_t min_group_size)
|
||||||
{
|
{
|
||||||
TriangulatorInternal triangulator(min_match_score, min_group_size);
|
return triangulate_debug(
|
||||||
return triangulator.triangulate_poses(poses_2d, cameras, roomparams, joint_names);
|
poses_2d,
|
||||||
|
cameras,
|
||||||
|
roomparams,
|
||||||
|
joint_names,
|
||||||
|
previous_poses_3d,
|
||||||
|
min_match_score,
|
||||||
|
min_group_size)
|
||||||
|
.final_poses;
|
||||||
}
|
}
|
||||||
|
|||||||
+30
-1
@@ -3,7 +3,23 @@ from __future__ import annotations
|
|||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ._core import Camera, triangulate_poses
|
from ._core import (
|
||||||
|
Camera,
|
||||||
|
CameraModel,
|
||||||
|
CoreProposalDebug,
|
||||||
|
FullProposalDebug,
|
||||||
|
GroupingDebug,
|
||||||
|
MergeDebug,
|
||||||
|
PairCandidate,
|
||||||
|
PreviousPoseFilterDebug,
|
||||||
|
PreviousPoseMatch,
|
||||||
|
ProposalGroupDebug,
|
||||||
|
TriangulationTrace,
|
||||||
|
build_pair_candidates,
|
||||||
|
filter_pairs_with_previous_poses,
|
||||||
|
triangulate_debug,
|
||||||
|
triangulate_poses,
|
||||||
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -28,7 +44,20 @@ def pack_poses_2d(
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Camera",
|
"Camera",
|
||||||
|
"CameraModel",
|
||||||
|
"CoreProposalDebug",
|
||||||
|
"FullProposalDebug",
|
||||||
|
"GroupingDebug",
|
||||||
|
"MergeDebug",
|
||||||
|
"PairCandidate",
|
||||||
|
"PreviousPoseFilterDebug",
|
||||||
|
"PreviousPoseMatch",
|
||||||
|
"ProposalGroupDebug",
|
||||||
|
"TriangulationTrace",
|
||||||
|
"build_pair_candidates",
|
||||||
"convert_cameras",
|
"convert_cameras",
|
||||||
|
"filter_pairs_with_previous_poses",
|
||||||
"pack_poses_2d",
|
"pack_poses_2d",
|
||||||
|
"triangulate_debug",
|
||||||
"triangulate_poses",
|
"triangulate_poses",
|
||||||
]
|
]
|
||||||
|
|||||||
+33
-5
@@ -1,12 +1,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import TypeAlias, TypedDict
|
from typing import Literal, TypeAlias, TypedDict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import numpy.typing as npt
|
import numpy.typing as npt
|
||||||
|
|
||||||
from ._core import Camera
|
from ._core import Camera, CameraModel
|
||||||
|
|
||||||
Matrix3x3Like: TypeAlias = Sequence[Sequence[float]]
|
Matrix3x3Like: TypeAlias = Sequence[Sequence[float]]
|
||||||
VectorLike: TypeAlias = Sequence[float]
|
VectorLike: TypeAlias = Sequence[float]
|
||||||
@@ -21,12 +21,39 @@ class CameraDict(TypedDict, total=False):
|
|||||||
T: Sequence[Sequence[float]]
|
T: Sequence[Sequence[float]]
|
||||||
width: int
|
width: int
|
||||||
height: int
|
height: int
|
||||||
type: str
|
type: Literal["pinhole", "fisheye"]
|
||||||
|
model: Literal["pinhole", "fisheye"] | CameraModel
|
||||||
|
|
||||||
|
|
||||||
|
CameraModelLike: TypeAlias = CameraModel | Literal["pinhole", "fisheye"]
|
||||||
CameraLike = Camera | CameraDict
|
CameraLike = Camera | CameraDict
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_camera_model(model: CameraModelLike) -> CameraModel:
|
||||||
|
if isinstance(model, CameraModel):
|
||||||
|
return model
|
||||||
|
if model == "pinhole":
|
||||||
|
return CameraModel.PINHOLE
|
||||||
|
if model == "fisheye":
|
||||||
|
return CameraModel.FISHEYE
|
||||||
|
raise ValueError(f"Unsupported camera model: {model}")
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_distortion(distortion: VectorLike, camera_model: CameraModel) -> tuple[float, float, float, float, float]:
|
||||||
|
values = tuple(float(value) for value in distortion)
|
||||||
|
expected = 4 if camera_model is CameraModel.FISHEYE else 5
|
||||||
|
if len(values) not in {expected, 5}:
|
||||||
|
raise ValueError(
|
||||||
|
f"{camera_model.name.lower()} cameras require {expected} distortion coefficients"
|
||||||
|
+ (" (or 5 with a trailing zero)." if camera_model is CameraModel.FISHEYE else ".")
|
||||||
|
)
|
||||||
|
if camera_model is CameraModel.FISHEYE and len(values) == 4:
|
||||||
|
values = values + (0.0,)
|
||||||
|
if len(values) != 5:
|
||||||
|
raise ValueError("Distortion coefficients must normalize to exactly 5 values.")
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
|
def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
|
||||||
"""Normalize mappings or existing Camera objects into bound Camera instances."""
|
"""Normalize mappings or existing Camera objects into bound Camera instances."""
|
||||||
|
|
||||||
@@ -39,12 +66,13 @@ def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
|
|||||||
camera = Camera()
|
camera = Camera()
|
||||||
camera.name = str(cam["name"])
|
camera.name = str(cam["name"])
|
||||||
camera.K = cam["K"]
|
camera.K = cam["K"]
|
||||||
camera.DC = cam["DC"]
|
camera_model = _coerce_camera_model(cam.get("model", cam.get("type", "pinhole")))
|
||||||
|
camera.DC = _coerce_distortion(cam["DC"], camera_model)
|
||||||
camera.R = cam["R"]
|
camera.R = cam["R"]
|
||||||
camera.T = cam["T"]
|
camera.T = cam["T"]
|
||||||
camera.width = int(cam["width"])
|
camera.width = int(cam["width"])
|
||||||
camera.height = int(cam["height"])
|
camera.height = int(cam["height"])
|
||||||
camera.type = str(cam.get("type", "pinhole"))
|
camera.model = camera_model
|
||||||
converted.append(camera)
|
converted.append(camera)
|
||||||
return converted
|
return converted
|
||||||
|
|
||||||
|
|||||||
+70
-1
@@ -52,7 +52,7 @@ def test_camera_structure_repr():
|
|||||||
camera.T = [[1], [2], [3]]
|
camera.T = [[1], [2], [3]]
|
||||||
camera.width = 640
|
camera.width = 640
|
||||||
camera.height = 480
|
camera.height = 480
|
||||||
camera.type = "pinhole"
|
camera.model = rpt.CameraModel.PINHOLE
|
||||||
|
|
||||||
rendered = repr(camera)
|
rendered = repr(camera)
|
||||||
assert "Camera 1" in rendered
|
assert "Camera 1" in rendered
|
||||||
@@ -100,6 +100,75 @@ def test_triangulate_repeatability():
|
|||||||
np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5)
|
np.testing.assert_allclose(first, second, rtol=1e-5, atol=1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_pair_candidates_exposes_cartesian_view_pairs():
|
||||||
|
poses_2d, person_counts = rpt.pack_poses_2d(
|
||||||
|
[
|
||||||
|
np.zeros((2, 2, 3), dtype=np.float32),
|
||||||
|
np.zeros((1, 2, 3), dtype=np.float32),
|
||||||
|
np.zeros((3, 2, 3), dtype=np.float32),
|
||||||
|
],
|
||||||
|
joint_count=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
pairs = rpt.build_pair_candidates(poses_2d, person_counts)
|
||||||
|
|
||||||
|
assert len(pairs) == (2 * 1) + (2 * 3) + (1 * 3)
|
||||||
|
assert (pairs[0].view1, pairs[0].view2, pairs[0].person1, pairs[0].person2) == (0, 1, 0, 0)
|
||||||
|
assert pairs[-1].global_person2 == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_triangulate_accepts_empty_previous_poses():
|
||||||
|
poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
|
||||||
|
roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
|
||||||
|
empty_previous = np.zeros((0, len(JOINT_NAMES), 4), dtype=np.float32)
|
||||||
|
|
||||||
|
baseline = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
|
||||||
|
with_previous = rpt.triangulate_poses(
|
||||||
|
poses_2d,
|
||||||
|
person_counts,
|
||||||
|
cameras,
|
||||||
|
roomparams,
|
||||||
|
JOINT_NAMES,
|
||||||
|
empty_previous,
|
||||||
|
)
|
||||||
|
|
||||||
|
np.testing.assert_allclose(with_previous, baseline, rtol=1e-5, atol=1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_triangulate_debug_matches_final_output():
|
||||||
|
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
|
||||||
|
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
|
||||||
|
|
||||||
|
final_poses = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
|
||||||
|
trace = rpt.triangulate_debug(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
|
||||||
|
|
||||||
|
np.testing.assert_allclose(trace.final_poses, final_poses, rtol=1e-5, atol=1e-5)
|
||||||
|
assert len(trace.pairs) >= len(trace.core_proposals)
|
||||||
|
for group in trace.grouping.groups:
|
||||||
|
assert all(0 <= index < len(trace.core_proposals) for index in group.proposal_indices)
|
||||||
|
for merge_indices in trace.merge.group_proposal_indices:
|
||||||
|
assert all(0 <= index < len(trace.core_proposals) for index in merge_indices)
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_pairs_with_previous_poses_returns_debug_matches():
|
||||||
|
poses_2d, person_counts, cameras = load_case("data/p1/sample.json", "tests/poses_p1.json")
|
||||||
|
roomparams = np.asarray([[5.6, 6.4, 2.4], [0.0, -0.5, 1.2]], dtype=np.float32)
|
||||||
|
previous_poses = rpt.triangulate_poses(poses_2d, person_counts, cameras, roomparams, JOINT_NAMES)
|
||||||
|
|
||||||
|
debug = rpt.filter_pairs_with_previous_poses(
|
||||||
|
poses_2d,
|
||||||
|
person_counts,
|
||||||
|
cameras,
|
||||||
|
JOINT_NAMES,
|
||||||
|
previous_poses,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert debug.used_previous_poses is True
|
||||||
|
assert len(debug.matches) == len(rpt.build_pair_candidates(poses_2d, person_counts))
|
||||||
|
assert len(debug.kept_pairs) == len(debug.kept_pair_indices)
|
||||||
|
assert any(match.matched_view1 or match.matched_view2 for match in debug.matches)
|
||||||
|
|
||||||
|
|
||||||
def test_triangulate_does_not_mutate_inputs():
|
def test_triangulate_does_not_mutate_inputs():
|
||||||
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
|
poses_2d, person_counts, cameras = load_case("data/h1/sample.json", "tests/poses_h1.json")
|
||||||
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
|
roomparams = np.asarray([[4.8, 6.0, 2.0], [0.0, 0.0, 1.0]], dtype=np.float32)
|
||||||
|
|||||||
Reference in New Issue
Block a user