2455 lines
86 KiB
C++
2455 lines
86 KiB
C++
#include <algorithm>
|
|
#include <cmath>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <limits>
|
|
#include <map>
|
|
#include <numeric>
|
|
#include <set>
|
|
#include <stdexcept>
|
|
#include <string_view>
|
|
#include <unordered_map>
|
|
|
|
#include "interface.hpp"
|
|
#include "cached_camera.hpp"
|
|
|
|
namespace
|
|
{
|
|
using Pose2D = std::vector<std::array<float, 3>>;
|
|
using Pose3D = std::vector<std::array<float, 4>>;
|
|
using PosePair = std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>;
|
|
using GroupedPose = std::tuple<std::array<float, 3>, Pose3D, std::vector<int>>;
|
|
|
|
size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_joints)
|
|
{
|
|
return (((pose * num_joints) + joint) * 3) + coord;
|
|
}
|
|
|
|
[[maybe_unused]] static void print_2d_poses(const Pose2D &poses)
|
|
{
|
|
std::cout << "Poses: (" << poses.size() << ", 3)[" << std::endl;
|
|
for (const auto &pose : poses)
|
|
{
|
|
std::cout << " [";
|
|
for (size_t i = 0; i < 3; ++i)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << pose[i];
|
|
if (i < 2)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
[[maybe_unused]] static void print_3d_poses(const Pose3D &poses)
|
|
{
|
|
std::cout << "Poses: (" << poses.size() << ", 4)[" << std::endl;
|
|
for (const auto &pose : poses)
|
|
{
|
|
std::cout << " [";
|
|
for (size_t i = 0; i < 4; ++i)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << pose[i];
|
|
if (i < 3)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
[[maybe_unused]] static void print_allpairs(const std::vector<PosePair> &all_pairs)
|
|
{
|
|
std::cout << "All pairs: [" << std::endl;
|
|
for (const auto &pair : all_pairs)
|
|
{
|
|
const auto &tuple_part = pair.first;
|
|
const auto &pair_part = pair.second;
|
|
|
|
std::cout << "("
|
|
<< std::get<0>(tuple_part) << ", "
|
|
<< std::get<1>(tuple_part) << ", "
|
|
<< std::get<2>(tuple_part) << ", "
|
|
<< std::get<3>(tuple_part) << ")";
|
|
std::cout << ", ("
|
|
<< pair_part.first << ", "
|
|
<< pair_part.second << ")"
|
|
<< std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
std::array<float, 3> undistort_point(
|
|
const std::array<float, 3> &raw_point, const CachedCamera &icam)
|
|
{
|
|
std::array<float, 3> point = raw_point;
|
|
const float ifx_old = 1.0f / icam.cam.K[0][0];
|
|
const float ify_old = 1.0f / icam.cam.K[1][1];
|
|
const float cx_old = icam.cam.K[0][2];
|
|
const float cy_old = icam.cam.K[1][2];
|
|
const float fx_new = icam.newK[0][0];
|
|
const float fy_new = icam.newK[1][1];
|
|
const float cx_new = icam.newK[0][2];
|
|
const float cy_new = icam.newK[1][2];
|
|
|
|
point[0] = (point[0] - cx_old) * ifx_old;
|
|
point[1] = (point[1] - cy_old) * ify_old;
|
|
|
|
if (icam.cam.model == CameraModel::Fisheye)
|
|
{
|
|
undistort_point_fisheye(point, icam.cam.DC);
|
|
}
|
|
else
|
|
{
|
|
undistort_point_pinhole(point, icam.cam.DC);
|
|
}
|
|
|
|
point[0] = (point[0] * fx_new) + cx_new;
|
|
point[1] = (point[1] * fy_new) + cy_new;
|
|
|
|
const float mask_offset = (icam.cam.width + icam.cam.height) / 20.0f;
|
|
if (point[0] < -mask_offset || point[0] >= icam.cam.width + mask_offset ||
|
|
point[1] < -mask_offset || point[1] >= icam.cam.height + mask_offset)
|
|
{
|
|
point = {0.0f, 0.0f, 0.0f};
|
|
}
|
|
|
|
return point;
|
|
}
|
|
|
|
class PackedPoseStore2D
|
|
{
|
|
public:
|
|
PackedPoseStore2D(
|
|
const PoseBatch2DView &source,
|
|
const std::vector<CachedCamera> &internal_cameras)
|
|
: person_counts(source.num_views),
|
|
view_offsets(source.num_views),
|
|
num_views(source.num_views),
|
|
num_joints(source.num_joints)
|
|
{
|
|
for (size_t view = 0; view < num_views; ++view)
|
|
{
|
|
person_counts[view] = source.person_counts[view];
|
|
if (person_counts[view] > source.max_persons)
|
|
{
|
|
throw std::invalid_argument(
|
|
"person_counts entries must not exceed the padded person dimension.");
|
|
}
|
|
view_offsets[view] = total_persons;
|
|
total_persons += person_counts[view];
|
|
}
|
|
|
|
data.resize(total_persons * num_joints * 3);
|
|
for (size_t view = 0; view < num_views; ++view)
|
|
{
|
|
for (size_t person = 0; person < person_counts[view]; ++person)
|
|
{
|
|
const size_t packed_pose = pose_index(view, person);
|
|
for (size_t joint = 0; joint < num_joints; ++joint)
|
|
{
|
|
const std::array<float, 3> undistorted = undistort_point(
|
|
{
|
|
source.at(view, person, joint, 0),
|
|
source.at(view, person, joint, 1),
|
|
source.at(view, person, joint, 2),
|
|
},
|
|
internal_cameras[view]);
|
|
for (size_t coord = 0; coord < 3; ++coord)
|
|
{
|
|
at(packed_pose, joint, coord) = undistorted[coord];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t pose_index(size_t view, size_t person) const
|
|
{
|
|
return view_offsets[view] + person;
|
|
}
|
|
|
|
float &at(size_t pose, size_t joint, size_t coord)
|
|
{
|
|
return data[packed_pose_offset(pose, joint, coord, num_joints)];
|
|
}
|
|
|
|
const float &at(size_t pose, size_t joint, size_t coord) const
|
|
{
|
|
return data[packed_pose_offset(pose, joint, coord, num_joints)];
|
|
}
|
|
|
|
Pose2D pose(size_t view, size_t person) const
|
|
{
|
|
const size_t packed_pose = pose_index(view, person);
|
|
Pose2D result(num_joints);
|
|
for (size_t joint = 0; joint < num_joints; ++joint)
|
|
{
|
|
for (size_t coord = 0; coord < 3; ++coord)
|
|
{
|
|
result[joint][coord] = at(packed_pose, joint, coord);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Pose2D pose(size_t view, size_t person, const std::vector<size_t> &joint_indices) const
|
|
{
|
|
const size_t packed_pose = pose_index(view, person);
|
|
Pose2D result(joint_indices.size());
|
|
for (size_t joint = 0; joint < joint_indices.size(); ++joint)
|
|
{
|
|
const size_t source_joint = joint_indices[joint];
|
|
for (size_t coord = 0; coord < 3; ++coord)
|
|
{
|
|
result[joint][coord] = at(packed_pose, source_joint, coord);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::vector<uint32_t> person_counts;
|
|
std::vector<size_t> view_offsets;
|
|
size_t num_views = 0;
|
|
size_t num_joints = 0;
|
|
size_t total_persons = 0;
|
|
|
|
private:
|
|
std::vector<float> data;
|
|
};
|
|
|
|
struct PreparedInputs
|
|
{
|
|
std::vector<CachedCamera> internal_cameras;
|
|
std::vector<size_t> core_joint_idx;
|
|
std::vector<std::array<size_t, 2>> core_limbs_idx;
|
|
PackedPoseStore2D packed_poses;
|
|
|
|
PreparedInputs(
|
|
std::vector<CachedCamera> cameras_in,
|
|
std::vector<size_t> core_joint_idx_in,
|
|
std::vector<std::array<size_t, 2>> core_limbs_idx_in,
|
|
PackedPoseStore2D packed_poses_in)
|
|
: internal_cameras(std::move(cameras_in)),
|
|
core_joint_idx(std::move(core_joint_idx_in)),
|
|
core_limbs_idx(std::move(core_limbs_idx_in)),
|
|
packed_poses(std::move(packed_poses_in))
|
|
{
|
|
}
|
|
};
|
|
|
|
struct PreviousProjectionCache
|
|
{
|
|
std::vector<std::vector<Pose2D>> projected_poses;
|
|
std::vector<std::vector<std::vector<float>>> projected_dists;
|
|
std::vector<Pose3D> core_poses;
|
|
};
|
|
|
|
constexpr std::array<std::string_view, 12> kCoreJoints = {
|
|
"shoulder_left",
|
|
"shoulder_right",
|
|
"hip_left",
|
|
"hip_right",
|
|
"elbow_left",
|
|
"elbow_right",
|
|
"knee_left",
|
|
"knee_right",
|
|
"wrist_left",
|
|
"wrist_right",
|
|
"ankle_left",
|
|
"ankle_right",
|
|
};
|
|
|
|
constexpr std::array<std::pair<std::string_view, std::string_view>, 8> kCoreLimbs = {{
|
|
{"knee_left", "ankle_left"},
|
|
{"hip_left", "knee_left"},
|
|
{"hip_right", "knee_right"},
|
|
{"knee_right", "ankle_right"},
|
|
{"elbow_left", "wrist_left"},
|
|
{"elbow_right", "wrist_right"},
|
|
{"shoulder_left", "elbow_left"},
|
|
{"shoulder_right", "elbow_right"},
|
|
}};
|
|
|
|
std::vector<PairCandidate> build_pair_candidates_from_packed(const PackedPoseStore2D &packed_poses);
|
|
PreviousProjectionCache project_previous_poses(
|
|
const PoseBatch3DView &previous_poses_3d,
|
|
const std::vector<CachedCamera> &internal_cameras,
|
|
const std::vector<size_t> &core_joint_idx);
|
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl(
|
|
const PackedPoseStore2D &packed_poses,
|
|
const std::vector<CachedCamera> &internal_cameras,
|
|
const std::vector<size_t> &core_joint_idx,
|
|
const std::vector<PairCandidate> &pairs,
|
|
const TriangulationOptions &options,
|
|
const PoseBatch3DView &previous_poses_3d);
|
|
float calc_pose_score(
|
|
const Pose2D &pose,
|
|
const Pose2D &reference_pose,
|
|
const std::vector<float> &dists,
|
|
const CachedCamera &icam);
|
|
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
|
|
const std::vector<Pose3D> &poses_3d,
|
|
const CachedCamera &icam,
|
|
bool calc_dists);
|
|
std::vector<float> score_projection(
|
|
const Pose2D &pose,
|
|
const Pose2D &repro,
|
|
const std::vector<float> &dists,
|
|
const std::vector<bool> &mask,
|
|
float iscale);
|
|
std::pair<Pose3D, float> triangulate_and_score(
|
|
const Pose2D &pose1,
|
|
const Pose2D &pose2,
|
|
const CachedCamera &cam1,
|
|
const CachedCamera &cam2,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
|
|
std::vector<GroupedPose> calc_grouping(
|
|
const std::vector<PosePair> &all_pairs,
|
|
const std::vector<std::pair<Pose3D, float>> &all_scored_poses,
|
|
float min_score);
|
|
Pose3D merge_group(
|
|
const std::vector<Pose3D> &poses_3d,
|
|
float min_score,
|
|
size_t num_cams);
|
|
void add_extra_joints(
|
|
std::vector<Pose3D> &poses,
|
|
const std::vector<std::string> &joint_names);
|
|
void filter_poses(
|
|
std::vector<Pose3D> &poses,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<size_t> &core_joint_idx,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
|
|
void add_missing_joints(
|
|
std::vector<Pose3D> &poses,
|
|
const std::vector<std::string> &joint_names,
|
|
float min_match_score);
|
|
void replace_far_joints(
|
|
std::vector<Pose3D> &poses,
|
|
const std::vector<std::string> &joint_names,
|
|
float min_match_score);
|
|
TriangulationTrace triangulate_debug_impl(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView *previous_poses_3d,
|
|
const TriangulationOptions &options);
|
|
|
|
PreparedInputs prepare_inputs(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
if (poses_2d.num_views == 0)
|
|
{
|
|
throw std::invalid_argument("No 2D poses provided.");
|
|
}
|
|
if (poses_2d.person_counts == nullptr)
|
|
{
|
|
throw std::invalid_argument("person_counts must not be null.");
|
|
}
|
|
if (poses_2d.num_views != cameras.size())
|
|
{
|
|
throw std::invalid_argument("Number of cameras and 2D poses must be the same.");
|
|
}
|
|
if (joint_names.size() != poses_2d.num_joints)
|
|
{
|
|
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
|
|
}
|
|
if (poses_2d.num_joints == 0)
|
|
{
|
|
throw std::invalid_argument("At least one joint is required.");
|
|
}
|
|
if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr)
|
|
{
|
|
throw std::invalid_argument("poses_2d data must not be null.");
|
|
}
|
|
for (const auto &joint : kCoreJoints)
|
|
{
|
|
if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end())
|
|
{
|
|
throw std::invalid_argument("Core joint '" + std::string(joint) + "' not found in joint names.");
|
|
}
|
|
}
|
|
|
|
std::vector<CachedCamera> internal_cameras;
|
|
internal_cameras.reserve(cameras.size());
|
|
for (const auto &camera : cameras)
|
|
{
|
|
internal_cameras.push_back(cache_camera(camera));
|
|
}
|
|
|
|
std::vector<size_t> core_joint_idx;
|
|
core_joint_idx.reserve(kCoreJoints.size());
|
|
for (const auto &joint : kCoreJoints)
|
|
{
|
|
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
|
|
core_joint_idx.push_back(static_cast<size_t>(std::distance(joint_names.begin(), it)));
|
|
}
|
|
|
|
std::vector<std::array<size_t, 2>> core_limbs_idx;
|
|
core_limbs_idx.reserve(kCoreLimbs.size());
|
|
for (const auto &limb : kCoreLimbs)
|
|
{
|
|
auto it1 = std::find(kCoreJoints.begin(), kCoreJoints.end(), limb.first);
|
|
auto it2 = std::find(kCoreJoints.begin(), kCoreJoints.end(), limb.second);
|
|
core_limbs_idx.push_back(
|
|
{static_cast<size_t>(std::distance(kCoreJoints.begin(), it1)),
|
|
static_cast<size_t>(std::distance(kCoreJoints.begin(), it2))});
|
|
}
|
|
|
|
PackedPoseStore2D packed_poses(poses_2d, internal_cameras);
|
|
return PreparedInputs(
|
|
std::move(internal_cameras),
|
|
std::move(core_joint_idx),
|
|
std::move(core_limbs_idx),
|
|
std::move(packed_poses));
|
|
}
|
|
|
|
std::vector<PairCandidate> build_pair_candidates_from_packed(const PackedPoseStore2D &packed_poses)
|
|
{
|
|
std::vector<PairCandidate> pairs;
|
|
for (size_t view1 = 0; view1 < packed_poses.num_views; ++view1)
|
|
{
|
|
for (size_t view2 = view1 + 1; view2 < packed_poses.num_views; ++view2)
|
|
{
|
|
for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1)
|
|
{
|
|
for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2)
|
|
{
|
|
pairs.push_back(PairCandidate {
|
|
static_cast<int>(view1),
|
|
static_cast<int>(view2),
|
|
static_cast<int>(person1),
|
|
static_cast<int>(person2),
|
|
static_cast<int>(packed_poses.pose_index(view1, person1)),
|
|
static_cast<int>(packed_poses.pose_index(view2, person2)),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return pairs;
|
|
}
|
|
|
|
PreviousProjectionCache project_previous_poses(
|
|
const PoseBatch3DView &previous_poses_3d,
|
|
const std::vector<CachedCamera> &internal_cameras,
|
|
const std::vector<size_t> &core_joint_idx)
|
|
{
|
|
PreviousProjectionCache cache;
|
|
cache.core_poses.resize(previous_poses_3d.num_persons);
|
|
for (size_t person = 0; person < previous_poses_3d.num_persons; ++person)
|
|
{
|
|
Pose3D pose(core_joint_idx.size(), {0.0f, 0.0f, 0.0f, 0.0f});
|
|
for (size_t joint = 0; joint < core_joint_idx.size(); ++joint)
|
|
{
|
|
const size_t source_joint = core_joint_idx[joint];
|
|
for (size_t coord = 0; coord < 4; ++coord)
|
|
{
|
|
pose[joint][coord] = previous_poses_3d.at(person, source_joint, coord);
|
|
}
|
|
}
|
|
cache.core_poses[person] = std::move(pose);
|
|
}
|
|
|
|
cache.projected_poses.resize(internal_cameras.size());
|
|
cache.projected_dists.resize(internal_cameras.size());
|
|
for (size_t view = 0; view < internal_cameras.size(); ++view)
|
|
{
|
|
auto [projected_poses, projected_dists] = project_poses(cache.core_poses, internal_cameras[view], true);
|
|
cache.projected_poses[view] = std::move(projected_poses);
|
|
cache.projected_dists[view] = std::move(projected_dists);
|
|
}
|
|
|
|
return cache;
|
|
}
|
|
|
|
float calc_pose_score(
|
|
const Pose2D &pose,
|
|
const Pose2D &reference_pose,
|
|
const std::vector<float> &dists,
|
|
const CachedCamera &icam)
|
|
{
|
|
const float min_score = 0.1f;
|
|
std::vector<bool> mask(pose.size(), false);
|
|
size_t valid_count = 0;
|
|
for (size_t joint = 0; joint < pose.size(); ++joint)
|
|
{
|
|
mask[joint] = pose[joint][2] > min_score && reference_pose[joint][2] > min_score;
|
|
if (mask[joint])
|
|
{
|
|
++valid_count;
|
|
}
|
|
}
|
|
if (valid_count < 3)
|
|
{
|
|
return 0.0f;
|
|
}
|
|
|
|
const float iscale = (icam.cam.width + icam.cam.height) * 0.5f;
|
|
std::vector<float> scores = score_projection(pose, reference_pose, dists, mask, iscale);
|
|
const size_t drop_k = static_cast<size_t>(pose.size() * 0.2f);
|
|
if (drop_k > 0)
|
|
{
|
|
std::partial_sort(scores.begin(), scores.begin() + drop_k, scores.end());
|
|
}
|
|
|
|
float total = 0.0f;
|
|
for (size_t i = drop_k; i < scores.size(); ++i)
|
|
{
|
|
total += scores[i];
|
|
}
|
|
return total / static_cast<float>(scores.size() - drop_k);
|
|
}
|
|
|
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl(
|
|
const PackedPoseStore2D &packed_poses,
|
|
const std::vector<CachedCamera> &internal_cameras,
|
|
const std::vector<size_t> &core_joint_idx,
|
|
const std::vector<PairCandidate> &pairs,
|
|
const TriangulationOptions &options,
|
|
const PoseBatch3DView &previous_poses_3d)
|
|
{
|
|
PreviousPoseFilterDebug debug;
|
|
debug.used_previous_poses = true;
|
|
debug.matches.resize(pairs.size());
|
|
|
|
if (previous_poses_3d.num_persons == 0)
|
|
{
|
|
debug.kept_pair_indices.resize(pairs.size());
|
|
std::iota(debug.kept_pair_indices.begin(), debug.kept_pair_indices.end(), 0);
|
|
debug.kept_pairs = pairs;
|
|
for (auto &match : debug.matches)
|
|
{
|
|
match.decision = "no_previous_poses";
|
|
}
|
|
return debug;
|
|
}
|
|
|
|
const PreviousProjectionCache projected_previous =
|
|
project_previous_poses(previous_poses_3d, internal_cameras, core_joint_idx);
|
|
const float threshold = options.min_match_score;
|
|
|
|
for (size_t pair_index = 0; pair_index < pairs.size(); ++pair_index)
|
|
{
|
|
const PairCandidate &pair = pairs[pair_index];
|
|
const Pose2D pose1_core = packed_poses.pose(
|
|
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), core_joint_idx);
|
|
const Pose2D pose2_core = packed_poses.pose(
|
|
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), core_joint_idx);
|
|
|
|
PreviousPoseMatch best_match;
|
|
bool found_decision = false;
|
|
for (size_t previous_index = 0; previous_index < previous_poses_3d.num_persons; ++previous_index)
|
|
{
|
|
const float score_view1 = calc_pose_score(
|
|
pose1_core,
|
|
projected_previous.projected_poses[static_cast<size_t>(pair.view1)][previous_index],
|
|
projected_previous.projected_dists[static_cast<size_t>(pair.view1)][previous_index],
|
|
internal_cameras[static_cast<size_t>(pair.view1)]);
|
|
const float score_view2 = calc_pose_score(
|
|
pose2_core,
|
|
projected_previous.projected_poses[static_cast<size_t>(pair.view2)][previous_index],
|
|
projected_previous.projected_dists[static_cast<size_t>(pair.view2)][previous_index],
|
|
internal_cameras[static_cast<size_t>(pair.view2)]);
|
|
const bool matched_view1 = score_view1 > threshold;
|
|
const bool matched_view2 = score_view2 > threshold;
|
|
|
|
if (matched_view1 && matched_view2)
|
|
{
|
|
best_match = PreviousPoseMatch {
|
|
static_cast<int>(previous_index),
|
|
score_view1,
|
|
score_view2,
|
|
true,
|
|
true,
|
|
true,
|
|
"matched_previous_pose",
|
|
};
|
|
found_decision = true;
|
|
break;
|
|
}
|
|
|
|
if (matched_view1 || matched_view2)
|
|
{
|
|
best_match = PreviousPoseMatch {
|
|
static_cast<int>(previous_index),
|
|
score_view1,
|
|
score_view2,
|
|
matched_view1,
|
|
matched_view2,
|
|
false,
|
|
"dropped_single_view_match",
|
|
};
|
|
found_decision = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found_decision)
|
|
{
|
|
best_match.decision = "new_person_candidate";
|
|
best_match.kept = true;
|
|
}
|
|
|
|
debug.matches[pair_index] = best_match;
|
|
if (best_match.kept)
|
|
{
|
|
debug.kept_pair_indices.push_back(static_cast<int>(pair_index));
|
|
debug.kept_pairs.push_back(pair);
|
|
}
|
|
}
|
|
|
|
return debug;
|
|
}
|
|
|
|
TriangulationTrace triangulate_debug_impl(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView *previous_poses_3d,
|
|
const TriangulationOptions &options)
|
|
{
|
|
TriangulationTrace trace;
|
|
trace.final_poses.num_joints = joint_names.size();
|
|
|
|
if (previous_poses_3d != nullptr && previous_poses_3d->num_persons > 0 &&
|
|
previous_poses_3d->num_joints != joint_names.size())
|
|
{
|
|
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
|
|
}
|
|
|
|
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
|
|
trace.pairs = build_pair_candidates_from_packed(prepared.packed_poses);
|
|
if (trace.pairs.empty())
|
|
{
|
|
return trace;
|
|
}
|
|
|
|
std::vector<int> active_pair_indices(trace.pairs.size());
|
|
std::iota(active_pair_indices.begin(), active_pair_indices.end(), 0);
|
|
std::vector<PairCandidate> active_pairs = trace.pairs;
|
|
|
|
if (previous_poses_3d != nullptr)
|
|
{
|
|
trace.previous_filter = filter_pairs_with_previous_poses_impl(
|
|
prepared.packed_poses,
|
|
prepared.internal_cameras,
|
|
prepared.core_joint_idx,
|
|
trace.pairs,
|
|
options,
|
|
*previous_poses_3d);
|
|
active_pair_indices = trace.previous_filter.kept_pair_indices;
|
|
active_pairs = trace.previous_filter.kept_pairs;
|
|
if (active_pairs.empty())
|
|
{
|
|
return trace;
|
|
}
|
|
}
|
|
|
|
trace.core_proposals.reserve(active_pairs.size());
|
|
std::vector<PosePair> kept_pose_pairs;
|
|
std::vector<std::pair<Pose3D, float>> kept_scored_poses;
|
|
std::vector<int> kept_core_indices;
|
|
for (size_t i = 0; i < active_pairs.size(); ++i)
|
|
{
|
|
const PairCandidate &pair = active_pairs[i];
|
|
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
|
|
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
|
|
const Pose2D pose1_core = prepared.packed_poses.pose(
|
|
static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1), prepared.core_joint_idx);
|
|
const Pose2D pose2_core = prepared.packed_poses.pose(
|
|
static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2), prepared.core_joint_idx);
|
|
|
|
auto [pose3d, score] = triangulate_and_score(
|
|
pose1_core, pose2_core, cam1, cam2, roomparams, prepared.core_limbs_idx);
|
|
|
|
CoreProposalDebug proposal;
|
|
proposal.pair_index = active_pair_indices[i];
|
|
proposal.pair = pair;
|
|
proposal.pose_3d = pose3d;
|
|
proposal.score = score;
|
|
proposal.kept = score >= options.min_match_score;
|
|
proposal.drop_reason = proposal.kept ? "kept" : "below_min_match_score";
|
|
trace.core_proposals.push_back(proposal);
|
|
|
|
if (!proposal.kept)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
kept_core_indices.push_back(static_cast<int>(trace.core_proposals.size() - 1));
|
|
kept_scored_poses.emplace_back(std::move(pose3d), score);
|
|
kept_pose_pairs.emplace_back(
|
|
std::make_tuple(pair.view1, pair.view2, pair.person1, pair.person2),
|
|
std::make_pair(pair.global_person1, pair.global_person2));
|
|
}
|
|
|
|
if (kept_pose_pairs.empty())
|
|
{
|
|
return trace;
|
|
}
|
|
|
|
auto groups = calc_grouping(kept_pose_pairs, kept_scored_poses, options.min_match_score);
|
|
trace.grouping.initial_groups.reserve(groups.size());
|
|
for (const auto &group : groups)
|
|
{
|
|
ProposalGroupDebug debug_group;
|
|
debug_group.center = std::get<0>(group);
|
|
debug_group.pose_3d = std::get<1>(group);
|
|
for (const int index : std::get<2>(group))
|
|
{
|
|
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
|
}
|
|
trace.grouping.initial_groups.push_back(std::move(debug_group));
|
|
}
|
|
|
|
for (size_t i = groups.size(); i > 0; --i)
|
|
{
|
|
if (std::get<2>(groups[i - 1]).size() < options.min_group_size)
|
|
{
|
|
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
|
}
|
|
}
|
|
if (groups.empty())
|
|
{
|
|
return trace;
|
|
}
|
|
|
|
std::map<std::tuple<int, int, int>, std::vector<size_t>> pairs_map;
|
|
for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
|
|
{
|
|
const auto &pair = kept_pose_pairs[i];
|
|
const auto &mid1 = std::make_tuple(
|
|
std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second));
|
|
const auto &mid2 = std::make_tuple(
|
|
std::get<0>(pair.first), std::get<1>(pair.first), std::get<1>(pair.second));
|
|
pairs_map[mid1].push_back(i);
|
|
pairs_map[mid2].push_back(i);
|
|
}
|
|
|
|
std::vector<size_t> group_map(kept_pose_pairs.size());
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
for (const int index : std::get<2>(groups[i]))
|
|
{
|
|
group_map[static_cast<size_t>(index)] = i;
|
|
}
|
|
}
|
|
|
|
std::set<size_t> drop_indices;
|
|
for (auto &pair_entry : pairs_map)
|
|
{
|
|
auto &indices = pair_entry.second;
|
|
if (indices.size() <= 1)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
std::vector<size_t> group_sizes;
|
|
std::vector<float> pair_scores;
|
|
group_sizes.reserve(indices.size());
|
|
pair_scores.reserve(indices.size());
|
|
for (const size_t index : indices)
|
|
{
|
|
group_sizes.push_back(std::get<2>(groups[group_map[index]]).size());
|
|
pair_scores.push_back(kept_scored_poses[index].second);
|
|
}
|
|
|
|
std::vector<size_t> indices_sorted(indices.size());
|
|
std::iota(indices_sorted.begin(), indices_sorted.end(), 0);
|
|
std::sort(indices_sorted.begin(), indices_sorted.end(),
|
|
[&group_sizes, &pair_scores](size_t a, size_t b)
|
|
{
|
|
if (group_sizes[a] != group_sizes[b])
|
|
{
|
|
return group_sizes[a] > group_sizes[b];
|
|
}
|
|
return pair_scores[a] > pair_scores[b];
|
|
});
|
|
|
|
for (size_t j = 1; j < indices_sorted.size(); ++j)
|
|
{
|
|
drop_indices.insert(indices[indices_sorted[j]]);
|
|
}
|
|
}
|
|
|
|
std::vector<size_t> drop_list(drop_indices.begin(), drop_indices.end());
|
|
std::sort(drop_list.begin(), drop_list.end(), std::greater<size_t>());
|
|
for (const size_t drop_idx : drop_list)
|
|
{
|
|
trace.grouping.duplicate_pair_drops.push_back(kept_core_indices[drop_idx]);
|
|
kept_scored_poses.erase(kept_scored_poses.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
|
kept_pose_pairs.erase(kept_pose_pairs.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
|
kept_core_indices.erase(kept_core_indices.begin() + static_cast<std::ptrdiff_t>(drop_idx));
|
|
|
|
for (auto &group : groups)
|
|
{
|
|
auto &indices = std::get<2>(group);
|
|
auto it = std::find(indices.begin(), indices.end(), static_cast<int>(drop_idx));
|
|
if (it != indices.end())
|
|
{
|
|
indices.erase(it);
|
|
}
|
|
for (int &index : indices)
|
|
{
|
|
if (static_cast<size_t>(index) > drop_idx)
|
|
{
|
|
--index;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (size_t i = groups.size(); i > 0; --i)
|
|
{
|
|
if (std::get<2>(groups[i - 1]).size() < options.min_group_size)
|
|
{
|
|
groups.erase(groups.begin() + static_cast<std::ptrdiff_t>(i - 1));
|
|
}
|
|
}
|
|
if (groups.empty())
|
|
{
|
|
return trace;
|
|
}
|
|
|
|
trace.grouping.groups.reserve(groups.size());
|
|
for (const auto &group : groups)
|
|
{
|
|
ProposalGroupDebug debug_group;
|
|
debug_group.center = std::get<0>(group);
|
|
debug_group.pose_3d = std::get<1>(group);
|
|
for (const int index : std::get<2>(group))
|
|
{
|
|
debug_group.proposal_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
|
}
|
|
trace.grouping.groups.push_back(std::move(debug_group));
|
|
}
|
|
|
|
std::vector<Pose3D> full_pose_buffer(kept_pose_pairs.size());
|
|
std::vector<int> full_proposal_index_by_core(trace.core_proposals.size(), -1);
|
|
trace.full_proposals.reserve(kept_pose_pairs.size());
|
|
for (size_t i = 0; i < kept_pose_pairs.size(); ++i)
|
|
{
|
|
const auto &pair_ids = kept_pose_pairs[i].first;
|
|
const PairCandidate pair = {
|
|
std::get<0>(pair_ids),
|
|
std::get<1>(pair_ids),
|
|
std::get<2>(pair_ids),
|
|
std::get<3>(pair_ids),
|
|
kept_pose_pairs[i].second.first,
|
|
kept_pose_pairs[i].second.second,
|
|
};
|
|
const CachedCamera &cam1 = prepared.internal_cameras[static_cast<size_t>(pair.view1)];
|
|
const CachedCamera &cam2 = prepared.internal_cameras[static_cast<size_t>(pair.view2)];
|
|
Pose2D pose1 = prepared.packed_poses.pose(static_cast<size_t>(pair.view1), static_cast<size_t>(pair.person1));
|
|
Pose2D pose2 = prepared.packed_poses.pose(static_cast<size_t>(pair.view2), static_cast<size_t>(pair.person2));
|
|
|
|
auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams, {});
|
|
(void)score;
|
|
for (size_t joint = 0; joint < pose3d.size(); ++joint)
|
|
{
|
|
const float score1 = pose1[joint][2];
|
|
const float score2 = pose2[joint][2];
|
|
const float min_score = 0.1f;
|
|
|
|
if (score1 > min_score && score2 > min_score)
|
|
{
|
|
const float score_projection = (score1 + score2) * 0.5f;
|
|
const float score_triangulation = pose3d[joint][3];
|
|
pose3d[joint][3] = 0.9f * score_triangulation + 0.1f * score_projection;
|
|
}
|
|
}
|
|
|
|
full_pose_buffer[i] = pose3d;
|
|
full_proposal_index_by_core[kept_core_indices[i]] = static_cast<int>(trace.full_proposals.size());
|
|
trace.full_proposals.push_back(FullProposalDebug {
|
|
kept_core_indices[i],
|
|
pair,
|
|
std::move(pose3d),
|
|
});
|
|
}
|
|
|
|
std::vector<Pose3D> final_poses_3d(groups.size());
|
|
trace.merge.group_proposal_indices.reserve(groups.size());
|
|
trace.merge.merged_poses.reserve(groups.size());
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
std::vector<Pose3D> poses;
|
|
std::vector<int> source_core_indices;
|
|
poses.reserve(std::get<2>(groups[i]).size());
|
|
source_core_indices.reserve(std::get<2>(groups[i]).size());
|
|
for (const int index : std::get<2>(groups[i]))
|
|
{
|
|
poses.push_back(full_pose_buffer[static_cast<size_t>(index)]);
|
|
source_core_indices.push_back(kept_core_indices[static_cast<size_t>(index)]);
|
|
}
|
|
final_poses_3d[i] = merge_group(poses, options.min_match_score, prepared.internal_cameras.size());
|
|
trace.merge.group_proposal_indices.push_back(std::move(source_core_indices));
|
|
trace.merge.merged_poses.push_back(final_poses_3d[i]);
|
|
}
|
|
|
|
add_extra_joints(final_poses_3d, joint_names);
|
|
filter_poses(final_poses_3d, roomparams, prepared.core_joint_idx, prepared.core_limbs_idx);
|
|
add_missing_joints(final_poses_3d, joint_names, options.min_match_score);
|
|
replace_far_joints(final_poses_3d, joint_names, options.min_match_score);
|
|
|
|
size_t valid_persons = 0;
|
|
for (const auto &pose : final_poses_3d)
|
|
{
|
|
const bool is_valid = std::any_of(
|
|
pose.begin(),
|
|
pose.end(),
|
|
[&options](const std::array<float, 4> &joint)
|
|
{
|
|
return joint[3] > options.min_match_score;
|
|
});
|
|
if (is_valid)
|
|
{
|
|
++valid_persons;
|
|
}
|
|
}
|
|
|
|
trace.final_poses.num_persons = valid_persons;
|
|
trace.final_poses.data.resize(valid_persons * trace.final_poses.num_joints * 4);
|
|
|
|
size_t person_index = 0;
|
|
for (const auto &pose : final_poses_3d)
|
|
{
|
|
const bool is_valid = std::any_of(
|
|
pose.begin(),
|
|
pose.end(),
|
|
[&options](const std::array<float, 4> &joint)
|
|
{
|
|
return joint[3] > options.min_match_score;
|
|
});
|
|
if (!is_valid)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
for (size_t joint = 0; joint < trace.final_poses.num_joints; ++joint)
|
|
{
|
|
for (size_t coord = 0; coord < 4; ++coord)
|
|
{
|
|
trace.final_poses.at(person_index, joint, coord) = pose[joint][coord];
|
|
}
|
|
}
|
|
++person_index;
|
|
}
|
|
|
|
return trace;
|
|
}
|
|
|
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView &previous_poses_3d,
|
|
const TriangulationOptions &options)
|
|
{
|
|
if (previous_poses_3d.num_persons > 0 && previous_poses_3d.num_joints != joint_names.size())
|
|
{
|
|
throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names.");
|
|
}
|
|
|
|
PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names);
|
|
const std::vector<PairCandidate> pairs = build_pair_candidates_from_packed(prepared.packed_poses);
|
|
if (pairs.empty())
|
|
{
|
|
PreviousPoseFilterDebug empty;
|
|
empty.used_previous_poses = true;
|
|
return empty;
|
|
}
|
|
|
|
return filter_pairs_with_previous_poses_impl(
|
|
prepared.packed_poses,
|
|
prepared.internal_cameras,
|
|
prepared.core_joint_idx,
|
|
pairs,
|
|
options,
|
|
previous_poses_3d);
|
|
}
|
|
|
|
std::tuple<std::vector<Pose2D>, std::vector<std::vector<float>>> project_poses(
|
|
const std::vector<Pose3D> &poses_3d,
|
|
const CachedCamera &icam,
|
|
bool calc_dists)
|
|
{
|
|
if (poses_3d.empty())
|
|
{
|
|
return std::make_tuple(std::vector<Pose2D> {}, std::vector<std::vector<float>> {});
|
|
}
|
|
|
|
const size_t num_persons = poses_3d.size();
|
|
const size_t num_joints = poses_3d[0].size();
|
|
|
|
std::vector<Pose2D> poses_2d(num_persons);
|
|
std::vector<std::vector<float>> all_dists(num_persons);
|
|
|
|
const std::array<std::array<float, 3>, 3> &K = icam.newK;
|
|
const std::array<std::array<float, 3>, 3> &R = icam.invR;
|
|
const std::array<std::array<float, 1>, 3> &T = icam.cam.T;
|
|
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
const Pose3D &body3D = poses_3d[i];
|
|
Pose2D body2D(num_joints, std::array<float, 3>{0.0f, 0.0f, 0.0f});
|
|
std::vector<float> dists(num_joints, 0.0f);
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float x = body3D[j][0];
|
|
float y = body3D[j][1];
|
|
float z = body3D[j][2];
|
|
float score = body3D[j][3];
|
|
|
|
const float xt = x - T[0][0];
|
|
const float yt = y - T[1][0];
|
|
const float zt = z - T[2][0];
|
|
float x_cam = xt * R[0][0] + yt * R[0][1] + zt * R[0][2];
|
|
float y_cam = xt * R[1][0] + yt * R[1][1] + zt * R[1][2];
|
|
float z_cam = xt * R[2][0] + yt * R[2][1] + zt * R[2][2];
|
|
|
|
if (z_cam <= 0.0f)
|
|
{
|
|
x_cam = 0.0f;
|
|
y_cam = 0.0f;
|
|
z_cam = 0.0f;
|
|
}
|
|
|
|
dists[j] = calc_dists ? std::sqrt(x_cam * x_cam + y_cam * y_cam + z_cam * z_cam) : 0.0f;
|
|
|
|
float u = 0.0f;
|
|
float v = 0.0f;
|
|
if (z_cam > 0.0f)
|
|
{
|
|
u = (K[0][0] * x_cam + K[0][1] * y_cam + K[0][2] * z_cam) / z_cam;
|
|
v = (K[1][0] * x_cam + K[1][1] * y_cam + K[1][2] * z_cam) / z_cam;
|
|
}
|
|
|
|
if (u < 0.0f || u >= icam.cam.width || v < 0.0f || v >= icam.cam.height)
|
|
{
|
|
u = 0.0f;
|
|
v = 0.0f;
|
|
score = 0.0f;
|
|
}
|
|
|
|
body2D[j] = {u, v, score};
|
|
}
|
|
|
|
poses_2d[i] = std::move(body2D);
|
|
all_dists[i] = std::move(dists);
|
|
}
|
|
|
|
return std::make_tuple(poses_2d, all_dists);
|
|
}
|
|
|
|
std::vector<float> score_projection(
|
|
const Pose2D &pose,
|
|
const Pose2D &repro,
|
|
const std::vector<float> &dists,
|
|
const std::vector<bool> &mask,
|
|
float iscale)
|
|
{
|
|
const float min_score = 0.1f;
|
|
const size_t num_joints = pose.size();
|
|
|
|
std::vector<float> error(num_joints, 0.0f);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
const float dx = pose[i][0] - repro[i][0];
|
|
const float dy = pose[i][1] - repro[i][1];
|
|
float err = std::sqrt(dx * dx + dy * dy);
|
|
if (repro[i][2] < min_score)
|
|
{
|
|
err = iscale;
|
|
}
|
|
error[i] = err;
|
|
}
|
|
}
|
|
|
|
const float inv_iscale = 1.0f / iscale;
|
|
const float iscale_quarter = iscale / 4.0f;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
error[i] = std::max(0.0f, std::min(error[i], iscale_quarter)) * inv_iscale;
|
|
}
|
|
|
|
float mean_dist = 0.0f;
|
|
int count = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
mean_dist += dists[i];
|
|
++count;
|
|
}
|
|
}
|
|
if (count > 0)
|
|
{
|
|
mean_dist /= static_cast<float>(count);
|
|
}
|
|
|
|
const float dscale = std::sqrt(mean_dist / 3.5f);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
error[i] *= dscale;
|
|
}
|
|
|
|
std::vector<float> score(num_joints, 0.0f);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
score[i] = 1.0f / (1.0f + error[i] * 10.0f);
|
|
}
|
|
|
|
return score;
|
|
}
|
|
|
|
float dot(const std::array<float, 3> &a, const std::array<float, 3> &b)
|
|
{
|
|
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
|
}
|
|
std::array<float, 3> cross(const std::array<float, 3> &a, const std::array<float, 3> &b)
|
|
{
|
|
return {a[1] * b[2] - a[2] * b[1],
|
|
a[2] * b[0] - a[0] * b[2],
|
|
a[0] * b[1] - a[1] * b[0]};
|
|
}
|
|
std::array<float, 3> add(const std::array<float, 3> &a, const std::array<float, 3> &b)
|
|
{
|
|
return {a[0] + b[0], a[1] + b[1], a[2] + b[2]};
|
|
}
|
|
std::array<float, 3> subtract(const std::array<float, 3> &a, const std::array<float, 3> &b)
|
|
{
|
|
return {a[0] - b[0], a[1] - b[1], a[2] - b[2]};
|
|
}
|
|
std::array<float, 3> multiply(const std::array<float, 3> &a, float s)
|
|
{
|
|
return {a[0] * s, a[1] * s, a[2] * s};
|
|
}
|
|
std::array<float, 3> normalize(const std::array<float, 3> &v)
|
|
{
|
|
float norm = std::sqrt(dot(v, v));
|
|
if (norm < 1e-8f)
|
|
return v;
|
|
return multiply(v, 1.0f / norm);
|
|
}
|
|
std::array<float, 3> mat_mul_vec(
|
|
const std::array<std::array<float, 3>, 3> &M, const std::array<float, 3> &v)
|
|
{
|
|
std::array<float, 3> res = {M[0][0] * v[0] + M[0][1] * v[1] + M[0][2] * v[2],
|
|
M[1][0] * v[0] + M[1][1] * v[1] + M[1][2] * v[2],
|
|
M[2][0] * v[0] + M[2][1] * v[1] + M[2][2] * v[2]};
|
|
return res;
|
|
}
|
|
|
|
std::array<float, 3> calc_ray_dir(const CachedCamera &icam, const std::array<float, 2> &pt)
|
|
{
|
|
// Compute normalized ray direction from the point
|
|
std::array<float, 3> uv1 = {pt[0], pt[1], 1.0};
|
|
auto d = mat_mul_vec(icam.cam.R, mat_mul_vec(icam.invK, uv1));
|
|
auto ray_dir = normalize(d);
|
|
|
|
return ray_dir;
|
|
}
|
|
|
|
std::array<float, 4> triangulate_midpoint(
|
|
const CachedCamera &icam1,
|
|
const CachedCamera &icam2,
|
|
const std::array<float, 2> &pt1,
|
|
const std::array<float, 2> &pt2)
|
|
{
|
|
// Triangulate two points by computing their two rays and the midpoint of their closest approach
|
|
// See: https://en.wikipedia.org/wiki/Skew_lines#Nearest_points
|
|
|
|
// Obtain the camera centers and ray directions for both views
|
|
std::array<float, 3> p1 = icam1.center;
|
|
std::array<float, 3> p2 = icam2.center;
|
|
std::array<float, 3> d1 = calc_ray_dir(icam1, pt1);
|
|
std::array<float, 3> d2 = calc_ray_dir(icam2, pt2);
|
|
|
|
// Compute the perpendicular plane vectors
|
|
std::array<float, 3> n = cross(d1, d2);
|
|
std::array<float, 3> n1 = cross(d1, n);
|
|
std::array<float, 3> n2 = cross(d2, n);
|
|
|
|
// Calculate point on Line 1 nearest to Line 2
|
|
float t1 = dot(subtract(p2, p1), n2) / dot(d1, n2);
|
|
std::array<float, 3> c1 = add(p1, multiply(d1, t1));
|
|
|
|
// Calculate point on Line 2 nearest to Line 1
|
|
float t2 = dot(subtract(p1, p2), n1) / dot(d2, n1);
|
|
std::array<float, 3> c2 = add(p2, multiply(d2, t2));
|
|
|
|
// Compute midpoint between c1 and c2.
|
|
std::array<float, 3> midpoint = multiply(add(c1, c2), 0.5);
|
|
float dist = std::sqrt(dot(subtract(c1, c2), subtract(c1, c2)));
|
|
|
|
std::array<float, 4> result = {midpoint[0], midpoint[1], midpoint[2], dist};
|
|
return result;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::pair<std::vector<std::array<float, 4>>, float> triangulate_and_score(
|
|
const std::vector<std::array<float, 3>> &pose1,
|
|
const std::vector<std::array<float, 3>> &pose2,
|
|
const CachedCamera &cam1,
|
|
const CachedCamera &cam2,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
|
{
|
|
const float min_score = 0.1;
|
|
const size_t num_joints = pose1.size();
|
|
|
|
// Create mask for valid points
|
|
std::vector<bool> mask;
|
|
mask.resize(num_joints);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (pose1[i][2] <= min_score || pose2[i][2] <= min_score)
|
|
{
|
|
mask[i] = false;
|
|
}
|
|
else
|
|
{
|
|
mask[i] = true;
|
|
}
|
|
}
|
|
|
|
// If too few joints are visible, return a low score
|
|
int num_visible = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
num_visible++;
|
|
}
|
|
}
|
|
if (num_visible < 3)
|
|
{
|
|
std::vector<std::array<float, 4>> empty(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
float score = 0.0;
|
|
return std::make_pair(empty, score);
|
|
}
|
|
|
|
// Use midpoint triangulation instead of cv::triangulatePoints because it is much faster,
|
|
// while having almost the same accuracy.
|
|
std::vector<std::array<float, 4>> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
auto &pt1 = pose1[i];
|
|
auto &pt2 = pose2[i];
|
|
std::array<float, 4> pt3d = triangulate_midpoint(
|
|
cam1, cam2, {pt1[0], pt1[1]}, {pt2[0], pt2[1]});
|
|
|
|
float score = std::max(0.0, 1.0 - pt3d[3]);
|
|
pose3d[i] = {pt3d[0], pt3d[1], pt3d[2], score};
|
|
}
|
|
}
|
|
|
|
// Check if mean of the points is inside the room bounds
|
|
std::array<float, 3> mean = {0.0, 0.0, 0.0};
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
mean[0] += pose3d[i][0];
|
|
mean[1] += pose3d[i][1];
|
|
mean[2] += pose3d[i][2];
|
|
}
|
|
}
|
|
float inv_num_vis = 1.0 / num_visible;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
mean[j] *= inv_num_vis;
|
|
}
|
|
const std::array<float, 3> &room_size = roomparams[0];
|
|
const std::array<float, 3> &room_center = roomparams[1];
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
if (mean[j] > room_center[j] + room_size[j] / 2.0 ||
|
|
mean[j] < room_center[j] - room_size[j] / 2.0)
|
|
{
|
|
// Very low score if outside room
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
pose3d[i][3] = 0.001;
|
|
}
|
|
}
|
|
return std::make_pair(pose3d, 0.001);
|
|
}
|
|
}
|
|
|
|
// Calculate reprojections
|
|
std::vector<std::vector<std::array<float, 4>>> poses_3d = {pose3d};
|
|
auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true);
|
|
auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true);
|
|
auto repro1 = repro1s[0];
|
|
auto dists1 = dists1s[0];
|
|
auto repro2 = repro2s[0];
|
|
auto dists2 = dists2s[0];
|
|
|
|
// Calculate scores for each view
|
|
float iscale = (cam1.cam.width + cam1.cam.height) / 2.0;
|
|
std::vector<float> score1 = score_projection(pose1, repro1, dists1, mask, iscale);
|
|
std::vector<float> score2 = score_projection(pose2, repro2, dists2, mask, iscale);
|
|
|
|
// Update scores of 3D pose
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
// Using mainly the reprojection score leads to slightly better average results,
|
|
// but the triangulation score is used to penalize some bad outliers.
|
|
float scoreT = 0.5 * (score1[i] + score2[i]);
|
|
pose3d[i][3] = 0.9 * scoreT + 0.1 * pose3d[i][3];
|
|
}
|
|
}
|
|
|
|
// Set scores outside the room to a low value
|
|
const float wdist = 0.1;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
if (pose3d[i][j] > room_center[j] + room_size[j] / 2.0 + wdist ||
|
|
pose3d[i][j] < room_center[j] - room_size[j] / 2.0 - wdist)
|
|
{
|
|
pose3d[i][3] = 0.001;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Filter clearly wrong limbs
|
|
if (!core_limbs_idx.empty())
|
|
{
|
|
const float max_length_sq = 0.9 * 0.9;
|
|
for (size_t i = 0; i < core_limbs_idx.size(); ++i)
|
|
{
|
|
size_t limb1 = core_limbs_idx[i][0];
|
|
size_t limb2 = core_limbs_idx[i][1];
|
|
|
|
if (pose3d[limb1][3] > min_score && pose3d[limb2][3] > min_score)
|
|
{
|
|
float dx = pose3d[limb1][0] - pose3d[limb2][0];
|
|
float dy = pose3d[limb1][1] - pose3d[limb2][1];
|
|
float dz = pose3d[limb1][2] - pose3d[limb2][2];
|
|
float length_sq = dx * dx + dy * dy + dz * dz;
|
|
|
|
if (length_sq > max_length_sq)
|
|
{
|
|
pose3d[limb2][3] = 0.001;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Drop lowest scores
|
|
std::vector<float> valid_scores;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (pose3d[i][3] > min_score)
|
|
{
|
|
valid_scores.push_back(pose3d[i][3]);
|
|
}
|
|
}
|
|
size_t scores_size = valid_scores.size();
|
|
size_t drop_k = static_cast<size_t>(scores_size * 0.2);
|
|
if (drop_k > 0)
|
|
{
|
|
std::partial_sort(valid_scores.begin(), valid_scores.begin() + drop_k, valid_scores.end());
|
|
valid_scores.erase(valid_scores.begin(), valid_scores.begin() + drop_k);
|
|
}
|
|
|
|
// Calculate final score
|
|
float final_score = 0.0;
|
|
if (!valid_scores.empty())
|
|
{
|
|
float sum_scores = std::accumulate(valid_scores.begin(), valid_scores.end(), 0.0);
|
|
final_score = sum_scores / static_cast<float>(valid_scores.size());
|
|
}
|
|
|
|
return std::make_pair(pose3d, final_score);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::tuple<std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
calc_grouping(
|
|
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
|
|
const std::vector<std::pair<std::vector<std::array<float, 4>>, float>> &all_scored_poses,
|
|
float min_score)
|
|
{
|
|
float max_center_dist_sq = 0.75 * 0.75;
|
|
float max_joint_avg_dist = 0.25;
|
|
size_t num_pairs = all_pairs.size();
|
|
|
|
// Calculate pose centers
|
|
std::vector<std::array<float, 3>> centers;
|
|
centers.resize(num_pairs);
|
|
for (size_t i = 0; i < num_pairs; ++i)
|
|
{
|
|
const auto &pose_3d = all_scored_poses[i].first;
|
|
const size_t num_joints = pose_3d.size();
|
|
|
|
std::array<float, 3> center = {0.0, 0.0, 0.0};
|
|
size_t num_valid = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = pose_3d[j][3];
|
|
if (score > min_score)
|
|
{
|
|
center[0] += pose_3d[j][0];
|
|
center[1] += pose_3d[j][1];
|
|
center[2] += pose_3d[j][2];
|
|
++num_valid;
|
|
}
|
|
}
|
|
if (num_valid > 0)
|
|
{
|
|
float inv = 1.0 / num_valid;
|
|
center[0] *= inv;
|
|
center[1] *= inv;
|
|
center[2] *= inv;
|
|
}
|
|
centers[i] = center;
|
|
}
|
|
|
|
// Calculate Groups
|
|
// defined as a tuple of center, pose, and all-pairs-indices of members
|
|
std::vector<std::tuple<
|
|
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
groups;
|
|
std::vector<std::vector<size_t>> per_group_visible_counts;
|
|
for (size_t i = 0; i < num_pairs; ++i)
|
|
{
|
|
const auto &pose_3d = all_scored_poses[i].first;
|
|
size_t num_joints = pose_3d.size();
|
|
|
|
const std::array<float, 3> ¢er = centers[i];
|
|
float best_dist = std::numeric_limits<float>::infinity();
|
|
int best_group = -1;
|
|
|
|
for (size_t j = 0; j < groups.size(); ++j)
|
|
{
|
|
auto &group = groups[j];
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
|
|
// Check if the center is close enough
|
|
float dx = group_center[0] - center[0];
|
|
float dy = group_center[1] - center[1];
|
|
float dz = group_center[2] - center[2];
|
|
float center_dist_sq = dx * dx + dy * dy + dz * dz;
|
|
|
|
if (center_dist_sq < max_center_dist_sq)
|
|
{
|
|
const auto &group_pose = std::get<1>(group);
|
|
|
|
// Calculate average joint distance
|
|
std::vector<float> dists;
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
float score1 = pose_3d[row][3];
|
|
float score2 = group_pose[row][3];
|
|
if (score1 > min_score && score2 > min_score)
|
|
{
|
|
float dx = pose_3d[row][0] - group_pose[row][0];
|
|
float dy = pose_3d[row][1] - group_pose[row][1];
|
|
float dz = pose_3d[row][2] - group_pose[row][2];
|
|
float dist = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
dists.push_back(dist);
|
|
}
|
|
}
|
|
if (dists.size() >= 5)
|
|
{
|
|
// Drop highest value to reduce influence of outliers
|
|
auto max_it = std::max_element(dists.begin(), dists.end());
|
|
dists.erase(max_it);
|
|
}
|
|
if (dists.size() > 0)
|
|
{
|
|
// Check if the average joint distance is close enough
|
|
float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
|
|
avg_dist /= static_cast<float>(dists.size());
|
|
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
|
|
{
|
|
best_dist = avg_dist;
|
|
best_group = static_cast<int>(j);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_group == -1)
|
|
{
|
|
// Create a new group
|
|
std::vector<int> new_indices{static_cast<int>(i)};
|
|
std::vector<std::array<float, 4>> group_pose = pose_3d;
|
|
groups.emplace_back(center, group_pose, new_indices);
|
|
|
|
// Update per joint counts
|
|
std::vector<size_t> new_valid_joint_counts(num_joints, 0);
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
if (pose_3d[row][3] > min_score)
|
|
{
|
|
new_valid_joint_counts[row] = 1;
|
|
}
|
|
}
|
|
per_group_visible_counts.push_back(std::move(new_valid_joint_counts));
|
|
}
|
|
else
|
|
{
|
|
// Update existing group
|
|
auto &group = groups[best_group];
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
std::vector<std::array<float, 4>> &group_pose = std::get<1>(group);
|
|
std::vector<int> &group_indices = std::get<2>(group);
|
|
|
|
float n_elems = static_cast<float>(group_indices.size());
|
|
float inv_n = 1.0 / (n_elems + 1.0);
|
|
|
|
// Update group center
|
|
group_center[0] = (group_center[0] * n_elems + center[0]) * inv_n;
|
|
group_center[1] = (group_center[1] * n_elems + center[1]) * inv_n;
|
|
group_center[2] = (group_center[2] * n_elems + center[2]) * inv_n;
|
|
|
|
// Update group pose
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
if (pose_3d[row][3] > min_score)
|
|
{
|
|
float j_elems = static_cast<float>(per_group_visible_counts[best_group][row]);
|
|
float inv_j = 1.0 / (j_elems + 1.0);
|
|
|
|
group_pose[row][0] = (group_pose[row][0] * j_elems + pose_3d[row][0]) * inv_j;
|
|
group_pose[row][1] = (group_pose[row][1] * j_elems + pose_3d[row][1]) * inv_j;
|
|
group_pose[row][2] = (group_pose[row][2] * j_elems + pose_3d[row][2]) * inv_j;
|
|
group_pose[row][3] = (group_pose[row][3] * j_elems + pose_3d[row][3]) * inv_j;
|
|
per_group_visible_counts[best_group][row]++;
|
|
}
|
|
}
|
|
group_indices.push_back(static_cast<int>(i));
|
|
}
|
|
}
|
|
|
|
// Merge close groups
|
|
// Depending on the inital group creation, one or more groups can be created that in the end
|
|
// share the same persons, even if they had a larger distance at the beginning
|
|
// So merge them similar to the group assignment before
|
|
std::vector<std::tuple<
|
|
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
merged_groups;
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
size_t num_joints = std::get<1>(groups[i]).size();
|
|
auto &group = groups[i];
|
|
auto &group_visible_counts = per_group_visible_counts[i];
|
|
|
|
float best_dist = std::numeric_limits<float>::infinity();
|
|
int best_group = -1;
|
|
|
|
for (size_t j = 0; j < merged_groups.size(); ++j)
|
|
{
|
|
const auto &group_pose = std::get<1>(group);
|
|
const auto &merged_pose = std::get<1>(merged_groups[j]);
|
|
|
|
// Calculate average joint distance
|
|
std::vector<float> dists;
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
float score1 = group_pose[row][3];
|
|
float score2 = merged_pose[row][3];
|
|
if (score1 > min_score && score2 > min_score)
|
|
{
|
|
float dx = group_pose[row][0] - merged_pose[row][0];
|
|
float dy = group_pose[row][1] - merged_pose[row][1];
|
|
float dz = group_pose[row][2] - merged_pose[row][2];
|
|
float dist = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
dists.push_back(dist);
|
|
}
|
|
}
|
|
if (dists.size() >= 5)
|
|
{
|
|
// Drop highest value to reduce influence of outliers
|
|
auto max_it = std::max_element(dists.begin(), dists.end());
|
|
dists.erase(max_it);
|
|
}
|
|
if (dists.size() > 0)
|
|
{
|
|
// Check if the average joint distance is close enough
|
|
float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
|
|
avg_dist /= static_cast<float>(dists.size());
|
|
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
|
|
{
|
|
best_dist = avg_dist;
|
|
best_group = static_cast<int>(j);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_group == -1)
|
|
{
|
|
// Create a new group
|
|
merged_groups.push_back(group);
|
|
}
|
|
else
|
|
{
|
|
// Update existing group
|
|
auto &merged_group = merged_groups[best_group];
|
|
std::array<float, 3> &merged_center = std::get<0>(merged_group);
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
std::vector<int> &merged_group_indices = std::get<2>(merged_group);
|
|
|
|
float n_elems1 = static_cast<float>(merged_group_indices.size());
|
|
float n_elems2 = static_cast<float>(std::get<2>(group).size());
|
|
float inv1 = n_elems1 / (n_elems1 + n_elems2);
|
|
float inv2 = n_elems2 / (n_elems1 + n_elems2);
|
|
|
|
// Update group center
|
|
merged_center[0] = merged_center[0] * inv1 + group_center[0] * inv2;
|
|
merged_center[1] = merged_center[1] * inv1 + group_center[1] * inv2;
|
|
merged_center[2] = merged_center[2] * inv1 + group_center[2] * inv2;
|
|
|
|
// Update group pose
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
auto &group_pose = std::get<1>(group);
|
|
auto &merged_pose = std::get<1>(merged_group);
|
|
|
|
if (group_pose[row][3] > min_score)
|
|
{
|
|
float j_elems1 = static_cast<float>(group_visible_counts[row]);
|
|
float j_elems2 = static_cast<float>(per_group_visible_counts[best_group][row]);
|
|
float inv1 = j_elems1 / (j_elems1 + j_elems2);
|
|
float inv2 = j_elems2 / (j_elems1 + j_elems2);
|
|
|
|
merged_pose[row][0] = merged_pose[row][0] * inv1 + group_pose[row][0] * inv2;
|
|
merged_pose[row][1] = merged_pose[row][1] * inv1 + group_pose[row][1] * inv2;
|
|
merged_pose[row][2] = merged_pose[row][2] * inv1 + group_pose[row][2] * inv2;
|
|
merged_pose[row][3] = merged_pose[row][3] * inv1 + group_pose[row][3] * inv2;
|
|
group_visible_counts[row]++;
|
|
}
|
|
}
|
|
|
|
// Merge indices
|
|
merged_group_indices.insert(
|
|
merged_group_indices.end(), std::get<2>(group).begin(), std::get<2>(group).end());
|
|
}
|
|
}
|
|
|
|
return merged_groups;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::array<float, 4>> merge_group(
|
|
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
|
|
float min_score,
|
|
size_t num_cams)
|
|
{
|
|
size_t num_poses = poses_3d.size();
|
|
size_t num_joints = poses_3d[0].size();
|
|
|
|
// Merge poses to create initial pose
|
|
// Use only those triangulations with a high score
|
|
std::vector<std::array<float, 4>> sum_poses(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
std::vector<float> sum_mask1(num_joints, 0);
|
|
float offset1 = (1.0 - min_score) / 2.0;
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = pose[j][3];
|
|
if (score > min_score - offset1)
|
|
{
|
|
float weight = std::pow(score, 2);
|
|
sum_poses[j][0] += pose[j][0] * weight;
|
|
sum_poses[j][1] += pose[j][1] * weight;
|
|
sum_poses[j][2] += pose[j][2] * weight;
|
|
sum_poses[j][3] += score * weight;
|
|
sum_mask1[j] += weight;
|
|
}
|
|
}
|
|
}
|
|
std::vector<std::array<float, 4>> initial_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask1[j] > 0)
|
|
{
|
|
float inv = 1.0 / sum_mask1[j];
|
|
initial_pose_3d[j][0] = sum_poses[j][0] * inv;
|
|
initial_pose_3d[j][1] = sum_poses[j][1] * inv;
|
|
initial_pose_3d[j][2] = sum_poses[j][2] * inv;
|
|
initial_pose_3d[j][3] = sum_poses[j][3] * inv;
|
|
}
|
|
}
|
|
|
|
// Use center as default if the initial pose is empty
|
|
std::vector<bool> jmask(num_joints, false);
|
|
std::array<float, 3> center = {0.0, 0.0, 0.0};
|
|
int valid_joints = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = initial_pose_3d[j][3];
|
|
if (score > min_score - offset1)
|
|
{
|
|
jmask[j] = true;
|
|
center[0] += initial_pose_3d[j][0];
|
|
center[1] += initial_pose_3d[j][1];
|
|
center[2] += initial_pose_3d[j][2];
|
|
valid_joints++;
|
|
}
|
|
}
|
|
if (valid_joints > 0)
|
|
{
|
|
float inv_valid = 1.0 / valid_joints;
|
|
center[0] *= inv_valid;
|
|
center[1] *= inv_valid;
|
|
center[2] *= inv_valid;
|
|
}
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (!jmask[j])
|
|
{
|
|
initial_pose_3d[j][0] = center[0];
|
|
initial_pose_3d[j][1] = center[1];
|
|
initial_pose_3d[j][2] = center[2];
|
|
initial_pose_3d[j][3] = 0.0;
|
|
}
|
|
}
|
|
|
|
// Drop joints with low scores and filter outlying joints using distance threshold
|
|
float offset2 = (1.0 - min_score) * 2.0;
|
|
float max_dist_sq = 1.2 * 1.2;
|
|
std::vector<std::vector<bool>> mask(num_poses, std::vector<bool>(num_joints, false));
|
|
std::vector<std::vector<float>> distances(num_poses, std::vector<float>(num_joints, 0.0));
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float dx = pose[j][0] - initial_pose_3d[j][0];
|
|
float dy = pose[j][1] - initial_pose_3d[j][1];
|
|
float dz = pose[j][2] - initial_pose_3d[j][2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
distances[i][j] = dist_sq;
|
|
|
|
float score = pose[j][3];
|
|
if (dist_sq <= max_dist_sq && score > (min_score - offset2))
|
|
{
|
|
mask[i][j] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Select the best-k proposals for each joint that are closest to the initial pose
|
|
int keep_best = std::max((int)std::ceil(num_cams / 2.0), (int)std::ceil(num_poses / 3.0));
|
|
std::vector<std::vector<bool>> best_k_mask(num_poses, std::vector<bool>(num_joints, false));
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
std::vector<std::pair<float, int>> valid_indices;
|
|
valid_indices.reserve(num_poses);
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
if (mask[i][j])
|
|
{
|
|
valid_indices.emplace_back(distances[i][j], i);
|
|
}
|
|
}
|
|
int num_valid = std::min(keep_best, static_cast<int>(valid_indices.size()));
|
|
if (num_valid > 0)
|
|
{
|
|
std::partial_sort(valid_indices.begin(), valid_indices.begin() + num_valid,
|
|
valid_indices.end());
|
|
|
|
for (int k = 0; k < num_valid; ++k)
|
|
{
|
|
int idx = valid_indices[k].second;
|
|
best_k_mask[idx][j] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Combine masks
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
mask[i][j] = mask[i][j] && best_k_mask[i][j];
|
|
}
|
|
}
|
|
|
|
// Compute the final pose
|
|
std::vector<std::array<float, 4>> sum_poses2(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
std::vector<float> sum_mask2(num_joints, 0);
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (mask[i][j])
|
|
{
|
|
// Use an exponential weighting to give more importance to higher scores
|
|
float weight = std::pow(pose[j][3], 4);
|
|
sum_poses2[j][0] += pose[j][0] * weight;
|
|
sum_poses2[j][1] += pose[j][1] * weight;
|
|
sum_poses2[j][2] += pose[j][2] * weight;
|
|
sum_poses2[j][3] += pose[j][3] * weight;
|
|
sum_mask2[j] += weight;
|
|
}
|
|
}
|
|
}
|
|
std::vector<std::array<float, 4>> final_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask2[j] > 0)
|
|
{
|
|
float inv = 1.0 / sum_mask2[j];
|
|
final_pose_3d[j][0] = sum_poses2[j][0] * inv;
|
|
final_pose_3d[j][1] = sum_poses2[j][1] * inv;
|
|
final_pose_3d[j][2] = sum_poses2[j][2] * inv;
|
|
final_pose_3d[j][3] = sum_poses2[j][3] * inv;
|
|
}
|
|
}
|
|
|
|
return final_pose_3d;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void add_extra_joints(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
// Find indices of "head", "ear_left", and "ear_right"
|
|
auto it_head = std::find(joint_names.begin(), joint_names.end(), "head");
|
|
auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left");
|
|
auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right");
|
|
int idx_h = std::distance(joint_names.begin(), it_head);
|
|
int idx_el = std::distance(joint_names.begin(), it_ear_left);
|
|
int idx_er = std::distance(joint_names.begin(), it_ear_right);
|
|
|
|
// If the confidence score of "head" is zero compute it as the average of the ears
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
if (pose[idx_h][3] == 0.0)
|
|
{
|
|
if (pose[idx_el][3] > 0.0 && pose[idx_er][3] > 0.0)
|
|
{
|
|
pose[idx_h][0] = (pose[idx_el][0] + pose[idx_er][0]) * 0.5;
|
|
pose[idx_h][1] = (pose[idx_el][1] + pose[idx_er][1]) * 0.5;
|
|
pose[idx_h][2] = (pose[idx_el][2] + pose[idx_er][2]) * 0.5;
|
|
pose[idx_h][3] = std::min(pose[idx_el][3], pose[idx_er][3]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void filter_poses(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<size_t> &core_joint_idx,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
|
{
|
|
const float min_score = 0.1;
|
|
std::vector<int> drop_indices;
|
|
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
size_t num_core_joints = core_joint_idx.size();
|
|
size_t num_full_joints = pose.size();
|
|
|
|
// Collect valid joint indices
|
|
std::vector<size_t> valid_joint_idx;
|
|
for (size_t j = 0; j < num_core_joints; ++j)
|
|
{
|
|
size_t idx = core_joint_idx[j];
|
|
if (pose[idx][3] > min_score)
|
|
{
|
|
valid_joint_idx.push_back(idx);
|
|
}
|
|
}
|
|
|
|
// Drop poses with too few joints
|
|
if (valid_joint_idx.size() < 5)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Compute min, max and mean coordinates
|
|
std::array<float, 3> mean = {0.0, 0.0, 0.0};
|
|
std::array<float, 3> mins = {
|
|
std::numeric_limits<float>::max(),
|
|
std::numeric_limits<float>::max(),
|
|
std::numeric_limits<float>::max()};
|
|
std::array<float, 3> maxs = {
|
|
std::numeric_limits<float>::lowest(),
|
|
std::numeric_limits<float>::lowest(),
|
|
std::numeric_limits<float>::lowest()};
|
|
for (size_t j = 0; j < valid_joint_idx.size(); ++j)
|
|
{
|
|
size_t idx = valid_joint_idx[j];
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
mins[k] = std::min(mins[k], pose[idx][k]);
|
|
maxs[k] = std::max(maxs[k], pose[idx][k]);
|
|
mean[k] += pose[idx][k];
|
|
}
|
|
}
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
mean[k] /= static_cast<float>(valid_joint_idx.size());
|
|
}
|
|
|
|
// Drop poses that are too large or too small or too flat
|
|
float min_flatness = 0.2;
|
|
float max_size = 2.3;
|
|
float min_size = 0.3;
|
|
std::array<float, 3> diff;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
diff[j] = maxs[j] - mins[j];
|
|
}
|
|
if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if ((diff[0] < min_flatness && diff[1] < min_flatness) ||
|
|
(diff[1] < min_flatness && diff[2] < min_flatness) ||
|
|
(diff[2] < min_flatness && diff[0] < min_flatness))
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Drop persons outside room
|
|
const float wdist = 0.1;
|
|
const auto &room_size = roomparams[0];
|
|
const auto &room_center = roomparams[1];
|
|
const std::array<float, 3> room_half_size = {
|
|
room_size[0] / 2.0f,
|
|
room_size[1] / 2.0f,
|
|
room_size[2] / 2.0f};
|
|
bool outside = false;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
// Check if the mean position is outside the room boundaries
|
|
if (mean[j] > room_half_size[j] + room_center[j] ||
|
|
mean[j] < -room_half_size[j] + room_center[j])
|
|
{
|
|
outside = true;
|
|
break;
|
|
}
|
|
}
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
// Check if any limb is too far outside the room
|
|
if (maxs[j] > room_half_size[j] + room_center[j] + wdist ||
|
|
mins[j] < -room_half_size[j] + room_center[j] - wdist)
|
|
{
|
|
outside = true;
|
|
break;
|
|
}
|
|
}
|
|
if (outside)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Set joint scores outside the room to a low value
|
|
for (size_t j = 0; j < num_full_joints; ++j)
|
|
{
|
|
if (pose[j][3] > min_score)
|
|
{
|
|
for (int k = 0; k < 3; ++k)
|
|
{
|
|
if (pose[j][k] > room_half_size[k] + room_center[k] + wdist ||
|
|
pose[j][k] < -room_half_size[k] + room_center[k] - wdist)
|
|
{
|
|
pose[j][3] = 0.001;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate total limb length and average limb length
|
|
const float max_avg_length = 0.5;
|
|
const float min_avg_length = 0.1;
|
|
float total_length = 0.0;
|
|
int total_limbs = 0;
|
|
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
|
|
{
|
|
size_t start_idx = core_joint_idx[core_limbs_idx[j][0]];
|
|
size_t end_idx = core_joint_idx[core_limbs_idx[j][1]];
|
|
|
|
if (pose[start_idx][3] < min_score || pose[end_idx][3] < min_score)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
float dx = pose[end_idx][0] - pose[start_idx][0];
|
|
float dy = pose[end_idx][1] - pose[start_idx][1];
|
|
float dz = pose[end_idx][2] - pose[start_idx][2];
|
|
float length = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
|
|
total_length += length;
|
|
total_limbs++;
|
|
}
|
|
if (total_limbs == 0)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
float average_length = total_length / static_cast<float>(total_limbs);
|
|
if (average_length < min_avg_length)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if (total_limbs >= 5 && average_length > max_avg_length)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Set confidences of invalid poses to a low value
|
|
for (size_t i = 0; i < drop_indices.size(); ++i)
|
|
{
|
|
auto &pose = poses[drop_indices[i]];
|
|
for (size_t j = 0; j < pose.size(); ++j)
|
|
{
|
|
pose[j][3] = 0.001;
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void add_missing_joints(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::vector<std::string> &joint_names,
|
|
float min_match_score)
|
|
{
|
|
// Map joint names to their indices for quick lookup
|
|
std::unordered_map<std::string, size_t> joint_name_to_idx;
|
|
for (size_t idx = 0; idx < joint_names.size(); ++idx)
|
|
{
|
|
joint_name_to_idx[joint_names[idx]] = idx;
|
|
}
|
|
|
|
// Adjacent mapping
|
|
std::unordered_map<std::string, std::vector<std::string>> adjacents = {
|
|
{"hip_right", {"hip_middle", "hip_left"}},
|
|
{"hip_left", {"hip_middle", "hip_right"}},
|
|
{"knee_right", {"hip_right", "knee_left"}},
|
|
{"knee_left", {"hip_left", "knee_right"}},
|
|
{"ankle_right", {"knee_right", "ankle_left"}},
|
|
{"ankle_left", {"knee_left", "ankle_right"}},
|
|
{"shoulder_right", {"shoulder_middle", "shoulder_left"}},
|
|
{"shoulder_left", {"shoulder_middle", "shoulder_right"}},
|
|
{"elbow_right", {"shoulder_right", "hip_right"}},
|
|
{"elbow_left", {"shoulder_left", "hip_left"}},
|
|
{"wrist_right", {"elbow_right"}},
|
|
{"wrist_left", {"elbow_left"}},
|
|
{"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
|
|
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}}};
|
|
|
|
// Collect adjacent joint mappings
|
|
std::vector<std::vector<size_t>> adjacent_indices;
|
|
adjacent_indices.resize(joint_names.size());
|
|
for (size_t j = 0; j < joint_names.size(); ++j)
|
|
{
|
|
std::string adname = "";
|
|
const std::string &jname = joint_names[j];
|
|
|
|
// Determine adjacent joint based on name patterns
|
|
if (adjacents.find(jname) != adjacents.end())
|
|
{
|
|
adname = jname;
|
|
}
|
|
|
|
if (!adname.empty())
|
|
{
|
|
auto it = adjacents.find(adname);
|
|
if (it != adjacents.end())
|
|
{
|
|
for (const std::string &adj_name : it->second)
|
|
{
|
|
auto jt = joint_name_to_idx.find(adj_name);
|
|
if (jt != joint_name_to_idx.end())
|
|
{
|
|
adjacent_indices[j].push_back(jt->second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
size_t num_joints = pose.size();
|
|
|
|
// Compute body center as the mean of valid joints
|
|
std::array<float, 3> body_center = {0.0, 0.0, 0.0};
|
|
size_t valid_count = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (pose[j][3] > min_match_score)
|
|
{
|
|
body_center[0] += pose[j][0];
|
|
body_center[1] += pose[j][1];
|
|
body_center[2] += pose[j][2];
|
|
valid_count++;
|
|
}
|
|
}
|
|
if (valid_count == 0)
|
|
{
|
|
continue;
|
|
}
|
|
float inv_c = 1.0 / static_cast<float>(valid_count);
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
body_center[j] *= inv_c;
|
|
}
|
|
|
|
// Iterate over each joint
|
|
for (size_t j = 0; j < joint_names.size(); ++j)
|
|
{
|
|
if (pose[j][3] == 0.0)
|
|
{
|
|
// Joint is missing; attempt to estimate its position based on adjacent joints
|
|
const std::vector<size_t> &adj_indices = adjacent_indices[j];
|
|
std::array<float, 3> adjacent_position = {0.0, 0.0, 0.0};
|
|
size_t adjacent_count = 0;
|
|
|
|
for (size_t adj_idx : adj_indices)
|
|
{
|
|
if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1)
|
|
{
|
|
adjacent_position[0] += pose[adj_idx][0];
|
|
adjacent_position[1] += pose[adj_idx][1];
|
|
adjacent_position[2] += pose[adj_idx][2];
|
|
++adjacent_count;
|
|
}
|
|
}
|
|
|
|
if (adjacent_count > 0)
|
|
{
|
|
float inv_c = 1.0 / static_cast<float>(adjacent_count);
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
adjacent_position[k] *= inv_c;
|
|
}
|
|
|
|
// Update the missing joint's position
|
|
pose[j][0] = adjacent_position[0];
|
|
pose[j][1] = adjacent_position[1];
|
|
pose[j][2] = adjacent_position[2];
|
|
}
|
|
else
|
|
{
|
|
// No valid adjacent joints, use body center
|
|
pose[j][0] = body_center[0];
|
|
pose[j][1] = body_center[1];
|
|
pose[j][2] = body_center[2];
|
|
}
|
|
|
|
// Set a low confidence score
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void replace_far_joints(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::vector<std::string> &joint_names,
|
|
float min_match_score)
|
|
{
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
size_t num_full_joints = pose.size();
|
|
|
|
// Calculate the average position of pose parts
|
|
std::array<float, 4> center_head = {0.0, 0.0, 0.0, 0};
|
|
std::array<float, 4> center_foot_left = {0.0, 0.0, 0.0, 0};
|
|
std::array<float, 4> center_foot_right = {0.0, 0.0, 0.0, 0};
|
|
std::array<float, 4> center_hand_left = {0.0, 0.0, 0.0, 0};
|
|
std::array<float, 4> center_hand_right = {0.0, 0.0, 0.0, 0};
|
|
for (size_t j = 0; j < num_full_joints; ++j)
|
|
{
|
|
const std::string &jname = joint_names[j];
|
|
float offset2 = (1.0 - min_match_score) * 2;
|
|
float min_score = min_match_score - offset2;
|
|
|
|
if (pose[j][3] > min_score)
|
|
{
|
|
if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
|
|
jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
|
|
{
|
|
center_head[0] += pose[j][0];
|
|
center_head[1] += pose[j][1];
|
|
center_head[2] += pose[j][2];
|
|
center_head[3] += 1.0;
|
|
}
|
|
else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
|
|
{
|
|
if (jname.find("_left") != std::string::npos)
|
|
{
|
|
center_foot_left[0] += pose[j][0];
|
|
center_foot_left[1] += pose[j][1];
|
|
center_foot_left[2] += pose[j][2];
|
|
center_foot_left[3] += 1.0;
|
|
}
|
|
else if (jname.find("_right") != std::string::npos)
|
|
{
|
|
center_foot_right[0] += pose[j][0];
|
|
center_foot_right[1] += pose[j][1];
|
|
center_foot_right[2] += pose[j][2];
|
|
center_foot_right[3] += 1.0;
|
|
}
|
|
}
|
|
else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
|
|
{
|
|
if (jname.find("_left") != std::string::npos)
|
|
{
|
|
center_hand_left[0] += pose[j][0];
|
|
center_hand_left[1] += pose[j][1];
|
|
center_hand_left[2] += pose[j][2];
|
|
center_hand_left[3] += 1.0;
|
|
}
|
|
else if (jname.find("_right") != std::string::npos)
|
|
{
|
|
center_hand_right[0] += pose[j][0];
|
|
center_hand_right[1] += pose[j][1];
|
|
center_hand_right[2] += pose[j][2];
|
|
center_hand_right[3] += 1.0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (size_t k = 0; k < 3; k++)
|
|
{
|
|
center_head[k] /= center_head[3];
|
|
center_foot_left[k] /= center_foot_left[3];
|
|
center_foot_right[k] /= center_foot_right[3];
|
|
center_hand_left[k] /= center_hand_left[3];
|
|
center_hand_right[k] /= center_hand_right[3];
|
|
}
|
|
|
|
// Drop joints that are too far from the part center
|
|
const float max_dist_head_sq = 0.20 * 0.20;
|
|
const float max_dist_foot_sq = 0.25 * 0.25;
|
|
const float max_dist_hand_sq = 0.20 * 0.20;
|
|
for (size_t j = 0; j < num_full_joints; ++j)
|
|
{
|
|
const std::string &jname = joint_names[j];
|
|
|
|
if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" ||
|
|
jname == "eye_right" || jname == "ear_left" || jname == "ear_right")
|
|
{
|
|
float dx = pose[j][0] - center_head[0];
|
|
float dy = pose[j][1] - center_head[1];
|
|
float dz = pose[j][2] - center_head[2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if ((pose[j][3] > 0.0 && dist_sq > max_dist_head_sq) || pose[j][3] == 0.0)
|
|
{
|
|
pose[j][0] = center_head[0];
|
|
pose[j][1] = center_head[1];
|
|
pose[j][2] = center_head[2];
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0)
|
|
{
|
|
if (jname.find("_left") != std::string::npos)
|
|
{
|
|
float dx = pose[j][0] - center_foot_left[0];
|
|
float dy = pose[j][1] - center_foot_left[1];
|
|
float dz = pose[j][2] - center_foot_left[2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
|
|
{
|
|
pose[j][0] = center_foot_left[0];
|
|
pose[j][1] = center_foot_left[1];
|
|
pose[j][2] = center_foot_left[2];
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
else if (jname.find("_right") != std::string::npos)
|
|
{
|
|
float dx = pose[j][0] - center_foot_right[0];
|
|
float dy = pose[j][1] - center_foot_right[1];
|
|
float dz = pose[j][2] - center_foot_right[2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0)
|
|
{
|
|
pose[j][0] = center_foot_right[0];
|
|
pose[j][1] = center_foot_right[1];
|
|
pose[j][2] = center_foot_right[2];
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
}
|
|
else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0)
|
|
{
|
|
if (jname.find("_left") != std::string::npos)
|
|
{
|
|
float dx = pose[j][0] - center_hand_left[0];
|
|
float dy = pose[j][1] - center_hand_left[1];
|
|
float dz = pose[j][2] - center_hand_left[2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
|
|
{
|
|
pose[j][0] = center_hand_left[0];
|
|
pose[j][1] = center_hand_left[1];
|
|
pose[j][2] = center_hand_left[2];
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
else if (jname.find("_right") != std::string::npos)
|
|
{
|
|
float dx = pose[j][0] - center_hand_right[0];
|
|
float dy = pose[j][1] - center_hand_right[1];
|
|
float dz = pose[j][2] - center_hand_right[2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0)
|
|
{
|
|
pose[j][0] = center_hand_right[0];
|
|
pose[j][1] = center_hand_right[1];
|
|
pose[j][2] = center_hand_right[2];
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
std::vector<PairCandidate> build_pair_candidates(const PoseBatch2DView &poses_2d)
|
|
{
|
|
if (poses_2d.person_counts == nullptr)
|
|
{
|
|
throw std::invalid_argument("person_counts must not be null.");
|
|
}
|
|
|
|
std::vector<PairCandidate> pairs;
|
|
std::vector<int> offsets(poses_2d.num_views, 0);
|
|
int total = 0;
|
|
for (size_t view = 0; view < poses_2d.num_views; ++view)
|
|
{
|
|
if (poses_2d.person_counts[view] > poses_2d.max_persons)
|
|
{
|
|
throw std::invalid_argument("person_counts entries must not exceed the padded person dimension.");
|
|
}
|
|
offsets[view] = total;
|
|
total += static_cast<int>(poses_2d.person_counts[view]);
|
|
}
|
|
|
|
for (size_t view1 = 0; view1 < poses_2d.num_views; ++view1)
|
|
{
|
|
for (size_t view2 = view1 + 1; view2 < poses_2d.num_views; ++view2)
|
|
{
|
|
for (size_t person1 = 0; person1 < poses_2d.person_counts[view1]; ++person1)
|
|
{
|
|
for (size_t person2 = 0; person2 < poses_2d.person_counts[view2]; ++person2)
|
|
{
|
|
pairs.push_back(PairCandidate {
|
|
static_cast<int>(view1),
|
|
static_cast<int>(view2),
|
|
static_cast<int>(person1),
|
|
static_cast<int>(person2),
|
|
offsets[view1] + static_cast<int>(person1),
|
|
offsets[view2] + static_cast<int>(person2),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return pairs;
|
|
}
|
|
|
|
PreviousPoseFilterDebug filter_pairs_with_previous_poses(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView &previous_poses_3d,
|
|
const TriangulationOptions &options)
|
|
{
|
|
return filter_pairs_with_previous_poses_impl(poses_2d, cameras, joint_names, previous_poses_3d, options);
|
|
}
|
|
|
|
TriangulationTrace triangulate_debug(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView *previous_poses_3d,
|
|
const TriangulationOptions &options)
|
|
{
|
|
return triangulate_debug_impl(poses_2d, cameras, roomparams, joint_names, previous_poses_3d, options);
|
|
}
|
|
|
|
PoseBatch3D triangulate_poses(
|
|
const PoseBatch2DView &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names,
|
|
const PoseBatch3DView *previous_poses_3d,
|
|
const TriangulationOptions &options)
|
|
{
|
|
return triangulate_debug(
|
|
poses_2d,
|
|
cameras,
|
|
roomparams,
|
|
joint_names,
|
|
previous_poses_3d,
|
|
options)
|
|
.final_poses;
|
|
}
|