#include #include #include #include #include #include #include #include #include #include #include #include "interface.hpp" #include "cached_camera.hpp" namespace { using Pose2D = std::vector>; using Pose3D = std::vector>; using PosePair = std::pair, std::pair>; using GroupedPose = std::tuple, Pose3D, std::vector>; size_t packed_pose_offset(size_t pose, size_t joint, size_t coord, size_t num_joints) { return (((pose * num_joints) + joint) * 3) + coord; } [[maybe_unused]] static void print_2d_poses(const Pose2D &poses) { std::cout << "Poses: (" << poses.size() << ", 3)[" << std::endl; for (const auto &pose : poses) { std::cout << " ["; for (size_t i = 0; i < 3; ++i) { std::cout << std::fixed << std::setprecision(3) << pose[i]; if (i < 2) { std::cout << ", "; } } std::cout << "]" << std::endl; } std::cout << "]" << std::endl; } [[maybe_unused]] static void print_3d_poses(const Pose3D &poses) { std::cout << "Poses: (" << poses.size() << ", 4)[" << std::endl; for (const auto &pose : poses) { std::cout << " ["; for (size_t i = 0; i < 4; ++i) { std::cout << std::fixed << std::setprecision(3) << pose[i]; if (i < 3) { std::cout << ", "; } } std::cout << "]" << std::endl; } std::cout << "]" << std::endl; } [[maybe_unused]] static void print_allpairs(const std::vector &all_pairs) { std::cout << "All pairs: [" << std::endl; for (const auto &pair : all_pairs) { const auto &tuple_part = pair.first; const auto &pair_part = pair.second; std::cout << "(" << std::get<0>(tuple_part) << ", " << std::get<1>(tuple_part) << ", " << std::get<2>(tuple_part) << ", " << std::get<3>(tuple_part) << ")"; std::cout << ", (" << pair_part.first << ", " << pair_part.second << ")" << std::endl; } std::cout << "]" << std::endl; } std::array undistort_point( const std::array &raw_point, const CachedCamera &icam) { std::array point = raw_point; const float ifx_old = 1.0f / icam.cam.K[0][0]; const float ify_old = 1.0f / icam.cam.K[1][1]; const float cx_old = icam.cam.K[0][2]; const float cy_old = icam.cam.K[1][2]; const float fx_new = icam.newK[0][0]; const float fy_new = icam.newK[1][1]; const float cx_new = icam.newK[0][2]; const float cy_new = icam.newK[1][2]; point[0] = (point[0] - cx_old) * ifx_old; point[1] = (point[1] - cy_old) * ify_old; if (icam.cam.model == CameraModel::Fisheye) { undistort_point_fisheye(point, icam.cam.DC); } else { undistort_point_pinhole(point, icam.cam.DC); } point[0] = (point[0] * fx_new) + cx_new; point[1] = (point[1] * fy_new) + cy_new; const float mask_offset = (icam.cam.width + icam.cam.height) / 20.0f; if (point[0] < -mask_offset || point[0] >= icam.cam.width + mask_offset || point[1] < -mask_offset || point[1] >= icam.cam.height + mask_offset) { point = {0.0f, 0.0f, 0.0f}; } return point; } class PackedPoseStore2D { public: PackedPoseStore2D( const PoseBatch2DView &source, const std::vector &internal_cameras) : person_counts(source.num_views), view_offsets(source.num_views), num_views(source.num_views), num_joints(source.num_joints) { for (size_t view = 0; view < num_views; ++view) { person_counts[view] = source.person_counts[view]; if (person_counts[view] > source.max_persons) { throw std::invalid_argument( "person_counts entries must not exceed the padded person dimension."); } view_offsets[view] = total_persons; total_persons += person_counts[view]; } data.resize(total_persons * num_joints * 3); for (size_t view = 0; view < num_views; ++view) { for (size_t person = 0; person < person_counts[view]; ++person) { const size_t packed_pose = pose_index(view, person); for (size_t joint = 0; joint < num_joints; ++joint) { const std::array undistorted = undistort_point( { source.at(view, person, joint, 0), source.at(view, person, joint, 1), source.at(view, person, joint, 2), }, internal_cameras[view]); for (size_t coord = 0; coord < 3; ++coord) { at(packed_pose, joint, coord) = undistorted[coord]; } } } } } size_t pose_index(size_t view, size_t person) const { return view_offsets[view] + person; } float &at(size_t pose, size_t joint, size_t coord) { return data[packed_pose_offset(pose, joint, coord, num_joints)]; } const float &at(size_t pose, size_t joint, size_t coord) const { return data[packed_pose_offset(pose, joint, coord, num_joints)]; } Pose2D pose(size_t view, size_t person) const { const size_t packed_pose = pose_index(view, person); Pose2D result(num_joints); for (size_t joint = 0; joint < num_joints; ++joint) { for (size_t coord = 0; coord < 3; ++coord) { result[joint][coord] = at(packed_pose, joint, coord); } } return result; } Pose2D pose(size_t view, size_t person, const std::vector &joint_indices) const { const size_t packed_pose = pose_index(view, person); Pose2D result(joint_indices.size()); for (size_t joint = 0; joint < joint_indices.size(); ++joint) { const size_t source_joint = joint_indices[joint]; for (size_t coord = 0; coord < 3; ++coord) { result[joint][coord] = at(packed_pose, source_joint, coord); } } return result; } std::vector person_counts; std::vector view_offsets; size_t num_views = 0; size_t num_joints = 0; size_t total_persons = 0; private: std::vector data; }; struct PreparedInputs { std::vector internal_cameras; std::vector core_joint_idx; std::vector> core_limbs_idx; PackedPoseStore2D packed_poses; PreparedInputs( std::vector cameras_in, std::vector core_joint_idx_in, std::vector> core_limbs_idx_in, PackedPoseStore2D packed_poses_in) : internal_cameras(std::move(cameras_in)), core_joint_idx(std::move(core_joint_idx_in)), core_limbs_idx(std::move(core_limbs_idx_in)), packed_poses(std::move(packed_poses_in)) { } }; struct PreviousProjectionCache { std::vector> projected_poses; std::vector>> projected_dists; std::vector core_poses; }; constexpr std::array kCoreJoints = { "shoulder_left", "shoulder_right", "hip_left", "hip_right", "elbow_left", "elbow_right", "knee_left", "knee_right", "wrist_left", "wrist_right", "ankle_left", "ankle_right", }; constexpr std::array, 8> kCoreLimbs = {{ {"knee_left", "ankle_left"}, {"hip_left", "knee_left"}, {"hip_right", "knee_right"}, {"knee_right", "ankle_right"}, {"elbow_left", "wrist_left"}, {"elbow_right", "wrist_right"}, {"shoulder_left", "elbow_left"}, {"shoulder_right", "elbow_right"}, }}; std::vector build_pair_candidates_from_packed(const PackedPoseStore2D &packed_poses); PreviousProjectionCache project_previous_poses( const PoseBatch3DView &previous_poses_3d, const std::vector &internal_cameras, const std::vector &core_joint_idx); PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( const PackedPoseStore2D &packed_poses, const std::vector &internal_cameras, const std::vector &core_joint_idx, const std::vector &pairs, const TriangulationOptions &options, const PoseBatch3DView &previous_poses_3d); float calc_pose_score( const Pose2D &pose, const Pose2D &reference_pose, const std::vector &dists, const CachedCamera &icam); std::tuple, std::vector>> project_poses( const std::vector &poses_3d, const CachedCamera &icam, bool calc_dists); std::vector score_projection( const Pose2D &pose, const Pose2D &repro, const std::vector &dists, const std::vector &mask, float iscale); std::pair triangulate_and_score( const Pose2D &pose1, const Pose2D &pose2, const CachedCamera &cam1, const CachedCamera &cam2, const std::array, 2> &roomparams, const std::vector> &core_limbs_idx); std::vector calc_grouping( const std::vector &all_pairs, const std::vector> &all_scored_poses, float min_score); Pose3D merge_group( const std::vector &poses_3d, float min_score, size_t num_cams); void add_extra_joints( std::vector &poses, const std::vector &joint_names); void filter_poses( std::vector &poses, const std::array, 2> &roomparams, const std::vector &core_joint_idx, const std::vector> &core_limbs_idx); void add_missing_joints( std::vector &poses, const std::vector &joint_names, float min_match_score); void replace_far_joints( std::vector &poses, const std::vector &joint_names, float min_match_score); TriangulationTrace triangulate_debug_impl( const PoseBatch2DView &poses_2d, const std::vector &cameras, const std::array, 2> &roomparams, const std::vector &joint_names, const PoseBatch3DView *previous_poses_3d, const TriangulationOptions &options); PreparedInputs prepare_inputs( const PoseBatch2DView &poses_2d, const std::vector &cameras, const std::vector &joint_names) { if (poses_2d.num_views == 0) { throw std::invalid_argument("No 2D poses provided."); } if (poses_2d.person_counts == nullptr) { throw std::invalid_argument("person_counts must not be null."); } if (poses_2d.num_views != cameras.size()) { throw std::invalid_argument("Number of cameras and 2D poses must be the same."); } if (joint_names.size() != poses_2d.num_joints) { throw std::invalid_argument("Number of joint names and 2D poses must be the same."); } if (poses_2d.num_joints == 0) { throw std::invalid_argument("At least one joint is required."); } if (poses_2d.max_persons > 0 && poses_2d.num_joints > 0 && poses_2d.data == nullptr) { throw std::invalid_argument("poses_2d data must not be null."); } for (const auto &joint : kCoreJoints) { if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end()) { throw std::invalid_argument("Core joint '" + std::string(joint) + "' not found in joint names."); } } std::vector internal_cameras; internal_cameras.reserve(cameras.size()); for (const auto &camera : cameras) { internal_cameras.push_back(cache_camera(camera)); } std::vector core_joint_idx; core_joint_idx.reserve(kCoreJoints.size()); for (const auto &joint : kCoreJoints) { auto it = std::find(joint_names.begin(), joint_names.end(), joint); core_joint_idx.push_back(static_cast(std::distance(joint_names.begin(), it))); } std::vector> core_limbs_idx; core_limbs_idx.reserve(kCoreLimbs.size()); for (const auto &limb : kCoreLimbs) { auto it1 = std::find(kCoreJoints.begin(), kCoreJoints.end(), limb.first); auto it2 = std::find(kCoreJoints.begin(), kCoreJoints.end(), limb.second); core_limbs_idx.push_back( {static_cast(std::distance(kCoreJoints.begin(), it1)), static_cast(std::distance(kCoreJoints.begin(), it2))}); } PackedPoseStore2D packed_poses(poses_2d, internal_cameras); return PreparedInputs( std::move(internal_cameras), std::move(core_joint_idx), std::move(core_limbs_idx), std::move(packed_poses)); } std::vector build_pair_candidates_from_packed(const PackedPoseStore2D &packed_poses) { std::vector pairs; for (size_t view1 = 0; view1 < packed_poses.num_views; ++view1) { for (size_t view2 = view1 + 1; view2 < packed_poses.num_views; ++view2) { for (size_t person1 = 0; person1 < packed_poses.person_counts[view1]; ++person1) { for (size_t person2 = 0; person2 < packed_poses.person_counts[view2]; ++person2) { pairs.push_back(PairCandidate { static_cast(view1), static_cast(view2), static_cast(person1), static_cast(person2), static_cast(packed_poses.pose_index(view1, person1)), static_cast(packed_poses.pose_index(view2, person2)), }); } } } } return pairs; } PreviousProjectionCache project_previous_poses( const PoseBatch3DView &previous_poses_3d, const std::vector &internal_cameras, const std::vector &core_joint_idx) { PreviousProjectionCache cache; cache.core_poses.resize(previous_poses_3d.num_persons); for (size_t person = 0; person < previous_poses_3d.num_persons; ++person) { Pose3D pose(core_joint_idx.size(), {0.0f, 0.0f, 0.0f, 0.0f}); for (size_t joint = 0; joint < core_joint_idx.size(); ++joint) { const size_t source_joint = core_joint_idx[joint]; for (size_t coord = 0; coord < 4; ++coord) { pose[joint][coord] = previous_poses_3d.at(person, source_joint, coord); } } cache.core_poses[person] = std::move(pose); } cache.projected_poses.resize(internal_cameras.size()); cache.projected_dists.resize(internal_cameras.size()); for (size_t view = 0; view < internal_cameras.size(); ++view) { auto [projected_poses, projected_dists] = project_poses(cache.core_poses, internal_cameras[view], true); cache.projected_poses[view] = std::move(projected_poses); cache.projected_dists[view] = std::move(projected_dists); } return cache; } float calc_pose_score( const Pose2D &pose, const Pose2D &reference_pose, const std::vector &dists, const CachedCamera &icam) { const float min_score = 0.1f; std::vector mask(pose.size(), false); size_t valid_count = 0; for (size_t joint = 0; joint < pose.size(); ++joint) { mask[joint] = pose[joint][2] > min_score && reference_pose[joint][2] > min_score; if (mask[joint]) { ++valid_count; } } if (valid_count < 3) { return 0.0f; } const float iscale = (icam.cam.width + icam.cam.height) * 0.5f; std::vector scores = score_projection(pose, reference_pose, dists, mask, iscale); const size_t drop_k = static_cast(pose.size() * 0.2f); if (drop_k > 0) { std::partial_sort(scores.begin(), scores.begin() + drop_k, scores.end()); } float total = 0.0f; for (size_t i = drop_k; i < scores.size(); ++i) { total += scores[i]; } return total / static_cast(scores.size() - drop_k); } PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( const PackedPoseStore2D &packed_poses, const std::vector &internal_cameras, const std::vector &core_joint_idx, const std::vector &pairs, const TriangulationOptions &options, const PoseBatch3DView &previous_poses_3d) { PreviousPoseFilterDebug debug; debug.used_previous_poses = true; debug.matches.resize(pairs.size()); if (previous_poses_3d.num_persons == 0) { debug.kept_pair_indices.resize(pairs.size()); std::iota(debug.kept_pair_indices.begin(), debug.kept_pair_indices.end(), 0); debug.kept_pairs = pairs; for (auto &match : debug.matches) { match.decision = "no_previous_poses"; } return debug; } const PreviousProjectionCache projected_previous = project_previous_poses(previous_poses_3d, internal_cameras, core_joint_idx); const float threshold = options.min_match_score; for (size_t pair_index = 0; pair_index < pairs.size(); ++pair_index) { const PairCandidate &pair = pairs[pair_index]; const Pose2D pose1_core = packed_poses.pose( static_cast(pair.view1), static_cast(pair.person1), core_joint_idx); const Pose2D pose2_core = packed_poses.pose( static_cast(pair.view2), static_cast(pair.person2), core_joint_idx); PreviousPoseMatch best_match; bool found_decision = false; for (size_t previous_index = 0; previous_index < previous_poses_3d.num_persons; ++previous_index) { const float score_view1 = calc_pose_score( pose1_core, projected_previous.projected_poses[static_cast(pair.view1)][previous_index], projected_previous.projected_dists[static_cast(pair.view1)][previous_index], internal_cameras[static_cast(pair.view1)]); const float score_view2 = calc_pose_score( pose2_core, projected_previous.projected_poses[static_cast(pair.view2)][previous_index], projected_previous.projected_dists[static_cast(pair.view2)][previous_index], internal_cameras[static_cast(pair.view2)]); const bool matched_view1 = score_view1 > threshold; const bool matched_view2 = score_view2 > threshold; if (matched_view1 && matched_view2) { best_match = PreviousPoseMatch { static_cast(previous_index), score_view1, score_view2, true, true, true, "matched_previous_pose", }; found_decision = true; break; } if (matched_view1 || matched_view2) { best_match = PreviousPoseMatch { static_cast(previous_index), score_view1, score_view2, matched_view1, matched_view2, false, "dropped_single_view_match", }; found_decision = true; break; } } if (!found_decision) { best_match.decision = "new_person_candidate"; best_match.kept = true; } debug.matches[pair_index] = best_match; if (best_match.kept) { debug.kept_pair_indices.push_back(static_cast(pair_index)); debug.kept_pairs.push_back(pair); } } return debug; } TriangulationTrace triangulate_debug_impl( const PoseBatch2DView &poses_2d, const std::vector &cameras, const std::array, 2> &roomparams, const std::vector &joint_names, const PoseBatch3DView *previous_poses_3d, const TriangulationOptions &options) { TriangulationTrace trace; trace.final_poses.num_joints = joint_names.size(); if (previous_poses_3d != nullptr && previous_poses_3d->num_persons > 0 && previous_poses_3d->num_joints != joint_names.size()) { throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names."); } PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names); trace.pairs = build_pair_candidates_from_packed(prepared.packed_poses); if (trace.pairs.empty()) { return trace; } std::vector active_pair_indices(trace.pairs.size()); std::iota(active_pair_indices.begin(), active_pair_indices.end(), 0); std::vector active_pairs = trace.pairs; if (previous_poses_3d != nullptr) { trace.previous_filter = filter_pairs_with_previous_poses_impl( prepared.packed_poses, prepared.internal_cameras, prepared.core_joint_idx, trace.pairs, options, *previous_poses_3d); active_pair_indices = trace.previous_filter.kept_pair_indices; active_pairs = trace.previous_filter.kept_pairs; if (active_pairs.empty()) { return trace; } } trace.core_proposals.reserve(active_pairs.size()); std::vector kept_pose_pairs; std::vector> kept_scored_poses; std::vector kept_core_indices; for (size_t i = 0; i < active_pairs.size(); ++i) { const PairCandidate &pair = active_pairs[i]; const CachedCamera &cam1 = prepared.internal_cameras[static_cast(pair.view1)]; const CachedCamera &cam2 = prepared.internal_cameras[static_cast(pair.view2)]; const Pose2D pose1_core = prepared.packed_poses.pose( static_cast(pair.view1), static_cast(pair.person1), prepared.core_joint_idx); const Pose2D pose2_core = prepared.packed_poses.pose( static_cast(pair.view2), static_cast(pair.person2), prepared.core_joint_idx); auto [pose3d, score] = triangulate_and_score( pose1_core, pose2_core, cam1, cam2, roomparams, prepared.core_limbs_idx); CoreProposalDebug proposal; proposal.pair_index = active_pair_indices[i]; proposal.pair = pair; proposal.pose_3d = pose3d; proposal.score = score; proposal.kept = score >= options.min_match_score; proposal.drop_reason = proposal.kept ? "kept" : "below_min_match_score"; trace.core_proposals.push_back(proposal); if (!proposal.kept) { continue; } kept_core_indices.push_back(static_cast(trace.core_proposals.size() - 1)); kept_scored_poses.emplace_back(std::move(pose3d), score); kept_pose_pairs.emplace_back( std::make_tuple(pair.view1, pair.view2, pair.person1, pair.person2), std::make_pair(pair.global_person1, pair.global_person2)); } if (kept_pose_pairs.empty()) { return trace; } auto groups = calc_grouping(kept_pose_pairs, kept_scored_poses, options.min_match_score); trace.grouping.initial_groups.reserve(groups.size()); for (const auto &group : groups) { ProposalGroupDebug debug_group; debug_group.center = std::get<0>(group); debug_group.pose_3d = std::get<1>(group); for (const int index : std::get<2>(group)) { debug_group.proposal_indices.push_back(kept_core_indices[static_cast(index)]); } trace.grouping.initial_groups.push_back(std::move(debug_group)); } for (size_t i = groups.size(); i > 0; --i) { if (std::get<2>(groups[i - 1]).size() < options.min_group_size) { groups.erase(groups.begin() + static_cast(i - 1)); } } if (groups.empty()) { return trace; } std::map, std::vector> pairs_map; for (size_t i = 0; i < kept_pose_pairs.size(); ++i) { const auto &pair = kept_pose_pairs[i]; const auto &mid1 = std::make_tuple( std::get<0>(pair.first), std::get<1>(pair.first), std::get<0>(pair.second)); const auto &mid2 = std::make_tuple( std::get<0>(pair.first), std::get<1>(pair.first), std::get<1>(pair.second)); pairs_map[mid1].push_back(i); pairs_map[mid2].push_back(i); } std::vector group_map(kept_pose_pairs.size()); for (size_t i = 0; i < groups.size(); ++i) { for (const int index : std::get<2>(groups[i])) { group_map[static_cast(index)] = i; } } std::set drop_indices; for (auto &pair_entry : pairs_map) { auto &indices = pair_entry.second; if (indices.size() <= 1) { continue; } std::vector group_sizes; std::vector pair_scores; group_sizes.reserve(indices.size()); pair_scores.reserve(indices.size()); for (const size_t index : indices) { group_sizes.push_back(std::get<2>(groups[group_map[index]]).size()); pair_scores.push_back(kept_scored_poses[index].second); } std::vector indices_sorted(indices.size()); std::iota(indices_sorted.begin(), indices_sorted.end(), 0); std::sort(indices_sorted.begin(), indices_sorted.end(), [&group_sizes, &pair_scores](size_t a, size_t b) { if (group_sizes[a] != group_sizes[b]) { return group_sizes[a] > group_sizes[b]; } return pair_scores[a] > pair_scores[b]; }); for (size_t j = 1; j < indices_sorted.size(); ++j) { drop_indices.insert(indices[indices_sorted[j]]); } } std::vector drop_list(drop_indices.begin(), drop_indices.end()); std::sort(drop_list.begin(), drop_list.end(), std::greater()); for (const size_t drop_idx : drop_list) { trace.grouping.duplicate_pair_drops.push_back(kept_core_indices[drop_idx]); kept_scored_poses.erase(kept_scored_poses.begin() + static_cast(drop_idx)); kept_pose_pairs.erase(kept_pose_pairs.begin() + static_cast(drop_idx)); kept_core_indices.erase(kept_core_indices.begin() + static_cast(drop_idx)); for (auto &group : groups) { auto &indices = std::get<2>(group); auto it = std::find(indices.begin(), indices.end(), static_cast(drop_idx)); if (it != indices.end()) { indices.erase(it); } for (int &index : indices) { if (static_cast(index) > drop_idx) { --index; } } } } for (size_t i = groups.size(); i > 0; --i) { if (std::get<2>(groups[i - 1]).size() < options.min_group_size) { groups.erase(groups.begin() + static_cast(i - 1)); } } if (groups.empty()) { return trace; } trace.grouping.groups.reserve(groups.size()); for (const auto &group : groups) { ProposalGroupDebug debug_group; debug_group.center = std::get<0>(group); debug_group.pose_3d = std::get<1>(group); for (const int index : std::get<2>(group)) { debug_group.proposal_indices.push_back(kept_core_indices[static_cast(index)]); } trace.grouping.groups.push_back(std::move(debug_group)); } std::vector full_pose_buffer(kept_pose_pairs.size()); std::vector full_proposal_index_by_core(trace.core_proposals.size(), -1); trace.full_proposals.reserve(kept_pose_pairs.size()); for (size_t i = 0; i < kept_pose_pairs.size(); ++i) { const auto &pair_ids = kept_pose_pairs[i].first; const PairCandidate pair = { std::get<0>(pair_ids), std::get<1>(pair_ids), std::get<2>(pair_ids), std::get<3>(pair_ids), kept_pose_pairs[i].second.first, kept_pose_pairs[i].second.second, }; const CachedCamera &cam1 = prepared.internal_cameras[static_cast(pair.view1)]; const CachedCamera &cam2 = prepared.internal_cameras[static_cast(pair.view2)]; Pose2D pose1 = prepared.packed_poses.pose(static_cast(pair.view1), static_cast(pair.person1)); Pose2D pose2 = prepared.packed_poses.pose(static_cast(pair.view2), static_cast(pair.person2)); auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams, {}); (void)score; for (size_t joint = 0; joint < pose3d.size(); ++joint) { const float score1 = pose1[joint][2]; const float score2 = pose2[joint][2]; const float min_score = 0.1f; if (score1 > min_score && score2 > min_score) { const float score_projection = (score1 + score2) * 0.5f; const float score_triangulation = pose3d[joint][3]; pose3d[joint][3] = 0.9f * score_triangulation + 0.1f * score_projection; } } full_pose_buffer[i] = pose3d; full_proposal_index_by_core[kept_core_indices[i]] = static_cast(trace.full_proposals.size()); trace.full_proposals.push_back(FullProposalDebug { kept_core_indices[i], pair, std::move(pose3d), }); } std::vector final_poses_3d(groups.size()); trace.merge.group_proposal_indices.reserve(groups.size()); trace.merge.merged_poses.reserve(groups.size()); for (size_t i = 0; i < groups.size(); ++i) { std::vector poses; std::vector source_core_indices; poses.reserve(std::get<2>(groups[i]).size()); source_core_indices.reserve(std::get<2>(groups[i]).size()); for (const int index : std::get<2>(groups[i])) { poses.push_back(full_pose_buffer[static_cast(index)]); source_core_indices.push_back(kept_core_indices[static_cast(index)]); } final_poses_3d[i] = merge_group(poses, options.min_match_score, prepared.internal_cameras.size()); trace.merge.group_proposal_indices.push_back(std::move(source_core_indices)); trace.merge.merged_poses.push_back(final_poses_3d[i]); } add_extra_joints(final_poses_3d, joint_names); filter_poses(final_poses_3d, roomparams, prepared.core_joint_idx, prepared.core_limbs_idx); add_missing_joints(final_poses_3d, joint_names, options.min_match_score); replace_far_joints(final_poses_3d, joint_names, options.min_match_score); size_t valid_persons = 0; for (const auto &pose : final_poses_3d) { const bool is_valid = std::any_of( pose.begin(), pose.end(), [&options](const std::array &joint) { return joint[3] > options.min_match_score; }); if (is_valid) { ++valid_persons; } } trace.final_poses.num_persons = valid_persons; trace.final_poses.data.resize(valid_persons * trace.final_poses.num_joints * 4); size_t person_index = 0; for (const auto &pose : final_poses_3d) { const bool is_valid = std::any_of( pose.begin(), pose.end(), [&options](const std::array &joint) { return joint[3] > options.min_match_score; }); if (!is_valid) { continue; } for (size_t joint = 0; joint < trace.final_poses.num_joints; ++joint) { for (size_t coord = 0; coord < 4; ++coord) { trace.final_poses.at(person_index, joint, coord) = pose[joint][coord]; } } ++person_index; } return trace; } PreviousPoseFilterDebug filter_pairs_with_previous_poses_impl( const PoseBatch2DView &poses_2d, const std::vector &cameras, const std::vector &joint_names, const PoseBatch3DView &previous_poses_3d, const TriangulationOptions &options) { if (previous_poses_3d.num_persons > 0 && previous_poses_3d.num_joints != joint_names.size()) { throw std::invalid_argument("previous_poses_3d must use the same joint count as joint_names."); } PreparedInputs prepared = prepare_inputs(poses_2d, cameras, joint_names); const std::vector pairs = build_pair_candidates_from_packed(prepared.packed_poses); if (pairs.empty()) { PreviousPoseFilterDebug empty; empty.used_previous_poses = true; return empty; } return filter_pairs_with_previous_poses_impl( prepared.packed_poses, prepared.internal_cameras, prepared.core_joint_idx, pairs, options, previous_poses_3d); } std::tuple, std::vector>> project_poses( const std::vector &poses_3d, const CachedCamera &icam, bool calc_dists) { if (poses_3d.empty()) { return std::make_tuple(std::vector {}, std::vector> {}); } const size_t num_persons = poses_3d.size(); const size_t num_joints = poses_3d[0].size(); std::vector poses_2d(num_persons); std::vector> all_dists(num_persons); const std::array, 3> &K = icam.newK; const std::array, 3> &R = icam.invR; const std::array, 3> &T = icam.cam.T; for (size_t i = 0; i < num_persons; ++i) { const Pose3D &body3D = poses_3d[i]; Pose2D body2D(num_joints, std::array{0.0f, 0.0f, 0.0f}); std::vector dists(num_joints, 0.0f); for (size_t j = 0; j < num_joints; ++j) { float x = body3D[j][0]; float y = body3D[j][1]; float z = body3D[j][2]; float score = body3D[j][3]; const float xt = x - T[0][0]; const float yt = y - T[1][0]; const float zt = z - T[2][0]; float x_cam = xt * R[0][0] + yt * R[0][1] + zt * R[0][2]; float y_cam = xt * R[1][0] + yt * R[1][1] + zt * R[1][2]; float z_cam = xt * R[2][0] + yt * R[2][1] + zt * R[2][2]; if (z_cam <= 0.0f) { x_cam = 0.0f; y_cam = 0.0f; z_cam = 0.0f; } dists[j] = calc_dists ? std::sqrt(x_cam * x_cam + y_cam * y_cam + z_cam * z_cam) : 0.0f; float u = 0.0f; float v = 0.0f; if (z_cam > 0.0f) { u = (K[0][0] * x_cam + K[0][1] * y_cam + K[0][2] * z_cam) / z_cam; v = (K[1][0] * x_cam + K[1][1] * y_cam + K[1][2] * z_cam) / z_cam; } if (u < 0.0f || u >= icam.cam.width || v < 0.0f || v >= icam.cam.height) { u = 0.0f; v = 0.0f; score = 0.0f; } body2D[j] = {u, v, score}; } poses_2d[i] = std::move(body2D); all_dists[i] = std::move(dists); } return std::make_tuple(poses_2d, all_dists); } std::vector score_projection( const Pose2D &pose, const Pose2D &repro, const std::vector &dists, const std::vector &mask, float iscale) { const float min_score = 0.1f; const size_t num_joints = pose.size(); std::vector error(num_joints, 0.0f); for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { const float dx = pose[i][0] - repro[i][0]; const float dy = pose[i][1] - repro[i][1]; float err = std::sqrt(dx * dx + dy * dy); if (repro[i][2] < min_score) { err = iscale; } error[i] = err; } } const float inv_iscale = 1.0f / iscale; const float iscale_quarter = iscale / 4.0f; for (size_t i = 0; i < num_joints; ++i) { error[i] = std::max(0.0f, std::min(error[i], iscale_quarter)) * inv_iscale; } float mean_dist = 0.0f; int count = 0; for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { mean_dist += dists[i]; ++count; } } if (count > 0) { mean_dist /= static_cast(count); } const float dscale = std::sqrt(mean_dist / 3.5f); for (size_t i = 0; i < num_joints; ++i) { error[i] *= dscale; } std::vector score(num_joints, 0.0f); for (size_t i = 0; i < num_joints; ++i) { score[i] = 1.0f / (1.0f + error[i] * 10.0f); } return score; } float dot(const std::array &a, const std::array &b) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; } std::array cross(const std::array &a, const std::array &b) { return {a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]}; } std::array add(const std::array &a, const std::array &b) { return {a[0] + b[0], a[1] + b[1], a[2] + b[2]}; } std::array subtract(const std::array &a, const std::array &b) { return {a[0] - b[0], a[1] - b[1], a[2] - b[2]}; } std::array multiply(const std::array &a, float s) { return {a[0] * s, a[1] * s, a[2] * s}; } std::array normalize(const std::array &v) { float norm = std::sqrt(dot(v, v)); if (norm < 1e-8f) return v; return multiply(v, 1.0f / norm); } std::array mat_mul_vec( const std::array, 3> &M, const std::array &v) { std::array res = {M[0][0] * v[0] + M[0][1] * v[1] + M[0][2] * v[2], M[1][0] * v[0] + M[1][1] * v[1] + M[1][2] * v[2], M[2][0] * v[0] + M[2][1] * v[1] + M[2][2] * v[2]}; return res; } std::array calc_ray_dir(const CachedCamera &icam, const std::array &pt) { // Compute normalized ray direction from the point std::array uv1 = {pt[0], pt[1], 1.0}; auto d = mat_mul_vec(icam.cam.R, mat_mul_vec(icam.invK, uv1)); auto ray_dir = normalize(d); return ray_dir; } std::array triangulate_midpoint( const CachedCamera &icam1, const CachedCamera &icam2, const std::array &pt1, const std::array &pt2) { // Triangulate two points by computing their two rays and the midpoint of their closest approach // See: https://en.wikipedia.org/wiki/Skew_lines#Nearest_points // Obtain the camera centers and ray directions for both views std::array p1 = icam1.center; std::array p2 = icam2.center; std::array d1 = calc_ray_dir(icam1, pt1); std::array d2 = calc_ray_dir(icam2, pt2); // Compute the perpendicular plane vectors std::array n = cross(d1, d2); std::array n1 = cross(d1, n); std::array n2 = cross(d2, n); // Calculate point on Line 1 nearest to Line 2 float t1 = dot(subtract(p2, p1), n2) / dot(d1, n2); std::array c1 = add(p1, multiply(d1, t1)); // Calculate point on Line 2 nearest to Line 1 float t2 = dot(subtract(p1, p2), n1) / dot(d2, n1); std::array c2 = add(p2, multiply(d2, t2)); // Compute midpoint between c1 and c2. std::array midpoint = multiply(add(c1, c2), 0.5); float dist = std::sqrt(dot(subtract(c1, c2), subtract(c1, c2))); std::array result = {midpoint[0], midpoint[1], midpoint[2], dist}; return result; } // ================================================================================================= std::pair>, float> triangulate_and_score( const std::vector> &pose1, const std::vector> &pose2, const CachedCamera &cam1, const CachedCamera &cam2, const std::array, 2> &roomparams, const std::vector> &core_limbs_idx) { const float min_score = 0.1; const size_t num_joints = pose1.size(); // Create mask for valid points std::vector mask; mask.resize(num_joints); for (size_t i = 0; i < num_joints; ++i) { if (pose1[i][2] <= min_score || pose2[i][2] <= min_score) { mask[i] = false; } else { mask[i] = true; } } // If too few joints are visible, return a low score int num_visible = 0; for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { num_visible++; } } if (num_visible < 3) { std::vector> empty(num_joints, {0.0, 0.0, 0.0, 0.0}); float score = 0.0; return std::make_pair(empty, score); } // Use midpoint triangulation instead of cv::triangulatePoints because it is much faster, // while having almost the same accuracy. std::vector> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0}); for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { auto &pt1 = pose1[i]; auto &pt2 = pose2[i]; std::array pt3d = triangulate_midpoint( cam1, cam2, {pt1[0], pt1[1]}, {pt2[0], pt2[1]}); float score = std::max(0.0, 1.0 - pt3d[3]); pose3d[i] = {pt3d[0], pt3d[1], pt3d[2], score}; } } // Check if mean of the points is inside the room bounds std::array mean = {0.0, 0.0, 0.0}; for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { mean[0] += pose3d[i][0]; mean[1] += pose3d[i][1]; mean[2] += pose3d[i][2]; } } float inv_num_vis = 1.0 / num_visible; for (int j = 0; j < 3; ++j) { mean[j] *= inv_num_vis; } const std::array &room_size = roomparams[0]; const std::array &room_center = roomparams[1]; for (int j = 0; j < 3; ++j) { if (mean[j] > room_center[j] + room_size[j] / 2.0 || mean[j] < room_center[j] - room_size[j] / 2.0) { // Very low score if outside room for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { pose3d[i][3] = 0.001; } } return std::make_pair(pose3d, 0.001); } } // Calculate reprojections std::vector>> poses_3d = {pose3d}; auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true); auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true); auto repro1 = repro1s[0]; auto dists1 = dists1s[0]; auto repro2 = repro2s[0]; auto dists2 = dists2s[0]; // Calculate scores for each view float iscale = (cam1.cam.width + cam1.cam.height) / 2.0; std::vector score1 = score_projection(pose1, repro1, dists1, mask, iscale); std::vector score2 = score_projection(pose2, repro2, dists2, mask, iscale); // Update scores of 3D pose for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { // Using mainly the reprojection score leads to slightly better average results, // but the triangulation score is used to penalize some bad outliers. float scoreT = 0.5 * (score1[i] + score2[i]); pose3d[i][3] = 0.9 * scoreT + 0.1 * pose3d[i][3]; } } // Set scores outside the room to a low value const float wdist = 0.1; for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { for (int j = 0; j < 3; ++j) { if (pose3d[i][j] > room_center[j] + room_size[j] / 2.0 + wdist || pose3d[i][j] < room_center[j] - room_size[j] / 2.0 - wdist) { pose3d[i][3] = 0.001; break; } } } } // Filter clearly wrong limbs if (!core_limbs_idx.empty()) { const float max_length_sq = 0.9 * 0.9; for (size_t i = 0; i < core_limbs_idx.size(); ++i) { size_t limb1 = core_limbs_idx[i][0]; size_t limb2 = core_limbs_idx[i][1]; if (pose3d[limb1][3] > min_score && pose3d[limb2][3] > min_score) { float dx = pose3d[limb1][0] - pose3d[limb2][0]; float dy = pose3d[limb1][1] - pose3d[limb2][1]; float dz = pose3d[limb1][2] - pose3d[limb2][2]; float length_sq = dx * dx + dy * dy + dz * dz; if (length_sq > max_length_sq) { pose3d[limb2][3] = 0.001; } } } } // Drop lowest scores std::vector valid_scores; for (size_t i = 0; i < num_joints; ++i) { if (pose3d[i][3] > min_score) { valid_scores.push_back(pose3d[i][3]); } } size_t scores_size = valid_scores.size(); size_t drop_k = static_cast(scores_size * 0.2); if (drop_k > 0) { std::partial_sort(valid_scores.begin(), valid_scores.begin() + drop_k, valid_scores.end()); valid_scores.erase(valid_scores.begin(), valid_scores.begin() + drop_k); } // Calculate final score float final_score = 0.0; if (!valid_scores.empty()) { float sum_scores = std::accumulate(valid_scores.begin(), valid_scores.end(), 0.0); final_score = sum_scores / static_cast(valid_scores.size()); } return std::make_pair(pose3d, final_score); } // ================================================================================================= std::vector, std::vector>, std::vector>> calc_grouping( const std::vector, std::pair>> &all_pairs, const std::vector>, float>> &all_scored_poses, float min_score) { float max_center_dist_sq = 0.75 * 0.75; float max_joint_avg_dist = 0.25; size_t num_pairs = all_pairs.size(); // Calculate pose centers std::vector> centers; centers.resize(num_pairs); for (size_t i = 0; i < num_pairs; ++i) { const auto &pose_3d = all_scored_poses[i].first; const size_t num_joints = pose_3d.size(); std::array center = {0.0, 0.0, 0.0}; size_t num_valid = 0; for (size_t j = 0; j < num_joints; ++j) { float score = pose_3d[j][3]; if (score > min_score) { center[0] += pose_3d[j][0]; center[1] += pose_3d[j][1]; center[2] += pose_3d[j][2]; ++num_valid; } } if (num_valid > 0) { float inv = 1.0 / num_valid; center[0] *= inv; center[1] *= inv; center[2] *= inv; } centers[i] = center; } // Calculate Groups // defined as a tuple of center, pose, and all-pairs-indices of members std::vector, std::vector>, std::vector>> groups; std::vector> per_group_visible_counts; for (size_t i = 0; i < num_pairs; ++i) { const auto &pose_3d = all_scored_poses[i].first; size_t num_joints = pose_3d.size(); const std::array ¢er = centers[i]; float best_dist = std::numeric_limits::infinity(); int best_group = -1; for (size_t j = 0; j < groups.size(); ++j) { auto &group = groups[j]; std::array &group_center = std::get<0>(group); // Check if the center is close enough float dx = group_center[0] - center[0]; float dy = group_center[1] - center[1]; float dz = group_center[2] - center[2]; float center_dist_sq = dx * dx + dy * dy + dz * dz; if (center_dist_sq < max_center_dist_sq) { const auto &group_pose = std::get<1>(group); // Calculate average joint distance std::vector dists; for (size_t row = 0; row < num_joints; ++row) { float score1 = pose_3d[row][3]; float score2 = group_pose[row][3]; if (score1 > min_score && score2 > min_score) { float dx = pose_3d[row][0] - group_pose[row][0]; float dy = pose_3d[row][1] - group_pose[row][1]; float dz = pose_3d[row][2] - group_pose[row][2]; float dist = std::sqrt(dx * dx + dy * dy + dz * dz); dists.push_back(dist); } } if (dists.size() >= 5) { // Drop highest value to reduce influence of outliers auto max_it = std::max_element(dists.begin(), dists.end()); dists.erase(max_it); } if (dists.size() > 0) { // Check if the average joint distance is close enough float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0); avg_dist /= static_cast(dists.size()); if (avg_dist < max_joint_avg_dist && avg_dist < best_dist) { best_dist = avg_dist; best_group = static_cast(j); } } } } if (best_group == -1) { // Create a new group std::vector new_indices{static_cast(i)}; std::vector> group_pose = pose_3d; groups.emplace_back(center, group_pose, new_indices); // Update per joint counts std::vector new_valid_joint_counts(num_joints, 0); for (size_t row = 0; row < num_joints; ++row) { if (pose_3d[row][3] > min_score) { new_valid_joint_counts[row] = 1; } } per_group_visible_counts.push_back(std::move(new_valid_joint_counts)); } else { // Update existing group auto &group = groups[best_group]; std::array &group_center = std::get<0>(group); std::vector> &group_pose = std::get<1>(group); std::vector &group_indices = std::get<2>(group); float n_elems = static_cast(group_indices.size()); float inv_n = 1.0 / (n_elems + 1.0); // Update group center group_center[0] = (group_center[0] * n_elems + center[0]) * inv_n; group_center[1] = (group_center[1] * n_elems + center[1]) * inv_n; group_center[2] = (group_center[2] * n_elems + center[2]) * inv_n; // Update group pose for (size_t row = 0; row < num_joints; ++row) { if (pose_3d[row][3] > min_score) { float j_elems = static_cast(per_group_visible_counts[best_group][row]); float inv_j = 1.0 / (j_elems + 1.0); group_pose[row][0] = (group_pose[row][0] * j_elems + pose_3d[row][0]) * inv_j; group_pose[row][1] = (group_pose[row][1] * j_elems + pose_3d[row][1]) * inv_j; group_pose[row][2] = (group_pose[row][2] * j_elems + pose_3d[row][2]) * inv_j; group_pose[row][3] = (group_pose[row][3] * j_elems + pose_3d[row][3]) * inv_j; per_group_visible_counts[best_group][row]++; } } group_indices.push_back(static_cast(i)); } } // Merge close groups // Depending on the inital group creation, one or more groups can be created that in the end // share the same persons, even if they had a larger distance at the beginning // So merge them similar to the group assignment before std::vector, std::vector>, std::vector>> merged_groups; for (size_t i = 0; i < groups.size(); ++i) { size_t num_joints = std::get<1>(groups[i]).size(); auto &group = groups[i]; auto &group_visible_counts = per_group_visible_counts[i]; float best_dist = std::numeric_limits::infinity(); int best_group = -1; for (size_t j = 0; j < merged_groups.size(); ++j) { const auto &group_pose = std::get<1>(group); const auto &merged_pose = std::get<1>(merged_groups[j]); // Calculate average joint distance std::vector dists; for (size_t row = 0; row < num_joints; ++row) { float score1 = group_pose[row][3]; float score2 = merged_pose[row][3]; if (score1 > min_score && score2 > min_score) { float dx = group_pose[row][0] - merged_pose[row][0]; float dy = group_pose[row][1] - merged_pose[row][1]; float dz = group_pose[row][2] - merged_pose[row][2]; float dist = std::sqrt(dx * dx + dy * dy + dz * dz); dists.push_back(dist); } } if (dists.size() >= 5) { // Drop highest value to reduce influence of outliers auto max_it = std::max_element(dists.begin(), dists.end()); dists.erase(max_it); } if (dists.size() > 0) { // Check if the average joint distance is close enough float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0); avg_dist /= static_cast(dists.size()); if (avg_dist < max_joint_avg_dist && avg_dist < best_dist) { best_dist = avg_dist; best_group = static_cast(j); } } } if (best_group == -1) { // Create a new group merged_groups.push_back(group); } else { // Update existing group auto &merged_group = merged_groups[best_group]; std::array &merged_center = std::get<0>(merged_group); std::array &group_center = std::get<0>(group); std::vector &merged_group_indices = std::get<2>(merged_group); float n_elems1 = static_cast(merged_group_indices.size()); float n_elems2 = static_cast(std::get<2>(group).size()); float inv1 = n_elems1 / (n_elems1 + n_elems2); float inv2 = n_elems2 / (n_elems1 + n_elems2); // Update group center merged_center[0] = merged_center[0] * inv1 + group_center[0] * inv2; merged_center[1] = merged_center[1] * inv1 + group_center[1] * inv2; merged_center[2] = merged_center[2] * inv1 + group_center[2] * inv2; // Update group pose for (size_t row = 0; row < num_joints; ++row) { auto &group_pose = std::get<1>(group); auto &merged_pose = std::get<1>(merged_group); if (group_pose[row][3] > min_score) { float j_elems1 = static_cast(group_visible_counts[row]); float j_elems2 = static_cast(per_group_visible_counts[best_group][row]); float inv1 = j_elems1 / (j_elems1 + j_elems2); float inv2 = j_elems2 / (j_elems1 + j_elems2); merged_pose[row][0] = merged_pose[row][0] * inv1 + group_pose[row][0] * inv2; merged_pose[row][1] = merged_pose[row][1] * inv1 + group_pose[row][1] * inv2; merged_pose[row][2] = merged_pose[row][2] * inv1 + group_pose[row][2] * inv2; merged_pose[row][3] = merged_pose[row][3] * inv1 + group_pose[row][3] * inv2; group_visible_counts[row]++; } } // Merge indices merged_group_indices.insert( merged_group_indices.end(), std::get<2>(group).begin(), std::get<2>(group).end()); } } return merged_groups; } // ================================================================================================= std::vector> merge_group( const std::vector>> &poses_3d, float min_score, size_t num_cams) { size_t num_poses = poses_3d.size(); size_t num_joints = poses_3d[0].size(); // Merge poses to create initial pose // Use only those triangulations with a high score std::vector> sum_poses(num_joints, {0.0, 0.0, 0.0, 0.0}); std::vector sum_mask1(num_joints, 0); float offset1 = (1.0 - min_score) / 2.0; for (size_t i = 0; i < num_poses; ++i) { const auto &pose = poses_3d[i]; for (size_t j = 0; j < num_joints; ++j) { float score = pose[j][3]; if (score > min_score - offset1) { float weight = std::pow(score, 2); sum_poses[j][0] += pose[j][0] * weight; sum_poses[j][1] += pose[j][1] * weight; sum_poses[j][2] += pose[j][2] * weight; sum_poses[j][3] += score * weight; sum_mask1[j] += weight; } } } std::vector> initial_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0}); for (size_t j = 0; j < num_joints; ++j) { if (sum_mask1[j] > 0) { float inv = 1.0 / sum_mask1[j]; initial_pose_3d[j][0] = sum_poses[j][0] * inv; initial_pose_3d[j][1] = sum_poses[j][1] * inv; initial_pose_3d[j][2] = sum_poses[j][2] * inv; initial_pose_3d[j][3] = sum_poses[j][3] * inv; } } // Use center as default if the initial pose is empty std::vector jmask(num_joints, false); std::array center = {0.0, 0.0, 0.0}; int valid_joints = 0; for (size_t j = 0; j < num_joints; ++j) { float score = initial_pose_3d[j][3]; if (score > min_score - offset1) { jmask[j] = true; center[0] += initial_pose_3d[j][0]; center[1] += initial_pose_3d[j][1]; center[2] += initial_pose_3d[j][2]; valid_joints++; } } if (valid_joints > 0) { float inv_valid = 1.0 / valid_joints; center[0] *= inv_valid; center[1] *= inv_valid; center[2] *= inv_valid; } for (size_t j = 0; j < num_joints; ++j) { if (!jmask[j]) { initial_pose_3d[j][0] = center[0]; initial_pose_3d[j][1] = center[1]; initial_pose_3d[j][2] = center[2]; initial_pose_3d[j][3] = 0.0; } } // Drop joints with low scores and filter outlying joints using distance threshold float offset2 = (1.0 - min_score) * 2.0; float max_dist_sq = 1.2 * 1.2; std::vector> mask(num_poses, std::vector(num_joints, false)); std::vector> distances(num_poses, std::vector(num_joints, 0.0)); for (size_t i = 0; i < num_poses; ++i) { const auto &pose = poses_3d[i]; for (size_t j = 0; j < num_joints; ++j) { float dx = pose[j][0] - initial_pose_3d[j][0]; float dy = pose[j][1] - initial_pose_3d[j][1]; float dz = pose[j][2] - initial_pose_3d[j][2]; float dist_sq = dx * dx + dy * dy + dz * dz; distances[i][j] = dist_sq; float score = pose[j][3]; if (dist_sq <= max_dist_sq && score > (min_score - offset2)) { mask[i][j] = true; } } } // Select the best-k proposals for each joint that are closest to the initial pose int keep_best = std::max((int)std::ceil(num_cams / 2.0), (int)std::ceil(num_poses / 3.0)); std::vector> best_k_mask(num_poses, std::vector(num_joints, false)); for (size_t j = 0; j < num_joints; ++j) { std::vector> valid_indices; valid_indices.reserve(num_poses); for (size_t i = 0; i < num_poses; ++i) { if (mask[i][j]) { valid_indices.emplace_back(distances[i][j], i); } } int num_valid = std::min(keep_best, static_cast(valid_indices.size())); if (num_valid > 0) { std::partial_sort(valid_indices.begin(), valid_indices.begin() + num_valid, valid_indices.end()); for (int k = 0; k < num_valid; ++k) { int idx = valid_indices[k].second; best_k_mask[idx][j] = true; } } } // Combine masks for (size_t i = 0; i < num_poses; ++i) { for (size_t j = 0; j < num_joints; ++j) { mask[i][j] = mask[i][j] && best_k_mask[i][j]; } } // Compute the final pose std::vector> sum_poses2(num_joints, {0.0, 0.0, 0.0, 0.0}); std::vector sum_mask2(num_joints, 0); for (size_t i = 0; i < num_poses; ++i) { const auto &pose = poses_3d[i]; for (size_t j = 0; j < num_joints; ++j) { if (mask[i][j]) { // Use an exponential weighting to give more importance to higher scores float weight = std::pow(pose[j][3], 4); sum_poses2[j][0] += pose[j][0] * weight; sum_poses2[j][1] += pose[j][1] * weight; sum_poses2[j][2] += pose[j][2] * weight; sum_poses2[j][3] += pose[j][3] * weight; sum_mask2[j] += weight; } } } std::vector> final_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0}); for (size_t j = 0; j < num_joints; ++j) { if (sum_mask2[j] > 0) { float inv = 1.0 / sum_mask2[j]; final_pose_3d[j][0] = sum_poses2[j][0] * inv; final_pose_3d[j][1] = sum_poses2[j][1] * inv; final_pose_3d[j][2] = sum_poses2[j][2] * inv; final_pose_3d[j][3] = sum_poses2[j][3] * inv; } } return final_pose_3d; } // ================================================================================================= void add_extra_joints( std::vector>> &poses, const std::vector &joint_names) { // Find indices of "head", "ear_left", and "ear_right" auto it_head = std::find(joint_names.begin(), joint_names.end(), "head"); auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left"); auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right"); int idx_h = std::distance(joint_names.begin(), it_head); int idx_el = std::distance(joint_names.begin(), it_ear_left); int idx_er = std::distance(joint_names.begin(), it_ear_right); // If the confidence score of "head" is zero compute it as the average of the ears for (size_t i = 0; i < poses.size(); ++i) { auto &pose = poses[i]; if (pose[idx_h][3] == 0.0) { if (pose[idx_el][3] > 0.0 && pose[idx_er][3] > 0.0) { pose[idx_h][0] = (pose[idx_el][0] + pose[idx_er][0]) * 0.5; pose[idx_h][1] = (pose[idx_el][1] + pose[idx_er][1]) * 0.5; pose[idx_h][2] = (pose[idx_el][2] + pose[idx_er][2]) * 0.5; pose[idx_h][3] = std::min(pose[idx_el][3], pose[idx_er][3]); } } } } // ================================================================================================= void filter_poses( std::vector>> &poses, const std::array, 2> &roomparams, const std::vector &core_joint_idx, const std::vector> &core_limbs_idx) { const float min_score = 0.1; std::vector drop_indices; for (size_t i = 0; i < poses.size(); ++i) { auto &pose = poses[i]; size_t num_core_joints = core_joint_idx.size(); size_t num_full_joints = pose.size(); // Collect valid joint indices std::vector valid_joint_idx; for (size_t j = 0; j < num_core_joints; ++j) { size_t idx = core_joint_idx[j]; if (pose[idx][3] > min_score) { valid_joint_idx.push_back(idx); } } // Drop poses with too few joints if (valid_joint_idx.size() < 5) { drop_indices.push_back(i); continue; } // Compute min, max and mean coordinates std::array mean = {0.0, 0.0, 0.0}; std::array mins = { std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max()}; std::array maxs = { std::numeric_limits::lowest(), std::numeric_limits::lowest(), std::numeric_limits::lowest()}; for (size_t j = 0; j < valid_joint_idx.size(); ++j) { size_t idx = valid_joint_idx[j]; for (size_t k = 0; k < 3; ++k) { mins[k] = std::min(mins[k], pose[idx][k]); maxs[k] = std::max(maxs[k], pose[idx][k]); mean[k] += pose[idx][k]; } } for (size_t k = 0; k < 3; ++k) { mean[k] /= static_cast(valid_joint_idx.size()); } // Drop poses that are too large or too small or too flat float min_flatness = 0.2; float max_size = 2.3; float min_size = 0.3; std::array diff; for (int j = 0; j < 3; ++j) { diff[j] = maxs[j] - mins[j]; } if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size) { drop_indices.push_back(i); continue; } if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size) { drop_indices.push_back(i); continue; } if ((diff[0] < min_flatness && diff[1] < min_flatness) || (diff[1] < min_flatness && diff[2] < min_flatness) || (diff[2] < min_flatness && diff[0] < min_flatness)) { drop_indices.push_back(i); continue; } // Drop persons outside room const float wdist = 0.1; const auto &room_size = roomparams[0]; const auto &room_center = roomparams[1]; const std::array room_half_size = { room_size[0] / 2.0f, room_size[1] / 2.0f, room_size[2] / 2.0f}; bool outside = false; for (int j = 0; j < 3; ++j) { // Check if the mean position is outside the room boundaries if (mean[j] > room_half_size[j] + room_center[j] || mean[j] < -room_half_size[j] + room_center[j]) { outside = true; break; } } for (int j = 0; j < 3; ++j) { // Check if any limb is too far outside the room if (maxs[j] > room_half_size[j] + room_center[j] + wdist || mins[j] < -room_half_size[j] + room_center[j] - wdist) { outside = true; break; } } if (outside) { drop_indices.push_back(i); continue; } // Set joint scores outside the room to a low value for (size_t j = 0; j < num_full_joints; ++j) { if (pose[j][3] > min_score) { for (int k = 0; k < 3; ++k) { if (pose[j][k] > room_half_size[k] + room_center[k] + wdist || pose[j][k] < -room_half_size[k] + room_center[k] - wdist) { pose[j][3] = 0.001; break; } } } } // Calculate total limb length and average limb length const float max_avg_length = 0.5; const float min_avg_length = 0.1; float total_length = 0.0; int total_limbs = 0; for (size_t j = 0; j < core_limbs_idx.size(); ++j) { size_t start_idx = core_joint_idx[core_limbs_idx[j][0]]; size_t end_idx = core_joint_idx[core_limbs_idx[j][1]]; if (pose[start_idx][3] < min_score || pose[end_idx][3] < min_score) { continue; } float dx = pose[end_idx][0] - pose[start_idx][0]; float dy = pose[end_idx][1] - pose[start_idx][1]; float dz = pose[end_idx][2] - pose[start_idx][2]; float length = std::sqrt(dx * dx + dy * dy + dz * dz); total_length += length; total_limbs++; } if (total_limbs == 0) { drop_indices.push_back(i); continue; } float average_length = total_length / static_cast(total_limbs); if (average_length < min_avg_length) { drop_indices.push_back(i); continue; } if (total_limbs >= 5 && average_length > max_avg_length) { drop_indices.push_back(i); continue; } } // Set confidences of invalid poses to a low value for (size_t i = 0; i < drop_indices.size(); ++i) { auto &pose = poses[drop_indices[i]]; for (size_t j = 0; j < pose.size(); ++j) { pose[j][3] = 0.001; } } } // ================================================================================================= void add_missing_joints( std::vector>> &poses, const std::vector &joint_names, float min_match_score) { // Map joint names to their indices for quick lookup std::unordered_map joint_name_to_idx; for (size_t idx = 0; idx < joint_names.size(); ++idx) { joint_name_to_idx[joint_names[idx]] = idx; } // Adjacent mapping std::unordered_map> adjacents = { {"hip_right", {"hip_middle", "hip_left"}}, {"hip_left", {"hip_middle", "hip_right"}}, {"knee_right", {"hip_right", "knee_left"}}, {"knee_left", {"hip_left", "knee_right"}}, {"ankle_right", {"knee_right", "ankle_left"}}, {"ankle_left", {"knee_left", "ankle_right"}}, {"shoulder_right", {"shoulder_middle", "shoulder_left"}}, {"shoulder_left", {"shoulder_middle", "shoulder_right"}}, {"elbow_right", {"shoulder_right", "hip_right"}}, {"elbow_left", {"shoulder_left", "hip_left"}}, {"wrist_right", {"elbow_right"}}, {"wrist_left", {"elbow_left"}}, {"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}}, {"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}}}; // Collect adjacent joint mappings std::vector> adjacent_indices; adjacent_indices.resize(joint_names.size()); for (size_t j = 0; j < joint_names.size(); ++j) { std::string adname = ""; const std::string &jname = joint_names[j]; // Determine adjacent joint based on name patterns if (adjacents.find(jname) != adjacents.end()) { adname = jname; } if (!adname.empty()) { auto it = adjacents.find(adname); if (it != adjacents.end()) { for (const std::string &adj_name : it->second) { auto jt = joint_name_to_idx.find(adj_name); if (jt != joint_name_to_idx.end()) { adjacent_indices[j].push_back(jt->second); } } } } } for (size_t i = 0; i < poses.size(); ++i) { auto &pose = poses[i]; size_t num_joints = pose.size(); // Compute body center as the mean of valid joints std::array body_center = {0.0, 0.0, 0.0}; size_t valid_count = 0; for (size_t j = 0; j < num_joints; ++j) { if (pose[j][3] > min_match_score) { body_center[0] += pose[j][0]; body_center[1] += pose[j][1]; body_center[2] += pose[j][2]; valid_count++; } } if (valid_count == 0) { continue; } float inv_c = 1.0 / static_cast(valid_count); for (int j = 0; j < 3; ++j) { body_center[j] *= inv_c; } // Iterate over each joint for (size_t j = 0; j < joint_names.size(); ++j) { if (pose[j][3] == 0.0) { // Joint is missing; attempt to estimate its position based on adjacent joints const std::vector &adj_indices = adjacent_indices[j]; std::array adjacent_position = {0.0, 0.0, 0.0}; size_t adjacent_count = 0; for (size_t adj_idx : adj_indices) { if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1) { adjacent_position[0] += pose[adj_idx][0]; adjacent_position[1] += pose[adj_idx][1]; adjacent_position[2] += pose[adj_idx][2]; ++adjacent_count; } } if (adjacent_count > 0) { float inv_c = 1.0 / static_cast(adjacent_count); for (size_t k = 0; k < 3; ++k) { adjacent_position[k] *= inv_c; } // Update the missing joint's position pose[j][0] = adjacent_position[0]; pose[j][1] = adjacent_position[1]; pose[j][2] = adjacent_position[2]; } else { // No valid adjacent joints, use body center pose[j][0] = body_center[0]; pose[j][1] = body_center[1]; pose[j][2] = body_center[2]; } // Set a low confidence score pose[j][3] = 0.1; } } } } // ================================================================================================= void replace_far_joints( std::vector>> &poses, const std::vector &joint_names, float min_match_score) { for (size_t i = 0; i < poses.size(); ++i) { auto &pose = poses[i]; size_t num_full_joints = pose.size(); // Calculate the average position of pose parts std::array center_head = {0.0, 0.0, 0.0, 0}; std::array center_foot_left = {0.0, 0.0, 0.0, 0}; std::array center_foot_right = {0.0, 0.0, 0.0, 0}; std::array center_hand_left = {0.0, 0.0, 0.0, 0}; std::array center_hand_right = {0.0, 0.0, 0.0, 0}; for (size_t j = 0; j < num_full_joints; ++j) { const std::string &jname = joint_names[j]; float offset2 = (1.0 - min_match_score) * 2; float min_score = min_match_score - offset2; if (pose[j][3] > min_score) { if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" || jname == "eye_right" || jname == "ear_left" || jname == "ear_right") { center_head[0] += pose[j][0]; center_head[1] += pose[j][1]; center_head[2] += pose[j][2]; center_head[3] += 1.0; } else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0) { if (jname.find("_left") != std::string::npos) { center_foot_left[0] += pose[j][0]; center_foot_left[1] += pose[j][1]; center_foot_left[2] += pose[j][2]; center_foot_left[3] += 1.0; } else if (jname.find("_right") != std::string::npos) { center_foot_right[0] += pose[j][0]; center_foot_right[1] += pose[j][1]; center_foot_right[2] += pose[j][2]; center_foot_right[3] += 1.0; } } else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0) { if (jname.find("_left") != std::string::npos) { center_hand_left[0] += pose[j][0]; center_hand_left[1] += pose[j][1]; center_hand_left[2] += pose[j][2]; center_hand_left[3] += 1.0; } else if (jname.find("_right") != std::string::npos) { center_hand_right[0] += pose[j][0]; center_hand_right[1] += pose[j][1]; center_hand_right[2] += pose[j][2]; center_hand_right[3] += 1.0; } } } } for (size_t k = 0; k < 3; k++) { center_head[k] /= center_head[3]; center_foot_left[k] /= center_foot_left[3]; center_foot_right[k] /= center_foot_right[3]; center_hand_left[k] /= center_hand_left[3]; center_hand_right[k] /= center_hand_right[3]; } // Drop joints that are too far from the part center const float max_dist_head_sq = 0.20 * 0.20; const float max_dist_foot_sq = 0.25 * 0.25; const float max_dist_hand_sq = 0.20 * 0.20; for (size_t j = 0; j < num_full_joints; ++j) { const std::string &jname = joint_names[j]; if (jname.compare(0, 4, "face_") == 0 || jname == "nose" || jname == "eye_left" || jname == "eye_right" || jname == "ear_left" || jname == "ear_right") { float dx = pose[j][0] - center_head[0]; float dy = pose[j][1] - center_head[1]; float dz = pose[j][2] - center_head[2]; float dist_sq = dx * dx + dy * dy + dz * dz; if ((pose[j][3] > 0.0 && dist_sq > max_dist_head_sq) || pose[j][3] == 0.0) { pose[j][0] = center_head[0]; pose[j][1] = center_head[1]; pose[j][2] = center_head[2]; pose[j][3] = 0.1; } } else if (jname.compare(0, 5, "foot_") == 0 || jname.compare(0, 6, "ankle_") == 0) { if (jname.find("_left") != std::string::npos) { float dx = pose[j][0] - center_foot_left[0]; float dy = pose[j][1] - center_foot_left[1]; float dz = pose[j][2] - center_foot_left[2]; float dist_sq = dx * dx + dy * dy + dz * dz; if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0) { pose[j][0] = center_foot_left[0]; pose[j][1] = center_foot_left[1]; pose[j][2] = center_foot_left[2]; pose[j][3] = 0.1; } } else if (jname.find("_right") != std::string::npos) { float dx = pose[j][0] - center_foot_right[0]; float dy = pose[j][1] - center_foot_right[1]; float dz = pose[j][2] - center_foot_right[2]; float dist_sq = dx * dx + dy * dy + dz * dz; if ((pose[j][3] > 0.0 && dist_sq > max_dist_foot_sq) || pose[j][3] == 0.0) { pose[j][0] = center_foot_right[0]; pose[j][1] = center_foot_right[1]; pose[j][2] = center_foot_right[2]; pose[j][3] = 0.1; } } } else if (jname.compare(0, 5, "hand_") == 0 || jname.compare(0, 6, "wrist_") == 0) { if (jname.find("_left") != std::string::npos) { float dx = pose[j][0] - center_hand_left[0]; float dy = pose[j][1] - center_hand_left[1]; float dz = pose[j][2] - center_hand_left[2]; float dist_sq = dx * dx + dy * dy + dz * dz; if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0) { pose[j][0] = center_hand_left[0]; pose[j][1] = center_hand_left[1]; pose[j][2] = center_hand_left[2]; pose[j][3] = 0.1; } } else if (jname.find("_right") != std::string::npos) { float dx = pose[j][0] - center_hand_right[0]; float dy = pose[j][1] - center_hand_right[1]; float dz = pose[j][2] - center_hand_right[2]; float dist_sq = dx * dx + dy * dy + dz * dz; if ((pose[j][3] > 0.0 && dist_sq > max_dist_hand_sq) || pose[j][3] == 0.0) { pose[j][0] = center_hand_right[0]; pose[j][1] = center_hand_right[1]; pose[j][2] = center_hand_right[2]; pose[j][3] = 0.1; } } } } } } } // namespace std::vector build_pair_candidates(const PoseBatch2DView &poses_2d) { if (poses_2d.person_counts == nullptr) { throw std::invalid_argument("person_counts must not be null."); } std::vector pairs; std::vector offsets(poses_2d.num_views, 0); int total = 0; for (size_t view = 0; view < poses_2d.num_views; ++view) { if (poses_2d.person_counts[view] > poses_2d.max_persons) { throw std::invalid_argument("person_counts entries must not exceed the padded person dimension."); } offsets[view] = total; total += static_cast(poses_2d.person_counts[view]); } for (size_t view1 = 0; view1 < poses_2d.num_views; ++view1) { for (size_t view2 = view1 + 1; view2 < poses_2d.num_views; ++view2) { for (size_t person1 = 0; person1 < poses_2d.person_counts[view1]; ++person1) { for (size_t person2 = 0; person2 < poses_2d.person_counts[view2]; ++person2) { pairs.push_back(PairCandidate { static_cast(view1), static_cast(view2), static_cast(person1), static_cast(person2), offsets[view1] + static_cast(person1), offsets[view2] + static_cast(person2), }); } } } } return pairs; } PreviousPoseFilterDebug filter_pairs_with_previous_poses( const PoseBatch2DView &poses_2d, const TriangulationConfig &config, const PoseBatch3DView &previous_poses_3d, const TriangulationOptions *options_override) { const TriangulationOptions &options = options_override != nullptr ? *options_override : config.options; return filter_pairs_with_previous_poses_impl( poses_2d, config.cameras, config.joint_names, previous_poses_3d, options); } TriangulationTrace triangulate_debug( const PoseBatch2DView &poses_2d, const TriangulationConfig &config, const PoseBatch3DView *previous_poses_3d, const TriangulationOptions *options_override) { const TriangulationOptions &options = options_override != nullptr ? *options_override : config.options; return triangulate_debug_impl( poses_2d, config.cameras, config.roomparams, config.joint_names, previous_poses_3d, options); } PoseBatch3D triangulate_poses( const PoseBatch2DView &poses_2d, const TriangulationConfig &config, const PoseBatch3DView *previous_poses_3d, const TriangulationOptions *options_override) { return triangulate_debug(poses_2d, config, previous_poses_3d, options_override).final_poses; }