1979 lines
67 KiB
C++
1979 lines
67 KiB
C++
#include <chrono>
|
|
#include <numeric>
|
|
|
|
#include <omp.h>
|
|
#include <opencv2/opencv.hpp>
|
|
|
|
#include "camera.hpp"
|
|
#include "triangulator.hpp"
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_2d_mat(const cv::Mat &mat)
|
|
{
|
|
// Ensure the matrix is 2D
|
|
if (mat.dims != 2)
|
|
{
|
|
std::cerr << "Error: The matrix is not 2D." << std::endl;
|
|
return;
|
|
}
|
|
|
|
// Retrieve matrix dimensions
|
|
int rows = mat.rows;
|
|
int cols = mat.cols;
|
|
|
|
// Print the matrix in a NumPy-like style
|
|
std::cout << "cv::Mat('shape': (" << rows << ", " << cols << ")";
|
|
std::cout << ", 'data': [" << std::endl;
|
|
|
|
for (int i = 0; i < rows; ++i)
|
|
{
|
|
std::cout << " [";
|
|
for (int j = 0; j < cols; ++j)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << mat.at<float>(i, j);
|
|
if (j < cols - 1)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]";
|
|
if (i < rows - 1)
|
|
{
|
|
std::cout << "," << std::endl;
|
|
}
|
|
}
|
|
std::cout << "])" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_2d_poses(const std::vector<std::array<float, 3>> &poses)
|
|
{
|
|
std::cout << "Poses: (" << poses.size() << ", 3)[" << std::endl;
|
|
for (const auto &pose : poses)
|
|
{
|
|
std::cout << " [";
|
|
for (size_t i = 0; i < 3; ++i)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << pose[i];
|
|
if (i < 2)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_3d_poses(const std::vector<std::array<float, 4>> &poses)
|
|
{
|
|
std::cout << "Poses: (" << poses.size() << ", 4)[" << std::endl;
|
|
for (const auto &pose : poses)
|
|
{
|
|
std::cout << " [";
|
|
for (size_t i = 0; i < 4; ++i)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << pose[i];
|
|
if (i < 3)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_allpairs(
|
|
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs)
|
|
{
|
|
std::cout << "All pairs: [" << std::endl;
|
|
for (const auto &pair : all_pairs)
|
|
{
|
|
const auto &tuple_part = pair.first;
|
|
const auto &pair_part = pair.second;
|
|
|
|
// Print the tuple part
|
|
std::cout << "("
|
|
<< std::get<0>(tuple_part) << ", "
|
|
<< std::get<1>(tuple_part) << ", "
|
|
<< std::get<2>(tuple_part) << ", "
|
|
<< std::get<3>(tuple_part) << ")";
|
|
|
|
// Print the pair part
|
|
std::cout << ", ("
|
|
<< pair_part.first << ", "
|
|
<< pair_part.second << ")"
|
|
<< std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
CameraInternal::CameraInternal(const Camera &cam)
|
|
{
|
|
this->cam = cam;
|
|
|
|
// Convert camera matrix to cv::Mat for OpenCV
|
|
K = cv::Mat(3, 3, CV_32FC1, const_cast<float *>(&cam.K[0][0])).clone();
|
|
DC = cv::Mat(cam.DC.size(), 1, CV_32FC1, const_cast<float *>(cam.DC.data())).clone();
|
|
R = cv::Mat(3, 3, CV_32FC1, const_cast<float *>(&cam.R[0][0])).clone();
|
|
T = cv::Mat(3, 1, CV_32FC1, const_cast<float *>(&cam.T[0][0])).clone();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void CameraInternal::update_projection_matrix()
|
|
{
|
|
// Calculate opencv-style projection matrix
|
|
cv::Mat Tr, RT;
|
|
Tr = R * (T * -1);
|
|
cv::hconcat(R, Tr, RT);
|
|
P = K * RT;
|
|
}
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
TriangulatorInternal::TriangulatorInternal(float min_match_score, size_t min_group_size)
|
|
{
|
|
this->min_match_score = min_match_score;
|
|
this->min_group_size = min_group_size;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate_poses(
|
|
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
auto ttime = std::chrono::steady_clock::now();
|
|
auto stime = std::chrono::steady_clock::now();
|
|
std::chrono::duration<double> elapsed;
|
|
|
|
// Check inputs
|
|
if (poses_2d.empty())
|
|
{
|
|
throw std::invalid_argument("No 2D poses provided.");
|
|
}
|
|
if (poses_2d.size() != cameras.size())
|
|
{
|
|
throw std::invalid_argument("Number of cameras and 2D poses must be the same.");
|
|
}
|
|
if (joint_names.size() != poses_2d[0][0].size())
|
|
{
|
|
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
|
|
}
|
|
for (const auto &joint : core_joints)
|
|
{
|
|
if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end())
|
|
{
|
|
throw std::invalid_argument("Core joint '" + joint + "' not found in joint names.");
|
|
}
|
|
}
|
|
|
|
// Convert inputs to internal formats
|
|
std::vector<std::vector<std::vector<std::array<float, 3>>>> i_poses_2d = poses_2d;
|
|
std::vector<CameraInternal> internal_cameras;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
internal_cameras.emplace_back(cameras[i]);
|
|
}
|
|
std::vector<size_t> core_joint_idx;
|
|
for (const auto &joint : core_joints)
|
|
{
|
|
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
|
|
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
|
|
}
|
|
std::vector<std::array<size_t, 2>> core_limbs_idx;
|
|
for (const auto &limb : core_limbs)
|
|
{
|
|
auto it1 = std::find(core_joints.begin(), core_joints.end(), limb.first);
|
|
auto it2 = std::find(core_joints.begin(), core_joints.end(), limb.second);
|
|
core_limbs_idx.push_back({(size_t)std::distance(core_joints.begin(), it1),
|
|
(size_t)std::distance(core_joints.begin(), it2)});
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
init_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Undistort 2D poses
|
|
#pragma omp parallel for
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
undistort_poses(i_poses_2d[i], internal_cameras[i]);
|
|
internal_cameras[i].update_projection_matrix();
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
undistort_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Project last 3D poses to 2D
|
|
std::vector<std::tuple<std::vector<std::vector<std::array<float, 3>>>,
|
|
std::vector<std::vector<float>>>>
|
|
last_poses_2d;
|
|
if (!last_poses_3d.empty())
|
|
{
|
|
// Select core joints
|
|
std::vector<std::vector<std::array<float, 4>>> last_core_poses;
|
|
last_core_poses.resize(last_poses_3d.size());
|
|
for (size_t i = 0; i < last_poses_3d.size(); ++i)
|
|
{
|
|
last_core_poses[i].resize(core_joint_idx.size());
|
|
for (size_t j = 0; j < core_joint_idx.size(); ++j)
|
|
{
|
|
for (size_t k = 0; k < 4; ++k)
|
|
{
|
|
last_core_poses[i][j][k] = last_poses_3d[i][core_joint_idx[j]][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Project
|
|
last_poses_2d.resize(cameras.size());
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
auto [poses2d, dists] = project_poses(last_core_poses, internal_cameras[i], true);
|
|
last_poses_2d[i] = std::make_tuple(poses2d, dists);
|
|
}
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
project_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Check matches to old poses
|
|
float threshold = min_match_score - 0.2;
|
|
std::map<size_t, std::map<size_t, std::vector<size_t>>> scored_pasts;
|
|
if (!last_poses_3d.empty())
|
|
{
|
|
// Calculate index pairs and initialize vectors
|
|
std::vector<std::array<size_t, 3>> indices_ijk;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
size_t num_last_persons = std::get<0>(last_poses_2d[i]).size();
|
|
scored_pasts[i] = std::map<size_t, std::vector<size_t>>();
|
|
|
|
for (size_t j = 0; j < num_last_persons; ++j)
|
|
{
|
|
size_t num_new_persons = poses_2d[i].size();
|
|
scored_pasts[i][j] = std::vector<size_t>();
|
|
|
|
for (size_t k = 0; k < num_new_persons; ++k)
|
|
{
|
|
indices_ijk.push_back({i, j, k});
|
|
}
|
|
}
|
|
}
|
|
std::vector<std::array<size_t, 2>> indices_ik;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
size_t num_new_persons = poses_2d[i].size();
|
|
for (size_t k = 0; k < num_new_persons; ++k)
|
|
{
|
|
indices_ik.push_back({i, k});
|
|
}
|
|
}
|
|
|
|
// Precalculate core poses
|
|
std::vector<std::vector<std::array<float, 3>>> poses_2d_core_list;
|
|
poses_2d_core_list.resize(indices_ik.size());
|
|
std::vector<std::vector<size_t>> mats_core_map;
|
|
mats_core_map.resize(cameras.size());
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
size_t num_new_persons = poses_2d[i].size();
|
|
for (size_t k = 0; k < num_new_persons; ++k)
|
|
{
|
|
mats_core_map[i].push_back(0);
|
|
}
|
|
}
|
|
for (size_t e = 0; e < indices_ik.size(); ++e)
|
|
{
|
|
const auto [i, k] = indices_ik[e];
|
|
const auto &pose = i_poses_2d[i][k];
|
|
std::vector<std::array<float, 3>> pose_core;
|
|
pose_core.resize(core_joint_idx.size());
|
|
for (size_t j = 0; j < core_joint_idx.size(); ++j)
|
|
{
|
|
size_t idx = core_joint_idx[j];
|
|
pose_core[j] = pose[idx];
|
|
}
|
|
poses_2d_core_list[e] = pose_core;
|
|
mats_core_map[i][k] = e;
|
|
}
|
|
|
|
// Calculate matching score
|
|
for (size_t e = 0; e < indices_ijk.size(); ++e)
|
|
{
|
|
const auto [i, j, k] = indices_ijk[e];
|
|
|
|
const auto &last_pose = std::get<0>(last_poses_2d[i])[j];
|
|
const auto &last_dist = std::get<1>(last_poses_2d[i])[j];
|
|
const auto &new_pose = poses_2d_core_list[mats_core_map[i][k]];
|
|
|
|
float score = calc_pose_score(new_pose, last_pose, last_dist, internal_cameras[i]);
|
|
if (score > threshold)
|
|
{
|
|
{
|
|
scored_pasts[i][j].push_back(k);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
match_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Create pairs of persons
|
|
// Checks if the person was already matched to the last frame and if so only creates pairs
|
|
// with those, else it creates all possible pairs
|
|
std::vector<int> num_persons_sum;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
int nsum = poses_2d[i].size();
|
|
if (i > 0)
|
|
{
|
|
nsum += num_persons_sum[i - 1];
|
|
}
|
|
num_persons_sum.push_back(nsum);
|
|
}
|
|
std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> all_pairs;
|
|
std::vector<std::array<size_t, 4>> indices;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
for (size_t j = i + 1; j < cameras.size(); ++j)
|
|
{
|
|
for (size_t k = 0; k < poses_2d[i].size(); ++k)
|
|
{
|
|
for (size_t l = 0; l < poses_2d[j].size(); ++l)
|
|
{
|
|
indices.push_back({i, j, k, l});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (size_t e = 0; e < indices.size(); ++e)
|
|
{
|
|
const auto [i, j, k, l] = indices[e];
|
|
|
|
int pid1 = num_persons_sum[i] + k;
|
|
int pid2 = num_persons_sum[k] + l;
|
|
bool match = false;
|
|
|
|
if (!last_poses_3d.empty())
|
|
{
|
|
for (size_t m = 0; m < last_poses_3d.size(); ++m)
|
|
{
|
|
auto &smi = scored_pasts[i][m];
|
|
auto &smj = scored_pasts[j][m];
|
|
bool in_smi = std::find(smi.begin(), smi.end(), k) != smi.end();
|
|
bool in_smj = std::find(smj.begin(), smj.end(), l) != smj.end();
|
|
|
|
if (in_smi && in_smj)
|
|
{
|
|
match = true;
|
|
auto item = std::make_pair(
|
|
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
|
|
all_pairs.push_back(item);
|
|
}
|
|
else if (in_smi || in_smj)
|
|
{
|
|
match = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!match)
|
|
{
|
|
auto item = std::make_pair(
|
|
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
|
|
all_pairs.push_back(item);
|
|
}
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
pairs_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Calculate pair scores
|
|
std::vector<std::pair<std::vector<std::array<float, 4>>, float>> all_scored_poses;
|
|
all_scored_poses.resize(all_pairs.size());
|
|
#pragma omp parallel for
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const auto &pids = all_pairs[i].first;
|
|
|
|
// Extract camera parameters
|
|
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
|
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
|
|
|
// Extract 2D poses
|
|
const auto &pose1 = i_poses_2d[std::get<0>(pids)][std::get<2>(pids)];
|
|
const auto &pose2 = i_poses_2d[std::get<1>(pids)][std::get<3>(pids)];
|
|
|
|
// Select core joints
|
|
std::vector<std::array<float, 3>> pose1_core, pose2_core;
|
|
pose1_core.resize(core_joint_idx.size());
|
|
pose2_core.resize(core_joint_idx.size());
|
|
for (size_t j = 0; j < core_joint_idx.size(); ++j)
|
|
{
|
|
size_t idx = core_joint_idx[j];
|
|
pose1_core[j] = pose1[idx];
|
|
pose2_core[j] = pose2[idx];
|
|
}
|
|
|
|
// Triangulate and score
|
|
auto [pose3d, score] = triangulate_and_score(
|
|
pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx);
|
|
all_scored_poses[i] = std::move(std::make_pair(pose3d, score));
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
pair_scoring_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Drop low scoring poses
|
|
size_t num_poses = all_scored_poses.size();
|
|
for (size_t i = num_poses; i > 0; --i)
|
|
{
|
|
if (all_scored_poses[i - 1].second < min_match_score)
|
|
{
|
|
all_scored_poses.erase(all_scored_poses.begin() + i - 1);
|
|
all_pairs.erase(all_pairs.begin() + i - 1);
|
|
}
|
|
}
|
|
|
|
// Group pairs that share a person
|
|
std::vector<std::tuple<
|
|
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
groups = calc_grouping(all_pairs, all_scored_poses, min_match_score);
|
|
|
|
// Drop groups with too few matches
|
|
size_t num_groups = groups.size();
|
|
for (size_t i = num_groups; i > 0; --i)
|
|
{
|
|
if (std::get<2>(groups[i - 1]).size() < this->min_group_size)
|
|
{
|
|
groups.erase(groups.begin() + i - 1);
|
|
}
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
grouping_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Calculate full 3D poses
|
|
std::vector<std::vector<std::array<float, 4>>> all_full_poses;
|
|
all_full_poses.resize(all_pairs.size());
|
|
#pragma omp parallel for
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const auto &pids = all_pairs[i].first;
|
|
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
|
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
|
const auto &pose1 = i_poses_2d[std::get<0>(pids)][std::get<2>(pids)];
|
|
const auto &pose2 = i_poses_2d[std::get<1>(pids)][std::get<3>(pids)];
|
|
|
|
auto [pose3d, score] = triangulate_and_score(
|
|
pose1, pose2, cam1, cam2, roomparams, {});
|
|
all_full_poses[i] = std::move(pose3d);
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
full_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Merge groups
|
|
std::vector<std::vector<std::array<float, 4>>> all_merged_poses;
|
|
all_merged_poses.resize(groups.size());
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
const auto &group = groups[i];
|
|
std::vector<std::vector<std::array<float, 4>>> poses;
|
|
poses.reserve(std::get<2>(group).size());
|
|
|
|
for (const auto &idx : std::get<2>(group))
|
|
{
|
|
poses.push_back(all_full_poses[idx]);
|
|
}
|
|
|
|
auto merged_pose = merge_group(poses, min_match_score);
|
|
all_merged_poses[i] = std::move(merged_pose);
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
merge_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Run post-processing steps
|
|
std::vector<std::vector<std::array<float, 4>>> final_poses_3d = std::move(all_merged_poses);
|
|
add_extra_joints(final_poses_3d, joint_names);
|
|
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx);
|
|
add_missing_joints(final_poses_3d, joint_names);
|
|
|
|
// Store final result for use in the next frame.
|
|
last_poses_3d = final_poses_3d;
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
post_time += elapsed.count();
|
|
stime = std::chrono::steady_clock::now();
|
|
|
|
// Convert to output format
|
|
std::vector<std::vector<std::array<float, 4>>> poses_3d;
|
|
poses_3d.reserve(final_poses_3d.size());
|
|
for (size_t i = 0; i < final_poses_3d.size(); ++i)
|
|
{
|
|
const size_t num_joints = final_poses_3d[i].size();
|
|
std::vector<std::array<float, 4>> pose;
|
|
pose.reserve(num_joints);
|
|
size_t num_valid = 0;
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
std::array<float, 4> point;
|
|
for (size_t k = 0; k < 4; ++k)
|
|
{
|
|
point[k] = final_poses_3d[i][j][k];
|
|
}
|
|
pose.push_back(point);
|
|
|
|
if (point[3] > min_match_score)
|
|
{
|
|
num_valid++;
|
|
}
|
|
}
|
|
|
|
if (num_valid > 0)
|
|
{
|
|
poses_3d.push_back(std::move(pose));
|
|
}
|
|
}
|
|
|
|
elapsed = std::chrono::steady_clock::now() - stime;
|
|
convert_time += elapsed.count();
|
|
|
|
elapsed = std::chrono::steady_clock::now() - ttime;
|
|
total_time += elapsed.count();
|
|
num_calls++;
|
|
|
|
return poses_3d;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::reset()
|
|
{
|
|
last_poses_3d.clear();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::print_stats()
|
|
{
|
|
std::cout << "{" << std::endl;
|
|
std::cout << " \"triangulator_calls\": " << num_calls << "," << std::endl;
|
|
std::cout << " \"init_time\": " << init_time / num_calls << "," << std::endl;
|
|
std::cout << " \"undistort_time\": " << undistort_time / num_calls << "," << std::endl;
|
|
std::cout << " \"project_time\": " << project_time / num_calls << "," << std::endl;
|
|
std::cout << " \"match_time\": " << match_time / num_calls << "," << std::endl;
|
|
std::cout << " \"pairs_time\": " << pairs_time / num_calls << "," << std::endl;
|
|
std::cout << " \"pair_scoring_time\": " << pair_scoring_time / num_calls << "," << std::endl;
|
|
std::cout << " \"grouping_time\": " << grouping_time / num_calls << "," << std::endl;
|
|
std::cout << " \"full_time\": " << full_time / num_calls << "," << std::endl;
|
|
std::cout << " \"merge_time\": " << merge_time / num_calls << "," << std::endl;
|
|
std::cout << " \"post_time\": " << post_time / num_calls << "," << std::endl;
|
|
std::cout << " \"convert_time\": " << convert_time / num_calls << "," << std::endl;
|
|
std::cout << " \"total_time\": " << total_time / num_calls << std::endl;
|
|
std::cout << "}" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::undistort_poses(
|
|
std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam)
|
|
{
|
|
int width = icam.cam.width;
|
|
int height = icam.cam.height;
|
|
|
|
// Undistort camera matrix
|
|
cv::Mat newK;
|
|
if (icam.cam.type == "fisheye")
|
|
{
|
|
cv::fisheye::estimateNewCameraMatrixForUndistortRectify(
|
|
icam.K, icam.DC, cv::Size(width, height), cv::Matx33d::eye(),
|
|
newK, 1.0, cv::Size(width, height), 1.0);
|
|
}
|
|
else
|
|
{
|
|
newK = cv::getOptimalNewCameraMatrix(
|
|
icam.K, icam.DC, cv::Size(width, height), 1, cv::Size(width, height));
|
|
}
|
|
|
|
// Convert vectors to single mat
|
|
size_t num_persons = poses_2d.size();
|
|
size_t num_joints = poses_2d[0].size();
|
|
std::vector<int> dims = {(int)(num_persons * num_joints), 2};
|
|
cv::Mat points_mat = cv::Mat(dims, CV_32F);
|
|
float *mat_ptr = points_mat.ptr<float>(0);
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
for (size_t k = 0; k < 2; ++k)
|
|
{
|
|
mat_ptr[i * num_joints * 2 + j * 2 + k] = poses_2d[i][j][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Undistort all the points
|
|
points_mat = points_mat.reshape(2, static_cast<int>(points_mat.rows));
|
|
if (icam.cam.type == "fisheye")
|
|
{
|
|
cv::fisheye::undistortPoints(points_mat, points_mat, icam.K, icam.DC, cv::noArray(), newK);
|
|
}
|
|
else
|
|
{
|
|
cv::undistortPoints(points_mat, points_mat, icam.K, icam.DC, cv::noArray(), newK);
|
|
}
|
|
points_mat = points_mat.reshape(1, static_cast<int>(points_mat.rows));
|
|
|
|
// Overwrite the old coordinates with the undistorted ones
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
for (size_t k = 0; k < 2; ++k)
|
|
{
|
|
poses_2d[i][j][k] = mat_ptr[i * num_joints * 2 + j * 2 + k];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Mask out points that are far outside the image (points slightly outside are still valid)
|
|
float mask_offset = (width + height) / 20.0;
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (poses_2d[i][j][0] < -mask_offset || poses_2d[i][j][0] >= width + mask_offset ||
|
|
poses_2d[i][j][1] < -mask_offset || poses_2d[i][j][1] >= height + mask_offset)
|
|
{
|
|
poses_2d[i][j][0] = 0.0;
|
|
poses_2d[i][j][1] = 0.0;
|
|
poses_2d[i][j][2] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update the camera matrix
|
|
icam.K = newK.clone();
|
|
for (size_t i = 0; i < 3; ++i)
|
|
{
|
|
for (size_t j = 0; j < 3; ++j)
|
|
{
|
|
icam.cam.K[i][j] = newK.at<float>(i, j);
|
|
}
|
|
}
|
|
if (icam.cam.type == "fisheye")
|
|
{
|
|
icam.DC = cv::Mat::zeros(4, 1, CV_32F);
|
|
icam.cam.DC = {0.0, 0.0, 0.0, 0.0};
|
|
}
|
|
else
|
|
{
|
|
icam.DC = cv::Mat::zeros(5, 1, CV_32F);
|
|
icam.cam.DC = {0.0, 0.0, 0.0, 0.0, 0.0};
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::tuple<std::vector<std::vector<std::array<float, 3>>>, std::vector<std::vector<float>>>
|
|
TriangulatorInternal::project_poses(
|
|
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
|
|
const CameraInternal &icam,
|
|
bool calc_dists)
|
|
{
|
|
const size_t num_persons = poses_3d.size();
|
|
const size_t num_joints = poses_3d[0].size();
|
|
|
|
// Prepare output vectors
|
|
std::vector<std::vector<std::array<float, 3>>> poses_2d;
|
|
std::vector<std::vector<float>> all_dists;
|
|
poses_2d.resize(num_persons);
|
|
all_dists.resize(num_persons);
|
|
|
|
// Get camera parameters
|
|
const std::array<std::array<float, 3>, 3> &K = icam.cam.K;
|
|
const std::array<std::array<float, 3>, 3> &R = icam.cam.R;
|
|
const std::array<std::array<float, 1>, 3> &T = icam.cam.T;
|
|
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
const std::vector<std::array<float, 4>> &body3D = poses_3d[i];
|
|
std::vector<std::array<float, 3>> body2D(num_joints, std::array<float, 3>{0.0, 0.0, 0.0});
|
|
std::vector<float> dists(num_joints, 0.0);
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float x = body3D[j][0];
|
|
float y = body3D[j][1];
|
|
float z = body3D[j][2];
|
|
float score = body3D[j][3];
|
|
|
|
// Project from world to camera coordinate system
|
|
float xt = x - T[0][0];
|
|
float yt = y - T[1][0];
|
|
float zt = z - T[2][0];
|
|
float x_cam = xt * R[0][0] + yt * R[0][1] + zt * R[0][2];
|
|
float y_cam = xt * R[1][0] + yt * R[1][1] + zt * R[1][2];
|
|
float z_cam = xt * R[2][0] + yt * R[2][1] + zt * R[2][2];
|
|
|
|
// Set points behind the camera to zero
|
|
if (z_cam <= 0.0)
|
|
{
|
|
x_cam = y_cam = z_cam = 0.0;
|
|
}
|
|
|
|
// Calculate distance from camera center if required
|
|
if (calc_dists)
|
|
dists[j] = std::sqrt(x_cam * x_cam + y_cam * y_cam + z_cam * z_cam);
|
|
else
|
|
dists[j] = 0.0;
|
|
|
|
// Project points to image plane
|
|
// Since images are already undistorted, use the pinhole projection for all camera types
|
|
float u = 0.0, v = 0.0;
|
|
if (z_cam > 0.0)
|
|
{
|
|
u = (K[0][0] * x_cam + K[0][1] * y_cam + K[0][2] * z_cam) / z_cam;
|
|
v = (K[1][0] * x_cam + K[1][1] * y_cam + K[1][2] * z_cam) / z_cam;
|
|
}
|
|
|
|
// Filter invalid projections
|
|
if (u < 0.0 || u >= icam.cam.width || v < 0.0 || v >= icam.cam.height)
|
|
{
|
|
u = 0.0;
|
|
v = 0.0;
|
|
score = 0.0;
|
|
}
|
|
|
|
body2D[j] = {u, v, score};
|
|
}
|
|
|
|
// Store results
|
|
poses_2d[i] = std::move(body2D);
|
|
all_dists[i] = std::move(dists);
|
|
}
|
|
|
|
return std::make_tuple(poses_2d, all_dists);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
float TriangulatorInternal::calc_pose_score(
|
|
const std::vector<std::array<float, 3>> &pose1,
|
|
const std::vector<std::array<float, 3>> &pose2,
|
|
const std::vector<float> &dist1,
|
|
const CameraInternal &icam)
|
|
{
|
|
const float min_score = 0.1;
|
|
const size_t num_joints = pose1.size();
|
|
|
|
// Create mask for valid points
|
|
std::vector<bool> mask;
|
|
mask.resize(num_joints);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (pose1[i][2] <= min_score || pose2[i][2] <= min_score)
|
|
{
|
|
mask[i] = false;
|
|
}
|
|
else
|
|
{
|
|
mask[i] = true;
|
|
}
|
|
}
|
|
|
|
// Drop if not enough valid points
|
|
int valid_count = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
valid_count++;
|
|
}
|
|
}
|
|
if (valid_count < 3)
|
|
{
|
|
return 0.0;
|
|
}
|
|
|
|
// Calculate scores
|
|
float iscale = (icam.cam.width + icam.cam.height) / 2;
|
|
std::vector<float> scores = score_projection(pose1, pose2, dist1, mask, iscale);
|
|
|
|
// Drop lowest scores
|
|
size_t drop_k = static_cast<size_t>(num_joints * 0.2);
|
|
const size_t min_k = 3;
|
|
std::vector<float> scores_vec;
|
|
scores_vec.reserve(valid_count);
|
|
for (size_t i = 0; i < scores.size(); ++i)
|
|
{
|
|
if (mask[i] > 0)
|
|
{
|
|
scores_vec.push_back(scores[i]);
|
|
}
|
|
}
|
|
size_t scores_size = scores_vec.size();
|
|
if (scores_size >= min_k)
|
|
{
|
|
drop_k = std::min(drop_k, scores_size - min_k);
|
|
std::partial_sort(scores_vec.begin(), scores_vec.begin() + drop_k, scores_vec.end());
|
|
scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k);
|
|
}
|
|
|
|
// Calculate final score
|
|
float score = 0.0;
|
|
size_t n_items = scores_vec.size();
|
|
if (n_items > 0)
|
|
{
|
|
float sum_scores = std::accumulate(scores_vec.begin(), scores_vec.end(), 0.0);
|
|
score = sum_scores / static_cast<float>(n_items);
|
|
}
|
|
|
|
return score;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<float> TriangulatorInternal::score_projection(
|
|
const std::vector<std::array<float, 3>> &pose,
|
|
const std::vector<std::array<float, 3>> &repro,
|
|
const std::vector<float> &dists,
|
|
const std::vector<bool> &mask,
|
|
float iscale)
|
|
{
|
|
const float min_score = 0.1;
|
|
const size_t num_joints = pose.size();
|
|
|
|
// Calculate error
|
|
std::vector<float> error(num_joints, 0.0);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
float dx = pose[i][0] - repro[i][0];
|
|
float dy = pose[i][1] - repro[i][1];
|
|
float err = std::sqrt(dx * dx + dy * dy);
|
|
|
|
// Set errors of invisible reprojections to a high value
|
|
if (repro[i][2] < min_score)
|
|
{
|
|
err = iscale;
|
|
}
|
|
error[i] = err;
|
|
}
|
|
}
|
|
|
|
// Scale error by image size
|
|
const float inv_iscale = 1.0 / iscale;
|
|
const float iscale_quarter = iscale / 4.0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
float err = error[i];
|
|
err = std::max(0.0f, std::min(err, iscale_quarter)) * inv_iscale;
|
|
error[i] = err;
|
|
}
|
|
|
|
// Scale error by distance to camera
|
|
float mean_dist = 0.0;
|
|
int count = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
mean_dist += dists[i];
|
|
++count;
|
|
}
|
|
}
|
|
if (count > 0)
|
|
{
|
|
mean_dist /= count;
|
|
}
|
|
const float dscale = std::sqrt(mean_dist / 3.5);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
error[i] *= dscale;
|
|
}
|
|
|
|
// Convert error to score
|
|
std::vector<float> score(num_joints, 0.0);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
score[i] = 1.0 / (1.0 + error[i] * 10.0);
|
|
}
|
|
|
|
return score;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
|
|
const std::vector<std::array<float, 3>> &pose1,
|
|
const std::vector<std::array<float, 3>> &pose2,
|
|
const CameraInternal &cam1,
|
|
const CameraInternal &cam2,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
|
{
|
|
const float min_score = 0.1;
|
|
const size_t num_joints = pose1.size();
|
|
|
|
// Create mask for valid points
|
|
std::vector<bool> mask;
|
|
mask.resize(num_joints);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (pose1[i][2] <= min_score || pose2[i][2] <= min_score)
|
|
{
|
|
mask[i] = false;
|
|
}
|
|
else
|
|
{
|
|
mask[i] = true;
|
|
}
|
|
}
|
|
|
|
// If too few joints are visible, return a low score
|
|
int num_visible = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
num_visible++;
|
|
}
|
|
}
|
|
if (num_visible < 3)
|
|
{
|
|
std::vector<std::array<float, 4>> empty(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
float score = 0.0;
|
|
return std::make_pair(empty, score);
|
|
}
|
|
|
|
// Extract coordinates of visible joints
|
|
std::vector<std::array<float, 2>> points1;
|
|
std::vector<std::array<float, 2>> points2;
|
|
points1.reserve(num_visible);
|
|
points2.reserve(num_visible);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
points1.push_back({pose1[i][0], pose1[i][1]});
|
|
points2.push_back({pose2[i][0], pose2[i][1]});
|
|
}
|
|
}
|
|
|
|
// Convert vectors to mats
|
|
cv::Mat points1_mat(2, num_visible, CV_32F);
|
|
cv::Mat points2_mat(2, num_visible, CV_32F);
|
|
float *p1_ptr = points1_mat.ptr<float>(0);
|
|
float *p2_ptr = points2_mat.ptr<float>(0);
|
|
for (int i = 0; i < num_visible; ++i)
|
|
{
|
|
p1_ptr[i + 0 * num_visible] = points1[i][0];
|
|
p1_ptr[i + 1 * num_visible] = points1[i][1];
|
|
p2_ptr[i + 0 * num_visible] = points2[i][0];
|
|
p2_ptr[i + 1 * num_visible] = points2[i][1];
|
|
}
|
|
|
|
// Triangulate points
|
|
cv::Mat points4d_h;
|
|
cv::triangulatePoints(cam1.P, cam2.P, points1_mat, points2_mat, points4d_h);
|
|
|
|
// Convert homogeneous coordinates to 3D
|
|
std::vector<std::array<float, 3>> points_3d;
|
|
points_3d.reserve(num_visible);
|
|
const float *p4_ptr = points4d_h.ptr<float>(0);
|
|
for (int i = 0; i < points4d_h.cols; ++i)
|
|
{
|
|
float w = p4_ptr[i + 3 * num_visible];
|
|
std::array<float, 3> pt = {
|
|
p4_ptr[i + 0 * num_visible] / w,
|
|
p4_ptr[i + 1 * num_visible] / w,
|
|
p4_ptr[i + 2 * num_visible] / w};
|
|
points_3d.push_back(std::move(pt));
|
|
}
|
|
|
|
// Create the 3D pose
|
|
std::vector<std::array<float, 4>> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
int idx = 0;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
pose3d[i][0] = points_3d[idx][0];
|
|
pose3d[i][1] = points_3d[idx][1];
|
|
pose3d[i][2] = points_3d[idx][2];
|
|
pose3d[i][3] = 1.0;
|
|
++idx;
|
|
}
|
|
}
|
|
|
|
// Check if mean of the points is inside the room bounds
|
|
std::array<float, 3> mean = {0.0, 0.0, 0.0};
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
mean[0] += pose3d[i][0];
|
|
mean[1] += pose3d[i][1];
|
|
mean[2] += pose3d[i][2];
|
|
}
|
|
}
|
|
float inv_num_vis = 1.0 / num_visible;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
mean[j] *= inv_num_vis;
|
|
}
|
|
const std::array<float, 3> &room_size = roomparams[0];
|
|
const std::array<float, 3> &room_center = roomparams[1];
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
if (mean[j] > room_center[j] + room_size[j] / 2.0 ||
|
|
mean[j] < room_center[j] - room_size[j] / 2.0)
|
|
{
|
|
// Very low score if outside room
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
pose3d[i][3] = 0.001;
|
|
}
|
|
}
|
|
return std::make_pair(pose3d, 0.001);
|
|
}
|
|
}
|
|
|
|
// Calculate reprojections
|
|
std::vector<std::vector<std::array<float, 4>>> poses_3d = {pose3d};
|
|
auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true);
|
|
auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true);
|
|
auto repro1 = repro1s[0];
|
|
auto dists1 = dists1s[0];
|
|
auto repro2 = repro2s[0];
|
|
auto dists2 = dists2s[0];
|
|
|
|
// Calculate scores for each view
|
|
float iscale = (cam1.cam.width + cam1.cam.height) / 2.0;
|
|
std::vector<float> score1 = score_projection(pose1, repro1, dists1, mask, iscale);
|
|
std::vector<float> score2 = score_projection(pose2, repro2, dists2, mask, iscale);
|
|
|
|
// Add scores to 3D pose
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
float score = 0.5 * (score1[i] + score2[i]);
|
|
pose3d[i][3] = score;
|
|
}
|
|
}
|
|
|
|
// Set scores outside the room to a low value
|
|
const float wdist = 0.1;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (mask[i])
|
|
{
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
if (pose3d[i][j] > room_center[j] + room_size[j] / 2.0 + wdist ||
|
|
pose3d[i][j] < room_center[j] - room_size[j] / 2.0 - wdist)
|
|
{
|
|
pose3d[i][3] = 0.001;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Filter clearly wrong limbs
|
|
if (!core_limbs_idx.empty())
|
|
{
|
|
const float max_length_sq = 0.9 * 0.9;
|
|
for (size_t i = 0; i < core_limbs_idx.size(); ++i)
|
|
{
|
|
size_t limb1 = core_limbs_idx[i][0];
|
|
size_t limb2 = core_limbs_idx[i][1];
|
|
|
|
if (pose3d[limb1][3] > min_score && pose3d[limb2][3] > min_score)
|
|
{
|
|
float dx = pose3d[limb1][0] - pose3d[limb2][0];
|
|
float dy = pose3d[limb1][1] - pose3d[limb2][1];
|
|
float dz = pose3d[limb1][2] - pose3d[limb2][2];
|
|
float length_sq = dx * dx + dy * dy + dz * dz;
|
|
|
|
if (length_sq > max_length_sq)
|
|
{
|
|
pose3d[limb2][3] = 0.001;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Drop lowest scores
|
|
size_t drop_k = static_cast<size_t>(num_joints * 0.2);
|
|
const size_t min_k = 3;
|
|
std::vector<float> valid_scores;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (pose3d[i][3] > min_score)
|
|
{
|
|
valid_scores.push_back(pose3d[i][3]);
|
|
}
|
|
}
|
|
size_t scores_size = valid_scores.size();
|
|
if (scores_size >= min_k)
|
|
{
|
|
drop_k = std::min(drop_k, scores_size - min_k);
|
|
std::partial_sort(valid_scores.begin(), valid_scores.begin() + drop_k, valid_scores.end());
|
|
valid_scores.erase(valid_scores.begin(), valid_scores.begin() + drop_k);
|
|
}
|
|
|
|
// Calculate final score
|
|
float final_score = 0.0;
|
|
if (!valid_scores.empty())
|
|
{
|
|
float sum_scores = std::accumulate(valid_scores.begin(), valid_scores.end(), 0.0);
|
|
final_score = sum_scores / static_cast<float>(valid_scores.size());
|
|
}
|
|
|
|
return std::make_pair(pose3d, final_score);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::tuple<std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
TriangulatorInternal::calc_grouping(
|
|
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
|
|
const std::vector<std::pair<std::vector<std::array<float, 4>>, float>> &all_scored_poses,
|
|
float min_score)
|
|
{
|
|
float max_center_dist_sq = 0.6 * 0.6;
|
|
float max_joint_avg_dist = 0.3;
|
|
size_t num_pairs = all_pairs.size();
|
|
|
|
// Calculate pose centers
|
|
std::vector<std::array<float, 3>> centers;
|
|
centers.resize(num_pairs);
|
|
for (size_t i = 0; i < num_pairs; ++i)
|
|
{
|
|
const auto &pose_3d = all_scored_poses[i].first;
|
|
const size_t num_joints = pose_3d.size();
|
|
|
|
std::array<float, 3> center = {0.0, 0.0, 0.0};
|
|
size_t num_valid = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = pose_3d[j][3];
|
|
if (score > min_score)
|
|
{
|
|
center[0] += pose_3d[j][0];
|
|
center[1] += pose_3d[j][1];
|
|
center[2] += pose_3d[j][2];
|
|
++num_valid;
|
|
}
|
|
}
|
|
if (num_valid > 0)
|
|
{
|
|
float inv = 1.0 / num_valid;
|
|
center[0] *= inv;
|
|
center[1] *= inv;
|
|
center[2] *= inv;
|
|
}
|
|
centers[i] = center;
|
|
}
|
|
|
|
// Calculate Groups
|
|
// defined as a tuple of center, pose, and all-pairs-indices of members
|
|
std::vector<std::tuple<std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>> groups;
|
|
std::vector<std::vector<size_t>> per_group_visible_counts;
|
|
for (size_t i = 0; i < num_pairs; ++i)
|
|
{
|
|
const auto &pose_3d = all_scored_poses[i].first;
|
|
size_t num_joints = pose_3d.size();
|
|
|
|
const std::array<float, 3> ¢er = centers[i];
|
|
float best_dist = std::numeric_limits<float>::infinity();
|
|
int best_group = -1;
|
|
|
|
for (size_t j = 0; j < groups.size(); ++j)
|
|
{
|
|
auto &group = groups[j];
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
|
|
// Check if the center is close enough
|
|
float dx = group_center[0] - center[0];
|
|
float dy = group_center[1] - center[1];
|
|
float dz = group_center[2] - center[2];
|
|
float center_dist_sq = dx * dx + dy * dy + dz * dz;
|
|
|
|
if (center_dist_sq < max_center_dist_sq)
|
|
{
|
|
const auto &group_pose = std::get<1>(group);
|
|
|
|
// Calculate average joint distance
|
|
std::vector<float> dists;
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
float score1 = pose_3d[row][3];
|
|
float score2 = group_pose[row][3];
|
|
if (score1 > min_score && score2 > min_score)
|
|
{
|
|
float dx = pose_3d[row][0] - group_pose[row][0];
|
|
float dy = pose_3d[row][1] - group_pose[row][1];
|
|
float dz = pose_3d[row][2] - group_pose[row][2];
|
|
float dist = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
dists.push_back(dist);
|
|
}
|
|
}
|
|
if (dists.size() >= 5)
|
|
{
|
|
// Drop highest value to reduce influence of outliers
|
|
auto max_it = std::max_element(dists.begin(), dists.end());
|
|
dists.erase(max_it);
|
|
}
|
|
if (dists.size() > 0)
|
|
{
|
|
// Check if the average joint distance is close enough
|
|
float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
|
|
avg_dist /= static_cast<float>(dists.size());
|
|
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
|
|
{
|
|
best_dist = avg_dist;
|
|
best_group = static_cast<int>(j);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_group == -1)
|
|
{
|
|
// Create a new group
|
|
std::vector<int> new_indices{static_cast<int>(i)};
|
|
std::vector<std::array<float, 4>> group_pose = pose_3d;
|
|
groups.emplace_back(center, group_pose, new_indices);
|
|
|
|
// Update per joint counts
|
|
std::vector<size_t> new_valid_joint_counts(num_joints, 0);
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
if (pose_3d[row][3] > min_score)
|
|
{
|
|
new_valid_joint_counts[row] = 1;
|
|
}
|
|
}
|
|
per_group_visible_counts.push_back(std::move(new_valid_joint_counts));
|
|
}
|
|
else
|
|
{
|
|
// Update existing group
|
|
auto &group = groups[best_group];
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
std::vector<std::array<float, 4>> &group_pose = std::get<1>(group);
|
|
std::vector<int> &group_indices = std::get<2>(group);
|
|
|
|
float n_elems = static_cast<float>(group_indices.size());
|
|
float inv_n = 1.0 / (n_elems + 1.0);
|
|
|
|
// Update group center
|
|
group_center[0] = (group_center[0] * n_elems + center[0]) * inv_n;
|
|
group_center[1] = (group_center[1] * n_elems + center[1]) * inv_n;
|
|
group_center[2] = (group_center[2] * n_elems + center[2]) * inv_n;
|
|
|
|
// Update group pose
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
if (pose_3d[row][3] > min_score)
|
|
{
|
|
float j_elems = static_cast<float>(per_group_visible_counts[best_group][row]);
|
|
float inv_j = 1.0 / (j_elems + 1.0);
|
|
|
|
group_pose[row][0] = (group_pose[row][0] * j_elems + pose_3d[row][0]) * inv_j;
|
|
group_pose[row][1] = (group_pose[row][1] * j_elems + pose_3d[row][1]) * inv_j;
|
|
group_pose[row][2] = (group_pose[row][2] * j_elems + pose_3d[row][2]) * inv_j;
|
|
group_pose[row][3] = (group_pose[row][3] * j_elems + pose_3d[row][3]) * inv_j;
|
|
per_group_visible_counts[best_group][row]++;
|
|
}
|
|
}
|
|
group_indices.push_back(static_cast<int>(i));
|
|
}
|
|
}
|
|
|
|
// Merge close groups
|
|
// Depending on the inital group creation, one or more groups can be created that in the end
|
|
// share the same persons, even if they had a larger distance at the beginning
|
|
// So merge them similar to the group assignment before
|
|
std::vector<std::tuple<
|
|
std::array<float, 3>, std::vector<std::array<float, 4>>, std::vector<int>>>
|
|
merged_groups;
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
size_t num_joints = std::get<1>(groups[i]).size();
|
|
auto &group = groups[i];
|
|
auto &group_visible_counts = per_group_visible_counts[i];
|
|
|
|
float best_dist = std::numeric_limits<float>::infinity();
|
|
int best_group = -1;
|
|
|
|
for (size_t j = 0; j < merged_groups.size(); ++j)
|
|
{
|
|
const auto &group_pose = std::get<1>(group);
|
|
const auto &merged_pose = std::get<1>(merged_groups[j]);
|
|
|
|
// Calculate average joint distance
|
|
std::vector<float> dists;
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
float score1 = group_pose[row][3];
|
|
float score2 = merged_pose[row][3];
|
|
if (score1 > min_score && score2 > min_score)
|
|
{
|
|
float dx = group_pose[row][0] - merged_pose[row][0];
|
|
float dy = group_pose[row][1] - merged_pose[row][1];
|
|
float dz = group_pose[row][2] - merged_pose[row][2];
|
|
float dist = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
dists.push_back(dist);
|
|
}
|
|
}
|
|
if (dists.size() >= 5)
|
|
{
|
|
// Drop highest value to reduce influence of outliers
|
|
auto max_it = std::max_element(dists.begin(), dists.end());
|
|
dists.erase(max_it);
|
|
}
|
|
if (dists.size() > 0)
|
|
{
|
|
// Check if the average joint distance is close enough
|
|
float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
|
|
avg_dist /= static_cast<float>(dists.size());
|
|
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
|
|
{
|
|
best_dist = avg_dist;
|
|
best_group = static_cast<int>(j);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_group == -1)
|
|
{
|
|
// Create a new group
|
|
merged_groups.push_back(group);
|
|
}
|
|
else
|
|
{
|
|
// Update existing group
|
|
auto &merged_group = merged_groups[best_group];
|
|
std::array<float, 3> &merged_center = std::get<0>(merged_group);
|
|
std::array<float, 3> &group_center = std::get<0>(group);
|
|
std::vector<int> &merged_group_indices = std::get<2>(merged_group);
|
|
|
|
float n_elems1 = static_cast<float>(merged_group_indices.size());
|
|
float n_elems2 = static_cast<float>(std::get<2>(group).size());
|
|
float inv1 = n_elems1 / (n_elems1 + n_elems2);
|
|
float inv2 = n_elems2 / (n_elems1 + n_elems2);
|
|
|
|
// Update group center
|
|
merged_center[0] = merged_center[0] * inv1 + group_center[0] * inv2;
|
|
merged_center[1] = merged_center[1] * inv1 + group_center[1] * inv2;
|
|
merged_center[2] = merged_center[2] * inv1 + group_center[2] * inv2;
|
|
|
|
// Update group pose
|
|
for (size_t row = 0; row < num_joints; ++row)
|
|
{
|
|
auto &group_pose = std::get<1>(group);
|
|
auto &merged_pose = std::get<1>(merged_group);
|
|
|
|
if (group_pose[row][3] > min_score)
|
|
{
|
|
float j_elems1 = static_cast<float>(group_visible_counts[row]);
|
|
float j_elems2 = static_cast<float>(per_group_visible_counts[best_group][row]);
|
|
float inv1 = j_elems1 / (j_elems1 + j_elems2);
|
|
float inv2 = j_elems2 / (j_elems1 + j_elems2);
|
|
|
|
merged_pose[row][0] = merged_pose[row][0] * inv1 + group_pose[row][0] * inv2;
|
|
merged_pose[row][1] = merged_pose[row][1] * inv1 + group_pose[row][1] * inv2;
|
|
merged_pose[row][2] = merged_pose[row][2] * inv1 + group_pose[row][2] * inv2;
|
|
merged_pose[row][3] = merged_pose[row][3] * inv1 + group_pose[row][3] * inv2;
|
|
group_visible_counts[row]++;
|
|
}
|
|
}
|
|
|
|
// Merge indices
|
|
merged_group_indices.insert(
|
|
merged_group_indices.end(), std::get<2>(group).begin(), std::get<2>(group).end());
|
|
}
|
|
}
|
|
|
|
return merged_groups;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::array<float, 4>> TriangulatorInternal::merge_group(
|
|
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
|
|
float min_score)
|
|
{
|
|
size_t num_poses = poses_3d.size();
|
|
size_t num_joints = poses_3d[0].size();
|
|
|
|
// Merge poses to create initial pose
|
|
// Use only those triangulations with a high score
|
|
std::vector<std::array<float, 4>> sum_poses(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
std::vector<int> sum_mask1(num_joints, 0);
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = pose[j][3];
|
|
if (score > min_score)
|
|
{
|
|
sum_poses[j][0] += pose[j][0];
|
|
sum_poses[j][1] += pose[j][1];
|
|
sum_poses[j][2] += pose[j][2];
|
|
sum_poses[j][3] += score;
|
|
sum_mask1[j]++;
|
|
}
|
|
}
|
|
}
|
|
std::vector<std::array<float, 4>> initial_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask1[j] > 0)
|
|
{
|
|
float inv = 1.0 / sum_mask1[j];
|
|
initial_pose_3d[j][0] = sum_poses[j][0] * inv;
|
|
initial_pose_3d[j][1] = sum_poses[j][1] * inv;
|
|
initial_pose_3d[j][2] = sum_poses[j][2] * inv;
|
|
initial_pose_3d[j][3] = sum_poses[j][3] * inv;
|
|
}
|
|
}
|
|
|
|
// Use center as default if the initial pose is empty
|
|
std::vector<bool> jmask(num_joints, false);
|
|
std::array<float, 3> center = {0.0, 0.0, 0.0};
|
|
int valid_joints = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float score = initial_pose_3d[j][3];
|
|
if (score > min_score)
|
|
{
|
|
jmask[j] = true;
|
|
center[0] += initial_pose_3d[j][0];
|
|
center[1] += initial_pose_3d[j][1];
|
|
center[2] += initial_pose_3d[j][2];
|
|
valid_joints++;
|
|
}
|
|
}
|
|
if (valid_joints > 0)
|
|
{
|
|
float inv_valid = 1.0 / valid_joints;
|
|
center[0] *= inv_valid;
|
|
center[1] *= inv_valid;
|
|
center[2] *= inv_valid;
|
|
}
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (!jmask[j])
|
|
{
|
|
initial_pose_3d[j][0] = center[0];
|
|
initial_pose_3d[j][1] = center[1];
|
|
initial_pose_3d[j][2] = center[2];
|
|
initial_pose_3d[j][3] = 0.0;
|
|
}
|
|
}
|
|
|
|
// Drop joints with low scores and filter outlying joints using distance threshold
|
|
float offset = 0.1;
|
|
float max_dist_sq = 1.2 * 1.2;
|
|
std::vector<std::vector<bool>> mask(num_poses, std::vector<bool>(num_joints, false));
|
|
std::vector<std::vector<float>> distances(num_poses, std::vector<float>(num_joints, 0.0));
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
float dx = pose[j][0] - initial_pose_3d[j][0];
|
|
float dy = pose[j][1] - initial_pose_3d[j][1];
|
|
float dz = pose[j][2] - initial_pose_3d[j][2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
distances[i][j] = dist_sq;
|
|
|
|
float score = pose[j][3];
|
|
if (dist_sq <= max_dist_sq && score > (min_score - offset))
|
|
{
|
|
mask[i][j] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Select the best-k proposals for each joint that are closest to the initial pose
|
|
int keep_best = std::max(3, (int)num_poses / 3);
|
|
std::vector<std::vector<bool>> best_k_mask(num_poses, std::vector<bool>(num_joints, false));
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
std::vector<std::pair<float, int>> valid_indices;
|
|
valid_indices.reserve(num_poses);
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
if (mask[i][j])
|
|
{
|
|
valid_indices.emplace_back(distances[i][j], i);
|
|
}
|
|
}
|
|
int num_valid = std::min(keep_best, static_cast<int>(valid_indices.size()));
|
|
if (num_valid > 0)
|
|
{
|
|
std::partial_sort(valid_indices.begin(), valid_indices.begin() + num_valid,
|
|
valid_indices.end());
|
|
|
|
for (int k = 0; k < num_valid; ++k)
|
|
{
|
|
int idx = valid_indices[k].second;
|
|
best_k_mask[idx][j] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Combine masks
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
mask[i][j] = mask[i][j] && best_k_mask[i][j];
|
|
}
|
|
}
|
|
|
|
// Compute the final pose
|
|
std::vector<std::array<float, 4>> sum_poses2(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
std::vector<int> sum_mask2(num_joints, 0);
|
|
for (size_t i = 0; i < num_poses; ++i)
|
|
{
|
|
const auto &pose = poses_3d[i];
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (mask[i][j])
|
|
{
|
|
sum_poses2[j][0] += pose[j][0];
|
|
sum_poses2[j][1] += pose[j][1];
|
|
sum_poses2[j][2] += pose[j][2];
|
|
sum_poses2[j][3] += pose[j][3];
|
|
sum_mask2[j]++;
|
|
}
|
|
}
|
|
}
|
|
std::vector<std::array<float, 4>> final_pose_3d(num_joints, {0.0, 0.0, 0.0, 0.0});
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask2[j] > 0)
|
|
{
|
|
float inv = 1.0 / sum_mask2[j];
|
|
final_pose_3d[j][0] = sum_poses2[j][0] * inv;
|
|
final_pose_3d[j][1] = sum_poses2[j][1] * inv;
|
|
final_pose_3d[j][2] = sum_poses2[j][2] * inv;
|
|
final_pose_3d[j][3] = sum_poses2[j][3] * inv;
|
|
}
|
|
}
|
|
|
|
return final_pose_3d;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::add_extra_joints(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
// Find indices of "head", "ear_left", and "ear_right"
|
|
auto it_head = std::find(joint_names.begin(), joint_names.end(), "head");
|
|
auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left");
|
|
auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right");
|
|
int idx_h = std::distance(joint_names.begin(), it_head);
|
|
int idx_el = std::distance(joint_names.begin(), it_ear_left);
|
|
int idx_er = std::distance(joint_names.begin(), it_ear_right);
|
|
|
|
// If the confidence score of "head" is zero compute it as the average of the ears
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
if (pose[idx_h][3] == 0.0)
|
|
{
|
|
if (pose[idx_el][3] > 0.0 && pose[idx_er][3] > 0.0)
|
|
{
|
|
pose[idx_h][0] = (pose[idx_el][0] + pose[idx_er][0]) * 0.5;
|
|
pose[idx_h][1] = (pose[idx_el][1] + pose[idx_er][1]) * 0.5;
|
|
pose[idx_h][2] = (pose[idx_el][2] + pose[idx_er][2]) * 0.5;
|
|
pose[idx_h][3] = std::min(pose[idx_el][3], pose[idx_er][3]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::filter_poses(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::array<std::array<float, 3>, 2> &roomparams,
|
|
const std::vector<size_t> &core_joint_idx,
|
|
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
|
|
{
|
|
const float min_score = 0.1;
|
|
std::vector<int> drop_indices;
|
|
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
size_t num_joints = pose.size();
|
|
|
|
// Collect valid joint indices
|
|
std::vector<size_t> valid_joint_idx;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (pose[j][3] > min_score)
|
|
{
|
|
valid_joint_idx.push_back(j);
|
|
}
|
|
}
|
|
|
|
// Drop poses with too few joints
|
|
if (valid_joint_idx.size() < 5)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Compute min, max and mean coordinates
|
|
std::array<float, 3> mean = {0.0, 0.0, 0.0};
|
|
std::array<float, 3> mins = {
|
|
std::numeric_limits<float>::max(),
|
|
std::numeric_limits<float>::max(),
|
|
std::numeric_limits<float>::max()};
|
|
std::array<float, 3> maxs = {
|
|
std::numeric_limits<float>::lowest(),
|
|
std::numeric_limits<float>::lowest(),
|
|
std::numeric_limits<float>::lowest()};
|
|
for (size_t j = 0; j < valid_joint_idx.size(); ++j)
|
|
{
|
|
size_t idx = valid_joint_idx[j];
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
mins[k] = std::min(mins[k], pose[idx][k]);
|
|
maxs[k] = std::max(maxs[k], pose[idx][k]);
|
|
mean[k] += pose[idx][k];
|
|
}
|
|
}
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
mean[k] /= static_cast<float>(valid_joint_idx.size());
|
|
}
|
|
|
|
// Drop poses that are too large or too small or too flat
|
|
float min_flatness = 0.2;
|
|
float max_size = 2.3;
|
|
float min_size = 0.3;
|
|
std::array<float, 3> diff;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
diff[j] = maxs[j] - mins[j];
|
|
}
|
|
if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if ((diff[0] < min_flatness && diff[1] < min_flatness) ||
|
|
(diff[1] < min_flatness && diff[2] < min_flatness) ||
|
|
(diff[2] < min_flatness && diff[0] < min_flatness))
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Drop persons outside room
|
|
const float wdist = 0.1;
|
|
const auto &room_size = roomparams[0];
|
|
const auto &room_center = roomparams[1];
|
|
const std::array<float, 3> room_half_size = {
|
|
room_size[0] / 2.0f,
|
|
room_size[1] / 2.0f,
|
|
room_size[2] / 2.0f};
|
|
bool outside = false;
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
// Check if the mean position is outside the room boundaries
|
|
if (mean[j] > room_half_size[j] + room_center[j] ||
|
|
mean[j] < -room_half_size[j] + room_center[j])
|
|
{
|
|
outside = true;
|
|
break;
|
|
}
|
|
}
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
// Check if any limb is too far outside the room
|
|
if (maxs[j] > room_half_size[j] + room_center[j] + wdist ||
|
|
mins[j] < -room_half_size[j] + room_center[j] - wdist)
|
|
{
|
|
outside = true;
|
|
break;
|
|
}
|
|
}
|
|
if (outside)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
|
|
// Calculate total limb length and average limb length
|
|
const float max_avg_length = 0.5;
|
|
const float min_avg_length = 0.1;
|
|
float total_length = 0.0;
|
|
int total_limbs = 0;
|
|
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
|
|
{
|
|
size_t start_idx = core_joint_idx[core_limbs_idx[j][0]];
|
|
size_t end_idx = core_joint_idx[core_limbs_idx[j][1]];
|
|
|
|
if (pose[start_idx][3] < min_score || pose[end_idx][3] < min_score)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
float dx = pose[end_idx][0] - pose[start_idx][0];
|
|
float dy = pose[end_idx][1] - pose[start_idx][1];
|
|
float dz = pose[end_idx][2] - pose[start_idx][2];
|
|
float length = std::sqrt(dx * dx + dy * dy + dz * dz);
|
|
|
|
total_length += length;
|
|
total_limbs++;
|
|
}
|
|
if (total_limbs == 0)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
float average_length = total_length / static_cast<float>(total_limbs);
|
|
if (average_length < min_avg_length)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
if (total_limbs >= 5 && average_length > max_avg_length)
|
|
{
|
|
drop_indices.push_back(i);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Set confidences of invalid poses to a low value
|
|
for (size_t i = 0; i < drop_indices.size(); ++i)
|
|
{
|
|
auto &pose = poses[drop_indices[i]];
|
|
for (size_t j = 0; j < pose.size(); ++j)
|
|
{
|
|
pose[j][3] = 0.001;
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::add_missing_joints(
|
|
std::vector<std::vector<std::array<float, 4>>> &poses,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
// Map joint names to their indices for quick lookup
|
|
std::unordered_map<std::string, size_t> joint_name_to_idx;
|
|
for (size_t idx = 0; idx < joint_names.size(); ++idx)
|
|
{
|
|
joint_name_to_idx[joint_names[idx]] = idx;
|
|
}
|
|
|
|
// Adjacent mapping
|
|
std::unordered_map<std::string, std::vector<std::string>> adjacents = {
|
|
{"hip_right", {"hip_middle", "hip_left"}},
|
|
{"hip_left", {"hip_middle", "hip_right"}},
|
|
{"knee_right", {"hip_right", "knee_left"}},
|
|
{"knee_left", {"hip_left", "knee_right"}},
|
|
{"ankle_right", {"knee_right", "ankle_left"}},
|
|
{"ankle_left", {"knee_left", "ankle_right"}},
|
|
{"shoulder_right", {"shoulder_middle", "shoulder_left"}},
|
|
{"shoulder_left", {"shoulder_middle", "shoulder_right"}},
|
|
{"elbow_right", {"shoulder_right", "hip_right"}},
|
|
{"elbow_left", {"shoulder_left", "hip_left"}},
|
|
{"wrist_right", {"elbow_right"}},
|
|
{"wrist_left", {"elbow_left"}},
|
|
{"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
|
|
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
|
|
{"foot_*_left_*", {"ankle_left"}},
|
|
{"foot_*_right_*", {"ankle_right"}},
|
|
{"face_*", {"nose"}},
|
|
{"hand_*_left_*", {"wrist_left"}},
|
|
{"hand_*_right_*", {"wrist_right"}}};
|
|
|
|
for (size_t i = 0; i < poses.size(); ++i)
|
|
{
|
|
auto &pose = poses[i];
|
|
size_t num_joints = pose.size();
|
|
|
|
// Collect valid joint indices
|
|
std::vector<size_t> valid_joint_idx;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (pose[j][3] > min_match_score)
|
|
{
|
|
valid_joint_idx.push_back(j);
|
|
}
|
|
}
|
|
|
|
if (valid_joint_idx.empty())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Compute body center as the mean of valid joints
|
|
std::array<float, 3> body_center = {0.0, 0.0, 0.0};
|
|
for (size_t idx : valid_joint_idx)
|
|
{
|
|
body_center[0] += pose[idx][0];
|
|
body_center[1] += pose[idx][1];
|
|
body_center[2] += pose[idx][2];
|
|
}
|
|
for (int j = 0; j < 3; ++j)
|
|
{
|
|
body_center[j] /= static_cast<float>(valid_joint_idx.size());
|
|
}
|
|
|
|
// Iterate over each joint
|
|
for (size_t j = 0; j < joint_names.size(); ++j)
|
|
{
|
|
std::string adname = "";
|
|
const std::string &jname = joint_names[j];
|
|
|
|
// Determine adjacent joint based on name patterns
|
|
if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos)
|
|
{
|
|
adname = "foot_*_left_*";
|
|
}
|
|
else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos)
|
|
{
|
|
adname = "foot_*_right_*";
|
|
}
|
|
else if (jname.substr(0, 5) == "face_")
|
|
{
|
|
adname = "face_*";
|
|
}
|
|
else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos)
|
|
{
|
|
adname = "hand_*_left_*";
|
|
}
|
|
else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos)
|
|
{
|
|
adname = "hand_*_right_*";
|
|
}
|
|
else if (adjacents.find(jname) != adjacents.end())
|
|
{
|
|
adname = jname;
|
|
}
|
|
|
|
if (adname == "")
|
|
{
|
|
// No adjacent joints defined for this joint
|
|
continue;
|
|
}
|
|
|
|
if (pose[j][3] == 0.0)
|
|
{
|
|
if (adjacents.find(adname) != adjacents.end())
|
|
{
|
|
// Joint is missing; attempt to estimate its position based on adjacent joints
|
|
std::array<float, 3> adjacent_position = {0.0, 0.0, 0.0};
|
|
size_t adjacent_count = 0;
|
|
|
|
const std::vector<std::string> &adjacent_joint_names = adjacents[adname];
|
|
for (const std::string &adj_name : adjacent_joint_names)
|
|
{
|
|
auto it = joint_name_to_idx.find(adj_name);
|
|
if (it != joint_name_to_idx.end())
|
|
{
|
|
size_t adj_idx = it->second;
|
|
if (adj_idx < pose.size() && pose[adj_idx][3] > 0.1)
|
|
{
|
|
adjacent_position[0] += pose[adj_idx][0];
|
|
adjacent_position[1] += pose[adj_idx][1];
|
|
adjacent_position[2] += pose[adj_idx][2];
|
|
++adjacent_count;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (adjacent_count > 0)
|
|
{
|
|
for (size_t k = 0; k < 3; ++k)
|
|
{
|
|
adjacent_position[k] /= static_cast<float>(adjacent_count);
|
|
}
|
|
|
|
// Update the missing joint's position
|
|
pose[j][0] = adjacent_position[0];
|
|
pose[j][1] = adjacent_position[1];
|
|
pose[j][2] = adjacent_position[2];
|
|
}
|
|
else
|
|
{
|
|
// No valid adjacent joints, use body center
|
|
pose[j][0] = body_center[0];
|
|
pose[j][1] = body_center[1];
|
|
pose[j][2] = body_center[2];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Adjacent joints not defined, use body center
|
|
pose[j][0] = body_center[0];
|
|
pose[j][1] = body_center[1];
|
|
pose[j][2] = body_center[2];
|
|
}
|
|
|
|
// Set a low confidence score
|
|
pose[j][3] = 0.1;
|
|
}
|
|
}
|
|
}
|
|
}
|