Files
RapidPoseTriangulation/spt/triangulator.cpp
2024-09-16 17:57:46 +02:00

1252 lines
42 KiB
C++

#include <numeric>
#include <omp.h>
#include <opencv2/opencv.hpp>
#include "camera.hpp"
#include "triangulator.hpp"
// =================================================================================================
// =================================================================================================
[[maybe_unused]] static void print_2d_mat(const cv::Mat &mat)
{
// Ensure the matrix is 2D
if (mat.dims != 2)
{
std::cerr << "Error: The matrix is not 2D." << std::endl;
return;
}
// Retrieve matrix dimensions
int rows = mat.rows;
int cols = mat.cols;
// Print the matrix in a NumPy-like style
std::cout << "cv::Mat('shape': (" << rows << ", " << cols << ")";
std::cout << ", 'data': [" << std::endl;
for (int i = 0; i < rows; ++i)
{
std::cout << " [";
for (int j = 0; j < cols; ++j)
{
std::cout << std::fixed << std::setprecision(3) << mat.at<double>(i, j);
if (j < cols - 1)
{
std::cout << ", ";
}
}
std::cout << "]";
if (i < rows - 1)
{
std::cout << "," << std::endl;
}
}
std::cout << "])" << std::endl;
}
// =================================================================================================
[[maybe_unused]] static void print_allpairs(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs)
{
std::cout << "All pairs: [" << std::endl;
for (const auto &pair : all_pairs)
{
const auto &tuple_part = pair.first;
const auto &pair_part = pair.second;
// Print the tuple part
std::cout << "("
<< std::get<0>(tuple_part) << ", "
<< std::get<1>(tuple_part) << ", "
<< std::get<2>(tuple_part) << ", "
<< std::get<3>(tuple_part) << ")";
// Print the pair part
std::cout << ", ("
<< pair_part.first << ", "
<< pair_part.second << ")"
<< std::endl;
}
std::cout << "]" << std::endl;
}
// =================================================================================================
// =================================================================================================
CameraInternal::CameraInternal(const Camera &cam)
{
this->cam = cam;
// Convert camera matrix to cv::Mat for OpenCV
K = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.K[0][0])).clone();
DC = cv::Mat(cam.DC.size(), 1, CV_64FC1, const_cast<double *>(cam.DC.data())).clone();
R = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.R[0][0])).clone();
T = cv::Mat(3, 1, CV_64FC1, const_cast<double *>(&cam.T[0][0])).clone();
}
// =================================================================================================
void CameraInternal::update_projection_matrix()
{
// Calculate opencv-style projection matrix
cv::Mat Tr, RT;
Tr = R * (T * -1);
cv::hconcat(R, Tr, RT);
P = K * RT;
}
// =================================================================================================
// =================================================================================================
TriangulatorInternal::TriangulatorInternal(double min_score)
{
this->min_score = min_score;
}
// =================================================================================================
std::vector<std::vector<std::array<double, 4>>> TriangulatorInternal::triangulate_poses(
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
// Check inputs
if (poses_2d.empty())
{
throw std::invalid_argument("No 2D poses provided.");
}
if (poses_2d.size() != cameras.size())
{
throw std::invalid_argument("Number of cameras and 2D poses must be the same.");
}
if (joint_names.size() != poses_2d[0][0].size())
{
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
}
for (const auto &joint : core_joints)
{
if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end())
{
throw std::invalid_argument("Core joint '" + joint + "' not found in joint names.");
}
}
// Convert inputs to internal formats
std::vector<std::vector<cv::Mat>> poses_2d_mats;
poses_2d_mats.reserve(cameras.size());
for (size_t i = 0; i < cameras.size(); ++i)
{
size_t num_persons = poses_2d[i].size();
size_t num_joints = poses_2d[i][0].size();
std::vector<cv::Mat> camera_poses;
camera_poses.reserve(num_persons);
for (size_t j = 0; j < num_persons; ++j)
{
std::vector<int> dims = {(int)num_joints, 3};
cv::Mat pose_mat = cv::Mat(dims, CV_64F);
// Use pointer to copy data efficiently
double *mat_ptr = pose_mat.ptr<double>(0);
for (size_t k = 0; k < num_joints; ++k)
{
const auto &joint = poses_2d[i][j][k];
mat_ptr[k * 3 + 0] = joint[0];
mat_ptr[k * 3 + 1] = joint[1];
mat_ptr[k * 3 + 2] = joint[2];
}
camera_poses.push_back(std::move(pose_mat));
}
poses_2d_mats.push_back(std::move(camera_poses));
}
std::vector<CameraInternal> internal_cameras;
for (size_t i = 0; i < cameras.size(); ++i)
{
internal_cameras.emplace_back(cameras[i]);
}
std::vector<size_t> core_joint_idx;
for (const auto &joint : core_joints)
{
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
}
std::vector<std::array<size_t, 2>> core_limbs_idx;
for (const auto &limb : core_limbs)
{
auto it1 = std::find(core_joints.begin(), core_joints.end(), limb.first);
auto it2 = std::find(core_joints.begin(), core_joints.end(), limb.second);
core_limbs_idx.push_back({(size_t)std::distance(core_joints.begin(), it1),
(size_t)std::distance(core_joints.begin(), it2)});
}
// Undistort 2D poses
#pragma omp parallel for
for (size_t i = 0; i < cameras.size(); ++i)
{
undistort_poses(poses_2d_mats[i], internal_cameras[i]);
internal_cameras[i].update_projection_matrix();
}
// Project last 3D poses to 2D
std::vector<std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>>> last_poses_2d;
if (!last_poses_3d.empty())
{
// Select core joints
std::vector<cv::Mat> last_core_poses;
last_core_poses.resize(last_poses_3d.size());
#pragma omp parallel for
for (size_t i = 0; i < last_poses_3d.size(); ++i)
{
cv::Mat &pose = last_poses_3d[i];
std::vector<int> dims = {(int)core_joint_idx.size(), 4};
cv::Mat last_poses_core(dims, pose.type());
for (size_t j = 0; j < core_joint_idx.size(); ++j)
{
pose.row(core_joint_idx[j]).copyTo(last_poses_core.row(j));
}
last_core_poses[i] = last_poses_core;
}
// Project
last_poses_2d.resize(cameras.size());
#pragma omp parallel for
for (size_t i = 0; i < cameras.size(); ++i)
{
auto [poses2d, dists] = project_poses(last_core_poses, internal_cameras[i], true);
last_poses_2d[i] = std::make_tuple(poses2d, dists);
}
}
// Check matches to old poses
double threshold = min_score - 0.2;
std::map<size_t, std::map<size_t, std::vector<size_t>>> scored_pasts;
if (!last_poses_3d.empty())
{
// Calculate index pairs and initialize vectors
std::vector<std::array<size_t, 3>> indices_ijk;
for (size_t i = 0; i < cameras.size(); ++i)
{
size_t num_last_persons = std::get<0>(last_poses_2d[i]).size();
scored_pasts[i] = std::map<size_t, std::vector<size_t>>();
for (size_t j = 0; j < num_last_persons; ++j)
{
size_t num_new_persons = poses_2d_mats[i].size();
scored_pasts[i][j] = std::vector<size_t>();
for (size_t k = 0; k < num_new_persons; ++k)
{
indices_ijk.push_back({i, j, k});
}
}
}
std::vector<std::array<size_t, 2>> indices_ik;
for (size_t i = 0; i < cameras.size(); ++i)
{
size_t num_new_persons = poses_2d_mats[i].size();
for (size_t k = 0; k < num_new_persons; ++k)
{
indices_ik.push_back({i, k});
}
}
// Precalculate core poses
std::vector<cv::Mat> poses_2d_mats_core_list;
poses_2d_mats_core_list.resize(indices_ik.size());
std::vector<std::vector<size_t>> mats_core_map;
mats_core_map.resize(cameras.size());
for (size_t i = 0; i < cameras.size(); ++i)
{
size_t num_new_persons = poses_2d_mats[i].size();
for (size_t k = 0; k < num_new_persons; ++k)
{
mats_core_map[i].push_back(0);
}
}
#pragma omp parallel for
for (size_t e = 0; e < indices_ik.size(); ++e)
{
const auto [i, k] = indices_ik[e];
const cv::Mat &pose = poses_2d_mats[i][k];
std::vector<int> dims = {(int)core_joint_idx.size(), 3};
cv::Mat pose_core(dims, pose.type());
for (size_t j = 0; j < core_joint_idx.size(); ++j)
{
pose.row(core_joint_idx[j]).copyTo(pose_core.row(j));
}
poses_2d_mats_core_list[e] = pose_core;
mats_core_map[i][k] = e;
}
// Calculate matching score
#pragma omp parallel for
for (size_t e = 0; e < indices_ijk.size(); ++e)
{
const auto [i, j, k] = indices_ijk[e];
const cv::Mat &last_pose = std::get<0>(last_poses_2d[i])[j];
const cv::Mat &last_dist = std::get<1>(last_poses_2d[i])[j];
const cv::Mat &new_pose = poses_2d_mats_core_list[mats_core_map[i][k]];
double score = calc_pose_score(new_pose, last_pose, last_dist, internal_cameras[i]);
if (score > threshold)
{
#pragma omp critical
{
scored_pasts[i][j].push_back(k);
}
}
}
}
// Create pairs of persons
// Checks if the person was already matched to the last frame and if so only creates pairs
// with those, else it creates all possible pairs
std::vector<int> num_persons_sum;
for (size_t i = 0; i < cameras.size(); ++i)
{
int nsum = poses_2d[i].size();
if (i > 0)
{
nsum += num_persons_sum[i - 1];
}
num_persons_sum.push_back(nsum);
}
std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> all_pairs;
std::vector<std::array<size_t, 4>> indices;
for (size_t i = 0; i < cameras.size(); ++i)
{
for (size_t j = i + 1; j < cameras.size(); ++j)
{
for (size_t k = 0; k < poses_2d[i].size(); ++k)
{
for (size_t l = 0; l < poses_2d[j].size(); ++l)
{
indices.push_back({i, j, k, l});
}
}
}
}
#pragma omp parallel for ordered schedule(dynamic)
for (size_t e = 0; e < indices.size(); ++e)
{
const auto [i, j, k, l] = indices[e];
int pid1 = num_persons_sum[i] + k;
int pid2 = num_persons_sum[k] + l;
bool match = false;
if (!last_poses_3d.empty())
{
for (size_t m = 0; m < last_poses_3d.size(); ++m)
{
auto &smi = scored_pasts[i][m];
auto &smj = scored_pasts[j][m];
bool in_smi = std::find(smi.begin(), smi.end(), k) != smi.end();
bool in_smj = std::find(smj.begin(), smj.end(), l) != smj.end();
if (in_smi && in_smj)
{
match = true;
auto item = std::make_pair(
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
#pragma omp ordered
all_pairs.push_back(item);
}
else if (in_smi || in_smj)
{
match = true;
}
}
}
if (!match)
{
auto item = std::make_pair(
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
// Needed to prevent randomized grouping/merging with slightly different results
#pragma omp ordered
all_pairs.push_back(item);
}
}
// Calculate pair scores
std::vector<std::pair<cv::Mat, double>> all_scored_poses;
all_scored_poses.resize(all_pairs.size());
#pragma omp parallel for
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
// Extract camera parameters
const auto &cam1 = internal_cameras[std::get<0>(pids)];
const auto &cam2 = internal_cameras[std::get<1>(pids)];
// Extract 2D poses
const cv::Mat &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
const cv::Mat &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
// Select core joints
std::vector<int> dims = {(int)core_joint_idx.size(), 3};
cv::Mat pose1_core(dims, pose1.type());
cv::Mat pose2_core(dims, pose2.type());
for (size_t j = 0; j < core_joint_idx.size(); ++j)
{
size_t idx = core_joint_idx[j];
pose1.row(idx).copyTo(pose1_core.row(j));
pose2.row(idx).copyTo(pose2_core.row(j));
}
// Triangulate and score
auto [pose3d, score] = triangulate_and_score(
pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx);
all_scored_poses[i] = std::make_pair(pose3d, score);
}
// Drop low scoring poses
std::vector<size_t> drop_indices;
for (size_t i = 0; i < all_scored_poses.size(); ++i)
{
if (all_scored_poses[i].second < min_score)
{
drop_indices.push_back(i);
}
}
if (!drop_indices.empty())
{
for (size_t i = drop_indices.size(); i > 0; --i)
{
all_scored_poses.erase(all_scored_poses.begin() + drop_indices[i - 1]);
all_pairs.erase(all_pairs.begin() + drop_indices[i - 1]);
}
}
// Group pairs that share a person
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
groups = calc_grouping(all_pairs, all_scored_poses, min_score);
// Calculate full 3D poses
std::vector<cv::Mat> all_full_poses;
all_full_poses.resize(all_pairs.size());
#pragma omp parallel for
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
const auto &cam1 = internal_cameras[std::get<0>(pids)];
const auto &cam2 = internal_cameras[std::get<1>(pids)];
const auto &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
const auto &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
auto [pose3d, score] = triangulate_and_score(
pose1, pose2, cam1, cam2, roomparams, {});
all_full_poses[i] = (pose3d);
}
// Merge groups
std::vector<cv::Mat> all_merged_poses;
all_merged_poses.resize(groups.size());
#pragma omp parallel for
for (size_t i = 0; i < groups.size(); ++i)
{
const auto &group = groups[i];
std::vector<cv::Mat> poses;
poses.reserve(std::get<2>(group).size());
for (const auto &idx : std::get<2>(group))
{
poses.push_back(all_full_poses[idx]);
}
auto merged_pose = merge_group(poses, min_score);
all_merged_poses[i] = (merged_pose);
}
last_poses_3d = all_merged_poses;
// Convert to output format
std::vector<std::vector<std::array<double, 4>>> poses_3d;
poses_3d.reserve(all_merged_poses.size());
for (size_t i = 0; i < all_merged_poses.size(); ++i)
{
const auto &mat = all_merged_poses[i];
const double *mat_ptr = mat.ptr<double>(0);
std::vector<std::array<double, 4>> pose;
size_t num_joints = mat.rows;
pose.reserve(num_joints);
size_t num_valid = 0;
for (size_t j = 0; j < num_joints; ++j)
{
std::array<double, 4> point;
for (size_t k = 0; k < 4; ++k)
{
point[k] = mat_ptr[j * 4 + k];
}
pose.push_back(point);
if (point[3] > min_score)
{
num_valid++;
}
}
if (num_valid > 0)
{
poses_3d.push_back(std::move(pose));
}
}
return poses_3d;
}
// =================================================================================================
void TriangulatorInternal::reset()
{
last_poses_3d.clear();
}
// =================================================================================================
void TriangulatorInternal::undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam)
{
int width = icam.cam.width;
int height = icam.cam.height;
// Undistort camera matrix
cv::Mat newK = cv::getOptimalNewCameraMatrix(
icam.K, icam.DC, cv::Size(width, height), 1, cv::Size(width, height));
for (size_t p = 0; p < poses.size(); ++p)
{
// Extract the (x, y) coordinates
cv::Mat points = poses[p].colRange(0, 2).clone();
points = points.reshape(2);
// Undistort the points
cv::undistortPoints(points, points, icam.K, icam.DC, cv::noArray(), newK);
// Update the original poses with the undistorted points
points = points.reshape(1);
points.copyTo(poses[p].colRange(0, 2));
// Mask out points that are far outside the image (points slightly outside are still valid)
double mask_offset = (width + height) / 40.0;
int num_joints = poses[p].rows;
double *poses_ptr = poses[p].ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
double x = poses_ptr[j * 3 + 0];
double y = poses_ptr[j * 3 + 1];
bool in_x = x >= -mask_offset && x < width + mask_offset;
bool in_y = y >= -mask_offset && y < height + mask_offset;
if (!in_x || !in_y)
{
poses_ptr[j * 3 + 0] = 0;
poses_ptr[j * 3 + 1] = 0;
poses_ptr[j * 3 + 2] = 0;
}
}
}
// Update the camera matrix
icam.K = newK.clone();
icam.DC = cv::Mat::zeros(5, 1, CV_64F);
}
// =================================================================================================
std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> TriangulatorInternal::project_poses(
const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists)
{
size_t num_persons = bodies3D.size();
size_t num_joints = bodies3D[0].rows;
std::vector<cv::Mat> bodies2D_list(num_persons);
std::vector<cv::Mat> dists_list(num_persons);
cv::Mat T_repeated = cv::repeat(icam.T, 1, num_joints);
for (size_t i = 0; i < num_persons; ++i)
{
const cv::Mat &body3D = bodies3D[i];
// Split up vector
cv::Mat points3d = body3D.colRange(0, 3).clone();
// Project from world to camera coordinate system
cv::Mat xyz = (icam.R * (points3d.t() - T_repeated)).t();
// Set points behind the camera to zero
double *xyz_ptr = xyz.ptr<double>(0);
for (size_t i = 0; i < num_joints; ++i)
{
if (xyz_ptr[i * 3 + 2] <= 0)
{
xyz_ptr[i * 3 + 0] = 0;
xyz_ptr[i * 3 + 1] = 0;
xyz_ptr[i * 3 + 2] = 0;
}
}
// Calculate distance from camera center
cv::Mat dists;
if (calc_dists)
{
dists = xyz.mul(xyz);
cv::reduce(dists, dists, 1, cv::REDUCE_SUM, CV_64F);
cv::sqrt(dists, dists);
}
// Project to camera system
cv::Mat uv;
if (icam.cam.type == "fisheye")
{
}
else
{
cv::Mat DCc = icam.DC.rowRange(0, 5).clone();
cv::projectPoints(
xyz, cv::Mat::zeros(3, 1, CV_64F), cv::Mat::zeros(3, 1, CV_64F), icam.K, DCc, uv);
}
uv = uv.reshape(1, {xyz.rows, 2});
// Add scores again
std::vector<int> dimsB = {(int)num_joints, 3};
cv::Mat bodies2D = cv::Mat(dimsB, CV_64F);
double *bodies2D_ptr = bodies2D.ptr<double>(0);
const double *uv_ptr = uv.ptr<double>(0);
const double *bodies3D_ptr = body3D.ptr<double>(0);
for (size_t i = 0; i < num_joints; ++i)
{
bodies2D_ptr[i * 3 + 0] = uv_ptr[i * 2 + 0];
bodies2D_ptr[i * 3 + 1] = uv_ptr[i * 2 + 1];
bodies2D_ptr[i * 3 + 2] = bodies3D_ptr[i * 4 + 3];
}
// Filter invalid projections
for (size_t i = 0; i < num_joints; ++i)
{
double x = bodies2D_ptr[i * 3 + 0];
double y = bodies2D_ptr[i * 3 + 1];
bool in_x = x >= 0 && x < icam.cam.width;
bool in_y = y >= 0 && y < icam.cam.height;
if (!in_x || !in_y)
{
bodies2D_ptr[i * 3 + 0] = 0;
bodies2D_ptr[i * 3 + 1] = 0;
bodies2D_ptr[i * 3 + 2] = 0;
}
}
// Store results
bodies2D_list[i] = bodies2D;
dists_list[i] = dists;
}
return std::make_tuple(bodies2D_list, dists_list);
}
// =================================================================================================
double TriangulatorInternal::calc_pose_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const cv::Mat &dist1,
const CameraInternal &icam)
{
// Create mask for valid points
double min_score = 0.1;
cv::Mat mask = (pose1.col(2) > min_score) & (pose2.col(2) > min_score);
int valid_count = cv::countNonZero(mask);
// Drop if not enough valid points
if (valid_count < 3)
{
return 0.0;
}
// Calculate scores
double iscale = (icam.cam.width + icam.cam.height) / 2;
cv::Mat scores = score_projection(pose1, pose2, dist1, mask, iscale);
// Drop lowest scores
size_t drop_k = static_cast<size_t>(pose1.rows * 0.2);
size_t min_k = 3;
std::vector<double> scores_vec;
const double *scores_ptr = scores.ptr<double>(0);
const uchar *mask_ptr = mask.ptr<uchar>(0);
for (int i = 0; i < scores.rows; ++i)
{
if (mask_ptr[i] > 0)
{
scores_vec.push_back(scores_ptr[i]);
}
}
std::sort(scores_vec.begin(), scores_vec.end());
drop_k = (scores_vec.size() >= min_k) ? std::min(drop_k, scores_vec.size() - min_k) : 0;
if (drop_k > 0)
{
scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k);
}
// Calculate final score
double score = 0.0;
size_t n_items = scores_vec.size();
for (size_t i = 0; i < n_items; i++)
{
score += scores_vec[i];
}
if (n_items > 0)
{
score /= (double)n_items;
}
return score;
}
// =================================================================================================
cv::Mat TriangulatorInternal::score_projection(
const cv::Mat &pose1,
const cv::Mat &repro1,
const cv::Mat &dists1,
const cv::Mat &mask,
double iscale)
{
double min_score = 0.1;
// Calculate error
cv::Mat error1 = pose1.colRange(0, 2) - repro1.colRange(0, 2);
error1 = error1.mul(error1);
error1 = error1.col(0) + error1.col(1);
cv::sqrt(error1, error1);
error1.setTo(0.0, ~mask);
// Set errors of invisible reprojections to a high value
double penalty = iscale;
cv::Mat mask_invisible = (repro1.col(2) < min_score) & mask;
error1.setTo(penalty, mask_invisible);
// Scale error by image size and distance to the camera
error1 = cv::max(0, cv::min(error1, (iscale / 4.0))) / iscale;
cv::Scalar mean_dist = cv::mean(dists1, mask);
double dscale1 = std::sqrt(mean_dist[0] / 3.5);
error1 *= dscale1;
// Convert errors to a score
cv::Mat score1 = 1.0 / (1.0 + error1 * 10);
return score1;
}
// =================================================================================================
std::pair<cv::Mat, double> TriangulatorInternal::triangulate_and_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const CameraInternal &cam1,
const CameraInternal &cam2,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::array<size_t, 2>> &core_limbs_idx)
{
// Mask out invisible joints
double min_score = 0.1;
cv::Mat mask1a = (pose1.col(2) >= min_score);
cv::Mat mask2a = (pose2.col(2) >= min_score);
const cv::Mat mask = mask1a & mask2a;
const uchar *mask_ptr = mask.ptr<uchar>(0);
// If too few joints are visible, return a low score
int num_visible = cv::countNonZero(mask);
if (num_visible < 3)
{
std::vector<int> dims = {(int)pose1.rows, 4};
cv::Mat pose3d(dims, CV_64F, cv::Scalar(0));
double score = 0.0;
return std::make_pair(pose3d, score);
}
// Extract coordinates
std::vector<int> dimsA = {2, num_visible};
cv::Mat points1 = cv::Mat(dimsA, CV_64F);
cv::Mat points2 = cv::Mat(dimsA, CV_64F);
const double *pose1_ptr = pose1.ptr<double>(0);
const double *pose2_ptr = pose2.ptr<double>(0);
double *points1_ptr = points1.ptr<double>(0);
double *points2_ptr = points2.ptr<double>(0);
int idx = 0;
for (int i = 0; i < pose1.rows; ++i)
{
if (mask_ptr[i] > 0)
{
points1_ptr[idx + 0 * num_visible] = pose1_ptr[i * 3 + 0];
points1_ptr[idx + 1 * num_visible] = pose1_ptr[i * 3 + 1];
points2_ptr[idx + 0 * num_visible] = pose2_ptr[i * 3 + 0];
points2_ptr[idx + 1 * num_visible] = pose2_ptr[i * 3 + 1];
idx++;
}
}
// Triangulate points
cv::Mat points4d_h, points3d;
cv::triangulatePoints(cam1.P, cam2.P, points1, points2, points4d_h);
cv::convertPointsFromHomogeneous(points4d_h.t(), points3d);
// Create the 3D pose matrix
std::vector<int> dimsB = {(int)pose1.rows, 4};
cv::Mat pose3d = cv::Mat(dimsB, CV_64F, cv::Scalar(0));
const double *points3d_ptr = points3d.ptr<double>(0);
double *pose3d_ptr = pose3d.ptr<double>(0);
idx = 0;
for (int i = 0; i < pose1.rows; ++i)
{
if (mask_ptr[i] > 0)
{
pose3d_ptr[i * 4 + 0] = points3d_ptr[idx * 3 + 0];
pose3d_ptr[i * 4 + 1] = points3d_ptr[idx * 3 + 1];
pose3d_ptr[i * 4 + 2] = points3d_ptr[idx * 3 + 2];
pose3d_ptr[i * 4 + 3] = 1.0;
idx++;
}
}
// Check if mean of the points is inside the room bounds
std::array<double, 3> mean = {0, 0, 0};
for (int i = 0; i < pose1.rows; ++i)
{
if (mask_ptr[i] > 0)
{
for (int j = 0; j < 3; ++j)
{
mean[j] += pose3d_ptr[i * 4 + j];
}
}
}
for (int j = 0; j < 3; ++j)
{
mean[j] /= num_visible;
}
std::array<double, 3> center = roomparams[0];
std::array<double, 3> size = roomparams[1];
for (int i = 0; i < 3; ++i)
{
if (mean[i] > center[i] + size[i] / 2 ||
mean[i] < center[i] - size[i] / 2)
{
// Very low score if outside room
pose3d.col(3).setTo(0.001);
return {pose3d, 0.001};
}
}
// Calculate reprojections
std::vector<cv::Mat> poses_3d = {pose3d};
cv::Mat repro1, dists1, repro2, dists2;
auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true);
auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true);
repro1 = repro1s[0];
dists1 = dists1s[0];
repro2 = repro2s[0];
dists2 = dists2s[0];
// Calculate scores for each view
double iscale = (cam1.cam.width + cam1.cam.height) / 2.0;
cv::Mat score1 = score_projection(pose1, repro1, dists1, mask, iscale);
cv::Mat score2 = score_projection(pose2, repro2, dists2, mask, iscale);
// Combine scores
cv::Mat scores = (score1 + score2) / 2.0;
const double *scores_ptr = scores.ptr<double>(0);
// Add scores to 3D pose
for (int i = 0; i < pose1.rows; ++i)
{
if (mask_ptr[i] > 0)
{
pose3d_ptr[i * 4 + 3] = scores_ptr[i];
}
}
// Set scores outside the room to a low value
double wdist = 0.1;
for (int i = 0; i < pose1.rows; ++i)
{
if (mask_ptr[i] > 0)
{
for (int j = 0; j < 3; ++j)
{
if (pose3d_ptr[i * 4 + j] > center[j] + size[j] / 2 + wdist ||
pose3d_ptr[i * 4 + j] < center[j] - size[j] / 2 - wdist)
{
pose3d_ptr[i * 4 + 3] = 0.001;
}
}
}
}
// Filter clearly wrong limbs
if (core_limbs_idx.size() > 0)
{
double max_length = 0.9;
std::vector<size_t> invalid_joints;
for (size_t i = 0; i < core_limbs_idx.size(); ++i)
{
auto limb = core_limbs_idx[i];
if (pose3d_ptr[limb[0] * 4 + 3] > min_score &&
pose3d_ptr[limb[1] * 4 + 3] > min_score)
{
double dx = pose3d_ptr[limb[0] * 4 + 0] - pose3d_ptr[limb[1] * 4 + 0];
double dy = pose3d_ptr[limb[0] * 4 + 1] - pose3d_ptr[limb[1] * 4 + 1];
double dz = pose3d_ptr[limb[0] * 4 + 2] - pose3d_ptr[limb[1] * 4 + 2];
double length = std::sqrt(dx * dx + dy * dy + dz * dz);
if (length > max_length)
{
invalid_joints.push_back(limb[1]);
}
}
}
for (size_t i = 0; i < invalid_joints.size(); ++i)
{
pose3d_ptr[invalid_joints[i] * 4 + 3] = 0.001;
}
}
// Drop lowest scores
size_t drop_k = static_cast<size_t>(pose1.rows * 0.2);
size_t min_k = 3;
std::vector<double> scores_vec;
for (int i = 0; i < pose1.rows; ++i)
{
if (pose3d_ptr[i * 4 + 3] > min_score)
{
scores_vec.push_back(pose3d_ptr[i * 4 + 3]);
}
}
std::sort(scores_vec.begin(), scores_vec.end());
drop_k = (scores_vec.size() >= min_k) ? std::min(drop_k, scores_vec.size() - min_k) : 0;
if (drop_k > 0)
{
scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k);
}
// Calculate final score
double score = 0.0;
size_t n_items = scores_vec.size();
for (size_t i = 0; i < n_items; i++)
{
score += scores_vec[i];
}
if (n_items > 0)
{
score /= (double)n_items;
}
return std::make_pair(pose3d, score);
}
// =================================================================================================
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> TriangulatorInternal::calc_grouping(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
const std::vector<std::pair<cv::Mat, double>> &all_scored_poses,
double min_score)
{
double max_center_dist = 0.6;
double max_joint_avg_dist = 0.3;
// Calculate pose centers
std::vector<cv::Point3d> centers;
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const cv::Mat &pose_3d = all_scored_poses[i].first;
size_t num_joints = pose_3d.rows;
const double *pose_3d_ptr = pose_3d.ptr<double>(0);
cv::Point3d center(0, 0, 0);
size_t num_valid = 0;
for (size_t j = 0; j < num_joints; ++j)
{
if (pose_3d_ptr[j * 4 + 3] > min_score)
{
center.x += pose_3d_ptr[j * 4 + 0];
center.y += pose_3d_ptr[j * 4 + 1];
center.z += pose_3d_ptr[j * 4 + 2];
num_valid++;
}
}
if (num_valid > 0)
{
center.x /= num_valid;
center.y /= num_valid;
center.z /= num_valid;
}
centers.push_back(center);
}
// Calculate Groups
// defined as a tuple of center, pose, and all-pairs-indices of members
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const cv::Mat &pose_3d = all_scored_poses[i].first;
size_t num_joints = pose_3d.rows;
const double *pose_3d_ptr = pose_3d.ptr<double>(0);
const cv::Point3d &center = centers[i];
double best_dist = std::numeric_limits<double>::infinity();
int best_group = -1;
for (size_t j = 0; j < groups.size(); ++j)
{
auto &group = groups[j];
cv::Point3d &group_center = std::get<0>(group);
cv::Mat &group_pose = std::get<1>(group);
const double *group_pose_ptr = group_pose.ptr<double>(0);
// Check if the center is close enough
if (cv::norm(group_center - center) < max_center_dist)
{
// Calculate average joint distance
std::vector<double> dists;
for (size_t row = 0; row < num_joints; row++)
{
if (pose_3d_ptr[row * 4 + 3] > min_score &&
group_pose_ptr[row * 4 + 3] > min_score)
{
double dx = pose_3d_ptr[row * 4 + 0] - group_pose_ptr[row * 4 + 0];
double dy = pose_3d_ptr[row * 4 + 1] - group_pose_ptr[row * 4 + 1];
double dz = pose_3d_ptr[row * 4 + 2] - group_pose_ptr[row * 4 + 2];
double dist = std::sqrt(dx * dx + dy * dy + dz * dz);
dists.push_back(dist);
}
}
double dist = std::numeric_limits<double>::infinity();
if (!dists.empty())
{
dist = std::accumulate(dists.begin(), dists.end(), 0.0) / dists.size();
}
// Check if the average joint distance is close enough
if (dist < max_joint_avg_dist && dist < best_dist)
{
best_dist = dist;
best_group = j;
}
}
}
if (best_group == -1)
{
// Create a new group
std::vector<int> new_indices{static_cast<int>(i)};
groups.emplace_back(center, pose_3d.clone(), new_indices);
}
else
{
// Update existing group
auto &group = groups[best_group];
cv::Point3d &group_center = std::get<0>(group);
cv::Mat &group_pose = std::get<1>(group);
std::vector<int> &group_indices = std::get<2>(group);
double n_elems = group_indices.size();
group_center = (group_center * n_elems + center) / (n_elems + 1);
cv::Mat new_pose = group_pose.clone();
const double *group_pose_ptr = group_pose.ptr<double>(0);
const double *pose_3d_ptr = pose_3d.ptr<double>(0);
double *new_pose_ptr = new_pose.ptr<double>(0);
for (size_t row = 0; row < num_joints; row++)
{
new_pose_ptr[row * 4 + 0] =
(group_pose_ptr[row * 4 + 0] * n_elems + pose_3d_ptr[row * 4 + 0]) /
(n_elems + 1);
new_pose_ptr[row * 4 + 1] =
(group_pose_ptr[row * 4 + 1] * n_elems + pose_3d_ptr[row * 4 + 1]) /
(n_elems + 1);
new_pose_ptr[row * 4 + 2] =
(group_pose_ptr[row * 4 + 2] * n_elems + pose_3d_ptr[row * 4 + 2]) /
(n_elems + 1);
new_pose_ptr[row * 4 + 3] =
(group_pose_ptr[row * 4 + 3] * n_elems + pose_3d_ptr[row * 4 + 3]) /
(n_elems + 1);
}
group_pose = new_pose;
group_indices.push_back(i);
}
}
return groups;
}
// =================================================================================================
cv::Mat TriangulatorInternal::merge_group(const std::vector<cv::Mat> &poses_3d, double min_score)
{
int num_poses = poses_3d.size();
int num_joints = poses_3d[0].rows;
// Merge poses to create initial pose
// Use only those triangulations with a high score
cv::Mat sum_poses = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type());
double *sum_poses_ptr = sum_poses.ptr<double>(0);
std::vector<int> sum_mask(num_joints, 0);
for (int i = 0; i < num_poses; ++i)
{
const cv::Mat &pose = poses_3d[i];
const double *pose_ptr = pose.ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
if (pose_ptr[j * 4 + 3] > min_score)
{
for (int k = 0; k < 4; ++k)
{
sum_poses_ptr[j * 4 + k] += pose_ptr[j * 4 + k];
}
sum_mask[j]++;
}
}
}
cv::Mat initial_pose_3d = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type());
double *initial_pose_3d_ptr = initial_pose_3d.ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
if (sum_mask[j] > 0)
{
for (int k = 0; k < 4; ++k)
{
initial_pose_3d_ptr[j * 4 + k] = sum_poses_ptr[j * 4 + k] / sum_mask[j];
}
}
}
// Use center as default if the initial pose is empty
std::vector<bool> jmask(num_joints, false);
cv::Point3d center(0, 0, 0);
int valid_joints = 0;
for (int j = 0; j < num_joints; ++j)
{
if (initial_pose_3d_ptr[j * 4 + 3] > min_score)
{
jmask[j] = true;
center.x += initial_pose_3d_ptr[j * 4 + 0];
center.y += initial_pose_3d_ptr[j * 4 + 1];
center.z += initial_pose_3d_ptr[j * 4 + 2];
valid_joints++;
}
}
if (valid_joints > 0)
{
center *= 1.0 / valid_joints;
}
for (int j = 0; j < num_joints; ++j)
{
if (!jmask[j])
{
initial_pose_3d_ptr[j * 4 + 0] = center.x;
initial_pose_3d_ptr[j * 4 + 1] = center.y;
initial_pose_3d_ptr[j * 4 + 2] = center.z;
}
}
// Drop joints with low scores and filter outlying joints using distance threshold
double offset = 0.1;
double max_dist = 1.2;
cv::Mat mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
cv::Mat distances = cv::Mat::zeros(num_poses, num_joints, CV_64F);
double *distances_ptr = distances.ptr<double>(0);
u_char *mask_ptr = mask.ptr<u_char>(0);
for (int i = 0; i < num_poses; ++i)
{
const cv::Mat &pose = poses_3d[i];
const double *pose_ptr = pose.ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
double dx = pose_ptr[j * 4 + 0] - initial_pose_3d_ptr[j * 4 + 0];
double dy = pose_ptr[j * 4 + 1] - initial_pose_3d_ptr[j * 4 + 1];
double dz = pose_ptr[j * 4 + 2] - initial_pose_3d_ptr[j * 4 + 2];
double dist = std::sqrt(dx * dx + dy * dy + dz * dz);
distances_ptr[i * num_joints + j] = dist;
if (dist <= max_dist && pose_ptr[j * 4 + 3] > (min_score - offset))
{
mask_ptr[i * num_joints + j] = 1;
}
}
}
// Select the best-k proposals for each joint that are closest to the initial pose
int keep_best = 3;
cv::Mat best_k_mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
for (int j = 0; j < num_joints; ++j)
{
std::vector<std::pair<double, int>> valid_indices;
for (int i = 0; i < num_poses; ++i)
{
if (mask_ptr[i * num_joints + j])
{
auto item = std::make_pair(distances_ptr[i * num_joints + j], i);
valid_indices.push_back(item);
}
}
std::sort(valid_indices.begin(), valid_indices.end());
for (int k = 0; k < std::min(keep_best, static_cast<int>(valid_indices.size())); ++k)
{
best_k_mask.at<uchar>(valid_indices[k].second, j) = 1;
}
}
// Combine masks
mask = mask & best_k_mask;
mask_ptr = mask.ptr<u_char>(0);
// Compute the final pose
sum_poses = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
sum_poses_ptr = sum_poses.ptr<double>(0);
sum_mask = std::vector<int>(num_joints, 0);
for (int i = 0; i < num_poses; ++i)
{
const cv::Mat &pose = poses_3d[i];
const double *pose_ptr = pose.ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
if (mask_ptr[i * num_joints + j] > 0)
{
for (int k = 0; k < 4; ++k)
{
sum_poses_ptr[j * 4 + k] += pose_ptr[j * 4 + k];
}
sum_mask[j]++;
}
}
}
cv::Mat final_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
double *final_pose_3d_ptr = final_pose_3d.ptr<double>(0);
for (int j = 0; j < num_joints; ++j)
{
if (sum_mask[j] > 0)
{
for (int k = 0; k < 4; ++k)
{
final_pose_3d_ptr[j * 4 + k] = sum_poses_ptr[j * 4 + k] / sum_mask[j];
}
}
}
return final_pose_3d;
}