888 lines
30 KiB
C++
888 lines
30 KiB
C++
#include <numeric>
|
|
|
|
#include <opencv2/opencv.hpp>
|
|
|
|
#include "camera.hpp"
|
|
#include "triangulator.hpp"
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_2d_mat(const cv::Mat &mat)
|
|
{
|
|
// Ensure the matrix is 2D
|
|
if (mat.dims != 2)
|
|
{
|
|
std::cerr << "Error: The matrix is not 2D." << std::endl;
|
|
return;
|
|
}
|
|
|
|
// Retrieve matrix dimensions
|
|
int rows = mat.rows;
|
|
int cols = mat.cols;
|
|
|
|
// Print the matrix in a NumPy-like style
|
|
std::cout << "cv::Mat('shape': (" << rows << ", " << cols << ")";
|
|
std::cout << ", 'data': [" << std::endl;
|
|
|
|
for (int i = 0; i < rows; ++i)
|
|
{
|
|
std::cout << " [";
|
|
for (int j = 0; j < cols; ++j)
|
|
{
|
|
std::cout << std::fixed << std::setprecision(3) << mat.at<double>(i, j);
|
|
if (j < cols - 1)
|
|
{
|
|
std::cout << ", ";
|
|
}
|
|
}
|
|
std::cout << "]";
|
|
if (i < rows - 1)
|
|
{
|
|
std::cout << "," << std::endl;
|
|
}
|
|
}
|
|
std::cout << "])" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
[[maybe_unused]] static void print_allpairs(
|
|
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs)
|
|
{
|
|
std::cout << "All pairs: [" << std::endl;
|
|
for (const auto &pair : all_pairs)
|
|
{
|
|
const auto &tuple_part = pair.first;
|
|
const auto &pair_part = pair.second;
|
|
|
|
// Print the tuple part
|
|
std::cout << "("
|
|
<< std::get<0>(tuple_part) << ", "
|
|
<< std::get<1>(tuple_part) << ", "
|
|
<< std::get<2>(tuple_part) << ", "
|
|
<< std::get<3>(tuple_part) << ")";
|
|
|
|
// Print the pair part
|
|
std::cout << ", ("
|
|
<< pair_part.first << ", "
|
|
<< pair_part.second << ")"
|
|
<< std::endl;
|
|
}
|
|
std::cout << "]" << std::endl;
|
|
}
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
CameraInternal::CameraInternal(const Camera &cam)
|
|
{
|
|
this->cam = cam;
|
|
|
|
// Convert camera matrix to cv::Mat for OpenCV
|
|
K = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.K[0][0])).clone();
|
|
DC = cv::Mat(cam.DC.size(), 1, CV_64FC1, const_cast<double *>(cam.DC.data())).clone();
|
|
R = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.R[0][0])).clone();
|
|
T = cv::Mat(3, 1, CV_64FC1, const_cast<double *>(&cam.T[0][0])).clone();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void CameraInternal::update_projection_matrix()
|
|
{
|
|
// Calculate opencv-style projection matrix
|
|
cv::Mat Tr, RT;
|
|
Tr = R * (T * -1);
|
|
cv::hconcat(R, Tr, RT);
|
|
P = K * RT;
|
|
}
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
TriangulatorInternal::TriangulatorInternal(double min_score)
|
|
{
|
|
this->min_score = min_score;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::vector<std::array<double, 4>>> TriangulatorInternal::triangulate_poses(
|
|
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
|
|
const std::vector<Camera> &cameras,
|
|
const std::array<std::array<double, 3>, 2> &roomparams,
|
|
const std::vector<std::string> &joint_names)
|
|
{
|
|
// Check inputs
|
|
if (poses_2d.empty())
|
|
{
|
|
throw std::invalid_argument("No 2D poses provided.");
|
|
}
|
|
if (poses_2d.size() != cameras.size())
|
|
{
|
|
throw std::invalid_argument("Number of cameras and 2D poses must be the same.");
|
|
}
|
|
if (joint_names.size() != poses_2d[0][0].size())
|
|
{
|
|
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
|
|
}
|
|
for (const auto &joint : core_joints)
|
|
{
|
|
if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end())
|
|
{
|
|
throw std::invalid_argument("Core joint '" + joint + "' not found in joint names.");
|
|
}
|
|
}
|
|
|
|
// Convert inputs to internal formats
|
|
std::vector<std::vector<cv::Mat>> poses_2d_mats;
|
|
poses_2d_mats.reserve(cameras.size());
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
size_t num_persons = poses_2d[i].size();
|
|
size_t num_joints = poses_2d[i][0].size();
|
|
std::vector<cv::Mat> camera_poses;
|
|
camera_poses.reserve(num_persons);
|
|
|
|
for (size_t j = 0; j < num_persons; ++j)
|
|
{
|
|
std::vector<int> dims = {(int)num_joints, 3};
|
|
cv::Mat pose_mat(dims, CV_64F);
|
|
for (size_t k = 0; k < num_joints; ++k)
|
|
{
|
|
for (size_t l = 0; l < 3; ++l)
|
|
{
|
|
pose_mat.at<double>(k, l) = poses_2d[i][j][k][l];
|
|
}
|
|
}
|
|
camera_poses.push_back(pose_mat);
|
|
}
|
|
poses_2d_mats.push_back(camera_poses);
|
|
}
|
|
std::vector<CameraInternal> internal_cameras;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
internal_cameras.emplace_back(cameras[i]);
|
|
}
|
|
std::vector<size_t> core_joint_idx;
|
|
for (const auto &joint : core_joints)
|
|
{
|
|
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
|
|
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
|
|
}
|
|
|
|
// Undistort 2D poses
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
undistort_poses(poses_2d_mats[i], internal_cameras[i]);
|
|
internal_cameras[i].update_projection_matrix();
|
|
}
|
|
|
|
// Create pairs of persons
|
|
// Checks if the person was already matched to the last frame and if so only creates pairs
|
|
// with those, else it creates all possible pairs
|
|
std::vector<int> num_persons;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
num_persons.push_back(poses_2d[i].size());
|
|
}
|
|
std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> all_pairs;
|
|
for (size_t i = 0; i < cameras.size(); ++i)
|
|
{
|
|
for (size_t j = i + 1; j < cameras.size(); ++j)
|
|
{
|
|
for (size_t k = 0; k < poses_2d[i].size(); ++k)
|
|
{
|
|
for (size_t l = 0; l < poses_2d[j].size(); ++l)
|
|
{
|
|
int pid1 = std::accumulate(num_persons.begin(), num_persons.begin() + i, 0) + k;
|
|
int pid2 = std::accumulate(num_persons.begin(), num_persons.begin() + j, 0) + l;
|
|
bool match = false;
|
|
|
|
if (!match)
|
|
{
|
|
all_pairs.emplace_back(
|
|
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate pair scores
|
|
std::vector<std::pair<cv::Mat, double>> all_scored_poses;
|
|
all_scored_poses.reserve(all_pairs.size());
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const auto &pids = all_pairs[i].first;
|
|
|
|
// Extract camera parameters
|
|
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
|
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
|
|
|
// Extract 2D poses
|
|
const cv::Mat &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
|
|
const cv::Mat &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
|
|
|
|
// Select core joints
|
|
std::vector<int> dims = {(int)core_joint_idx.size(), 3};
|
|
cv::Mat pose1_core(dims, pose1.type());
|
|
cv::Mat pose2_core(dims, pose2.type());
|
|
for (size_t j = 0; j < core_joint_idx.size(); ++j)
|
|
{
|
|
size_t idx = core_joint_idx[j];
|
|
pose1.row(idx).copyTo(pose1_core.row(j));
|
|
pose2.row(idx).copyTo(pose2_core.row(j));
|
|
}
|
|
|
|
// Triangulate and score
|
|
auto [pose3d, score] = triangulate_and_score(pose1_core, pose2_core, cam1, cam2, roomparams);
|
|
all_scored_poses.emplace_back(pose3d, score);
|
|
}
|
|
|
|
// Drop low scoring poses
|
|
std::vector<size_t> drop_indices;
|
|
for (size_t i = 0; i < all_scored_poses.size(); ++i)
|
|
{
|
|
if (all_scored_poses[i].second < min_score)
|
|
{
|
|
drop_indices.push_back(i);
|
|
}
|
|
}
|
|
if (!drop_indices.empty())
|
|
{
|
|
for (size_t i = drop_indices.size() - 1; i >= 0; --i)
|
|
{
|
|
all_scored_poses.erase(all_scored_poses.begin() + drop_indices[i]);
|
|
all_pairs.erase(all_pairs.begin() + drop_indices[i]);
|
|
}
|
|
}
|
|
|
|
// Group pairs that share a person
|
|
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
|
|
groups = calc_grouping(all_pairs, all_scored_poses, min_score);
|
|
|
|
// Calculate full 3D poses
|
|
std::vector<cv::Mat> all_full_poses;
|
|
all_full_poses.reserve(all_pairs.size());
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const auto &pids = all_pairs[i].first;
|
|
const auto &cam1 = internal_cameras[std::get<0>(pids)];
|
|
const auto &cam2 = internal_cameras[std::get<1>(pids)];
|
|
const auto &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
|
|
const auto &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
|
|
|
|
auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams);
|
|
all_full_poses.push_back(pose3d);
|
|
}
|
|
|
|
// Merge groups
|
|
std::vector<cv::Mat> all_merged_poses;
|
|
all_merged_poses.reserve(groups.size());
|
|
for (size_t i = 0; i < groups.size(); ++i)
|
|
{
|
|
const auto &group = groups[i];
|
|
std::vector<cv::Mat> poses;
|
|
for (const auto &idx : std::get<2>(group))
|
|
{
|
|
poses.push_back(all_full_poses[idx]);
|
|
}
|
|
|
|
auto merged_pose = merge_group(poses, min_score);
|
|
all_merged_poses.push_back(merged_pose);
|
|
}
|
|
|
|
// Convert to output format
|
|
std::vector<std::vector<std::array<double, 4>>> poses_3d;
|
|
poses_3d.reserve(all_merged_poses.size());
|
|
for (size_t i = 0; i < all_merged_poses.size(); ++i)
|
|
{
|
|
const auto &mat = all_merged_poses[i];
|
|
std::vector<std::array<double, 4>> pose;
|
|
size_t num_joints = mat.rows;
|
|
pose.reserve(num_joints);
|
|
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
std::array<double, 4> point;
|
|
for (size_t k = 0; k < 4; ++k)
|
|
{
|
|
point[k] = mat.at<double>(j, k);
|
|
}
|
|
pose.push_back(point);
|
|
}
|
|
|
|
poses_3d.push_back(std::move(pose));
|
|
}
|
|
|
|
return poses_3d;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::reset()
|
|
{
|
|
last_poses_3d.clear();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::undistort_points(cv::Mat &points, CameraInternal &icam)
|
|
{
|
|
int width = icam.cam.width;
|
|
int height = icam.cam.height;
|
|
|
|
// Undistort camera matrix
|
|
cv::Mat newK = cv::getOptimalNewCameraMatrix(
|
|
icam.K, icam.DC, cv::Size(width, height), 1, cv::Size(width, height));
|
|
|
|
// Undistort points
|
|
cv::undistortPoints(points, points, icam.K, icam.DC, cv::noArray(), newK);
|
|
|
|
// Update the camera parameters
|
|
icam.K = newK;
|
|
icam.DC = cv::Mat::zeros(5, 1, CV_64F);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void TriangulatorInternal::undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam)
|
|
{
|
|
for (size_t p = 0; p < poses.size(); ++p)
|
|
{
|
|
int num_joints = poses[p].rows;
|
|
|
|
// Extract the (x, y) coordinates
|
|
std::vector<int> dims = {num_joints, 2};
|
|
cv::Mat points(dims, CV_64F);
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
points.at<double>(j, 0) = poses[p].at<double>(j, 0);
|
|
points.at<double>(j, 1) = poses[p].at<double>(j, 1);
|
|
}
|
|
|
|
// Undistort the points
|
|
undistort_points(points, icam);
|
|
|
|
// Update the original poses with the undistorted points
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
poses[p].at<double>(j, 0) = points.at<double>(j, 0);
|
|
poses[p].at<double>(j, 1) = points.at<double>(j, 1);
|
|
}
|
|
|
|
// Mask out points that are far outside the image (points slightly outside are still valid)
|
|
double offset = (icam.cam.width + icam.cam.height) / 40.0;
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
double x = poses[p].at<double>(j, 0);
|
|
double y = poses[p].at<double>(j, 1);
|
|
bool in_x = x >= -offset && x < icam.cam.width + offset;
|
|
bool in_y = y >= -offset && y < icam.cam.height + offset;
|
|
if (!in_x || !in_y)
|
|
{
|
|
poses[p].at<double>(j, 0) = 0;
|
|
poses[p].at<double>(j, 1) = 0;
|
|
poses[p].at<double>(j, 2) = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> TriangulatorInternal::project_poses(
|
|
const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists)
|
|
{
|
|
size_t num_persons = bodies3D.size();
|
|
std::vector<cv::Mat> bodies2D_list(num_persons);
|
|
std::vector<cv::Mat> dists_list(num_persons);
|
|
|
|
for (size_t i = 0; i < num_persons; ++i)
|
|
{
|
|
const cv::Mat &body3D = bodies3D[i];
|
|
size_t num_joints = body3D.size[0];
|
|
|
|
// Split up vector
|
|
std::vector<int> dimsA = {(int)num_joints, 3};
|
|
cv::Mat points3d(dimsA, CV_64F);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
points3d.at<double>(i, 0) = body3D.at<double>(i, 0);
|
|
points3d.at<double>(i, 1) = body3D.at<double>(i, 1);
|
|
points3d.at<double>(i, 2) = body3D.at<double>(i, 2);
|
|
}
|
|
|
|
// Project from world to camera coordinate system
|
|
cv::Mat T_repeated = cv::repeat(icam.T, 1, num_joints);
|
|
cv::Mat xyz = (icam.R * (points3d.t() - T_repeated)).t();
|
|
|
|
// Set points behind the camera to zero
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (xyz.at<double>(i, 2) <= 0)
|
|
{
|
|
xyz.at<double>(i, 0) = 0;
|
|
xyz.at<double>(i, 1) = 0;
|
|
xyz.at<double>(i, 2) = 0;
|
|
}
|
|
}
|
|
|
|
// Calculate distance from camera center
|
|
cv::Mat dists;
|
|
if (calc_dists)
|
|
{
|
|
dists = xyz.mul(xyz);
|
|
cv::reduce(dists, dists, 1, cv::REDUCE_SUM, CV_64F);
|
|
cv::sqrt(dists, dists);
|
|
}
|
|
|
|
// Project to camera system
|
|
cv::Mat uv;
|
|
if (icam.cam.type == "fisheye")
|
|
{
|
|
}
|
|
else
|
|
{
|
|
cv::Mat DCc = icam.DC.rowRange(0, 5).clone();
|
|
cv::projectPoints(
|
|
xyz, cv::Mat::zeros(3, 1, CV_64F), cv::Mat::zeros(3, 1, CV_64F), icam.K, DCc, uv);
|
|
}
|
|
uv = uv.reshape(1, {xyz.rows, 2});
|
|
|
|
// Add scores again
|
|
std::vector<int> dimsB = {(int)num_joints, 3};
|
|
cv::Mat bodies2D(dimsB, CV_64F);
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
bodies2D.at<double>(i, 0) = uv.at<double>(i, 0);
|
|
bodies2D.at<double>(i, 1) = uv.at<double>(i, 1);
|
|
bodies2D.at<double>(i, 2) = body3D.at<double>(i, 3);
|
|
}
|
|
|
|
// Filter invalid projections
|
|
cv::Mat valid_x = (bodies2D.col(0) >= 0) & (bodies2D.col(0) < icam.cam.width);
|
|
cv::Mat valid_y = (bodies2D.col(1) >= 0) & (bodies2D.col(1) < icam.cam.height);
|
|
cv::Mat vis = valid_x & valid_y;
|
|
for (size_t i = 0; i < num_joints; ++i)
|
|
{
|
|
if (!vis.at<uchar>(i))
|
|
{
|
|
bodies2D.at<double>(i, 0) = 0;
|
|
bodies2D.at<double>(i, 1) = 0;
|
|
bodies2D.at<double>(i, 2) = 0;
|
|
}
|
|
}
|
|
|
|
// Store results
|
|
bodies2D_list[i] = bodies2D;
|
|
dists_list[i] = dists;
|
|
}
|
|
|
|
return std::make_tuple(bodies2D_list, dists_list);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
cv::Mat TriangulatorInternal::score_projection(
|
|
const cv::Mat &pose1,
|
|
const cv::Mat &repro1,
|
|
const cv::Mat &dists1,
|
|
const cv::Mat &mask,
|
|
double iscale)
|
|
{
|
|
double min_score = 0.1;
|
|
double penalty = iscale;
|
|
|
|
// Calculate error
|
|
cv::Mat diff = pose1.colRange(0, 2) - repro1.colRange(0, 2);
|
|
cv::Mat error1;
|
|
error1 = diff.mul(diff);
|
|
cv::reduce(error1, error1, 1, cv::REDUCE_SUM, CV_64F);
|
|
cv::sqrt(error1, error1);
|
|
error1.setTo(0.0, ~mask);
|
|
|
|
// Set errors of invisible reprojections to a high value
|
|
cv::Mat mask_invisible = (repro1.col(2) < min_score);
|
|
error1.setTo(penalty, mask_invisible);
|
|
|
|
// Scale error by image size and distance to the camera
|
|
error1 = cv::min(error1, (iscale / 4.0)) / iscale;
|
|
|
|
// Compute scaling factor
|
|
double dscale1 = std::sqrt(cv::mean(dists1).val[0] / 3.5);
|
|
|
|
// Scale errors
|
|
error1 *= dscale1;
|
|
|
|
// Convert errors to a score
|
|
cv::Mat score1 = 1.0 / (1.0 + error1 * 10);
|
|
|
|
return score1;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::pair<cv::Mat, double> TriangulatorInternal::triangulate_and_score(
|
|
const cv::Mat &pose1,
|
|
const cv::Mat &pose2,
|
|
const CameraInternal &cam1,
|
|
const CameraInternal &cam2,
|
|
const std::array<std::array<double, 3>, 2> &roomparams)
|
|
{
|
|
// Mask out invisible joints
|
|
double min_score = 0.1;
|
|
cv::Mat mask1a = (pose1.col(2) >= min_score);
|
|
cv::Mat mask2a = (pose2.col(2) >= min_score);
|
|
cv::Mat mask = mask1a & mask2a;
|
|
|
|
// If too few joints are visible, return a low score
|
|
int num_visible = cv::countNonZero(mask);
|
|
if (num_visible < 3)
|
|
{
|
|
std::vector<int> dims = {(int)pose1.rows, 4};
|
|
cv::Mat pose3d(dims, CV_64F);
|
|
double score = 0.0;
|
|
return std::make_pair(pose3d, score);
|
|
}
|
|
|
|
// Triangulate points
|
|
std::vector<int> dimsA = {2, num_visible};
|
|
cv::Mat points1(dimsA, CV_64F);
|
|
cv::Mat points2(dimsA, CV_64F);
|
|
int idx = 0;
|
|
for (int i = 0; i < pose1.rows; ++i)
|
|
{
|
|
if (mask.at<uchar>(i) > 0)
|
|
{
|
|
points1.at<double>(0, idx) = pose1.at<double>(i, 0);
|
|
points1.at<double>(1, idx) = pose1.at<double>(i, 1);
|
|
points2.at<double>(0, idx) = pose2.at<double>(i, 0);
|
|
points2.at<double>(1, idx) = pose2.at<double>(i, 1);
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
cv::Mat points3d_h, points3d;
|
|
cv::triangulatePoints(cam1.P, cam2.P, points1, points2, points3d_h);
|
|
cv::convertPointsFromHomogeneous(points3d_h.t(), points3d);
|
|
|
|
// Create the 3D pose matrix
|
|
std::vector<int> dimsB = {(int)pose1.rows, 4};
|
|
cv::Mat pose3d(dimsB, CV_64F);
|
|
for (int i = 0; i < pose1.rows; ++i)
|
|
{
|
|
if (mask.at<uchar>(i) > 0)
|
|
{
|
|
pose3d.at<double>(i, 0) = points3d.at<double>(i, 0);
|
|
pose3d.at<double>(i, 1) = points3d.at<double>(i, 1);
|
|
pose3d.at<double>(i, 2) = points3d.at<double>(i, 2);
|
|
pose3d.at<double>(i, 3) = 1.0;
|
|
}
|
|
}
|
|
|
|
// Check if points are inside the room bounds
|
|
cv::Mat mean, mins, maxs;
|
|
cv::reduce(pose3d.colRange(0, 3), mean, 0, cv::REDUCE_AVG);
|
|
cv::reduce(pose3d.colRange(0, 3), mins, 0, cv::REDUCE_MIN);
|
|
cv::reduce(pose3d.colRange(0, 3), maxs, 0, cv::REDUCE_MAX);
|
|
double wdist = 0.1;
|
|
std::array<double, 3> center = roomparams[0];
|
|
std::array<double, 3> size = roomparams[1];
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
if (mean.at<double>(i) > center[i] + size[i] / 2 ||
|
|
mean.at<double>(i) < center[i] - size[i] / 2 ||
|
|
maxs.at<double>(i) > center[i] + size[i] / 2 + wdist ||
|
|
mins.at<double>(i) < center[i] - size[i] / 2 - wdist)
|
|
{
|
|
// Very low score if outside room
|
|
return {pose3d, 0.001};
|
|
}
|
|
}
|
|
|
|
// Calculate reprojections
|
|
std::vector<cv::Mat> poses_3d = {pose3d};
|
|
cv::Mat repro1, dists1, repro2, dists2;
|
|
auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true);
|
|
auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true);
|
|
repro1 = repro1s[0];
|
|
dists1 = dists1s[0];
|
|
repro2 = repro2s[0];
|
|
dists2 = dists2s[0];
|
|
|
|
// Calculate scores for each view
|
|
double iscale = (cam1.cam.width + cam1.cam.height) / 2.0;
|
|
cv::Mat score1 = score_projection(pose1, repro1, dists1, mask, iscale);
|
|
cv::Mat score2 = score_projection(pose2, repro2, dists2, mask, iscale);
|
|
|
|
// Combine scores
|
|
cv::Mat scores = (score1 + score2) / 2.0;
|
|
|
|
// Add scores to 3D pose
|
|
for (int i = 0; i < pose1.rows; ++i)
|
|
{
|
|
if (mask.at<uchar>(i) > 0)
|
|
{
|
|
pose3d.at<double>(i, 3) = scores.at<double>(i);
|
|
}
|
|
}
|
|
|
|
// Drop lowest scores
|
|
int drop_k = static_cast<int>(pose1.rows * 0.2);
|
|
std::sort(scores.begin<double>(), scores.end<double>());
|
|
|
|
// Calculate final score
|
|
double score = 0.0;
|
|
for (int i = drop_k; i < scores.rows; i++)
|
|
{
|
|
score += scores.at<double>(i);
|
|
}
|
|
score /= (pose1.rows - drop_k);
|
|
|
|
return std::make_pair(pose3d, score);
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> TriangulatorInternal::calc_grouping(
|
|
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
|
|
const std::vector<std::pair<cv::Mat, double>> &all_scored_poses,
|
|
double min_score)
|
|
{
|
|
double max_center_dist = 0.6;
|
|
double max_joint_avg_dist = 0.3;
|
|
size_t num_joints = all_scored_poses[0].first.rows;
|
|
|
|
// Calculate pose centers
|
|
std::vector<cv::Point3d> centers;
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const cv::Mat &pose_3d = all_scored_poses[i].first;
|
|
cv::Point3d center(0, 0, 0);
|
|
size_t num_valid = 0;
|
|
for (size_t j = 0; j < num_joints; ++j)
|
|
{
|
|
if (pose_3d.at<double>(j, 3) > min_score)
|
|
{
|
|
center.x += pose_3d.at<double>(j, 0);
|
|
center.y += pose_3d.at<double>(j, 1);
|
|
center.z += pose_3d.at<double>(j, 2);
|
|
num_valid++;
|
|
}
|
|
}
|
|
if (num_valid > 0)
|
|
{
|
|
center.x /= num_valid;
|
|
center.y /= num_valid;
|
|
center.z /= num_valid;
|
|
}
|
|
centers.push_back(center);
|
|
}
|
|
|
|
// Calculate Groups
|
|
// defined as a tuple of center, pose, and all-pairs-indices of members
|
|
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
|
|
for (size_t i = 0; i < all_pairs.size(); ++i)
|
|
{
|
|
const cv::Mat &pose_3d = all_scored_poses[i].first;
|
|
const cv::Point3d ¢er = centers[i];
|
|
double best_dist = std::numeric_limits<double>::infinity();
|
|
int best_group = -1;
|
|
|
|
for (size_t j = 0; j < groups.size(); ++j)
|
|
{
|
|
auto &group = groups[j];
|
|
cv::Point3d &group_center = std::get<0>(group);
|
|
cv::Mat &group_pose = std::get<1>(group);
|
|
|
|
// Check if the center is close enough
|
|
if (cv::norm(group_center - center) < max_center_dist)
|
|
{
|
|
// Calculate average joint distance
|
|
std::vector<double> dists;
|
|
for (size_t row = 0; row < num_joints; row++)
|
|
{
|
|
if (pose_3d.at<double>(row, 3) > min_score && group_pose.at<double>(row, 3) > min_score)
|
|
{
|
|
cv::Point3d p1(pose_3d.at<double>(row, 0), pose_3d.at<double>(row, 1), pose_3d.at<double>(row, 2));
|
|
cv::Point3d p2(group_pose.at<double>(row, 0), group_pose.at<double>(row, 1), group_pose.at<double>(row, 2));
|
|
dists.push_back(cv::norm(p1 - p2));
|
|
}
|
|
}
|
|
double dist = std::numeric_limits<double>::infinity();
|
|
if (!dists.empty())
|
|
{
|
|
dist = std::accumulate(dists.begin(), dists.end(), 0.0) / dists.size();
|
|
}
|
|
|
|
// Check if the average joint distance is close enough
|
|
if (dist < max_joint_avg_dist && dist < best_dist)
|
|
{
|
|
best_dist = dist;
|
|
best_group = j;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_group == -1)
|
|
{
|
|
// Create a new group
|
|
std::vector<int> new_indices{static_cast<int>(i)};
|
|
groups.emplace_back(center, pose_3d.clone(), new_indices);
|
|
}
|
|
else
|
|
{
|
|
// Update existing group
|
|
auto &group = groups[best_group];
|
|
cv::Point3d &group_center = std::get<0>(group);
|
|
cv::Mat &group_pose = std::get<1>(group);
|
|
std::vector<int> &group_indices = std::get<2>(group);
|
|
|
|
double n_elems = group_indices.size();
|
|
group_center = (group_center * n_elems + center) / (n_elems + 1);
|
|
|
|
cv::Mat new_pose = group_pose.clone();
|
|
for (size_t row = 0; row < num_joints; row++)
|
|
{
|
|
|
|
new_pose.at<double>(row, 0) = (group_pose.at<double>(row, 0) * n_elems + pose_3d.at<double>(row, 0)) / (n_elems + 1);
|
|
new_pose.at<double>(row, 1) = (group_pose.at<double>(row, 1) * n_elems + pose_3d.at<double>(row, 1)) / (n_elems + 1);
|
|
new_pose.at<double>(row, 2) = (group_pose.at<double>(row, 2) * n_elems + pose_3d.at<double>(row, 2)) / (n_elems + 1);
|
|
new_pose.at<double>(row, 3) = (group_pose.at<double>(row, 3) * n_elems + pose_3d.at<double>(row, 3)) / (n_elems + 1);
|
|
}
|
|
group_pose = new_pose;
|
|
group_indices.push_back(i);
|
|
}
|
|
}
|
|
return groups;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
cv::Mat TriangulatorInternal::merge_group(const std::vector<cv::Mat> &poses_3d, double min_score)
|
|
{
|
|
int num_poses = poses_3d.size();
|
|
int num_joints = poses_3d[0].rows;
|
|
|
|
// Merge poses to create initial pose
|
|
// Use only those triangulations with a high score
|
|
cv::Mat sum_poses = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type());
|
|
cv::Mat sum_mask = cv::Mat::zeros(poses_3d[0].size(), CV_32S);
|
|
for (const auto &pose : poses_3d)
|
|
{
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (pose.at<double>(j, 3) > min_score)
|
|
{
|
|
sum_poses.row(j) += pose.row(j);
|
|
sum_mask.at<int>(j, 3) += 1;
|
|
}
|
|
}
|
|
}
|
|
cv::Mat initial_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask.at<int>(j, 3) > 0)
|
|
{
|
|
initial_pose_3d.row(j) = sum_poses.row(j) / sum_mask.at<int>(j, 3);
|
|
}
|
|
}
|
|
|
|
// Use center as default if the initial pose is empty
|
|
cv::Mat jmask = cv::Mat::zeros(num_joints, 1, CV_8U);
|
|
cv::Point3d center(0, 0, 0);
|
|
int valid_joints = 0;
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (initial_pose_3d.at<double>(j, 3) > 0.0)
|
|
{
|
|
jmask.at<uchar>(j) = 1;
|
|
center += cv::Point3d(initial_pose_3d.at<double>(j, 0), initial_pose_3d.at<double>(j, 1), initial_pose_3d.at<double>(j, 2));
|
|
valid_joints++;
|
|
}
|
|
}
|
|
if (valid_joints > 0)
|
|
{
|
|
center *= 1.0 / valid_joints;
|
|
}
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (jmask.at<uchar>(j) == 0)
|
|
{
|
|
initial_pose_3d.at<double>(j, 0) = center.x;
|
|
initial_pose_3d.at<double>(j, 1) = center.y;
|
|
initial_pose_3d.at<double>(j, 2) = center.z;
|
|
}
|
|
}
|
|
|
|
// Drop joints with low scores and filter outlying joints using distance threshold
|
|
double offset = 0.1;
|
|
double max_dist = 1.2;
|
|
cv::Mat mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
|
|
cv::Mat distances = cv::Mat::zeros(num_poses, num_joints, CV_64F);
|
|
for (int i = 0; i < num_poses; ++i)
|
|
{
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
cv::Point3d joint_i(poses_3d[i].at<double>(j, 0), poses_3d[i].at<double>(j, 1), poses_3d[i].at<double>(j, 2));
|
|
cv::Point3d joint_initial(initial_pose_3d.at<double>(j, 0), initial_pose_3d.at<double>(j, 1), initial_pose_3d.at<double>(j, 2));
|
|
double distance = cv::norm(joint_i - joint_initial);
|
|
distances.at<double>(i, j) = distance;
|
|
if (distance <= max_dist && poses_3d[i].at<double>(j, 3) > (min_score - offset))
|
|
{
|
|
mask.at<uchar>(i, j) = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Select the best-k proposals for each joint that are closest to the initial pose
|
|
int keep_best = 3;
|
|
cv::Mat best_k_mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
std::vector<std::pair<double, int>> valid_indices;
|
|
for (int i = 0; i < num_poses; ++i)
|
|
{
|
|
if (mask.at<uchar>(i, j))
|
|
{
|
|
valid_indices.push_back({distances.at<double>(i, j), i});
|
|
}
|
|
}
|
|
std::sort(valid_indices.begin(), valid_indices.end());
|
|
for (int k = 0; k < std::min(keep_best, static_cast<int>(valid_indices.size())); ++k)
|
|
{
|
|
best_k_mask.at<uchar>(valid_indices[k].second, j) = 1;
|
|
}
|
|
}
|
|
|
|
// Combine masks
|
|
mask = mask & best_k_mask;
|
|
|
|
// Compute the final pose
|
|
sum_poses = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
|
|
sum_mask = cv::Mat::zeros(sum_mask.size(), CV_32S);
|
|
for (int i = 0; i < num_poses; ++i)
|
|
{
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (mask.at<uchar>(i, j))
|
|
{
|
|
sum_poses.row(j) += poses_3d[i].row(j);
|
|
sum_mask.at<int>(j, 3) += 1;
|
|
}
|
|
}
|
|
}
|
|
cv::Mat final_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
|
|
for (int j = 0; j < num_joints; ++j)
|
|
{
|
|
if (sum_mask.at<int>(j, 3) > 0)
|
|
{
|
|
final_pose_3d.row(j) = sum_poses.row(j) / sum_mask.at<int>(j, 3);
|
|
}
|
|
}
|
|
|
|
return final_pose_3d;
|
|
}
|