#include #include #include #include "camera.hpp" #include "triangulator.hpp" // ================================================================================================= // ================================================================================================= [[maybe_unused]] static void print_2d_mat(const cv::Mat &mat) { // Ensure the matrix is 2D if (mat.dims != 2) { std::cerr << "Error: The matrix is not 2D." << std::endl; return; } // Retrieve matrix dimensions int rows = mat.rows; int cols = mat.cols; // Print the matrix in a NumPy-like style std::cout << "cv::Mat('shape': (" << rows << ", " << cols << ")"; std::cout << ", 'data': [" << std::endl; for (int i = 0; i < rows; ++i) { std::cout << " ["; for (int j = 0; j < cols; ++j) { std::cout << std::fixed << std::setprecision(3) << mat.at(i, j); if (j < cols - 1) { std::cout << ", "; } } std::cout << "]"; if (i < rows - 1) { std::cout << "," << std::endl; } } std::cout << "])" << std::endl; } // ================================================================================================= [[maybe_unused]] static void print_allpairs( const std::vector, std::pair>> &all_pairs) { std::cout << "All pairs: [" << std::endl; for (const auto &pair : all_pairs) { const auto &tuple_part = pair.first; const auto &pair_part = pair.second; // Print the tuple part std::cout << "(" << std::get<0>(tuple_part) << ", " << std::get<1>(tuple_part) << ", " << std::get<2>(tuple_part) << ", " << std::get<3>(tuple_part) << ")"; // Print the pair part std::cout << ", (" << pair_part.first << ", " << pair_part.second << ")" << std::endl; } std::cout << "]" << std::endl; } // ================================================================================================= // ================================================================================================= CameraInternal::CameraInternal(const Camera &cam) { this->cam = cam; // Convert camera matrix to cv::Mat for OpenCV K = cv::Mat(3, 3, CV_64FC1, const_cast(&cam.K[0][0])).clone(); DC = cv::Mat(cam.DC.size(), 1, CV_64FC1, const_cast(cam.DC.data())).clone(); R = cv::Mat(3, 3, CV_64FC1, const_cast(&cam.R[0][0])).clone(); T = cv::Mat(3, 1, CV_64FC1, const_cast(&cam.T[0][0])).clone(); } // ================================================================================================= void CameraInternal::update_projection_matrix() { // Calculate opencv-style projection matrix cv::Mat Tr, RT; Tr = R * (T * -1); cv::hconcat(R, Tr, RT); P = K * RT; } // ================================================================================================= // ================================================================================================= TriangulatorInternal::TriangulatorInternal(double min_score) { this->min_score = min_score; } // ================================================================================================= std::vector>> TriangulatorInternal::triangulate_poses( const std::vector>>> &poses_2d, const std::vector &cameras, const std::array, 2> &roomparams, const std::vector &joint_names) { // Check inputs if (poses_2d.empty()) { throw std::invalid_argument("No 2D poses provided."); } if (poses_2d.size() != cameras.size()) { throw std::invalid_argument("Number of cameras and 2D poses must be the same."); } if (joint_names.size() != poses_2d[0][0].size()) { throw std::invalid_argument("Number of joint names and 2D poses must be the same."); } for (const auto &joint : core_joints) { if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end()) { throw std::invalid_argument("Core joint '" + joint + "' not found in joint names."); } } // Convert inputs to internal formats std::vector> poses_2d_mats; poses_2d_mats.reserve(cameras.size()); for (size_t i = 0; i < cameras.size(); ++i) { size_t num_persons = poses_2d[i].size(); size_t num_joints = poses_2d[i][0].size(); std::vector camera_poses; camera_poses.reserve(num_persons); for (size_t j = 0; j < num_persons; ++j) { std::vector dims = {(int)num_joints, 3}; cv::Mat pose_mat = cv::Mat(dims, CV_64F); // Use pointer to copy data efficiently double *mat_ptr = pose_mat.ptr(0); for (size_t k = 0; k < num_joints; ++k) { const auto &joint = poses_2d[i][j][k]; mat_ptr[k * 3 + 0] = joint[0]; mat_ptr[k * 3 + 1] = joint[1]; mat_ptr[k * 3 + 2] = joint[2]; } camera_poses.push_back(std::move(pose_mat)); } poses_2d_mats.push_back(std::move(camera_poses)); } std::vector internal_cameras; for (size_t i = 0; i < cameras.size(); ++i) { internal_cameras.emplace_back(cameras[i]); } std::vector core_joint_idx; for (const auto &joint : core_joints) { auto it = std::find(joint_names.begin(), joint_names.end(), joint); core_joint_idx.push_back(std::distance(joint_names.begin(), it)); } std::vector> core_limbs_idx; for (const auto &limb : core_limbs) { auto it1 = std::find(core_joints.begin(), core_joints.end(), limb.first); auto it2 = std::find(core_joints.begin(), core_joints.end(), limb.second); core_limbs_idx.push_back({(size_t)std::distance(core_joints.begin(), it1), (size_t)std::distance(core_joints.begin(), it2)}); } // Undistort 2D poses #pragma omp parallel for for (size_t i = 0; i < cameras.size(); ++i) { undistort_poses(poses_2d_mats[i], internal_cameras[i]); internal_cameras[i].update_projection_matrix(); } // Project last 3D poses to 2D std::vector, std::vector>> last_poses_2d; if (!last_poses_3d.empty()) { // Select core joints std::vector last_core_poses; last_core_poses.resize(last_poses_3d.size()); #pragma omp parallel for for (size_t i = 0; i < last_poses_3d.size(); ++i) { cv::Mat &pose = last_poses_3d[i]; std::vector dims = {(int)core_joint_idx.size(), 4}; cv::Mat last_poses_core(dims, pose.type()); for (size_t j = 0; j < core_joint_idx.size(); ++j) { pose.row(core_joint_idx[j]).copyTo(last_poses_core.row(j)); } last_core_poses[i] = last_poses_core; } // Project last_poses_2d.resize(cameras.size()); #pragma omp parallel for for (size_t i = 0; i < cameras.size(); ++i) { auto [poses2d, dists] = project_poses(last_core_poses, internal_cameras[i], true); last_poses_2d[i] = std::make_tuple(poses2d, dists); } } // Check matches to old poses double threshold = min_score - 0.2; std::map>> scored_pasts; if (!last_poses_3d.empty()) { // Calculate index pairs and initialize vectors std::vector> indices_ijk; for (size_t i = 0; i < cameras.size(); ++i) { size_t num_last_persons = std::get<0>(last_poses_2d[i]).size(); scored_pasts[i] = std::map>(); for (size_t j = 0; j < num_last_persons; ++j) { size_t num_new_persons = poses_2d_mats[i].size(); scored_pasts[i][j] = std::vector(); for (size_t k = 0; k < num_new_persons; ++k) { indices_ijk.push_back({i, j, k}); } } } std::vector> indices_ik; for (size_t i = 0; i < cameras.size(); ++i) { size_t num_new_persons = poses_2d_mats[i].size(); for (size_t k = 0; k < num_new_persons; ++k) { indices_ik.push_back({i, k}); } } // Precalculate core poses std::vector poses_2d_mats_core_list; poses_2d_mats_core_list.resize(indices_ik.size()); std::vector> mats_core_map; mats_core_map.resize(cameras.size()); for (size_t i = 0; i < cameras.size(); ++i) { size_t num_new_persons = poses_2d_mats[i].size(); for (size_t k = 0; k < num_new_persons; ++k) { mats_core_map[i].push_back(0); } } #pragma omp parallel for for (size_t e = 0; e < indices_ik.size(); ++e) { const auto [i, k] = indices_ik[e]; const cv::Mat &pose = poses_2d_mats[i][k]; std::vector dims = {(int)core_joint_idx.size(), 3}; cv::Mat pose_core(dims, pose.type()); for (size_t j = 0; j < core_joint_idx.size(); ++j) { pose.row(core_joint_idx[j]).copyTo(pose_core.row(j)); } poses_2d_mats_core_list[e] = pose_core; mats_core_map[i][k] = e; } // Calculate matching score #pragma omp parallel for for (size_t e = 0; e < indices_ijk.size(); ++e) { const auto [i, j, k] = indices_ijk[e]; const cv::Mat &last_pose = std::get<0>(last_poses_2d[i])[j]; const cv::Mat &last_dist = std::get<1>(last_poses_2d[i])[j]; const cv::Mat &new_pose = poses_2d_mats_core_list[mats_core_map[i][k]]; double score = calc_pose_score(new_pose, last_pose, last_dist, internal_cameras[i]); if (score > threshold) { #pragma omp critical { scored_pasts[i][j].push_back(k); } } } } // Create pairs of persons // Checks if the person was already matched to the last frame and if so only creates pairs // with those, else it creates all possible pairs std::vector num_persons_sum; for (size_t i = 0; i < cameras.size(); ++i) { int nsum = poses_2d[i].size(); if (i > 0) { nsum += num_persons_sum[i - 1]; } num_persons_sum.push_back(nsum); } std::vector, std::pair>> all_pairs; std::vector> indices; for (size_t i = 0; i < cameras.size(); ++i) { for (size_t j = i + 1; j < cameras.size(); ++j) { for (size_t k = 0; k < poses_2d[i].size(); ++k) { for (size_t l = 0; l < poses_2d[j].size(); ++l) { indices.push_back({i, j, k, l}); } } } } #pragma omp parallel for ordered schedule(dynamic) for (size_t e = 0; e < indices.size(); ++e) { const auto [i, j, k, l] = indices[e]; int pid1 = num_persons_sum[i] + k; int pid2 = num_persons_sum[k] + l; bool match = false; if (!last_poses_3d.empty()) { for (size_t m = 0; m < last_poses_3d.size(); ++m) { auto &smi = scored_pasts[i][m]; auto &smj = scored_pasts[j][m]; bool in_smi = std::find(smi.begin(), smi.end(), k) != smi.end(); bool in_smj = std::find(smj.begin(), smj.end(), l) != smj.end(); if (in_smi && in_smj) { match = true; auto item = std::make_pair( std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2)); #pragma omp ordered all_pairs.push_back(item); } else if (in_smi || in_smj) { match = true; } } } if (!match) { auto item = std::make_pair( std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2)); // Needed to prevent randomized grouping/merging with slightly different results #pragma omp ordered all_pairs.push_back(item); } } // Calculate pair scores std::vector> all_scored_poses; all_scored_poses.resize(all_pairs.size()); #pragma omp parallel for for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; // Extract camera parameters const auto &cam1 = internal_cameras[std::get<0>(pids)]; const auto &cam2 = internal_cameras[std::get<1>(pids)]; // Extract 2D poses const cv::Mat &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)]; const cv::Mat &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)]; // Select core joints std::vector dims = {(int)core_joint_idx.size(), 3}; cv::Mat pose1_core(dims, pose1.type()); cv::Mat pose2_core(dims, pose2.type()); for (size_t j = 0; j < core_joint_idx.size(); ++j) { size_t idx = core_joint_idx[j]; pose1.row(idx).copyTo(pose1_core.row(j)); pose2.row(idx).copyTo(pose2_core.row(j)); } // Triangulate and score auto [pose3d, score] = triangulate_and_score( pose1_core, pose2_core, cam1, cam2, roomparams, core_limbs_idx); all_scored_poses[i] = std::make_pair(pose3d, score); } // Drop low scoring poses std::vector drop_indices; for (size_t i = 0; i < all_scored_poses.size(); ++i) { if (all_scored_poses[i].second < min_score) { drop_indices.push_back(i); } } if (!drop_indices.empty()) { for (size_t i = drop_indices.size(); i > 0; --i) { all_scored_poses.erase(all_scored_poses.begin() + drop_indices[i - 1]); all_pairs.erase(all_pairs.begin() + drop_indices[i - 1]); } } // Group pairs that share a person std::vector>> groups; groups = calc_grouping(all_pairs, all_scored_poses, min_score); // Calculate full 3D poses std::vector all_full_poses; all_full_poses.resize(all_pairs.size()); #pragma omp parallel for for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; const auto &cam1 = internal_cameras[std::get<0>(pids)]; const auto &cam2 = internal_cameras[std::get<1>(pids)]; const auto &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)]; const auto &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)]; auto [pose3d, score] = triangulate_and_score( pose1, pose2, cam1, cam2, roomparams, {}); all_full_poses[i] = (pose3d); } // Merge groups std::vector all_merged_poses; all_merged_poses.resize(groups.size()); #pragma omp parallel for for (size_t i = 0; i < groups.size(); ++i) { const auto &group = groups[i]; std::vector poses; poses.reserve(std::get<2>(group).size()); for (const auto &idx : std::get<2>(group)) { poses.push_back(all_full_poses[idx]); } auto merged_pose = merge_group(poses, min_score); all_merged_poses[i] = (merged_pose); } last_poses_3d = all_merged_poses; // Convert to output format std::vector>> poses_3d; poses_3d.reserve(all_merged_poses.size()); for (size_t i = 0; i < all_merged_poses.size(); ++i) { const auto &mat = all_merged_poses[i]; const double *mat_ptr = mat.ptr(0); std::vector> pose; size_t num_joints = mat.rows; pose.reserve(num_joints); size_t num_valid = 0; for (size_t j = 0; j < num_joints; ++j) { std::array point; for (size_t k = 0; k < 4; ++k) { point[k] = mat_ptr[j * 4 + k]; } pose.push_back(point); if (point[3] > min_score) { num_valid++; } } if (num_valid > 0) { poses_3d.push_back(std::move(pose)); } } return poses_3d; } // ================================================================================================= void TriangulatorInternal::reset() { last_poses_3d.clear(); } // ================================================================================================= void TriangulatorInternal::undistort_poses(std::vector &poses, CameraInternal &icam) { int width = icam.cam.width; int height = icam.cam.height; // Undistort camera matrix cv::Mat newK = cv::getOptimalNewCameraMatrix( icam.K, icam.DC, cv::Size(width, height), 1, cv::Size(width, height)); for (size_t p = 0; p < poses.size(); ++p) { // Extract the (x, y) coordinates cv::Mat points = poses[p].colRange(0, 2).clone(); points = points.reshape(2); // Undistort the points cv::undistortPoints(points, points, icam.K, icam.DC, cv::noArray(), newK); // Update the original poses with the undistorted points points = points.reshape(1); points.copyTo(poses[p].colRange(0, 2)); // Mask out points that are far outside the image (points slightly outside are still valid) double mask_offset = (width + height) / 40.0; int num_joints = poses[p].rows; double *poses_ptr = poses[p].ptr(0); for (int j = 0; j < num_joints; ++j) { double x = poses_ptr[j * 3 + 0]; double y = poses_ptr[j * 3 + 1]; bool in_x = x >= -mask_offset && x < width + mask_offset; bool in_y = y >= -mask_offset && y < height + mask_offset; if (!in_x || !in_y) { poses_ptr[j * 3 + 0] = 0; poses_ptr[j * 3 + 1] = 0; poses_ptr[j * 3 + 2] = 0; } } } // Update the camera matrix icam.K = newK.clone(); icam.DC = cv::Mat::zeros(5, 1, CV_64F); } // ================================================================================================= std::tuple, std::vector> TriangulatorInternal::project_poses( const std::vector &bodies3D, const CameraInternal &icam, bool calc_dists) { size_t num_persons = bodies3D.size(); size_t num_joints = bodies3D[0].rows; std::vector bodies2D_list(num_persons); std::vector dists_list(num_persons); cv::Mat T_repeated = cv::repeat(icam.T, 1, num_joints); for (size_t i = 0; i < num_persons; ++i) { const cv::Mat &body3D = bodies3D[i]; // Split up vector cv::Mat points3d = body3D.colRange(0, 3).clone(); // Project from world to camera coordinate system cv::Mat xyz = (icam.R * (points3d.t() - T_repeated)).t(); // Set points behind the camera to zero double *xyz_ptr = xyz.ptr(0); for (size_t i = 0; i < num_joints; ++i) { if (xyz_ptr[i * 3 + 2] <= 0) { xyz_ptr[i * 3 + 0] = 0; xyz_ptr[i * 3 + 1] = 0; xyz_ptr[i * 3 + 2] = 0; } } // Calculate distance from camera center cv::Mat dists; if (calc_dists) { dists = xyz.mul(xyz); cv::reduce(dists, dists, 1, cv::REDUCE_SUM, CV_64F); cv::sqrt(dists, dists); } // Project to camera system cv::Mat uv; if (icam.cam.type == "fisheye") { } else { cv::Mat DCc = icam.DC.rowRange(0, 5).clone(); cv::projectPoints( xyz, cv::Mat::zeros(3, 1, CV_64F), cv::Mat::zeros(3, 1, CV_64F), icam.K, DCc, uv); } uv = uv.reshape(1, {xyz.rows, 2}); // Add scores again std::vector dimsB = {(int)num_joints, 3}; cv::Mat bodies2D = cv::Mat(dimsB, CV_64F); double *bodies2D_ptr = bodies2D.ptr(0); const double *uv_ptr = uv.ptr(0); const double *bodies3D_ptr = body3D.ptr(0); for (size_t i = 0; i < num_joints; ++i) { bodies2D_ptr[i * 3 + 0] = uv_ptr[i * 2 + 0]; bodies2D_ptr[i * 3 + 1] = uv_ptr[i * 2 + 1]; bodies2D_ptr[i * 3 + 2] = bodies3D_ptr[i * 4 + 3]; } // Filter invalid projections for (size_t i = 0; i < num_joints; ++i) { double x = bodies2D_ptr[i * 3 + 0]; double y = bodies2D_ptr[i * 3 + 1]; bool in_x = x >= 0 && x < icam.cam.width; bool in_y = y >= 0 && y < icam.cam.height; if (!in_x || !in_y) { bodies2D_ptr[i * 3 + 0] = 0; bodies2D_ptr[i * 3 + 1] = 0; bodies2D_ptr[i * 3 + 2] = 0; } } // Store results bodies2D_list[i] = bodies2D; dists_list[i] = dists; } return std::make_tuple(bodies2D_list, dists_list); } // ================================================================================================= double TriangulatorInternal::calc_pose_score( const cv::Mat &pose1, const cv::Mat &pose2, const cv::Mat &dist1, const CameraInternal &icam) { // Create mask for valid points double min_score = 0.1; cv::Mat mask = (pose1.col(2) > min_score) & (pose2.col(2) > min_score); int valid_count = cv::countNonZero(mask); // Drop if not enough valid points if (valid_count < 3) { return 0.0; } // Calculate scores double iscale = (icam.cam.width + icam.cam.height) / 2; cv::Mat scores = score_projection(pose1, pose2, dist1, mask, iscale); // Drop lowest scores size_t drop_k = static_cast(pose1.rows * 0.2); size_t min_k = 3; std::vector scores_vec; const double *scores_ptr = scores.ptr(0); const uchar *mask_ptr = mask.ptr(0); for (int i = 0; i < scores.rows; ++i) { if (mask_ptr[i] > 0) { scores_vec.push_back(scores_ptr[i]); } } std::sort(scores_vec.begin(), scores_vec.end()); drop_k = (scores_vec.size() >= min_k) ? std::min(drop_k, scores_vec.size() - min_k) : 0; if (drop_k > 0) { scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k); } // Calculate final score double score = 0.0; size_t n_items = scores_vec.size(); for (size_t i = 0; i < n_items; i++) { score += scores_vec[i]; } if (n_items > 0) { score /= (double)n_items; } return score; } // ================================================================================================= cv::Mat TriangulatorInternal::score_projection( const cv::Mat &pose1, const cv::Mat &repro1, const cv::Mat &dists1, const cv::Mat &mask, double iscale) { double min_score = 0.1; // Calculate error cv::Mat error1 = pose1.colRange(0, 2) - repro1.colRange(0, 2); error1 = error1.mul(error1); error1 = error1.col(0) + error1.col(1); cv::sqrt(error1, error1); error1.setTo(0.0, ~mask); // Set errors of invisible reprojections to a high value double penalty = iscale; cv::Mat mask_invisible = (repro1.col(2) < min_score) & mask; error1.setTo(penalty, mask_invisible); // Scale error by image size and distance to the camera error1 = cv::max(0, cv::min(error1, (iscale / 4.0))) / iscale; cv::Scalar mean_dist = cv::mean(dists1, mask); double dscale1 = std::sqrt(mean_dist[0] / 3.5); error1 *= dscale1; // Convert errors to a score cv::Mat score1 = 1.0 / (1.0 + error1 * 10); return score1; } // ================================================================================================= std::pair TriangulatorInternal::triangulate_and_score( const cv::Mat &pose1, const cv::Mat &pose2, const CameraInternal &cam1, const CameraInternal &cam2, const std::array, 2> &roomparams, const std::vector> &core_limbs_idx) { // Mask out invisible joints double min_score = 0.1; cv::Mat mask1a = (pose1.col(2) >= min_score); cv::Mat mask2a = (pose2.col(2) >= min_score); const cv::Mat mask = mask1a & mask2a; const uchar *mask_ptr = mask.ptr(0); // If too few joints are visible, return a low score int num_visible = cv::countNonZero(mask); if (num_visible < 3) { std::vector dims = {(int)pose1.rows, 4}; cv::Mat pose3d(dims, CV_64F, cv::Scalar(0)); double score = 0.0; return std::make_pair(pose3d, score); } // Extract coordinates std::vector dimsA = {2, num_visible}; cv::Mat points1 = cv::Mat(dimsA, CV_64F); cv::Mat points2 = cv::Mat(dimsA, CV_64F); const double *pose1_ptr = pose1.ptr(0); const double *pose2_ptr = pose2.ptr(0); double *points1_ptr = points1.ptr(0); double *points2_ptr = points2.ptr(0); int idx = 0; for (int i = 0; i < pose1.rows; ++i) { if (mask_ptr[i] > 0) { points1_ptr[idx + 0 * num_visible] = pose1_ptr[i * 3 + 0]; points1_ptr[idx + 1 * num_visible] = pose1_ptr[i * 3 + 1]; points2_ptr[idx + 0 * num_visible] = pose2_ptr[i * 3 + 0]; points2_ptr[idx + 1 * num_visible] = pose2_ptr[i * 3 + 1]; idx++; } } // Triangulate points cv::Mat points4d_h, points3d; cv::triangulatePoints(cam1.P, cam2.P, points1, points2, points4d_h); cv::convertPointsFromHomogeneous(points4d_h.t(), points3d); // Create the 3D pose matrix std::vector dimsB = {(int)pose1.rows, 4}; cv::Mat pose3d = cv::Mat(dimsB, CV_64F, cv::Scalar(0)); const double *points3d_ptr = points3d.ptr(0); double *pose3d_ptr = pose3d.ptr(0); idx = 0; for (int i = 0; i < pose1.rows; ++i) { if (mask_ptr[i] > 0) { pose3d_ptr[i * 4 + 0] = points3d_ptr[idx * 3 + 0]; pose3d_ptr[i * 4 + 1] = points3d_ptr[idx * 3 + 1]; pose3d_ptr[i * 4 + 2] = points3d_ptr[idx * 3 + 2]; pose3d_ptr[i * 4 + 3] = 1.0; idx++; } } // Check if mean of the points is inside the room bounds std::array mean = {0, 0, 0}; for (int i = 0; i < pose1.rows; ++i) { if (mask_ptr[i] > 0) { for (int j = 0; j < 3; ++j) { mean[j] += pose3d_ptr[i * 4 + j]; } } } for (int j = 0; j < 3; ++j) { mean[j] /= num_visible; } std::array center = roomparams[0]; std::array size = roomparams[1]; for (int i = 0; i < 3; ++i) { if (mean[i] > center[i] + size[i] / 2 || mean[i] < center[i] - size[i] / 2) { // Very low score if outside room pose3d.col(3).setTo(0.001); return {pose3d, 0.001}; } } // Calculate reprojections std::vector poses_3d = {pose3d}; cv::Mat repro1, dists1, repro2, dists2; auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true); auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true); repro1 = repro1s[0]; dists1 = dists1s[0]; repro2 = repro2s[0]; dists2 = dists2s[0]; // Calculate scores for each view double iscale = (cam1.cam.width + cam1.cam.height) / 2.0; cv::Mat score1 = score_projection(pose1, repro1, dists1, mask, iscale); cv::Mat score2 = score_projection(pose2, repro2, dists2, mask, iscale); // Combine scores cv::Mat scores = (score1 + score2) / 2.0; const double *scores_ptr = scores.ptr(0); // Add scores to 3D pose for (int i = 0; i < pose1.rows; ++i) { if (mask_ptr[i] > 0) { pose3d_ptr[i * 4 + 3] = scores_ptr[i]; } } // Set scores outside the room to a low value double wdist = 0.1; for (int i = 0; i < pose1.rows; ++i) { if (mask_ptr[i] > 0) { for (int j = 0; j < 3; ++j) { if (pose3d_ptr[i * 4 + j] > center[j] + size[j] / 2 + wdist || pose3d_ptr[i * 4 + j] < center[j] - size[j] / 2 - wdist) { pose3d_ptr[i * 4 + 3] = 0.001; } } } } // Filter clearly wrong limbs if (core_limbs_idx.size() > 0) { double max_length = 0.9; std::vector invalid_joints; for (size_t i = 0; i < core_limbs_idx.size(); ++i) { auto limb = core_limbs_idx[i]; if (pose3d_ptr[limb[0] * 4 + 3] > min_score && pose3d_ptr[limb[1] * 4 + 3] > min_score) { double dx = pose3d_ptr[limb[0] * 4 + 0] - pose3d_ptr[limb[1] * 4 + 0]; double dy = pose3d_ptr[limb[0] * 4 + 1] - pose3d_ptr[limb[1] * 4 + 1]; double dz = pose3d_ptr[limb[0] * 4 + 2] - pose3d_ptr[limb[1] * 4 + 2]; double length = std::sqrt(dx * dx + dy * dy + dz * dz); if (length > max_length) { invalid_joints.push_back(limb[1]); } } } for (size_t i = 0; i < invalid_joints.size(); ++i) { pose3d_ptr[invalid_joints[i] * 4 + 3] = 0.001; } } // Drop lowest scores size_t drop_k = static_cast(pose1.rows * 0.2); size_t min_k = 3; std::vector scores_vec; for (int i = 0; i < pose1.rows; ++i) { if (pose3d_ptr[i * 4 + 3] > min_score) { scores_vec.push_back(pose3d_ptr[i * 4 + 3]); } } std::sort(scores_vec.begin(), scores_vec.end()); drop_k = (scores_vec.size() >= min_k) ? std::min(drop_k, scores_vec.size() - min_k) : 0; if (drop_k > 0) { scores_vec.erase(scores_vec.begin(), scores_vec.begin() + drop_k); } // Calculate final score double score = 0.0; size_t n_items = scores_vec.size(); for (size_t i = 0; i < n_items; i++) { score += scores_vec[i]; } if (n_items > 0) { score /= (double)n_items; } return std::make_pair(pose3d, score); } // ================================================================================================= std::vector>> TriangulatorInternal::calc_grouping( const std::vector, std::pair>> &all_pairs, const std::vector> &all_scored_poses, double min_score) { double max_center_dist = 0.6; double max_joint_avg_dist = 0.3; // Calculate pose centers std::vector centers; for (size_t i = 0; i < all_pairs.size(); ++i) { const cv::Mat &pose_3d = all_scored_poses[i].first; size_t num_joints = pose_3d.rows; const double *pose_3d_ptr = pose_3d.ptr(0); cv::Point3d center(0, 0, 0); size_t num_valid = 0; for (size_t j = 0; j < num_joints; ++j) { if (pose_3d_ptr[j * 4 + 3] > min_score) { center.x += pose_3d_ptr[j * 4 + 0]; center.y += pose_3d_ptr[j * 4 + 1]; center.z += pose_3d_ptr[j * 4 + 2]; num_valid++; } } if (num_valid > 0) { center.x /= num_valid; center.y /= num_valid; center.z /= num_valid; } centers.push_back(center); } // Calculate Groups // defined as a tuple of center, pose, and all-pairs-indices of members std::vector>> groups; for (size_t i = 0; i < all_pairs.size(); ++i) { const cv::Mat &pose_3d = all_scored_poses[i].first; size_t num_joints = pose_3d.rows; const double *pose_3d_ptr = pose_3d.ptr(0); const cv::Point3d ¢er = centers[i]; double best_dist = std::numeric_limits::infinity(); int best_group = -1; for (size_t j = 0; j < groups.size(); ++j) { auto &group = groups[j]; cv::Point3d &group_center = std::get<0>(group); cv::Mat &group_pose = std::get<1>(group); const double *group_pose_ptr = group_pose.ptr(0); // Check if the center is close enough if (cv::norm(group_center - center) < max_center_dist) { // Calculate average joint distance std::vector dists; for (size_t row = 0; row < num_joints; row++) { if (pose_3d_ptr[row * 4 + 3] > min_score && group_pose_ptr[row * 4 + 3] > min_score) { double dx = pose_3d_ptr[row * 4 + 0] - group_pose_ptr[row * 4 + 0]; double dy = pose_3d_ptr[row * 4 + 1] - group_pose_ptr[row * 4 + 1]; double dz = pose_3d_ptr[row * 4 + 2] - group_pose_ptr[row * 4 + 2]; double dist = std::sqrt(dx * dx + dy * dy + dz * dz); dists.push_back(dist); } } double dist = std::numeric_limits::infinity(); if (!dists.empty()) { dist = std::accumulate(dists.begin(), dists.end(), 0.0) / dists.size(); } // Check if the average joint distance is close enough if (dist < max_joint_avg_dist && dist < best_dist) { best_dist = dist; best_group = j; } } } if (best_group == -1) { // Create a new group std::vector new_indices{static_cast(i)}; groups.emplace_back(center, pose_3d.clone(), new_indices); } else { // Update existing group auto &group = groups[best_group]; cv::Point3d &group_center = std::get<0>(group); cv::Mat &group_pose = std::get<1>(group); std::vector &group_indices = std::get<2>(group); double n_elems = group_indices.size(); group_center = (group_center * n_elems + center) / (n_elems + 1); cv::Mat new_pose = group_pose.clone(); const double *group_pose_ptr = group_pose.ptr(0); const double *pose_3d_ptr = pose_3d.ptr(0); double *new_pose_ptr = new_pose.ptr(0); for (size_t row = 0; row < num_joints; row++) { new_pose_ptr[row * 4 + 0] = (group_pose_ptr[row * 4 + 0] * n_elems + pose_3d_ptr[row * 4 + 0]) / (n_elems + 1); new_pose_ptr[row * 4 + 1] = (group_pose_ptr[row * 4 + 1] * n_elems + pose_3d_ptr[row * 4 + 1]) / (n_elems + 1); new_pose_ptr[row * 4 + 2] = (group_pose_ptr[row * 4 + 2] * n_elems + pose_3d_ptr[row * 4 + 2]) / (n_elems + 1); new_pose_ptr[row * 4 + 3] = (group_pose_ptr[row * 4 + 3] * n_elems + pose_3d_ptr[row * 4 + 3]) / (n_elems + 1); } group_pose = new_pose; group_indices.push_back(i); } } return groups; } // ================================================================================================= cv::Mat TriangulatorInternal::merge_group(const std::vector &poses_3d, double min_score) { int num_poses = poses_3d.size(); int num_joints = poses_3d[0].rows; // Merge poses to create initial pose // Use only those triangulations with a high score cv::Mat sum_poses = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type()); double *sum_poses_ptr = sum_poses.ptr(0); std::vector sum_mask(num_joints, 0); for (int i = 0; i < num_poses; ++i) { const cv::Mat &pose = poses_3d[i]; const double *pose_ptr = pose.ptr(0); for (int j = 0; j < num_joints; ++j) { if (pose_ptr[j * 4 + 3] > min_score) { for (int k = 0; k < 4; ++k) { sum_poses_ptr[j * 4 + k] += pose_ptr[j * 4 + k]; } sum_mask[j]++; } } } cv::Mat initial_pose_3d = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type()); double *initial_pose_3d_ptr = initial_pose_3d.ptr(0); for (int j = 0; j < num_joints; ++j) { if (sum_mask[j] > 0) { for (int k = 0; k < 4; ++k) { initial_pose_3d_ptr[j * 4 + k] = sum_poses_ptr[j * 4 + k] / sum_mask[j]; } } } // Use center as default if the initial pose is empty std::vector jmask(num_joints, false); cv::Point3d center(0, 0, 0); int valid_joints = 0; for (int j = 0; j < num_joints; ++j) { if (initial_pose_3d_ptr[j * 4 + 3] > min_score) { jmask[j] = true; center.x += initial_pose_3d_ptr[j * 4 + 0]; center.y += initial_pose_3d_ptr[j * 4 + 1]; center.z += initial_pose_3d_ptr[j * 4 + 2]; valid_joints++; } } if (valid_joints > 0) { center *= 1.0 / valid_joints; } for (int j = 0; j < num_joints; ++j) { if (!jmask[j]) { initial_pose_3d_ptr[j * 4 + 0] = center.x; initial_pose_3d_ptr[j * 4 + 1] = center.y; initial_pose_3d_ptr[j * 4 + 2] = center.z; } } // Drop joints with low scores and filter outlying joints using distance threshold double offset = 0.1; double max_dist = 1.2; cv::Mat mask = cv::Mat::zeros(num_poses, num_joints, CV_8U); cv::Mat distances = cv::Mat::zeros(num_poses, num_joints, CV_64F); double *distances_ptr = distances.ptr(0); u_char *mask_ptr = mask.ptr(0); for (int i = 0; i < num_poses; ++i) { const cv::Mat &pose = poses_3d[i]; const double *pose_ptr = pose.ptr(0); for (int j = 0; j < num_joints; ++j) { double dx = pose_ptr[j * 4 + 0] - initial_pose_3d_ptr[j * 4 + 0]; double dy = pose_ptr[j * 4 + 1] - initial_pose_3d_ptr[j * 4 + 1]; double dz = pose_ptr[j * 4 + 2] - initial_pose_3d_ptr[j * 4 + 2]; double dist = std::sqrt(dx * dx + dy * dy + dz * dz); distances_ptr[i * num_joints + j] = dist; if (dist <= max_dist && pose_ptr[j * 4 + 3] > (min_score - offset)) { mask_ptr[i * num_joints + j] = 1; } } } // Select the best-k proposals for each joint that are closest to the initial pose int keep_best = 3; cv::Mat best_k_mask = cv::Mat::zeros(num_poses, num_joints, CV_8U); for (int j = 0; j < num_joints; ++j) { std::vector> valid_indices; for (int i = 0; i < num_poses; ++i) { if (mask_ptr[i * num_joints + j]) { auto item = std::make_pair(distances_ptr[i * num_joints + j], i); valid_indices.push_back(item); } } std::sort(valid_indices.begin(), valid_indices.end()); for (int k = 0; k < std::min(keep_best, static_cast(valid_indices.size())); ++k) { best_k_mask.at(valid_indices[k].second, j) = 1; } } // Combine masks mask = mask & best_k_mask; mask_ptr = mask.ptr(0); // Compute the final pose sum_poses = cv::Mat::zeros(sum_poses.size(), sum_poses.type()); sum_poses_ptr = sum_poses.ptr(0); sum_mask = std::vector(num_joints, 0); for (int i = 0; i < num_poses; ++i) { const cv::Mat &pose = poses_3d[i]; const double *pose_ptr = pose.ptr(0); for (int j = 0; j < num_joints; ++j) { if (mask_ptr[i * num_joints + j] > 0) { for (int k = 0; k < 4; ++k) { sum_poses_ptr[j * 4 + k] += pose_ptr[j * 4 + k]; } sum_mask[j]++; } } } cv::Mat final_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type()); double *final_pose_3d_ptr = final_pose_3d.ptr(0); for (int j = 0; j < num_joints; ++j) { if (sum_mask[j] > 0) { for (int k = 0; k < 4; ++k) { final_pose_3d_ptr[j * 4 + k] = sum_poses_ptr[j * 4 + k] / sum_mask[j]; } } } return final_pose_3d; }