Implemented custom midpoint triangulation.

This commit is contained in:
Daniel
2025-02-28 10:29:25 +01:00
parent 608f89d6b6
commit 0f2d597899
2 changed files with 298 additions and 196 deletions

View File

@ -294,73 +294,73 @@ Results of the model in various experiments on different datasets. \
```json
{
"img_loading": 0.0418024,
"demosaicing": 0.000712412,
"avg_time_2d": 0.014805,
"avg_time_3d": 0.000291986,
"fps": 63.2536
"img_loading": 0.180589,
"demosaicing": 0.000695076,
"avg_time_2d": 0.0152607,
"avg_time_3d": 0.000150192,
"fps": 62.0888
}
{
"triangulator_calls": 301,
"init_time": 6.44811e-06,
"undistort_time": 4.05236e-05,
"project_time": 3.04309e-06,
"match_time": 1.07992e-05,
"pairs_time": 6.09893e-06,
"pair_scoring_time": 6.0547e-05,
"grouping_time": 7.94037e-06,
"full_time": 5.80405e-05,
"merge_time": 1.34738e-05,
"post_time": 9.23313e-06,
"convert_time": 1.58432e-07,
"total_time": 0.000216593
"init_time": 3.53967e-06,
"undistort_time": 3.48582e-05,
"project_time": 2.18348e-06,
"match_time": 8.45481e-06,
"pairs_time": 4.53164e-06,
"pair_scoring_time": 3.10183e-05,
"grouping_time": 4.6499e-06,
"full_time": 3.33672e-05,
"merge_time": 1.02807e-05,
"post_time": 7.00402e-06,
"convert_time": 1.11306e-07,
"total_time": 0.000140236
}
{
"person_nums": {
"total_frames": 301,
"total_labels": 477,
"total_preds": 828,
"total_preds": 829,
"considered_empty": 0,
"valid_preds": 477,
"invalid_preds": 351,
"invalid_preds": 352,
"missing": 0,
"invalid_fraction": 0.42391,
"precision": 0.57609,
"invalid_fraction": 0.42461,
"precision": 0.57539,
"recall": 1.0,
"f1": 0.73103,
"non_empty": 828
"f1": 0.73047,
"non_empty": 829
},
"mpjpe": {
"count": 477,
"mean": 0.047978,
"median": 0.042546,
"std": 0.014958,
"sem": 0.000686,
"min": 0.03014,
"max": 0.12344,
"mean": 0.047984,
"median": 0.042648,
"std": 0.014812,
"sem": 0.000679,
"min": 0.03012,
"max": 0.116312,
"recall-0.025": 0.0,
"recall-0.05": 0.698113,
"recall-0.1": 0.987421,
"recall-0.05": 0.70021,
"recall-0.1": 0.985325,
"recall-0.15": 1.0,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 477,
"ap-0.025": 0.0,
"ap-0.05": 0.387605,
"ap-0.1": 0.730843,
"ap-0.15": 0.744044,
"ap-0.25": 0.744044,
"ap-0.5": 0.744044
"ap-0.05": 0.39114,
"ap-0.1": 0.735434,
"ap-0.15": 0.751482,
"ap-0.25": 0.751482,
"ap-0.5": 0.751482
},
"head": {
"count": 477,
"mean": 0.054194,
"median": 0.050192,
"std": 0.024884,
"sem": 0.001141,
"min": 0.005604,
"max": 0.180414,
"recall-0.025": 0.083857,
"mean": 0.054212,
"median": 0.050157,
"std": 0.024854,
"sem": 0.001139,
"min": 0.005599,
"max": 0.180565,
"recall-0.025": 0.081761,
"recall-0.05": 0.496855,
"recall-0.1": 0.937107,
"recall-0.15": 0.995807,
@ -370,15 +370,15 @@ Results of the model in various experiments on different datasets. \
},
"shoulder_left": {
"count": 477,
"mean": 0.042406,
"median": 0.036996,
"std": 0.020494,
"sem": 0.000939,
"min": 0.004232,
"max": 0.136479,
"recall-0.025": 0.163522,
"recall-0.05": 0.72956,
"recall-0.1": 0.987421,
"mean": 0.042435,
"median": 0.03702,
"std": 0.02058,
"sem": 0.000943,
"min": 0.00431,
"max": 0.136587,
"recall-0.025": 0.161426,
"recall-0.05": 0.727463,
"recall-0.1": 0.985325,
"recall-0.15": 1.0,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -386,14 +386,14 @@ Results of the model in various experiments on different datasets. \
},
"shoulder_right": {
"count": 477,
"mean": 0.049613,
"median": 0.045991,
"std": 0.023012,
"sem": 0.001055,
"min": 0.00538,
"max": 0.147114,
"mean": 0.049634,
"median": 0.045795,
"std": 0.023121,
"sem": 0.00106,
"min": 0.00535,
"max": 0.14745,
"recall-0.025": 0.100629,
"recall-0.05": 0.555556,
"recall-0.05": 0.559748,
"recall-0.1": 0.955975,
"recall-0.15": 1.0,
"recall-0.25": 1.0,
@ -402,13 +402,13 @@ Results of the model in various experiments on different datasets. \
},
"elbow_left": {
"count": 477,
"mean": 0.040782,
"median": 0.032111,
"std": 0.029225,
"sem": 0.00134,
"min": 0.003363,
"max": 0.326353,
"recall-0.025": 0.312369,
"mean": 0.040763,
"median": 0.032063,
"std": 0.029259,
"sem": 0.001341,
"min": 0.003449,
"max": 0.326227,
"recall-0.025": 0.316562,
"recall-0.05": 0.756813,
"recall-0.1": 0.953878,
"recall-0.15": 0.997904,
@ -418,29 +418,29 @@ Results of the model in various experiments on different datasets. \
},
"elbow_right": {
"count": 477,
"mean": 0.053348,
"median": 0.044418,
"std": 0.040864,
"sem": 0.001873,
"min": 0.003281,
"max": 0.243895,
"recall-0.025": 0.253669,
"mean": 0.053368,
"median": 0.045043,
"std": 0.040851,
"sem": 0.001872,
"min": 0.003529,
"max": 0.244051,
"recall-0.025": 0.255765,
"recall-0.05": 0.561845,
"recall-0.1": 0.901468,
"recall-0.15": 0.955975,
"recall-0.15": 0.958071,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 477
},
"wrist_left": {
"count": 477,
"mean": 0.060086,
"median": 0.053969,
"std": 0.038695,
"sem": 0.001774,
"min": 0.002109,
"max": 0.322924,
"recall-0.025": 0.129979,
"mean": 0.060002,
"median": 0.053953,
"std": 0.03861,
"sem": 0.00177,
"min": 0.002051,
"max": 0.322481,
"recall-0.025": 0.132075,
"recall-0.05": 0.404612,
"recall-0.1": 0.907757,
"recall-0.15": 0.960168,
@ -450,31 +450,31 @@ Results of the model in various experiments on different datasets. \
},
"wrist_right": {
"count": 477,
"mean": 0.05937,
"median": 0.054488,
"std": 0.034178,
"sem": 0.001567,
"min": 0.009831,
"max": 0.371597,
"recall-0.025": 0.109015,
"recall-0.05": 0.419287,
"mean": 0.059207,
"median": 0.054405,
"std": 0.033578,
"sem": 0.001539,
"min": 0.009618,
"max": 0.371667,
"recall-0.025": 0.115304,
"recall-0.05": 0.415094,
"recall-0.1": 0.899371,
"recall-0.15": 0.979036,
"recall-0.15": 0.981132,
"recall-0.25": 0.997904,
"recall-0.5": 1.0,
"num_labels": 477
},
"hip_left": {
"count": 477,
"mean": 0.048082,
"median": 0.042309,
"std": 0.02636,
"sem": 0.001208,
"min": 0.006447,
"max": 0.14256,
"recall-0.025": 0.186583,
"mean": 0.047948,
"median": 0.042251,
"std": 0.026295,
"sem": 0.001205,
"min": 0.006475,
"max": 0.145903,
"recall-0.025": 0.188679,
"recall-0.05": 0.618449,
"recall-0.1": 0.951782,
"recall-0.1": 0.953878,
"recall-0.15": 1.0,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -482,15 +482,15 @@ Results of the model in various experiments on different datasets. \
},
"hip_right": {
"count": 477,
"mean": 0.057967,
"median": 0.0564,
"std": 0.023728,
"sem": 0.001088,
"min": 0.00421,
"max": 0.132307,
"mean": 0.058483,
"median": 0.05753,
"std": 0.023762,
"sem": 0.001089,
"min": 0.005137,
"max": 0.132318,
"recall-0.025": 0.098532,
"recall-0.05": 0.396226,
"recall-0.1": 0.9413,
"recall-0.05": 0.39413,
"recall-0.1": 0.943396,
"recall-0.15": 1.0,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -498,13 +498,13 @@ Results of the model in various experiments on different datasets. \
},
"knee_left": {
"count": 477,
"mean": 0.040396,
"median": 0.037751,
"std": 0.02446,
"sem": 0.001121,
"min": 0.004904,
"max": 0.190671,
"recall-0.025": 0.259958,
"mean": 0.040438,
"median": 0.03808,
"std": 0.024403,
"sem": 0.001118,
"min": 0.004928,
"max": 0.190069,
"recall-0.025": 0.257862,
"recall-0.05": 0.748428,
"recall-0.1": 0.974843,
"recall-0.15": 0.989518,
@ -514,15 +514,15 @@ Results of the model in various experiments on different datasets. \
},
"knee_right": {
"count": 477,
"mean": 0.04018,
"median": 0.036019,
"std": 0.023349,
"sem": 0.00107,
"min": 0.007466,
"max": 0.1848,
"recall-0.025": 0.312369,
"recall-0.05": 0.712788,
"recall-0.1": 0.974843,
"mean": 0.040168,
"median": 0.03623,
"std": 0.023114,
"sem": 0.001059,
"min": 0.00733,
"max": 0.184933,
"recall-0.025": 0.310273,
"recall-0.05": 0.708595,
"recall-0.1": 0.976939,
"recall-0.15": 0.997904,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -530,14 +530,14 @@ Results of the model in various experiments on different datasets. \
},
"ankle_left": {
"count": 477,
"mean": 0.036352,
"median": 0.027965,
"std": 0.030885,
"sem": 0.001416,
"min": 0.004731,
"max": 0.223705,
"recall-0.025": 0.429769,
"recall-0.05": 0.815514,
"mean": 0.036353,
"median": 0.028172,
"std": 0.030783,
"sem": 0.001411,
"min": 0.004787,
"max": 0.223747,
"recall-0.025": 0.433962,
"recall-0.05": 0.81761,
"recall-0.1": 0.945493,
"recall-0.15": 0.983229,
"recall-0.25": 1.0,
@ -546,34 +546,34 @@ Results of the model in various experiments on different datasets. \
},
"ankle_right": {
"count": 477,
"mean": 0.040931,
"median": 0.030927,
"std": 0.037943,
"sem": 0.001739,
"min": 0.003325,
"max": 0.272891,
"mean": 0.040777,
"median": 0.030897,
"std": 0.037254,
"sem": 0.001708,
"min": 0.003323,
"max": 0.27012,
"recall-0.025": 0.303983,
"recall-0.05": 0.802935,
"recall-0.1": 0.930818,
"recall-0.15": 0.968553,
"recall-0.25": 0.995807,
"recall-0.25": 0.997904,
"recall-0.5": 1.0,
"num_labels": 477
},
"joint_recalls": {
"num_labels": 6201,
"recall-0.025": 0.20997,
"recall-0.05": 0.61571,
"recall-0.025": 0.21093,
"recall-0.05": 0.6149,
"recall-0.1": 0.94275,
"recall-0.15": 0.98645,
"recall-0.25": 0.99839,
"recall-0.25": 0.99871,
"recall-0.5": 1.0
}
}
{
"total_parts": 6678,
"correct_parts": 6618,
"pcp": 0.991015
"correct_parts": 6619,
"pcp": 0.991165
}
```

View File

@ -412,7 +412,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
// Calculate pair scores
std::vector<std::pair<std::vector<std::array<float, 4>>, float>> all_scored_poses;
all_scored_poses.resize(all_pairs.size());
#pragma omp parallel for
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
@ -479,7 +478,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
// Calculate full 3D poses
std::vector<std::vector<std::array<float, 4>>> all_full_poses;
all_full_poses.resize(all_pairs.size());
#pragma omp parallel for
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
@ -1010,6 +1008,155 @@ std::vector<float> TriangulatorInternal::score_projection(
// =================================================================================================
/* Compute the inverse using the adjugate method */
std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
{
// See: https://scicomp.stackexchange.com/a/29206
std::array<std::array<float, 3>, 3> adj = {
{{
M[1][1] * M[2][2] - M[1][2] * M[2][1],
M[0][2] * M[2][1] - M[0][1] * M[2][2],
M[0][1] * M[1][2] - M[0][2] * M[1][1],
},
{
M[1][2] * M[2][0] - M[1][0] * M[2][2],
M[0][0] * M[2][2] - M[0][2] * M[2][0],
M[0][2] * M[1][0] - M[0][0] * M[1][2],
},
{
M[1][0] * M[2][1] - M[1][1] * M[2][0],
M[0][1] * M[2][0] - M[0][0] * M[2][1],
M[0][0] * M[1][1] - M[0][1] * M[1][0],
}}};
float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
if (std::fabs(det) < 1e-6f)
{
return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
}
float idet = 1.0f / det;
std::array<std::array<float, 3>, 3> inv = {
{{
adj[0][0] * idet,
adj[0][1] * idet,
adj[0][2] * idet,
},
{
adj[1][0] * idet,
adj[1][1] * idet,
adj[1][2] * idet,
},
{
adj[2][0] * idet,
adj[2][1] * idet,
adj[2][2] * idet,
}}};
return inv;
}
std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
{
return {{{M[0][0], M[1][0], M[2][0]},
{M[0][1], M[1][1], M[2][1]},
{M[0][2], M[1][2], M[2][2]}}};
}
float dot(const std::array<float, 3> &a, const std::array<float, 3> &b)
{
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
}
std::array<float, 3> cross(const std::array<float, 3> &a, const std::array<float, 3> &b)
{
return {a[1] * b[2] - a[2] * b[1],
a[2] * b[0] - a[0] * b[2],
a[0] * b[1] - a[1] * b[0]};
}
std::array<float, 3> add(const std::array<float, 3> &a, const std::array<float, 3> &b)
{
return {a[0] + b[0], a[1] + b[1], a[2] + b[2]};
}
std::array<float, 3> subtract(const std::array<float, 3> &a, const std::array<float, 3> &b)
{
return {a[0] - b[0], a[1] - b[1], a[2] - b[2]};
}
std::array<float, 3> multiply(const std::array<float, 3> &a, float s)
{
return {a[0] * s, a[1] * s, a[2] * s};
}
std::array<float, 3> normalize(const std::array<float, 3> &v)
{
float norm = std::sqrt(dot(v, v));
if (norm < 1e-8f)
return v;
return multiply(v, 1.0f / norm);
}
std::array<float, 3> mat_mul_vec(
const std::array<std::array<float, 3>, 3> &M, const std::array<float, 3> &v)
{
std::array<float, 3> res = {M[0][0] * v[0] + M[0][1] * v[1] + M[0][2] * v[2],
M[1][0] * v[0] + M[1][1] * v[1] + M[1][2] * v[2],
M[2][0] * v[0] + M[2][1] * v[1] + M[2][2] * v[2]};
return res;
}
/* Compute camera center and corresponding ray direction */
std::tuple<std::array<float, 3>, std::array<float, 3>> calc_center_and_ray(
const CameraInternal &icam,
const std::array<float, 2> &pt)
{
// Compute Rᵀ and t
auto R_transpose = transpose3x3(icam.cam.R);
std::array<float, 3> t = {icam.cam.T[0][0], icam.cam.T[1][0], icam.cam.T[2][0]};
t = mat_mul_vec(icam.cam.R, multiply(t, -1.0f));
// Camera center: C = -Rᵀ * t
auto C = multiply(mat_mul_vec(R_transpose, t), -1.0f);
// Compute ray direction:
std::array<float, 3> uv1 = {pt[0], pt[1], 1.0f};
auto K_inv = invert3x3(icam.cam.K);
auto d = mat_mul_vec(R_transpose, mat_mul_vec(K_inv, uv1));
auto rayDir = normalize(d);
return std::make_tuple(C, rayDir);
}
/* Triangulate two points by computing their two rays and the midpoint of their closest approach */
std::array<float, 3> triangulate_midpoint(
const CameraInternal &icam1,
const CameraInternal &icam2,
const std::array<float, 2> &pt1,
const std::array<float, 2> &pt2)
{
// See: https://en.wikipedia.org/wiki/Skew_lines#Nearest_points
// Obtain the camera centers and ray directions for both views
auto [p1, d1] = calc_center_and_ray(icam1, pt1);
auto [p2, d2] = calc_center_and_ray(icam2, pt2);
// Compute the perpendicular plane vectors
std::array<float, 3> n = cross(d1, d2);
std::array<float, 3> n1 = cross(d1, n);
std::array<float, 3> n2 = cross(d2, n);
// Calculate point on Line 1 nearest to Line 2
float t1 = dot(subtract(p2, p1), n2) / dot(d1, n2);
std::array<float, 3> c1 = add(p1, multiply(d1, t1));
// Calculate point on Line 2 nearest to Line 1
float t2 = dot(subtract(p1, p2), n1) / dot(d2, n1);
std::array<float, 3> c2 = add(p2, multiply(d2, t2));
// Compute midpoint between c1 and c2.
std::array<float, 3> midpoint = multiply(add(c1, c2), 0.5);
return midpoint;
}
// =================================================================================================
std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
const std::vector<std::array<float, 3>> &pose1,
const std::vector<std::array<float, 3>> &pose2,
@ -1052,63 +1199,18 @@ std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triang
return std::make_pair(empty, score);
}
// Extract coordinates of visible joints
std::vector<std::array<float, 2>> points1;
std::vector<std::array<float, 2>> points2;
points1.reserve(num_visible);
points2.reserve(num_visible);
for (size_t i = 0; i < num_joints; ++i)
{
if (mask[i])
{
points1.push_back({pose1[i][0], pose1[i][1]});
points2.push_back({pose2[i][0], pose2[i][1]});
}
}
// Convert vectors to mats
cv::Mat points1_mat(2, num_visible, CV_32F);
cv::Mat points2_mat(2, num_visible, CV_32F);
float *p1_ptr = points1_mat.ptr<float>(0);
float *p2_ptr = points2_mat.ptr<float>(0);
for (int i = 0; i < num_visible; ++i)
{
p1_ptr[i + 0 * num_visible] = points1[i][0];
p1_ptr[i + 1 * num_visible] = points1[i][1];
p2_ptr[i + 0 * num_visible] = points2[i][0];
p2_ptr[i + 1 * num_visible] = points2[i][1];
}
// Triangulate points
cv::Mat points4d_h;
cv::triangulatePoints(cam1.P, cam2.P, points1_mat, points2_mat, points4d_h);
// Convert homogeneous coordinates to 3D
std::vector<std::array<float, 3>> points_3d;
points_3d.reserve(num_visible);
const float *p4_ptr = points4d_h.ptr<float>(0);
for (int i = 0; i < points4d_h.cols; ++i)
{
float w = p4_ptr[i + 3 * num_visible];
std::array<float, 3> pt = {
p4_ptr[i + 0 * num_visible] / w,
p4_ptr[i + 1 * num_visible] / w,
p4_ptr[i + 2 * num_visible] / w};
points_3d.push_back(std::move(pt));
}
// Create the 3D pose
// Use midpoint triangulation instead of cv::triangulatePoints because it is much faster,
// while having almost the same accuracy.
std::vector<std::array<float, 4>> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0});
int idx = 0;
for (size_t i = 0; i < num_joints; ++i)
{
if (mask[i])
{
pose3d[i][0] = points_3d[idx][0];
pose3d[i][1] = points_3d[idx][1];
pose3d[i][2] = points_3d[idx][2];
pose3d[i][3] = 1.0;
++idx;
auto &pt1 = pose1[i];
auto &pt2 = pose2[i];
std::array<float, 3> pt3d = triangulate_midpoint(
cam1, cam2, {pt1[0], pt1[1]}, {pt2[0], pt2[1]});
pose3d[i] = {pt3d[0], pt3d[1], pt3d[2], 1.0};
}
}