Implemented custom midpoint triangulation.

2025-02-28 10:29:25 +01:00
parent 608f89d6b6
commit 0f2d597899
2 changed files with 298 additions and 196 deletions
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
@ -294,73 +294,73 @@ Results of the model in various experiments on different datasets. \

 ```json
 {
-  "img_loading": 0.0418024,
-  "demosaicing": 0.000712412,
-  "avg_time_2d": 0.014805,
-  "avg_time_3d": 0.000291986,
-  "fps": 63.2536
+  "img_loading": 0.180589,
+  "demosaicing": 0.000695076,
+  "avg_time_2d": 0.0152607,
+  "avg_time_3d": 0.000150192,
+  "fps": 62.0888
 }
 {
  "triangulator_calls": 301,
-  "init_time": 6.44811e-06,
-  "undistort_time": 4.05236e-05,
-  "project_time": 3.04309e-06,
-  "match_time": 1.07992e-05,
-  "pairs_time": 6.09893e-06,
-  "pair_scoring_time": 6.0547e-05,
-  "grouping_time": 7.94037e-06,
-  "full_time": 5.80405e-05,
-  "merge_time": 1.34738e-05,
-  "post_time": 9.23313e-06,
-  "convert_time": 1.58432e-07,
-  "total_time": 0.000216593
+  "init_time": 3.53967e-06,
+  "undistort_time": 3.48582e-05,
+  "project_time": 2.18348e-06,
+  "match_time": 8.45481e-06,
+  "pairs_time": 4.53164e-06,
+  "pair_scoring_time": 3.10183e-05,
+  "grouping_time": 4.6499e-06,
+  "full_time": 3.33672e-05,
+  "merge_time": 1.02807e-05,
+  "post_time": 7.00402e-06,
+  "convert_time": 1.11306e-07,
+  "total_time": 0.000140236
 }
 {
  "person_nums": {
    "total_frames": 301,
    "total_labels": 477,
-    "total_preds": 828,
+    "total_preds": 829,
    "considered_empty": 0,
    "valid_preds": 477,
-    "invalid_preds": 351,
+    "invalid_preds": 352,
    "missing": 0,
-    "invalid_fraction": 0.42391,
-    "precision": 0.57609,
+    "invalid_fraction": 0.42461,
+    "precision": 0.57539,
    "recall": 1.0,
-    "f1": 0.73103,
-    "non_empty": 828
+    "f1": 0.73047,
+    "non_empty": 829
  },
  "mpjpe": {
    "count": 477,
-    "mean": 0.047978,
-    "median": 0.042546,
-    "std": 0.014958,
-    "sem": 0.000686,
-    "min": 0.03014,
-    "max": 0.12344,
+    "mean": 0.047984,
+    "median": 0.042648,
+    "std": 0.014812,
+    "sem": 0.000679,
+    "min": 0.03012,
+    "max": 0.116312,
    "recall-0.025": 0.0,
-    "recall-0.05": 0.698113,
-    "recall-0.1": 0.987421,
+    "recall-0.05": 0.70021,
+    "recall-0.1": 0.985325,
    "recall-0.15": 1.0,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
    "num_labels": 477,
    "ap-0.025": 0.0,
-    "ap-0.05": 0.387605,
-    "ap-0.1": 0.730843,
-    "ap-0.15": 0.744044,
-    "ap-0.25": 0.744044,
-    "ap-0.5": 0.744044
+    "ap-0.05": 0.39114,
+    "ap-0.1": 0.735434,
+    "ap-0.15": 0.751482,
+    "ap-0.25": 0.751482,
+    "ap-0.5": 0.751482
  },
  "head": {
    "count": 477,
-    "mean": 0.054194,
-    "median": 0.050192,
-    "std": 0.024884,
-    "sem": 0.001141,
-    "min": 0.005604,
-    "max": 0.180414,
-    "recall-0.025": 0.083857,
+    "mean": 0.054212,
+    "median": 0.050157,
+    "std": 0.024854,
+    "sem": 0.001139,
+    "min": 0.005599,
+    "max": 0.180565,
+    "recall-0.025": 0.081761,
    "recall-0.05": 0.496855,
    "recall-0.1": 0.937107,
    "recall-0.15": 0.995807,
@ -370,15 +370,15 @@ Results of the model in various experiments on different datasets. \
  },
  "shoulder_left": {
    "count": 477,
-    "mean": 0.042406,
-    "median": 0.036996,
-    "std": 0.020494,
-    "sem": 0.000939,
-    "min": 0.004232,
-    "max": 0.136479,
-    "recall-0.025": 0.163522,
-    "recall-0.05": 0.72956,
-    "recall-0.1": 0.987421,
+    "mean": 0.042435,
+    "median": 0.03702,
+    "std": 0.02058,
+    "sem": 0.000943,
+    "min": 0.00431,
+    "max": 0.136587,
+    "recall-0.025": 0.161426,
+    "recall-0.05": 0.727463,
+    "recall-0.1": 0.985325,
    "recall-0.15": 1.0,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
@ -386,14 +386,14 @@ Results of the model in various experiments on different datasets. \
  },
  "shoulder_right": {
    "count": 477,
-    "mean": 0.049613,
-    "median": 0.045991,
-    "std": 0.023012,
-    "sem": 0.001055,
-    "min": 0.00538,
-    "max": 0.147114,
+    "mean": 0.049634,
+    "median": 0.045795,
+    "std": 0.023121,
+    "sem": 0.00106,
+    "min": 0.00535,
+    "max": 0.14745,
    "recall-0.025": 0.100629,
-    "recall-0.05": 0.555556,
+    "recall-0.05": 0.559748,
    "recall-0.1": 0.955975,
    "recall-0.15": 1.0,
    "recall-0.25": 1.0,
@ -402,13 +402,13 @@ Results of the model in various experiments on different datasets. \
  },
  "elbow_left": {
    "count": 477,
-    "mean": 0.040782,
-    "median": 0.032111,
-    "std": 0.029225,
-    "sem": 0.00134,
-    "min": 0.003363,
-    "max": 0.326353,
-    "recall-0.025": 0.312369,
+    "mean": 0.040763,
+    "median": 0.032063,
+    "std": 0.029259,
+    "sem": 0.001341,
+    "min": 0.003449,
+    "max": 0.326227,
+    "recall-0.025": 0.316562,
    "recall-0.05": 0.756813,
    "recall-0.1": 0.953878,
    "recall-0.15": 0.997904,
@ -418,29 +418,29 @@ Results of the model in various experiments on different datasets. \
  },
  "elbow_right": {
    "count": 477,
-    "mean": 0.053348,
-    "median": 0.044418,
-    "std": 0.040864,
-    "sem": 0.001873,
-    "min": 0.003281,
-    "max": 0.243895,
-    "recall-0.025": 0.253669,
+    "mean": 0.053368,
+    "median": 0.045043,
+    "std": 0.040851,
+    "sem": 0.001872,
+    "min": 0.003529,
+    "max": 0.244051,
+    "recall-0.025": 0.255765,
    "recall-0.05": 0.561845,
    "recall-0.1": 0.901468,
-    "recall-0.15": 0.955975,
+    "recall-0.15": 0.958071,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
    "num_labels": 477
  },
  "wrist_left": {
    "count": 477,
-    "mean": 0.060086,
-    "median": 0.053969,
-    "std": 0.038695,
-    "sem": 0.001774,
-    "min": 0.002109,
-    "max": 0.322924,
-    "recall-0.025": 0.129979,
+    "mean": 0.060002,
+    "median": 0.053953,
+    "std": 0.03861,
+    "sem": 0.00177,
+    "min": 0.002051,
+    "max": 0.322481,
+    "recall-0.025": 0.132075,
    "recall-0.05": 0.404612,
    "recall-0.1": 0.907757,
    "recall-0.15": 0.960168,
@ -450,31 +450,31 @@ Results of the model in various experiments on different datasets. \
  },
  "wrist_right": {
    "count": 477,
-    "mean": 0.05937,
-    "median": 0.054488,
-    "std": 0.034178,
-    "sem": 0.001567,
-    "min": 0.009831,
-    "max": 0.371597,
-    "recall-0.025": 0.109015,
-    "recall-0.05": 0.419287,
+    "mean": 0.059207,
+    "median": 0.054405,
+    "std": 0.033578,
+    "sem": 0.001539,
+    "min": 0.009618,
+    "max": 0.371667,
+    "recall-0.025": 0.115304,
+    "recall-0.05": 0.415094,
    "recall-0.1": 0.899371,
-    "recall-0.15": 0.979036,
+    "recall-0.15": 0.981132,
    "recall-0.25": 0.997904,
    "recall-0.5": 1.0,
    "num_labels": 477
  },
  "hip_left": {
    "count": 477,
-    "mean": 0.048082,
-    "median": 0.042309,
-    "std": 0.02636,
-    "sem": 0.001208,
-    "min": 0.006447,
-    "max": 0.14256,
-    "recall-0.025": 0.186583,
+    "mean": 0.047948,
+    "median": 0.042251,
+    "std": 0.026295,
+    "sem": 0.001205,
+    "min": 0.006475,
+    "max": 0.145903,
+    "recall-0.025": 0.188679,
    "recall-0.05": 0.618449,
-    "recall-0.1": 0.951782,
+    "recall-0.1": 0.953878,
    "recall-0.15": 1.0,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
@ -482,15 +482,15 @@ Results of the model in various experiments on different datasets. \
  },
  "hip_right": {
    "count": 477,
-    "mean": 0.057967,
-    "median": 0.0564,
-    "std": 0.023728,
-    "sem": 0.001088,
-    "min": 0.00421,
-    "max": 0.132307,
+    "mean": 0.058483,
+    "median": 0.05753,
+    "std": 0.023762,
+    "sem": 0.001089,
+    "min": 0.005137,
+    "max": 0.132318,
    "recall-0.025": 0.098532,
-    "recall-0.05": 0.396226,
-    "recall-0.1": 0.9413,
+    "recall-0.05": 0.39413,
+    "recall-0.1": 0.943396,
    "recall-0.15": 1.0,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
@ -498,13 +498,13 @@ Results of the model in various experiments on different datasets. \
  },
  "knee_left": {
    "count": 477,
-    "mean": 0.040396,
-    "median": 0.037751,
-    "std": 0.02446,
-    "sem": 0.001121,
-    "min": 0.004904,
-    "max": 0.190671,
-    "recall-0.025": 0.259958,
+    "mean": 0.040438,
+    "median": 0.03808,
+    "std": 0.024403,
+    "sem": 0.001118,
+    "min": 0.004928,
+    "max": 0.190069,
+    "recall-0.025": 0.257862,
    "recall-0.05": 0.748428,
    "recall-0.1": 0.974843,
    "recall-0.15": 0.989518,
@ -514,15 +514,15 @@ Results of the model in various experiments on different datasets. \
  },
  "knee_right": {
    "count": 477,
-    "mean": 0.04018,
-    "median": 0.036019,
-    "std": 0.023349,
-    "sem": 0.00107,
-    "min": 0.007466,
-    "max": 0.1848,
-    "recall-0.025": 0.312369,
-    "recall-0.05": 0.712788,
-    "recall-0.1": 0.974843,
+    "mean": 0.040168,
+    "median": 0.03623,
+    "std": 0.023114,
+    "sem": 0.001059,
+    "min": 0.00733,
+    "max": 0.184933,
+    "recall-0.025": 0.310273,
+    "recall-0.05": 0.708595,
+    "recall-0.1": 0.976939,
    "recall-0.15": 0.997904,
    "recall-0.25": 1.0,
    "recall-0.5": 1.0,
@ -530,14 +530,14 @@ Results of the model in various experiments on different datasets. \
  },
  "ankle_left": {
    "count": 477,
-    "mean": 0.036352,
-    "median": 0.027965,
-    "std": 0.030885,
-    "sem": 0.001416,
-    "min": 0.004731,
-    "max": 0.223705,
-    "recall-0.025": 0.429769,
-    "recall-0.05": 0.815514,
+    "mean": 0.036353,
+    "median": 0.028172,
+    "std": 0.030783,
+    "sem": 0.001411,
+    "min": 0.004787,
+    "max": 0.223747,
+    "recall-0.025": 0.433962,
+    "recall-0.05": 0.81761,
    "recall-0.1": 0.945493,
    "recall-0.15": 0.983229,
    "recall-0.25": 1.0,
@ -546,34 +546,34 @@ Results of the model in various experiments on different datasets. \
  },
  "ankle_right": {
    "count": 477,
-    "mean": 0.040931,
-    "median": 0.030927,
-    "std": 0.037943,
-    "sem": 0.001739,
-    "min": 0.003325,
-    "max": 0.272891,
+    "mean": 0.040777,
+    "median": 0.030897,
+    "std": 0.037254,
+    "sem": 0.001708,
+    "min": 0.003323,
+    "max": 0.27012,
    "recall-0.025": 0.303983,
    "recall-0.05": 0.802935,
    "recall-0.1": 0.930818,
    "recall-0.15": 0.968553,
-    "recall-0.25": 0.995807,
+    "recall-0.25": 0.997904,
    "recall-0.5": 1.0,
    "num_labels": 477
  },
  "joint_recalls": {
    "num_labels": 6201,
-    "recall-0.025": 0.20997,
-    "recall-0.05": 0.61571,
+    "recall-0.025": 0.21093,
+    "recall-0.05": 0.6149,
    "recall-0.1": 0.94275,
    "recall-0.15": 0.98645,
-    "recall-0.25": 0.99839,
+    "recall-0.25": 0.99871,
    "recall-0.5": 1.0
  }
 }
 {
  "total_parts": 6678,
-  "correct_parts": 6618,
-  "pcp": 0.991015
+  "correct_parts": 6619,
+  "pcp": 0.991165
 }
 ```

--- a/rpt/triangulator.cpp
+++ b/rpt/triangulator.cpp
@ -412,7 +412,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
    // Calculate pair scores
    std::vector<std::pair<std::vector<std::array<float, 4>>, float>> all_scored_poses;
    all_scored_poses.resize(all_pairs.size());
-    #pragma omp parallel for
    for (size_t i = 0; i < all_pairs.size(); ++i)
    {
        const auto &pids = all_pairs[i].first;
@ -479,7 +478,6 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
    // Calculate full 3D poses
    std::vector<std::vector<std::array<float, 4>>> all_full_poses;
    all_full_poses.resize(all_pairs.size());
-    #pragma omp parallel for
    for (size_t i = 0; i < all_pairs.size(); ++i)
    {
        const auto &pids = all_pairs[i].first;
@ -1010,6 +1008,155 @@ std::vector<float> TriangulatorInternal::score_projection(

 // =================================================================================================

+/* Compute the inverse using the adjugate method */
+std::array<std::array<float, 3>, 3> invert3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    // See: https://scicomp.stackexchange.com/a/29206
+
+    std::array<std::array<float, 3>, 3> adj = {
+        {{
+             M[1][1] * M[2][2] - M[1][2] * M[2][1],
+             M[0][2] * M[2][1] - M[0][1] * M[2][2],
+             M[0][1] * M[1][2] - M[0][2] * M[1][1],
+         },
+         {
+             M[1][2] * M[2][0] - M[1][0] * M[2][2],
+             M[0][0] * M[2][2] - M[0][2] * M[2][0],
+             M[0][2] * M[1][0] - M[0][0] * M[1][2],
+         },
+         {
+             M[1][0] * M[2][1] - M[1][1] * M[2][0],
+             M[0][1] * M[2][0] - M[0][0] * M[2][1],
+             M[0][0] * M[1][1] - M[0][1] * M[1][0],
+         }}};
+
+    float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0];
+    if (std::fabs(det) < 1e-6f)
+    {
+        return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}};
+    }
+
+    float idet = 1.0f / det;
+    std::array<std::array<float, 3>, 3> inv = {
+        {{
+             adj[0][0] * idet,
+             adj[0][1] * idet,
+             adj[0][2] * idet,
+         },
+         {
+             adj[1][0] * idet,
+             adj[1][1] * idet,
+             adj[1][2] * idet,
+         },
+         {
+             adj[2][0] * idet,
+             adj[2][1] * idet,
+             adj[2][2] * idet,
+         }}};
+
+    return inv;
+}
+
+std::array<std::array<float, 3>, 3> transpose3x3(const std::array<std::array<float, 3>, 3> &M)
+{
+    return {{{M[0][0], M[1][0], M[2][0]},
+             {M[0][1], M[1][1], M[2][1]},
+             {M[0][2], M[1][2], M[2][2]}}};
+}
+
+float dot(const std::array<float, 3> &a, const std::array<float, 3> &b)
+{
+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+std::array<float, 3> cross(const std::array<float, 3> &a, const std::array<float, 3> &b)
+{
+    return {a[1] * b[2] - a[2] * b[1],
+            a[2] * b[0] - a[0] * b[2],
+            a[0] * b[1] - a[1] * b[0]};
+}
+std::array<float, 3> add(const std::array<float, 3> &a, const std::array<float, 3> &b)
+{
+    return {a[0] + b[0], a[1] + b[1], a[2] + b[2]};
+}
+std::array<float, 3> subtract(const std::array<float, 3> &a, const std::array<float, 3> &b)
+{
+    return {a[0] - b[0], a[1] - b[1], a[2] - b[2]};
+}
+std::array<float, 3> multiply(const std::array<float, 3> &a, float s)
+{
+    return {a[0] * s, a[1] * s, a[2] * s};
+}
+std::array<float, 3> normalize(const std::array<float, 3> &v)
+{
+    float norm = std::sqrt(dot(v, v));
+    if (norm < 1e-8f)
+        return v;
+    return multiply(v, 1.0f / norm);
+}
+std::array<float, 3> mat_mul_vec(
+    const std::array<std::array<float, 3>, 3> &M, const std::array<float, 3> &v)
+{
+    std::array<float, 3> res = {M[0][0] * v[0] + M[0][1] * v[1] + M[0][2] * v[2],
+                                M[1][0] * v[0] + M[1][1] * v[1] + M[1][2] * v[2],
+                                M[2][0] * v[0] + M[2][1] * v[1] + M[2][2] * v[2]};
+    return res;
+}
+
+/* Compute camera center and corresponding ray direction */
+std::tuple<std::array<float, 3>, std::array<float, 3>> calc_center_and_ray(
+    const CameraInternal &icam,
+    const std::array<float, 2> &pt)
+{
+    // Compute Rᵀ and t
+    auto R_transpose = transpose3x3(icam.cam.R);
+    std::array<float, 3> t = {icam.cam.T[0][0], icam.cam.T[1][0], icam.cam.T[2][0]};
+    t = mat_mul_vec(icam.cam.R, multiply(t, -1.0f));
+
+    // Camera center: C = -Rᵀ * t
+    auto C = multiply(mat_mul_vec(R_transpose, t), -1.0f);
+
+    // Compute ray direction:
+    std::array<float, 3> uv1 = {pt[0], pt[1], 1.0f};
+    auto K_inv = invert3x3(icam.cam.K);
+    auto d = mat_mul_vec(R_transpose, mat_mul_vec(K_inv, uv1));
+    auto rayDir = normalize(d);
+
+    return std::make_tuple(C, rayDir);
+}
+
+/* Triangulate two points by computing their two rays and the midpoint of their closest approach */
+std::array<float, 3> triangulate_midpoint(
+    const CameraInternal &icam1,
+    const CameraInternal &icam2,
+    const std::array<float, 2> &pt1,
+    const std::array<float, 2> &pt2)
+{
+    // See: https://en.wikipedia.org/wiki/Skew_lines#Nearest_points
+
+    // Obtain the camera centers and ray directions for both views
+    auto [p1, d1] = calc_center_and_ray(icam1, pt1);
+    auto [p2, d2] = calc_center_and_ray(icam2, pt2);
+
+    // Compute the perpendicular plane vectors
+    std::array<float, 3> n = cross(d1, d2);
+    std::array<float, 3> n1 = cross(d1, n);
+    std::array<float, 3> n2 = cross(d2, n);
+
+    // Calculate point on Line 1 nearest to Line 2
+    float t1 = dot(subtract(p2, p1), n2) / dot(d1, n2);
+    std::array<float, 3> c1 = add(p1, multiply(d1, t1));
+
+    // Calculate point on Line 2 nearest to Line 1
+    float t2 = dot(subtract(p1, p2), n1) / dot(d2, n1);
+    std::array<float, 3> c2 = add(p2, multiply(d2, t2));
+
+    // Compute midpoint between c1 and c2.
+    std::array<float, 3> midpoint = multiply(add(c1, c2), 0.5);
+    return midpoint;
+}
+
+// =================================================================================================
+
 std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triangulate_and_score(
    const std::vector<std::array<float, 3>> &pose1,
    const std::vector<std::array<float, 3>> &pose2,
@ -1052,63 +1199,18 @@ std::pair<std::vector<std::array<float, 4>>, float> TriangulatorInternal::triang
        return std::make_pair(empty, score);
    }

-    // Extract coordinates of visible joints
-    std::vector<std::array<float, 2>> points1;
-    std::vector<std::array<float, 2>> points2;
-    points1.reserve(num_visible);
-    points2.reserve(num_visible);
-    for (size_t i = 0; i < num_joints; ++i)
-    {
-        if (mask[i])
-        {
-            points1.push_back({pose1[i][0], pose1[i][1]});
-            points2.push_back({pose2[i][0], pose2[i][1]});
-        }
-    }
-
-    // Convert vectors to mats
-    cv::Mat points1_mat(2, num_visible, CV_32F);
-    cv::Mat points2_mat(2, num_visible, CV_32F);
-    float *p1_ptr = points1_mat.ptr<float>(0);
-    float *p2_ptr = points2_mat.ptr<float>(0);
-    for (int i = 0; i < num_visible; ++i)
-    {
-        p1_ptr[i + 0 * num_visible] = points1[i][0];
-        p1_ptr[i + 1 * num_visible] = points1[i][1];
-        p2_ptr[i + 0 * num_visible] = points2[i][0];
-        p2_ptr[i + 1 * num_visible] = points2[i][1];
-    }
-
-    // Triangulate points
-    cv::Mat points4d_h;
-    cv::triangulatePoints(cam1.P, cam2.P, points1_mat, points2_mat, points4d_h);
-
-    // Convert homogeneous coordinates to 3D
-    std::vector<std::array<float, 3>> points_3d;
-    points_3d.reserve(num_visible);
-    const float *p4_ptr = points4d_h.ptr<float>(0);
-    for (int i = 0; i < points4d_h.cols; ++i)
-    {
-        float w = p4_ptr[i + 3 * num_visible];
-        std::array<float, 3> pt = {
-            p4_ptr[i + 0 * num_visible] / w,
-            p4_ptr[i + 1 * num_visible] / w,
-            p4_ptr[i + 2 * num_visible] / w};
-        points_3d.push_back(std::move(pt));
-    }
-
-    // Create the 3D pose
+    // Use midpoint triangulation instead of cv::triangulatePoints because it is much faster,
+    // while having almost the same accuracy.
    std::vector<std::array<float, 4>> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0});
-    int idx = 0;
    for (size_t i = 0; i < num_joints; ++i)
    {
        if (mask[i])
        {
-            pose3d[i][0] = points_3d[idx][0];
-            pose3d[i][1] = points_3d[idx][1];
-            pose3d[i][2] = points_3d[idx][2];
-            pose3d[i][3] = 1.0;
-            ++idx;
+            auto &pt1 = pose1[i];
+            auto &pt2 = pose2[i];
+            std::array<float, 3> pt3d = triangulate_midpoint(
+                cam1, cam2, {pt1[0], pt1[1]}, {pt2[0], pt2[1]});
+            pose3d[i] = {pt3d[0], pt3d[1], pt3d[2], 1.0};
        }
    }