Faster camera matrix undistortion.

2025-03-03 11:01:50 +01:00
parent 2e1d401dd4
commit 7b99a38ce2
3 changed files with 81 additions and 77 deletions
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
@ -294,26 +294,26 @@ Results of the model in various experiments on different datasets. \

 ```json
 {
-  "img_loading": 0.0424103,
-  "demosaicing": 0.000724716,
-  "avg_time_2d": 0.01494,
-  "avg_time_3d": 0.000128772,
-  "fps": 63.3173
+  "img_loading": 0.0419109,
+  "demosaicing": 0.000697378,
+  "avg_time_2d": 0.0148942,
+  "avg_time_3d": 0.000115663,
+  "fps": 63.6649
 }
 {
  "triangulator_calls": 301,
-  "init_time": 1.60891e-06,
-  "undistort_time": 2.57178e-05,
-  "project_time": 2.22848e-06,
-  "match_time": 8.41567e-06,
-  "pairs_time": 4.53139e-06,
-  "pair_scoring_time": 2.67118e-05,
-  "grouping_time": 4.63213e-06,
-  "full_time": 2.72313e-05,
-  "merge_time": 1.03292e-05,
-  "post_time": 7.36791e-06,
-  "convert_time": 1.27439e-07,
-  "total_time": 0.00011914
+  "init_time": 2.90153e-06,
+  "undistort_time": 1.41506e-05,
+  "project_time": 2.27745e-06,
+  "match_time": 8.70995e-06,
+  "pairs_time": 4.43637e-06,
+  "pair_scoring_time": 2.61534e-05,
+  "grouping_time": 4.53385e-06,
+  "full_time": 2.64184e-05,
+  "merge_time": 1.0161e-05,
+  "post_time": 6.56943e-06,
+  "convert_time": 1.31748e-07,
+  "total_time": 0.000106678
 }
 {
  "person_nums": {
@ -332,10 +332,10 @@ Results of the model in various experiments on different datasets. \
  },
  "mpjpe": {
    "count": 477,
-    "mean": 0.047983,
+    "mean": 0.048001,
    "median": 0.042569,
-    "std": 0.01486,
-    "sem": 0.000681,
+    "std": 0.014925,
+    "sem": 0.000684,
    "min": 0.03012,
    "max": 0.116311,
    "recall-0.025": 0.0,
@ -346,11 +346,11 @@ Results of the model in various experiments on different datasets. \
    "recall-0.5": 1.0,
    "num_labels": 477,
    "ap-0.025": 0.0,
-    "ap-0.05": 0.389102,
-    "ap-0.1": 0.729848,
-    "ap-0.15": 0.747198,
-    "ap-0.25": 0.747198,
-    "ap-0.5": 0.747198
+    "ap-0.05": 0.389107,
+    "ap-0.1": 0.731078,
+    "ap-0.15": 0.74682,
+    "ap-0.25": 0.74682,
+    "ap-0.5": 0.74682
  },
  "head": {
    "count": 477,
@ -390,7 +390,7 @@ Results of the model in various experiments on different datasets. \
    "median": 0.045796,
    "std": 0.02312,
    "sem": 0.00106,
-    "min": 0.005349,
+    "min": 0.005348,
    "max": 0.147448,
    "recall-0.025": 0.100629,
    "recall-0.05": 0.561845,
@ -450,26 +450,26 @@ Results of the model in various experiments on different datasets. \
  },
  "wrist_right": {
    "count": 477,
-    "mean": 0.059177,
+    "mean": 0.059427,
    "median": 0.054405,
-    "std": 0.033566,
-    "sem": 0.001538,
+    "std": 0.034224,
+    "sem": 0.001569,
    "min": 0.009618,
    "max": 0.371666,
-    "recall-0.025": 0.115304,
+    "recall-0.025": 0.113208,
    "recall-0.05": 0.415094,
    "recall-0.1": 0.899371,
-    "recall-0.15": 0.981132,
+    "recall-0.15": 0.979036,
    "recall-0.25": 0.997904,
    "recall-0.5": 1.0,
    "num_labels": 477
  },
  "hip_left": {
    "count": 477,
-    "mean": 0.048042,
+    "mean": 0.048028,
    "median": 0.042252,
-    "std": 0.026486,
-    "sem": 0.001214,
+    "std": 0.026459,
+    "sem": 0.001213,
    "min": 0.006475,
    "max": 0.145904,
    "recall-0.025": 0.190776,
@ -562,7 +562,7 @@ Results of the model in various experiments on different datasets. \
  },
  "joint_recalls": {
    "num_labels": 6201,
-    "recall-0.025": 0.21158,
+    "recall-0.025": 0.21142,
    "recall-0.05": 0.61538,
    "recall-0.1": 0.94275,
    "recall-0.15": 0.98645,
--- a/rpt/triangulator.cpp
+++ b/rpt/triangulator.cpp
@ -85,13 +85,23 @@
 CameraInternal::CameraInternal(const Camera &cam)
 {
    this->cam = cam;
-
-    this->invK = invert3x3(cam.K);
    this->invR = transpose3x3(cam.R);

    // Camera center:
    // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
    this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
+
+    // Undistort camera matrix
+    // As with the undistortion, the own implementation avoids some overhead compared to OpenCV
+    if (cam.type == "fisheye")
+    {
+        newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height});
+    }
+    else
+    {
+        newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height});
+    }
+    this->invK = invert3x3(newK);
 }

 // =================================================================================================
@ -366,13 +376,19 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
    }

    // Define key points
-    const size_t N = 9;
+    // Calculate only the contour points of the image, and use less points,
+    // the edges and centers should be enough if the camera has no strange distortions
+    const size_t N = 3;
    std::vector<std ::array<float, 2>> pts;
-    pts.reserve(N * N);
+    pts.reserve(4 * (N - 1));
    for (size_t y = 0; y < N; ++y)
    {
        for (size_t x = 0; x < N; ++x)
        {
+            if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
+            {
+                continue;
+            }
            pts.push_back({x * (w - 1) / (N - 1), y * (h - 1) / (N - 1)});
        }
    }
@ -406,24 +422,36 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
    {
        for (size_t x = 0; x < N; ++x)
        {
+            if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
+            {
+                continue;
+            }
+
            auto &pt = pts[k];
            k += 1;

-            oX0 = std::min(oX0, pt[0]);
-            oX1 = std::max(oX1, pt[0]);
-            oY0 = std::min(oY0, pt[1]);
-            oY1 = std::max(oY1, pt[1]);
-
            if (x == 0)
+            {
+                oX0 = std::min(oX0, pt[0]);
                iX0 = std::max(iX0, pt[0]);
+            }
            if (x == N - 1)
+            {
+                oX1 = std::max(oX1, pt[0]);
                iX1 = std::min(iX1, pt[0]);
+            }
            if (y == 0)
+            {
+                oY0 = std::min(oY0, pt[1]);
                iY0 = std::max(iY0, pt[1]);
+            }
            if (y == N - 1)
+            {
+                oY1 = std::max(oY1, pt[1]);
                iY1 = std::min(iY1, pt[1]);
            }
        }
+    }
    float inner_width = iX1 - iX0;
    float inner_height = iY1 - iY0;
    float outer_width = oX1 - oX0;
@ -916,29 +944,14 @@ void TriangulatorInternal::print_stats()
 void TriangulatorInternal::undistort_poses(
    std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam)
 {
-    int width = icam.cam.width;
-    int height = icam.cam.height;
-
-    // Undistort camera matrix
-    // As with the undistortion, the own implementation avoids some overhead compared to OpenCV
-    std::array<std::array<float, 3>, 3> newK;
-    if (icam.cam.type == "fisheye")
-    {
-        newK = icam.calc_optimal_camera_matrix_fisheye(1.0, {width, height});
-    }
-    else
-    {
-        newK = icam.calc_optimal_camera_matrix_pinhole(1.0, {width, height});
-    }
-
    float ifx_old = 1.0 / icam.cam.K[0][0];
    float ify_old = 1.0 / icam.cam.K[1][1];
    float cx_old = icam.cam.K[0][2];
    float cy_old = icam.cam.K[1][2];
-    float fx_new = newK[0][0];
-    float fy_new = newK[1][1];
-    float cx_new = newK[0][2];
-    float cy_new = newK[1][2];
+    float fx_new = icam.newK[0][0];
+    float fy_new = icam.newK[1][1];
+    float cx_new = icam.newK[0][2];
+    float cy_new = icam.newK[1][2];

    // Undistort all the points
    size_t num_persons = poses_2d.size();
@ -971,6 +984,8 @@ void TriangulatorInternal::undistort_poses(
    }

    // Mask out points that are far outside the image (points slightly outside are still valid)
+    int width = icam.cam.width;
+    int height = icam.cam.height;
    float mask_offset = (width + height) / 20.0;
    for (size_t i = 0; i < num_persons; ++i)
    {
@ -985,18 +1000,6 @@ void TriangulatorInternal::undistort_poses(
            }
        }
    }
-
-    // Update the camera intrinsics
-    icam.cam.K = newK;
-    icam.invK = CameraInternal::invert3x3(newK);
-    if (icam.cam.type == "fisheye")
-    {
-        icam.cam.DC = {0.0, 0.0, 0.0, 0.0};
-    }
-    else
-    {
-        icam.cam.DC = {0.0, 0.0, 0.0, 0.0, 0.0};
-    }
 }

 // =================================================================================================
@ -1017,7 +1020,7 @@ TriangulatorInternal::project_poses(
    all_dists.resize(num_persons);

    // Get camera parameters
-    const std::array<std::array<float, 3>, 3> &K = icam.cam.K;
+    const std::array<std::array<float, 3>, 3> &K = icam.newK;
    const std::array<std::array<float, 3>, 3> &R = icam.cam.R;
    const std::array<std::array<float, 1>, 3> &T = icam.cam.T;

--- a/rpt/triangulator.hpp
+++ b/rpt/triangulator.hpp
@ -15,9 +15,10 @@ public:

    Camera cam;

-    std::array<std::array<float, 3>, 3> invK;
    std::array<std::array<float, 3>, 3> invR;
    std::array<float, 3> center;
+    std::array<std::array<float, 3>, 3> newK;
+    std::array<std::array<float, 3>, 3> invK;

    static std::array<std::array<float, 3>, 3> transpose3x3(
        const std::array<std::array<float, 3>, 3> &M);