diff --git a/media/RESULTS.md b/media/RESULTS.md index d7d92e7..ca2fd90 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -294,26 +294,26 @@ Results of the model in various experiments on different datasets. \ ```json { - "img_loading": 0.0424103, - "demosaicing": 0.000724716, - "avg_time_2d": 0.01494, - "avg_time_3d": 0.000128772, - "fps": 63.3173 + "img_loading": 0.0419109, + "demosaicing": 0.000697378, + "avg_time_2d": 0.0148942, + "avg_time_3d": 0.000115663, + "fps": 63.6649 } { "triangulator_calls": 301, - "init_time": 1.60891e-06, - "undistort_time": 2.57178e-05, - "project_time": 2.22848e-06, - "match_time": 8.41567e-06, - "pairs_time": 4.53139e-06, - "pair_scoring_time": 2.67118e-05, - "grouping_time": 4.63213e-06, - "full_time": 2.72313e-05, - "merge_time": 1.03292e-05, - "post_time": 7.36791e-06, - "convert_time": 1.27439e-07, - "total_time": 0.00011914 + "init_time": 2.90153e-06, + "undistort_time": 1.41506e-05, + "project_time": 2.27745e-06, + "match_time": 8.70995e-06, + "pairs_time": 4.43637e-06, + "pair_scoring_time": 2.61534e-05, + "grouping_time": 4.53385e-06, + "full_time": 2.64184e-05, + "merge_time": 1.0161e-05, + "post_time": 6.56943e-06, + "convert_time": 1.31748e-07, + "total_time": 0.000106678 } { "person_nums": { @@ -332,10 +332,10 @@ Results of the model in various experiments on different datasets. \ }, "mpjpe": { "count": 477, - "mean": 0.047983, + "mean": 0.048001, "median": 0.042569, - "std": 0.01486, - "sem": 0.000681, + "std": 0.014925, + "sem": 0.000684, "min": 0.03012, "max": 0.116311, "recall-0.025": 0.0, @@ -346,11 +346,11 @@ Results of the model in various experiments on different datasets. \ "recall-0.5": 1.0, "num_labels": 477, "ap-0.025": 0.0, - "ap-0.05": 0.389102, - "ap-0.1": 0.729848, - "ap-0.15": 0.747198, - "ap-0.25": 0.747198, - "ap-0.5": 0.747198 + "ap-0.05": 0.389107, + "ap-0.1": 0.731078, + "ap-0.15": 0.74682, + "ap-0.25": 0.74682, + "ap-0.5": 0.74682 }, "head": { "count": 477, @@ -390,7 +390,7 @@ Results of the model in various experiments on different datasets. \ "median": 0.045796, "std": 0.02312, "sem": 0.00106, - "min": 0.005349, + "min": 0.005348, "max": 0.147448, "recall-0.025": 0.100629, "recall-0.05": 0.561845, @@ -450,26 +450,26 @@ Results of the model in various experiments on different datasets. \ }, "wrist_right": { "count": 477, - "mean": 0.059177, + "mean": 0.059427, "median": 0.054405, - "std": 0.033566, - "sem": 0.001538, + "std": 0.034224, + "sem": 0.001569, "min": 0.009618, "max": 0.371666, - "recall-0.025": 0.115304, + "recall-0.025": 0.113208, "recall-0.05": 0.415094, "recall-0.1": 0.899371, - "recall-0.15": 0.981132, + "recall-0.15": 0.979036, "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "hip_left": { "count": 477, - "mean": 0.048042, + "mean": 0.048028, "median": 0.042252, - "std": 0.026486, - "sem": 0.001214, + "std": 0.026459, + "sem": 0.001213, "min": 0.006475, "max": 0.145904, "recall-0.025": 0.190776, @@ -562,7 +562,7 @@ Results of the model in various experiments on different datasets. \ }, "joint_recalls": { "num_labels": 6201, - "recall-0.025": 0.21158, + "recall-0.025": 0.21142, "recall-0.05": 0.61538, "recall-0.1": 0.94275, "recall-0.15": 0.98645, diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index 56581f1..81a89b1 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -85,13 +85,23 @@ CameraInternal::CameraInternal(const Camera &cam) { this->cam = cam; - - this->invK = invert3x3(cam.K); this->invR = transpose3x3(cam.R); // Camera center: // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]}; + + // Undistort camera matrix + // As with the undistortion, the own implementation avoids some overhead compared to OpenCV + if (cam.type == "fisheye") + { + newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height}); + } + else + { + newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height}); + } + this->invK = invert3x3(newK); } // ================================================================================================= @@ -366,13 +376,19 @@ std::array, 3> CameraInternal::calc_optimal_camera_matrix_p } // Define key points - const size_t N = 9; + // Calculate only the contour points of the image, and use less points, + // the edges and centers should be enough if the camera has no strange distortions + const size_t N = 3; std::vector> pts; - pts.reserve(N * N); + pts.reserve(4 * (N - 1)); for (size_t y = 0; y < N; ++y) { for (size_t x = 0; x < N; ++x) { + if (x != 0 && x != N - 1 && y != 0 && y != N - 1) + { + continue; + } pts.push_back({x * (w - 1) / (N - 1), y * (h - 1) / (N - 1)}); } } @@ -406,22 +422,34 @@ std::array, 3> CameraInternal::calc_optimal_camera_matrix_p { for (size_t x = 0; x < N; ++x) { + if (x != 0 && x != N - 1 && y != 0 && y != N - 1) + { + continue; + } + auto &pt = pts[k]; k += 1; - oX0 = std::min(oX0, pt[0]); - oX1 = std::max(oX1, pt[0]); - oY0 = std::min(oY0, pt[1]); - oY1 = std::max(oY1, pt[1]); - if (x == 0) + { + oX0 = std::min(oX0, pt[0]); iX0 = std::max(iX0, pt[0]); + } if (x == N - 1) + { + oX1 = std::max(oX1, pt[0]); iX1 = std::min(iX1, pt[0]); + } if (y == 0) + { + oY0 = std::min(oY0, pt[1]); iY0 = std::max(iY0, pt[1]); + } if (y == N - 1) + { + oY1 = std::max(oY1, pt[1]); iY1 = std::min(iY1, pt[1]); + } } } float inner_width = iX1 - iX0; @@ -916,29 +944,14 @@ void TriangulatorInternal::print_stats() void TriangulatorInternal::undistort_poses( std::vector>> &poses_2d, CameraInternal &icam) { - int width = icam.cam.width; - int height = icam.cam.height; - - // Undistort camera matrix - // As with the undistortion, the own implementation avoids some overhead compared to OpenCV - std::array, 3> newK; - if (icam.cam.type == "fisheye") - { - newK = icam.calc_optimal_camera_matrix_fisheye(1.0, {width, height}); - } - else - { - newK = icam.calc_optimal_camera_matrix_pinhole(1.0, {width, height}); - } - float ifx_old = 1.0 / icam.cam.K[0][0]; float ify_old = 1.0 / icam.cam.K[1][1]; float cx_old = icam.cam.K[0][2]; float cy_old = icam.cam.K[1][2]; - float fx_new = newK[0][0]; - float fy_new = newK[1][1]; - float cx_new = newK[0][2]; - float cy_new = newK[1][2]; + float fx_new = icam.newK[0][0]; + float fy_new = icam.newK[1][1]; + float cx_new = icam.newK[0][2]; + float cy_new = icam.newK[1][2]; // Undistort all the points size_t num_persons = poses_2d.size(); @@ -971,6 +984,8 @@ void TriangulatorInternal::undistort_poses( } // Mask out points that are far outside the image (points slightly outside are still valid) + int width = icam.cam.width; + int height = icam.cam.height; float mask_offset = (width + height) / 20.0; for (size_t i = 0; i < num_persons; ++i) { @@ -985,18 +1000,6 @@ void TriangulatorInternal::undistort_poses( } } } - - // Update the camera intrinsics - icam.cam.K = newK; - icam.invK = CameraInternal::invert3x3(newK); - if (icam.cam.type == "fisheye") - { - icam.cam.DC = {0.0, 0.0, 0.0, 0.0}; - } - else - { - icam.cam.DC = {0.0, 0.0, 0.0, 0.0, 0.0}; - } } // ================================================================================================= @@ -1017,7 +1020,7 @@ TriangulatorInternal::project_poses( all_dists.resize(num_persons); // Get camera parameters - const std::array, 3> &K = icam.cam.K; + const std::array, 3> &K = icam.newK; const std::array, 3> &R = icam.cam.R; const std::array, 3> &T = icam.cam.T; diff --git a/rpt/triangulator.hpp b/rpt/triangulator.hpp index 88f6a53..62c3615 100644 --- a/rpt/triangulator.hpp +++ b/rpt/triangulator.hpp @@ -15,9 +15,10 @@ public: Camera cam; - std::array, 3> invK; std::array, 3> invR; std::array center; + std::array, 3> newK; + std::array, 3> invK; static std::array, 3> transpose3x3( const std::array, 3> &M);