diff --git a/media/RESULTS.md b/media/RESULTS.md index 09d8759..b2062a5 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -294,73 +294,73 @@ Results of the model in various experiments on different datasets. \ ```json { - "img_loading": 0.0418024, - "demosaicing": 0.000712412, - "avg_time_2d": 0.014805, - "avg_time_3d": 0.000291986, - "fps": 63.2536 + "img_loading": 0.180589, + "demosaicing": 0.000695076, + "avg_time_2d": 0.0152607, + "avg_time_3d": 0.000150192, + "fps": 62.0888 } { "triangulator_calls": 301, - "init_time": 6.44811e-06, - "undistort_time": 4.05236e-05, - "project_time": 3.04309e-06, - "match_time": 1.07992e-05, - "pairs_time": 6.09893e-06, - "pair_scoring_time": 6.0547e-05, - "grouping_time": 7.94037e-06, - "full_time": 5.80405e-05, - "merge_time": 1.34738e-05, - "post_time": 9.23313e-06, - "convert_time": 1.58432e-07, - "total_time": 0.000216593 + "init_time": 3.53967e-06, + "undistort_time": 3.48582e-05, + "project_time": 2.18348e-06, + "match_time": 8.45481e-06, + "pairs_time": 4.53164e-06, + "pair_scoring_time": 3.10183e-05, + "grouping_time": 4.6499e-06, + "full_time": 3.33672e-05, + "merge_time": 1.02807e-05, + "post_time": 7.00402e-06, + "convert_time": 1.11306e-07, + "total_time": 0.000140236 } { "person_nums": { "total_frames": 301, "total_labels": 477, - "total_preds": 828, + "total_preds": 829, "considered_empty": 0, "valid_preds": 477, - "invalid_preds": 351, + "invalid_preds": 352, "missing": 0, - "invalid_fraction": 0.42391, - "precision": 0.57609, + "invalid_fraction": 0.42461, + "precision": 0.57539, "recall": 1.0, - "f1": 0.73103, - "non_empty": 828 + "f1": 0.73047, + "non_empty": 829 }, "mpjpe": { "count": 477, - "mean": 0.047978, - "median": 0.042546, - "std": 0.014958, - "sem": 0.000686, - "min": 0.03014, - "max": 0.12344, + "mean": 0.047984, + "median": 0.042648, + "std": 0.014812, + "sem": 0.000679, + "min": 0.03012, + "max": 0.116312, "recall-0.025": 0.0, - "recall-0.05": 0.698113, - "recall-0.1": 0.987421, + "recall-0.05": 0.70021, + "recall-0.1": 0.985325, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477, "ap-0.025": 0.0, - "ap-0.05": 0.387605, - "ap-0.1": 0.730843, - "ap-0.15": 0.744044, - "ap-0.25": 0.744044, - "ap-0.5": 0.744044 + "ap-0.05": 0.39114, + "ap-0.1": 0.735434, + "ap-0.15": 0.751482, + "ap-0.25": 0.751482, + "ap-0.5": 0.751482 }, "head": { "count": 477, - "mean": 0.054194, - "median": 0.050192, - "std": 0.024884, - "sem": 0.001141, - "min": 0.005604, - "max": 0.180414, - "recall-0.025": 0.083857, + "mean": 0.054212, + "median": 0.050157, + "std": 0.024854, + "sem": 0.001139, + "min": 0.005599, + "max": 0.180565, + "recall-0.025": 0.081761, "recall-0.05": 0.496855, "recall-0.1": 0.937107, "recall-0.15": 0.995807, @@ -370,15 +370,15 @@ Results of the model in various experiments on different datasets. \ }, "shoulder_left": { "count": 477, - "mean": 0.042406, - "median": 0.036996, - "std": 0.020494, - "sem": 0.000939, - "min": 0.004232, - "max": 0.136479, - "recall-0.025": 0.163522, - "recall-0.05": 0.72956, - "recall-0.1": 0.987421, + "mean": 0.042435, + "median": 0.03702, + "std": 0.02058, + "sem": 0.000943, + "min": 0.00431, + "max": 0.136587, + "recall-0.025": 0.161426, + "recall-0.05": 0.727463, + "recall-0.1": 0.985325, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -386,14 +386,14 @@ Results of the model in various experiments on different datasets. \ }, "shoulder_right": { "count": 477, - "mean": 0.049613, - "median": 0.045991, - "std": 0.023012, - "sem": 0.001055, - "min": 0.00538, - "max": 0.147114, + "mean": 0.049634, + "median": 0.045795, + "std": 0.023121, + "sem": 0.00106, + "min": 0.00535, + "max": 0.14745, "recall-0.025": 0.100629, - "recall-0.05": 0.555556, + "recall-0.05": 0.559748, "recall-0.1": 0.955975, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -402,13 +402,13 @@ Results of the model in various experiments on different datasets. \ }, "elbow_left": { "count": 477, - "mean": 0.040782, - "median": 0.032111, - "std": 0.029225, - "sem": 0.00134, - "min": 0.003363, - "max": 0.326353, - "recall-0.025": 0.312369, + "mean": 0.040763, + "median": 0.032063, + "std": 0.029259, + "sem": 0.001341, + "min": 0.003449, + "max": 0.326227, + "recall-0.025": 0.316562, "recall-0.05": 0.756813, "recall-0.1": 0.953878, "recall-0.15": 0.997904, @@ -418,29 +418,29 @@ Results of the model in various experiments on different datasets. \ }, "elbow_right": { "count": 477, - "mean": 0.053348, - "median": 0.044418, - "std": 0.040864, - "sem": 0.001873, - "min": 0.003281, - "max": 0.243895, - "recall-0.025": 0.253669, + "mean": 0.053368, + "median": 0.045043, + "std": 0.040851, + "sem": 0.001872, + "min": 0.003529, + "max": 0.244051, + "recall-0.025": 0.255765, "recall-0.05": 0.561845, "recall-0.1": 0.901468, - "recall-0.15": 0.955975, + "recall-0.15": 0.958071, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477 }, "wrist_left": { "count": 477, - "mean": 0.060086, - "median": 0.053969, - "std": 0.038695, - "sem": 0.001774, - "min": 0.002109, - "max": 0.322924, - "recall-0.025": 0.129979, + "mean": 0.060002, + "median": 0.053953, + "std": 0.03861, + "sem": 0.00177, + "min": 0.002051, + "max": 0.322481, + "recall-0.025": 0.132075, "recall-0.05": 0.404612, "recall-0.1": 0.907757, "recall-0.15": 0.960168, @@ -450,31 +450,31 @@ Results of the model in various experiments on different datasets. \ }, "wrist_right": { "count": 477, - "mean": 0.05937, - "median": 0.054488, - "std": 0.034178, - "sem": 0.001567, - "min": 0.009831, - "max": 0.371597, - "recall-0.025": 0.109015, - "recall-0.05": 0.419287, + "mean": 0.059207, + "median": 0.054405, + "std": 0.033578, + "sem": 0.001539, + "min": 0.009618, + "max": 0.371667, + "recall-0.025": 0.115304, + "recall-0.05": 0.415094, "recall-0.1": 0.899371, - "recall-0.15": 0.979036, + "recall-0.15": 0.981132, "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "hip_left": { "count": 477, - "mean": 0.048082, - "median": 0.042309, - "std": 0.02636, - "sem": 0.001208, - "min": 0.006447, - "max": 0.14256, - "recall-0.025": 0.186583, + "mean": 0.047948, + "median": 0.042251, + "std": 0.026295, + "sem": 0.001205, + "min": 0.006475, + "max": 0.145903, + "recall-0.025": 0.188679, "recall-0.05": 0.618449, - "recall-0.1": 0.951782, + "recall-0.1": 0.953878, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -482,15 +482,15 @@ Results of the model in various experiments on different datasets. \ }, "hip_right": { "count": 477, - "mean": 0.057967, - "median": 0.0564, - "std": 0.023728, - "sem": 0.001088, - "min": 0.00421, - "max": 0.132307, + "mean": 0.058483, + "median": 0.05753, + "std": 0.023762, + "sem": 0.001089, + "min": 0.005137, + "max": 0.132318, "recall-0.025": 0.098532, - "recall-0.05": 0.396226, - "recall-0.1": 0.9413, + "recall-0.05": 0.39413, + "recall-0.1": 0.943396, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -498,13 +498,13 @@ Results of the model in various experiments on different datasets. \ }, "knee_left": { "count": 477, - "mean": 0.040396, - "median": 0.037751, - "std": 0.02446, - "sem": 0.001121, - "min": 0.004904, - "max": 0.190671, - "recall-0.025": 0.259958, + "mean": 0.040438, + "median": 0.03808, + "std": 0.024403, + "sem": 0.001118, + "min": 0.004928, + "max": 0.190069, + "recall-0.025": 0.257862, "recall-0.05": 0.748428, "recall-0.1": 0.974843, "recall-0.15": 0.989518, @@ -514,15 +514,15 @@ Results of the model in various experiments on different datasets. \ }, "knee_right": { "count": 477, - "mean": 0.04018, - "median": 0.036019, - "std": 0.023349, - "sem": 0.00107, - "min": 0.007466, - "max": 0.1848, - "recall-0.025": 0.312369, - "recall-0.05": 0.712788, - "recall-0.1": 0.974843, + "mean": 0.040168, + "median": 0.03623, + "std": 0.023114, + "sem": 0.001059, + "min": 0.00733, + "max": 0.184933, + "recall-0.025": 0.310273, + "recall-0.05": 0.708595, + "recall-0.1": 0.976939, "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -530,14 +530,14 @@ Results of the model in various experiments on different datasets. \ }, "ankle_left": { "count": 477, - "mean": 0.036352, - "median": 0.027965, - "std": 0.030885, - "sem": 0.001416, - "min": 0.004731, - "max": 0.223705, - "recall-0.025": 0.429769, - "recall-0.05": 0.815514, + "mean": 0.036353, + "median": 0.028172, + "std": 0.030783, + "sem": 0.001411, + "min": 0.004787, + "max": 0.223747, + "recall-0.025": 0.433962, + "recall-0.05": 0.81761, "recall-0.1": 0.945493, "recall-0.15": 0.983229, "recall-0.25": 1.0, @@ -546,34 +546,34 @@ Results of the model in various experiments on different datasets. \ }, "ankle_right": { "count": 477, - "mean": 0.040931, - "median": 0.030927, - "std": 0.037943, - "sem": 0.001739, - "min": 0.003325, - "max": 0.272891, + "mean": 0.040777, + "median": 0.030897, + "std": 0.037254, + "sem": 0.001708, + "min": 0.003323, + "max": 0.27012, "recall-0.025": 0.303983, "recall-0.05": 0.802935, "recall-0.1": 0.930818, "recall-0.15": 0.968553, - "recall-0.25": 0.995807, + "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "joint_recalls": { "num_labels": 6201, - "recall-0.025": 0.20997, - "recall-0.05": 0.61571, + "recall-0.025": 0.21093, + "recall-0.05": 0.6149, "recall-0.1": 0.94275, "recall-0.15": 0.98645, - "recall-0.25": 0.99839, + "recall-0.25": 0.99871, "recall-0.5": 1.0 } } { "total_parts": 6678, - "correct_parts": 6618, - "pcp": 0.991015 + "correct_parts": 6619, + "pcp": 0.991165 } ``` diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index 0f3e6c7..ea2f7cd 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -412,7 +412,6 @@ std::vector>> TriangulatorInternal::triangulate // Calculate pair scores std::vector>, float>> all_scored_poses; all_scored_poses.resize(all_pairs.size()); - #pragma omp parallel for for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; @@ -479,7 +478,6 @@ std::vector>> TriangulatorInternal::triangulate // Calculate full 3D poses std::vector>> all_full_poses; all_full_poses.resize(all_pairs.size()); - #pragma omp parallel for for (size_t i = 0; i < all_pairs.size(); ++i) { const auto &pids = all_pairs[i].first; @@ -1010,6 +1008,155 @@ std::vector TriangulatorInternal::score_projection( // ================================================================================================= +/* Compute the inverse using the adjugate method */ +std::array, 3> invert3x3(const std::array, 3> &M) +{ + // See: https://scicomp.stackexchange.com/a/29206 + + std::array, 3> adj = { + {{ + M[1][1] * M[2][2] - M[1][2] * M[2][1], + M[0][2] * M[2][1] - M[0][1] * M[2][2], + M[0][1] * M[1][2] - M[0][2] * M[1][1], + }, + { + M[1][2] * M[2][0] - M[1][0] * M[2][2], + M[0][0] * M[2][2] - M[0][2] * M[2][0], + M[0][2] * M[1][0] - M[0][0] * M[1][2], + }, + { + M[1][0] * M[2][1] - M[1][1] * M[2][0], + M[0][1] * M[2][0] - M[0][0] * M[2][1], + M[0][0] * M[1][1] - M[0][1] * M[1][0], + }}}; + + float det = M[0][0] * adj[0][0] + M[0][1] * adj[1][0] + M[0][2] * adj[2][0]; + if (std::fabs(det) < 1e-6f) + { + return {{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}; + } + + float idet = 1.0f / det; + std::array, 3> inv = { + {{ + adj[0][0] * idet, + adj[0][1] * idet, + adj[0][2] * idet, + }, + { + adj[1][0] * idet, + adj[1][1] * idet, + adj[1][2] * idet, + }, + { + adj[2][0] * idet, + adj[2][1] * idet, + adj[2][2] * idet, + }}}; + + return inv; +} + +std::array, 3> transpose3x3(const std::array, 3> &M) +{ + return {{{M[0][0], M[1][0], M[2][0]}, + {M[0][1], M[1][1], M[2][1]}, + {M[0][2], M[1][2], M[2][2]}}}; +} + +float dot(const std::array &a, const std::array &b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} +std::array cross(const std::array &a, const std::array &b) +{ + return {a[1] * b[2] - a[2] * b[1], + a[2] * b[0] - a[0] * b[2], + a[0] * b[1] - a[1] * b[0]}; +} +std::array add(const std::array &a, const std::array &b) +{ + return {a[0] + b[0], a[1] + b[1], a[2] + b[2]}; +} +std::array subtract(const std::array &a, const std::array &b) +{ + return {a[0] - b[0], a[1] - b[1], a[2] - b[2]}; +} +std::array multiply(const std::array &a, float s) +{ + return {a[0] * s, a[1] * s, a[2] * s}; +} +std::array normalize(const std::array &v) +{ + float norm = std::sqrt(dot(v, v)); + if (norm < 1e-8f) + return v; + return multiply(v, 1.0f / norm); +} +std::array mat_mul_vec( + const std::array, 3> &M, const std::array &v) +{ + std::array res = {M[0][0] * v[0] + M[0][1] * v[1] + M[0][2] * v[2], + M[1][0] * v[0] + M[1][1] * v[1] + M[1][2] * v[2], + M[2][0] * v[0] + M[2][1] * v[1] + M[2][2] * v[2]}; + return res; +} + +/* Compute camera center and corresponding ray direction */ +std::tuple, std::array> calc_center_and_ray( + const CameraInternal &icam, + const std::array &pt) +{ + // Compute Rᵀ and t + auto R_transpose = transpose3x3(icam.cam.R); + std::array t = {icam.cam.T[0][0], icam.cam.T[1][0], icam.cam.T[2][0]}; + t = mat_mul_vec(icam.cam.R, multiply(t, -1.0f)); + + // Camera center: C = -Rᵀ * t + auto C = multiply(mat_mul_vec(R_transpose, t), -1.0f); + + // Compute ray direction: + std::array uv1 = {pt[0], pt[1], 1.0f}; + auto K_inv = invert3x3(icam.cam.K); + auto d = mat_mul_vec(R_transpose, mat_mul_vec(K_inv, uv1)); + auto rayDir = normalize(d); + + return std::make_tuple(C, rayDir); +} + +/* Triangulate two points by computing their two rays and the midpoint of their closest approach */ +std::array triangulate_midpoint( + const CameraInternal &icam1, + const CameraInternal &icam2, + const std::array &pt1, + const std::array &pt2) +{ + // See: https://en.wikipedia.org/wiki/Skew_lines#Nearest_points + + // Obtain the camera centers and ray directions for both views + auto [p1, d1] = calc_center_and_ray(icam1, pt1); + auto [p2, d2] = calc_center_and_ray(icam2, pt2); + + // Compute the perpendicular plane vectors + std::array n = cross(d1, d2); + std::array n1 = cross(d1, n); + std::array n2 = cross(d2, n); + + // Calculate point on Line 1 nearest to Line 2 + float t1 = dot(subtract(p2, p1), n2) / dot(d1, n2); + std::array c1 = add(p1, multiply(d1, t1)); + + // Calculate point on Line 2 nearest to Line 1 + float t2 = dot(subtract(p1, p2), n1) / dot(d2, n1); + std::array c2 = add(p2, multiply(d2, t2)); + + // Compute midpoint between c1 and c2. + std::array midpoint = multiply(add(c1, c2), 0.5); + return midpoint; +} + +// ================================================================================================= + std::pair>, float> TriangulatorInternal::triangulate_and_score( const std::vector> &pose1, const std::vector> &pose2, @@ -1052,63 +1199,18 @@ std::pair>, float> TriangulatorInternal::triang return std::make_pair(empty, score); } - // Extract coordinates of visible joints - std::vector> points1; - std::vector> points2; - points1.reserve(num_visible); - points2.reserve(num_visible); - for (size_t i = 0; i < num_joints; ++i) - { - if (mask[i]) - { - points1.push_back({pose1[i][0], pose1[i][1]}); - points2.push_back({pose2[i][0], pose2[i][1]}); - } - } - - // Convert vectors to mats - cv::Mat points1_mat(2, num_visible, CV_32F); - cv::Mat points2_mat(2, num_visible, CV_32F); - float *p1_ptr = points1_mat.ptr(0); - float *p2_ptr = points2_mat.ptr(0); - for (int i = 0; i < num_visible; ++i) - { - p1_ptr[i + 0 * num_visible] = points1[i][0]; - p1_ptr[i + 1 * num_visible] = points1[i][1]; - p2_ptr[i + 0 * num_visible] = points2[i][0]; - p2_ptr[i + 1 * num_visible] = points2[i][1]; - } - - // Triangulate points - cv::Mat points4d_h; - cv::triangulatePoints(cam1.P, cam2.P, points1_mat, points2_mat, points4d_h); - - // Convert homogeneous coordinates to 3D - std::vector> points_3d; - points_3d.reserve(num_visible); - const float *p4_ptr = points4d_h.ptr(0); - for (int i = 0; i < points4d_h.cols; ++i) - { - float w = p4_ptr[i + 3 * num_visible]; - std::array pt = { - p4_ptr[i + 0 * num_visible] / w, - p4_ptr[i + 1 * num_visible] / w, - p4_ptr[i + 2 * num_visible] / w}; - points_3d.push_back(std::move(pt)); - } - - // Create the 3D pose + // Use midpoint triangulation instead of cv::triangulatePoints because it is much faster, + // while having almost the same accuracy. std::vector> pose3d(num_joints, {0.0, 0.0, 0.0, 0.0}); - int idx = 0; for (size_t i = 0; i < num_joints; ++i) { if (mask[i]) { - pose3d[i][0] = points_3d[idx][0]; - pose3d[i][1] = points_3d[idx][1]; - pose3d[i][2] = points_3d[idx][2]; - pose3d[i][3] = 1.0; - ++idx; + auto &pt1 = pose1[i]; + auto &pt2 = pose2[i]; + std::array pt3d = triangulate_midpoint( + cam1, cam2, {pt1[0], pt1[1]}, {pt2[0], pt2[1]}); + pose3d[i] = {pt3d[0], pt3d[1], pt3d[2], 1.0}; } }