diff --git a/media/RESULTS.md b/media/RESULTS.md index 02ca0e8..962af93 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -1091,75 +1091,75 @@ Results of the model in various experiments on different datasets. \ ```json { - "img_loading": 0.0476801, - "demosaicing": 0.00107258, - "avg_time_2d": 0.0169037, - "avg_time_3d": 0.000441429, - "fps": 54.2957 + "img_loading": 0.0479787, + "demosaicing": 0.00105853, + "avg_time_2d": 0.0168711, + "avg_time_3d": 0.000490739, + "fps": 54.2878 } { "triangulator_calls": 420, - "init_time": 9.63167e-06, - "undistort_time": 3.56479e-05, - "project_time": 4.58269e-05, - "match_time": 2.47299e-05, - "pairs_time": 4.47937e-05, - "pair_scoring_time": 0.000102304, - "grouping_time": 1.5427e-05, - "full_time": 8.66486e-05, - "merge_time": 1.18494e-05, - "post_time": 1.38253e-05, - "convert_time": 7.38645e-07, - "total_time": 0.000391801 + "init_time": 1.06278e-05, + "undistort_time": 2.61993e-05, + "project_time": 5.91157e-05, + "match_time": 7.7537e-05, + "pairs_time": 4.91138e-05, + "pair_scoring_time": 9.39502e-05, + "grouping_time": 1.86132e-05, + "full_time": 8.07886e-05, + "merge_time": 9.90888e-06, + "post_time": 1.35556e-05, + "convert_time": 7.28681e-07, + "total_time": 0.000440487 } { "person_nums": { "total_frames": 420, "total_labels": 1466, - "total_preds": 1503, + "total_preds": 1502, "considered_empty": 0, "valid_preds": 1462, - "invalid_preds": 41, + "invalid_preds": 40, "missing": 4, - "invalid_fraction": 0.02728, - "precision": 0.97272, + "invalid_fraction": 0.02663, + "precision": 0.97337, "recall": 0.99727, - "f1": 0.98484, - "non_empty": 1503 + "f1": 0.98518, + "non_empty": 1502 }, "mpjpe": { "count": 1462, - "mean": 0.033844, - "median": 0.030313, - "std": 0.015707, - "sem": 0.000411, + "mean": 0.032346, + "median": 0.029628, + "std": 0.014512, + "sem": 0.00038, "min": 0.010671, - "max": 0.133636, - "recall-0.025": 0.317872, - "recall-0.05": 0.866303, - "recall-0.1": 0.991132, + "max": 0.136736, + "recall-0.025": 0.339018, + "recall-0.05": 0.900409, + "recall-0.1": 0.99045, "recall-0.15": 0.997271, "recall-0.25": 0.997271, "recall-0.5": 0.997271, "num_labels": 1466, - "ap-0.025": 0.172492, - "ap-0.05": 0.816785, - "ap-0.1": 0.975993, - "ap-0.15": 0.986826, - "ap-0.25": 0.986826, - "ap-0.5": 0.986826 + "ap-0.025": 0.181063, + "ap-0.05": 0.865292, + "ap-0.1": 0.979068, + "ap-0.15": 0.989802, + "ap-0.25": 0.989802, + "ap-0.5": 0.989802 }, "nose": { "count": 1461, - "mean": 0.015367, - "median": 0.011619, - "std": 0.017247, - "sem": 0.000451, + "mean": 0.01573, + "median": 0.011646, + "std": 0.018004, + "sem": 0.000471, "min": 0.001311, "max": 0.276143, - "recall-0.025": 0.905673, - "recall-0.05": 0.966507, - "recall-0.1": 0.994532, + "recall-0.025": 0.899522, + "recall-0.05": 0.963773, + "recall-0.1": 0.992481, "recall-0.15": 0.995899, "recall-0.25": 0.996582, "recall-0.5": 0.998633, @@ -1167,14 +1167,14 @@ Results of the model in various experiments on different datasets. \ }, "shoulder_left": { "count": 1462, - "mean": 0.01665, - "median": 0.014603, - "std": 0.011003, + "mean": 0.016823, + "median": 0.014758, + "std": 0.010996, "sem": 0.000288, "min": 0.000954, "max": 0.103637, - "recall-0.025": 0.841064, - "recall-0.05": 0.982265, + "recall-0.025": 0.8397, + "recall-0.05": 0.982947, "recall-0.1": 0.996589, "recall-0.15": 0.997271, "recall-0.25": 0.997271, @@ -1183,14 +1183,14 @@ Results of the model in various experiments on different datasets. \ }, "shoulder_right": { "count": 1461, - "mean": 0.016883, - "median": 0.014712, - "std": 0.011866, - "sem": 0.000311, + "mean": 0.016777, + "median": 0.014699, + "std": 0.011399, + "sem": 0.000298, "min": 0.001164, "max": 0.156188, - "recall-0.025": 0.83686, - "recall-0.05": 0.980205, + "recall-0.025": 0.833447, + "recall-0.05": 0.983618, "recall-0.1": 0.996587, "recall-0.15": 0.996587, "recall-0.25": 0.99727, @@ -1199,14 +1199,14 @@ Results of the model in various experiments on different datasets. \ }, "elbow_left": { "count": 1461, - "mean": 0.022321, - "median": 0.016639, - "std": 0.019089, - "sem": 0.0005, - "min": 0.00117, + "mean": 0.022172, + "median": 0.016399, + "std": 0.019077, + "sem": 0.000499, + "min": 0.000543, "max": 0.210066, - "recall-0.025": 0.733788, - "recall-0.05": 0.915358, + "recall-0.025": 0.735154, + "recall-0.05": 0.916724, "recall-0.1": 0.990444, "recall-0.15": 0.996587, "recall-0.25": 0.99727, @@ -1215,15 +1215,15 @@ Results of the model in various experiments on different datasets. \ }, "elbow_right": { "count": 1461, - "mean": 0.02069, - "median": 0.01607, - "std": 0.015618, - "sem": 0.000409, + "mean": 0.021149, + "median": 0.015999, + "std": 0.016746, + "sem": 0.000438, "min": 0.001472, "max": 0.162788, - "recall-0.025": 0.779904, - "recall-0.05": 0.939166, - "recall-0.1": 0.996582, + "recall-0.025": 0.780588, + "recall-0.05": 0.926863, + "recall-0.1": 0.995899, "recall-0.15": 0.997949, "recall-0.25": 0.998633, "recall-0.5": 0.998633, @@ -1231,63 +1231,63 @@ Results of the model in various experiments on different datasets. \ }, "wrist_left": { "count": 1432, - "mean": 0.035516, - "median": 0.016786, - "std": 0.05517, - "sem": 0.001458, + "mean": 0.035971, + "median": 0.016823, + "std": 0.055313, + "sem": 0.001462, "min": 0.000898, - "max": 0.460746, - "recall-0.025": 0.672245, - "recall-0.05": 0.84728, - "recall-0.1": 0.906555, - "recall-0.15": 0.956764, - "recall-0.25": 0.975593, + "max": 0.450938, + "recall-0.025": 0.67364, + "recall-0.05": 0.843794, + "recall-0.1": 0.904463, + "recall-0.15": 0.953975, + "recall-0.25": 0.974895, "recall-0.5": 0.998605, "num_labels": 1434 }, "wrist_right": { "count": 1455, - "mean": 0.024709, - "median": 0.016377, - "std": 0.027789, - "sem": 0.000729, + "mean": 0.026926, + "median": 0.016801, + "std": 0.033427, + "sem": 0.000877, "min": 0.001361, "max": 0.280646, - "recall-0.025": 0.711538, - "recall-0.05": 0.901786, - "recall-0.1": 0.974588, - "recall-0.15": 0.989011, + "recall-0.025": 0.690934, + "recall-0.05": 0.888736, + "recall-0.1": 0.964973, + "recall-0.15": 0.980769, "recall-0.25": 0.997253, "recall-0.5": 0.999313, "num_labels": 1456 }, "hip_left": { "count": 1461, - "mean": 0.035145, - "median": 0.031973, - "std": 0.019347, - "sem": 0.000506, + "mean": 0.034771, + "median": 0.031898, + "std": 0.019216, + "sem": 0.000503, "min": 0.00101, "max": 0.181992, - "recall-0.025": 0.326962, - "recall-0.05": 0.837543, - "recall-0.1": 0.990444, - "recall-0.15": 0.996587, + "recall-0.025": 0.32628, + "recall-0.05": 0.845051, + "recall-0.1": 0.989761, + "recall-0.15": 0.995904, "recall-0.25": 0.99727, "recall-0.5": 0.99727, "num_labels": 1465 }, "hip_right": { "count": 1462, - "mean": 0.037983, - "median": 0.032385, - "std": 0.025351, - "sem": 0.000663, + "mean": 0.037383, + "median": 0.032592, + "std": 0.024222, + "sem": 0.000634, "min": 0.002509, "max": 0.281736, - "recall-0.025": 0.31719, - "recall-0.05": 0.797408, - "recall-0.1": 0.969986, + "recall-0.025": 0.318554, + "recall-0.05": 0.802183, + "recall-0.1": 0.976126, "recall-0.15": 0.993861, "recall-0.25": 0.995907, "recall-0.5": 0.997271, @@ -1295,82 +1295,82 @@ Results of the model in various experiments on different datasets. \ }, "knee_left": { "count": 1461, - "mean": 0.039577, - "median": 0.033011, - "std": 0.034872, - "sem": 0.000913, - "min": 0.004244, + "mean": 0.038795, + "median": 0.032848, + "std": 0.034462, + "sem": 0.000902, + "min": 0.003309, "max": 0.473605, - "recall-0.025": 0.283276, - "recall-0.05": 0.787031, - "recall-0.1": 0.967918, - "recall-0.15": 0.985666, + "recall-0.025": 0.287372, + "recall-0.05": 0.799317, + "recall-0.1": 0.978157, + "recall-0.15": 0.9843, "recall-0.25": 0.990444, "recall-0.5": 0.99727, "num_labels": 1465 }, "knee_right": { "count": 1455, - "mean": 0.039525, - "median": 0.031801, - "std": 0.029897, - "sem": 0.000784, + "mean": 0.038488, + "median": 0.031543, + "std": 0.026577, + "sem": 0.000697, "min": 0.003512, - "max": 0.278788, - "recall-0.025": 0.343386, - "recall-0.05": 0.74366, - "recall-0.1": 0.959561, - "recall-0.15": 0.989034, - "recall-0.25": 0.994517, + "max": 0.275123, + "recall-0.025": 0.349554, + "recall-0.05": 0.749829, + "recall-0.1": 0.964359, + "recall-0.15": 0.993146, + "recall-0.25": 0.996573, "recall-0.5": 0.997258, "num_labels": 1459 }, "ankle_left": { - "count": 1457, - "mean": 0.064506, - "median": 0.035906, - "std": 0.071268, - "sem": 0.001868, + "count": 1458, + "mean": 0.056122, + "median": 0.034021, + "std": 0.062141, + "sem": 0.001628, "min": 0.003035, - "max": 0.441184, - "recall-0.025": 0.334245, - "recall-0.05": 0.625427, - "recall-0.1": 0.805878, - "recall-0.15": 0.88175, - "recall-0.25": 0.958305, - "recall-0.5": 0.995899, + "max": 0.432301, + "recall-0.025": 0.347915, + "recall-0.05": 0.666439, + "recall-0.1": 0.855776, + "recall-0.15": 0.917977, + "recall-0.25": 0.971292, + "recall-0.5": 0.996582, "num_labels": 1463 }, "ankle_right": { "count": 1445, - "mean": 0.065253, - "median": 0.032697, - "std": 0.084609, - "sem": 0.002227, + "mean": 0.053755, + "median": 0.030905, + "std": 0.067212, + "sem": 0.001769, "min": 0.001698, - "max": 0.469631, - "recall-0.025": 0.354795, - "recall-0.05": 0.680137, - "recall-0.1": 0.819178, - "recall-0.15": 0.867808, - "recall-0.25": 0.932877, + "max": 0.489965, + "recall-0.025": 0.378082, + "recall-0.05": 0.736301, + "recall-0.1": 0.856849, + "recall-0.15": 0.903425, + "recall-0.25": 0.962329, "recall-0.5": 0.989726, "num_labels": 1460 }, "joint_recalls": { "num_labels": 18990, - "recall-0.025": 0.57177, - "recall-0.05": 0.84618, - "recall-0.1": 0.95108, - "recall-0.15": 0.97235, - "recall-0.25": 0.98673, + "recall-0.025": 0.57341, + "recall-0.05": 0.85408, + "recall-0.1": 0.9584, + "recall-0.15": 0.9772, + "recall-0.25": 0.99005, "recall-0.5": 0.99674 } } { "total_parts": 20444, - "correct_parts": 20170, - "pcp": 0.986598 + "correct_parts": 20204, + "pcp": 0.988261 } ``` diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index 49566a2..eaca842 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -640,7 +640,7 @@ void TriangulatorInternal::undistort_poses(std::vector &poses, CameraIn points.copyTo(poses[p].colRange(0, 2)); // Mask out points that are far outside the image (points slightly outside are still valid) - float mask_offset = (width + height) / 40.0; + float mask_offset = (width + height) / 20.0; int num_joints = poses[p].rows; for (int j = 0; j < num_joints; ++j) { diff --git a/scripts/utils_2d_pose.hpp b/scripts/utils_2d_pose.hpp index d5978df..aaec0ad 100644 --- a/scripts/utils_2d_pose.hpp +++ b/scripts/utils_2d_pose.hpp @@ -867,8 +867,6 @@ namespace utils_2d_pose int pad_top = paddings[2]; int box_left = box[0]; int box_top = box[1]; - int img_w = image.cols; - int img_h = image.rows; for (auto &kp : kpts) { @@ -884,10 +882,6 @@ namespace utils_2d_pose x += box_left; y += box_top; - - // Clamp to iamge region - x = std::max(0.0f, std::min(x, img_w - 1.0f)); - y = std::max(0.0f, std::min(y, img_h - 1.0f)); } } @@ -949,6 +943,37 @@ namespace utils_2d_pose // Sometimes the detection model predicts multiple boxes with different shapes for the same // person. They then result in strongly overlapping poses, which are merged here. merge_close_poses(poses, {(size_t)image.cols, (size_t)image.rows}); + + // Clip keypoints far outside the image + float mask_offset = (image.cols + image.rows) / 10.0; + for (size_t i = 0; i < poses.size(); ++i) + { + for (size_t j = 0; j < poses[i].size(); ++j) + { + auto &kp = poses[i][j]; + if (kp[0] < -mask_offset) + { + kp[0] = -mask_offset; + kp[2] = 0.001; + } + if (kp[1] < -mask_offset) + { + kp[1] = -mask_offset; + kp[2] = 0.001; + } + if (kp[0] >= image.cols + mask_offset) + { + kp[0] = image.cols + mask_offset; + kp[2] = 0.001; + } + if (kp[1] >= image.rows + mask_offset) + { + kp[1] = image.rows + mask_offset; + kp[2] = 0.001; + } + } + } + return poses; }