Allow keypoint detections outside the image.

This commit is contained in:
Daniel
2025-02-11 11:26:33 +01:00
parent 24d706d030
commit 2c994eca44
3 changed files with 182 additions and 157 deletions

View File

@ -1091,75 +1091,75 @@ Results of the model in various experiments on different datasets. \
```json ```json
{ {
"img_loading": 0.0476801, "img_loading": 0.0479787,
"demosaicing": 0.00107258, "demosaicing": 0.00105853,
"avg_time_2d": 0.0169037, "avg_time_2d": 0.0168711,
"avg_time_3d": 0.000441429, "avg_time_3d": 0.000490739,
"fps": 54.2957 "fps": 54.2878
} }
{ {
"triangulator_calls": 420, "triangulator_calls": 420,
"init_time": 9.63167e-06, "init_time": 1.06278e-05,
"undistort_time": 3.56479e-05, "undistort_time": 2.61993e-05,
"project_time": 4.58269e-05, "project_time": 5.91157e-05,
"match_time": 2.47299e-05, "match_time": 7.7537e-05,
"pairs_time": 4.47937e-05, "pairs_time": 4.91138e-05,
"pair_scoring_time": 0.000102304, "pair_scoring_time": 9.39502e-05,
"grouping_time": 1.5427e-05, "grouping_time": 1.86132e-05,
"full_time": 8.66486e-05, "full_time": 8.07886e-05,
"merge_time": 1.18494e-05, "merge_time": 9.90888e-06,
"post_time": 1.38253e-05, "post_time": 1.35556e-05,
"convert_time": 7.38645e-07, "convert_time": 7.28681e-07,
"total_time": 0.000391801 "total_time": 0.000440487
} }
{ {
"person_nums": { "person_nums": {
"total_frames": 420, "total_frames": 420,
"total_labels": 1466, "total_labels": 1466,
"total_preds": 1503, "total_preds": 1502,
"considered_empty": 0, "considered_empty": 0,
"valid_preds": 1462, "valid_preds": 1462,
"invalid_preds": 41, "invalid_preds": 40,
"missing": 4, "missing": 4,
"invalid_fraction": 0.02728, "invalid_fraction": 0.02663,
"precision": 0.97272, "precision": 0.97337,
"recall": 0.99727, "recall": 0.99727,
"f1": 0.98484, "f1": 0.98518,
"non_empty": 1503 "non_empty": 1502
}, },
"mpjpe": { "mpjpe": {
"count": 1462, "count": 1462,
"mean": 0.033844, "mean": 0.032346,
"median": 0.030313, "median": 0.029628,
"std": 0.015707, "std": 0.014512,
"sem": 0.000411, "sem": 0.00038,
"min": 0.010671, "min": 0.010671,
"max": 0.133636, "max": 0.136736,
"recall-0.025": 0.317872, "recall-0.025": 0.339018,
"recall-0.05": 0.866303, "recall-0.05": 0.900409,
"recall-0.1": 0.991132, "recall-0.1": 0.99045,
"recall-0.15": 0.997271, "recall-0.15": 0.997271,
"recall-0.25": 0.997271, "recall-0.25": 0.997271,
"recall-0.5": 0.997271, "recall-0.5": 0.997271,
"num_labels": 1466, "num_labels": 1466,
"ap-0.025": 0.172492, "ap-0.025": 0.181063,
"ap-0.05": 0.816785, "ap-0.05": 0.865292,
"ap-0.1": 0.975993, "ap-0.1": 0.979068,
"ap-0.15": 0.986826, "ap-0.15": 0.989802,
"ap-0.25": 0.986826, "ap-0.25": 0.989802,
"ap-0.5": 0.986826 "ap-0.5": 0.989802
}, },
"nose": { "nose": {
"count": 1461, "count": 1461,
"mean": 0.015367, "mean": 0.01573,
"median": 0.011619, "median": 0.011646,
"std": 0.017247, "std": 0.018004,
"sem": 0.000451, "sem": 0.000471,
"min": 0.001311, "min": 0.001311,
"max": 0.276143, "max": 0.276143,
"recall-0.025": 0.905673, "recall-0.025": 0.899522,
"recall-0.05": 0.966507, "recall-0.05": 0.963773,
"recall-0.1": 0.994532, "recall-0.1": 0.992481,
"recall-0.15": 0.995899, "recall-0.15": 0.995899,
"recall-0.25": 0.996582, "recall-0.25": 0.996582,
"recall-0.5": 0.998633, "recall-0.5": 0.998633,
@ -1167,14 +1167,14 @@ Results of the model in various experiments on different datasets. \
}, },
"shoulder_left": { "shoulder_left": {
"count": 1462, "count": 1462,
"mean": 0.01665, "mean": 0.016823,
"median": 0.014603, "median": 0.014758,
"std": 0.011003, "std": 0.010996,
"sem": 0.000288, "sem": 0.000288,
"min": 0.000954, "min": 0.000954,
"max": 0.103637, "max": 0.103637,
"recall-0.025": 0.841064, "recall-0.025": 0.8397,
"recall-0.05": 0.982265, "recall-0.05": 0.982947,
"recall-0.1": 0.996589, "recall-0.1": 0.996589,
"recall-0.15": 0.997271, "recall-0.15": 0.997271,
"recall-0.25": 0.997271, "recall-0.25": 0.997271,
@ -1183,14 +1183,14 @@ Results of the model in various experiments on different datasets. \
}, },
"shoulder_right": { "shoulder_right": {
"count": 1461, "count": 1461,
"mean": 0.016883, "mean": 0.016777,
"median": 0.014712, "median": 0.014699,
"std": 0.011866, "std": 0.011399,
"sem": 0.000311, "sem": 0.000298,
"min": 0.001164, "min": 0.001164,
"max": 0.156188, "max": 0.156188,
"recall-0.025": 0.83686, "recall-0.025": 0.833447,
"recall-0.05": 0.980205, "recall-0.05": 0.983618,
"recall-0.1": 0.996587, "recall-0.1": 0.996587,
"recall-0.15": 0.996587, "recall-0.15": 0.996587,
"recall-0.25": 0.99727, "recall-0.25": 0.99727,
@ -1199,14 +1199,14 @@ Results of the model in various experiments on different datasets. \
}, },
"elbow_left": { "elbow_left": {
"count": 1461, "count": 1461,
"mean": 0.022321, "mean": 0.022172,
"median": 0.016639, "median": 0.016399,
"std": 0.019089, "std": 0.019077,
"sem": 0.0005, "sem": 0.000499,
"min": 0.00117, "min": 0.000543,
"max": 0.210066, "max": 0.210066,
"recall-0.025": 0.733788, "recall-0.025": 0.735154,
"recall-0.05": 0.915358, "recall-0.05": 0.916724,
"recall-0.1": 0.990444, "recall-0.1": 0.990444,
"recall-0.15": 0.996587, "recall-0.15": 0.996587,
"recall-0.25": 0.99727, "recall-0.25": 0.99727,
@ -1215,15 +1215,15 @@ Results of the model in various experiments on different datasets. \
}, },
"elbow_right": { "elbow_right": {
"count": 1461, "count": 1461,
"mean": 0.02069, "mean": 0.021149,
"median": 0.01607, "median": 0.015999,
"std": 0.015618, "std": 0.016746,
"sem": 0.000409, "sem": 0.000438,
"min": 0.001472, "min": 0.001472,
"max": 0.162788, "max": 0.162788,
"recall-0.025": 0.779904, "recall-0.025": 0.780588,
"recall-0.05": 0.939166, "recall-0.05": 0.926863,
"recall-0.1": 0.996582, "recall-0.1": 0.995899,
"recall-0.15": 0.997949, "recall-0.15": 0.997949,
"recall-0.25": 0.998633, "recall-0.25": 0.998633,
"recall-0.5": 0.998633, "recall-0.5": 0.998633,
@ -1231,63 +1231,63 @@ Results of the model in various experiments on different datasets. \
}, },
"wrist_left": { "wrist_left": {
"count": 1432, "count": 1432,
"mean": 0.035516, "mean": 0.035971,
"median": 0.016786, "median": 0.016823,
"std": 0.05517, "std": 0.055313,
"sem": 0.001458, "sem": 0.001462,
"min": 0.000898, "min": 0.000898,
"max": 0.460746, "max": 0.450938,
"recall-0.025": 0.672245, "recall-0.025": 0.67364,
"recall-0.05": 0.84728, "recall-0.05": 0.843794,
"recall-0.1": 0.906555, "recall-0.1": 0.904463,
"recall-0.15": 0.956764, "recall-0.15": 0.953975,
"recall-0.25": 0.975593, "recall-0.25": 0.974895,
"recall-0.5": 0.998605, "recall-0.5": 0.998605,
"num_labels": 1434 "num_labels": 1434
}, },
"wrist_right": { "wrist_right": {
"count": 1455, "count": 1455,
"mean": 0.024709, "mean": 0.026926,
"median": 0.016377, "median": 0.016801,
"std": 0.027789, "std": 0.033427,
"sem": 0.000729, "sem": 0.000877,
"min": 0.001361, "min": 0.001361,
"max": 0.280646, "max": 0.280646,
"recall-0.025": 0.711538, "recall-0.025": 0.690934,
"recall-0.05": 0.901786, "recall-0.05": 0.888736,
"recall-0.1": 0.974588, "recall-0.1": 0.964973,
"recall-0.15": 0.989011, "recall-0.15": 0.980769,
"recall-0.25": 0.997253, "recall-0.25": 0.997253,
"recall-0.5": 0.999313, "recall-0.5": 0.999313,
"num_labels": 1456 "num_labels": 1456
}, },
"hip_left": { "hip_left": {
"count": 1461, "count": 1461,
"mean": 0.035145, "mean": 0.034771,
"median": 0.031973, "median": 0.031898,
"std": 0.019347, "std": 0.019216,
"sem": 0.000506, "sem": 0.000503,
"min": 0.00101, "min": 0.00101,
"max": 0.181992, "max": 0.181992,
"recall-0.025": 0.326962, "recall-0.025": 0.32628,
"recall-0.05": 0.837543, "recall-0.05": 0.845051,
"recall-0.1": 0.990444, "recall-0.1": 0.989761,
"recall-0.15": 0.996587, "recall-0.15": 0.995904,
"recall-0.25": 0.99727, "recall-0.25": 0.99727,
"recall-0.5": 0.99727, "recall-0.5": 0.99727,
"num_labels": 1465 "num_labels": 1465
}, },
"hip_right": { "hip_right": {
"count": 1462, "count": 1462,
"mean": 0.037983, "mean": 0.037383,
"median": 0.032385, "median": 0.032592,
"std": 0.025351, "std": 0.024222,
"sem": 0.000663, "sem": 0.000634,
"min": 0.002509, "min": 0.002509,
"max": 0.281736, "max": 0.281736,
"recall-0.025": 0.31719, "recall-0.025": 0.318554,
"recall-0.05": 0.797408, "recall-0.05": 0.802183,
"recall-0.1": 0.969986, "recall-0.1": 0.976126,
"recall-0.15": 0.993861, "recall-0.15": 0.993861,
"recall-0.25": 0.995907, "recall-0.25": 0.995907,
"recall-0.5": 0.997271, "recall-0.5": 0.997271,
@ -1295,82 +1295,82 @@ Results of the model in various experiments on different datasets. \
}, },
"knee_left": { "knee_left": {
"count": 1461, "count": 1461,
"mean": 0.039577, "mean": 0.038795,
"median": 0.033011, "median": 0.032848,
"std": 0.034872, "std": 0.034462,
"sem": 0.000913, "sem": 0.000902,
"min": 0.004244, "min": 0.003309,
"max": 0.473605, "max": 0.473605,
"recall-0.025": 0.283276, "recall-0.025": 0.287372,
"recall-0.05": 0.787031, "recall-0.05": 0.799317,
"recall-0.1": 0.967918, "recall-0.1": 0.978157,
"recall-0.15": 0.985666, "recall-0.15": 0.9843,
"recall-0.25": 0.990444, "recall-0.25": 0.990444,
"recall-0.5": 0.99727, "recall-0.5": 0.99727,
"num_labels": 1465 "num_labels": 1465
}, },
"knee_right": { "knee_right": {
"count": 1455, "count": 1455,
"mean": 0.039525, "mean": 0.038488,
"median": 0.031801, "median": 0.031543,
"std": 0.029897, "std": 0.026577,
"sem": 0.000784, "sem": 0.000697,
"min": 0.003512, "min": 0.003512,
"max": 0.278788, "max": 0.275123,
"recall-0.025": 0.343386, "recall-0.025": 0.349554,
"recall-0.05": 0.74366, "recall-0.05": 0.749829,
"recall-0.1": 0.959561, "recall-0.1": 0.964359,
"recall-0.15": 0.989034, "recall-0.15": 0.993146,
"recall-0.25": 0.994517, "recall-0.25": 0.996573,
"recall-0.5": 0.997258, "recall-0.5": 0.997258,
"num_labels": 1459 "num_labels": 1459
}, },
"ankle_left": { "ankle_left": {
"count": 1457, "count": 1458,
"mean": 0.064506, "mean": 0.056122,
"median": 0.035906, "median": 0.034021,
"std": 0.071268, "std": 0.062141,
"sem": 0.001868, "sem": 0.001628,
"min": 0.003035, "min": 0.003035,
"max": 0.441184, "max": 0.432301,
"recall-0.025": 0.334245, "recall-0.025": 0.347915,
"recall-0.05": 0.625427, "recall-0.05": 0.666439,
"recall-0.1": 0.805878, "recall-0.1": 0.855776,
"recall-0.15": 0.88175, "recall-0.15": 0.917977,
"recall-0.25": 0.958305, "recall-0.25": 0.971292,
"recall-0.5": 0.995899, "recall-0.5": 0.996582,
"num_labels": 1463 "num_labels": 1463
}, },
"ankle_right": { "ankle_right": {
"count": 1445, "count": 1445,
"mean": 0.065253, "mean": 0.053755,
"median": 0.032697, "median": 0.030905,
"std": 0.084609, "std": 0.067212,
"sem": 0.002227, "sem": 0.001769,
"min": 0.001698, "min": 0.001698,
"max": 0.469631, "max": 0.489965,
"recall-0.025": 0.354795, "recall-0.025": 0.378082,
"recall-0.05": 0.680137, "recall-0.05": 0.736301,
"recall-0.1": 0.819178, "recall-0.1": 0.856849,
"recall-0.15": 0.867808, "recall-0.15": 0.903425,
"recall-0.25": 0.932877, "recall-0.25": 0.962329,
"recall-0.5": 0.989726, "recall-0.5": 0.989726,
"num_labels": 1460 "num_labels": 1460
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 18990, "num_labels": 18990,
"recall-0.025": 0.57177, "recall-0.025": 0.57341,
"recall-0.05": 0.84618, "recall-0.05": 0.85408,
"recall-0.1": 0.95108, "recall-0.1": 0.9584,
"recall-0.15": 0.97235, "recall-0.15": 0.9772,
"recall-0.25": 0.98673, "recall-0.25": 0.99005,
"recall-0.5": 0.99674 "recall-0.5": 0.99674
} }
} }
{ {
"total_parts": 20444, "total_parts": 20444,
"correct_parts": 20170, "correct_parts": 20204,
"pcp": 0.986598 "pcp": 0.988261
} }
``` ```

View File

@ -640,7 +640,7 @@ void TriangulatorInternal::undistort_poses(std::vector<cv::Mat> &poses, CameraIn
points.copyTo(poses[p].colRange(0, 2)); points.copyTo(poses[p].colRange(0, 2));
// Mask out points that are far outside the image (points slightly outside are still valid) // Mask out points that are far outside the image (points slightly outside are still valid)
float mask_offset = (width + height) / 40.0; float mask_offset = (width + height) / 20.0;
int num_joints = poses[p].rows; int num_joints = poses[p].rows;
for (int j = 0; j < num_joints; ++j) for (int j = 0; j < num_joints; ++j)
{ {

View File

@ -867,8 +867,6 @@ namespace utils_2d_pose
int pad_top = paddings[2]; int pad_top = paddings[2];
int box_left = box[0]; int box_left = box[0];
int box_top = box[1]; int box_top = box[1];
int img_w = image.cols;
int img_h = image.rows;
for (auto &kp : kpts) for (auto &kp : kpts)
{ {
@ -884,10 +882,6 @@ namespace utils_2d_pose
x += box_left; x += box_left;
y += box_top; y += box_top;
// Clamp to iamge region
x = std::max(0.0f, std::min(x, img_w - 1.0f));
y = std::max(0.0f, std::min(y, img_h - 1.0f));
} }
} }
@ -949,6 +943,37 @@ namespace utils_2d_pose
// Sometimes the detection model predicts multiple boxes with different shapes for the same // Sometimes the detection model predicts multiple boxes with different shapes for the same
// person. They then result in strongly overlapping poses, which are merged here. // person. They then result in strongly overlapping poses, which are merged here.
merge_close_poses(poses, {(size_t)image.cols, (size_t)image.rows}); merge_close_poses(poses, {(size_t)image.cols, (size_t)image.rows});
// Clip keypoints far outside the image
float mask_offset = (image.cols + image.rows) / 10.0;
for (size_t i = 0; i < poses.size(); ++i)
{
for (size_t j = 0; j < poses[i].size(); ++j)
{
auto &kp = poses[i][j];
if (kp[0] < -mask_offset)
{
kp[0] = -mask_offset;
kp[2] = 0.001;
}
if (kp[1] < -mask_offset)
{
kp[1] = -mask_offset;
kp[2] = 0.001;
}
if (kp[0] >= image.cols + mask_offset)
{
kp[0] = image.cols + mask_offset;
kp[2] = 0.001;
}
if (kp[1] >= image.rows + mask_offset)
{
kp[1] = image.rows + mask_offset;
kp[2] = 0.001;
}
}
}
return poses; return poses;
} }