Allow keypoint detections outside the image.

This commit is contained in:
Daniel
2025-02-11 11:26:33 +01:00
parent 24d706d030
commit 2c994eca44
3 changed files with 182 additions and 157 deletions

View File

@ -1091,75 +1091,75 @@ Results of the model in various experiments on different datasets. \
```json
{
"img_loading": 0.0476801,
"demosaicing": 0.00107258,
"avg_time_2d": 0.0169037,
"avg_time_3d": 0.000441429,
"fps": 54.2957
"img_loading": 0.0479787,
"demosaicing": 0.00105853,
"avg_time_2d": 0.0168711,
"avg_time_3d": 0.000490739,
"fps": 54.2878
}
{
"triangulator_calls": 420,
"init_time": 9.63167e-06,
"undistort_time": 3.56479e-05,
"project_time": 4.58269e-05,
"match_time": 2.47299e-05,
"pairs_time": 4.47937e-05,
"pair_scoring_time": 0.000102304,
"grouping_time": 1.5427e-05,
"full_time": 8.66486e-05,
"merge_time": 1.18494e-05,
"post_time": 1.38253e-05,
"convert_time": 7.38645e-07,
"total_time": 0.000391801
"init_time": 1.06278e-05,
"undistort_time": 2.61993e-05,
"project_time": 5.91157e-05,
"match_time": 7.7537e-05,
"pairs_time": 4.91138e-05,
"pair_scoring_time": 9.39502e-05,
"grouping_time": 1.86132e-05,
"full_time": 8.07886e-05,
"merge_time": 9.90888e-06,
"post_time": 1.35556e-05,
"convert_time": 7.28681e-07,
"total_time": 0.000440487
}
{
"person_nums": {
"total_frames": 420,
"total_labels": 1466,
"total_preds": 1503,
"total_preds": 1502,
"considered_empty": 0,
"valid_preds": 1462,
"invalid_preds": 41,
"invalid_preds": 40,
"missing": 4,
"invalid_fraction": 0.02728,
"precision": 0.97272,
"invalid_fraction": 0.02663,
"precision": 0.97337,
"recall": 0.99727,
"f1": 0.98484,
"non_empty": 1503
"f1": 0.98518,
"non_empty": 1502
},
"mpjpe": {
"count": 1462,
"mean": 0.033844,
"median": 0.030313,
"std": 0.015707,
"sem": 0.000411,
"mean": 0.032346,
"median": 0.029628,
"std": 0.014512,
"sem": 0.00038,
"min": 0.010671,
"max": 0.133636,
"recall-0.025": 0.317872,
"recall-0.05": 0.866303,
"recall-0.1": 0.991132,
"max": 0.136736,
"recall-0.025": 0.339018,
"recall-0.05": 0.900409,
"recall-0.1": 0.99045,
"recall-0.15": 0.997271,
"recall-0.25": 0.997271,
"recall-0.5": 0.997271,
"num_labels": 1466,
"ap-0.025": 0.172492,
"ap-0.05": 0.816785,
"ap-0.1": 0.975993,
"ap-0.15": 0.986826,
"ap-0.25": 0.986826,
"ap-0.5": 0.986826
"ap-0.025": 0.181063,
"ap-0.05": 0.865292,
"ap-0.1": 0.979068,
"ap-0.15": 0.989802,
"ap-0.25": 0.989802,
"ap-0.5": 0.989802
},
"nose": {
"count": 1461,
"mean": 0.015367,
"median": 0.011619,
"std": 0.017247,
"sem": 0.000451,
"mean": 0.01573,
"median": 0.011646,
"std": 0.018004,
"sem": 0.000471,
"min": 0.001311,
"max": 0.276143,
"recall-0.025": 0.905673,
"recall-0.05": 0.966507,
"recall-0.1": 0.994532,
"recall-0.025": 0.899522,
"recall-0.05": 0.963773,
"recall-0.1": 0.992481,
"recall-0.15": 0.995899,
"recall-0.25": 0.996582,
"recall-0.5": 0.998633,
@ -1167,14 +1167,14 @@ Results of the model in various experiments on different datasets. \
},
"shoulder_left": {
"count": 1462,
"mean": 0.01665,
"median": 0.014603,
"std": 0.011003,
"mean": 0.016823,
"median": 0.014758,
"std": 0.010996,
"sem": 0.000288,
"min": 0.000954,
"max": 0.103637,
"recall-0.025": 0.841064,
"recall-0.05": 0.982265,
"recall-0.025": 0.8397,
"recall-0.05": 0.982947,
"recall-0.1": 0.996589,
"recall-0.15": 0.997271,
"recall-0.25": 0.997271,
@ -1183,14 +1183,14 @@ Results of the model in various experiments on different datasets. \
},
"shoulder_right": {
"count": 1461,
"mean": 0.016883,
"median": 0.014712,
"std": 0.011866,
"sem": 0.000311,
"mean": 0.016777,
"median": 0.014699,
"std": 0.011399,
"sem": 0.000298,
"min": 0.001164,
"max": 0.156188,
"recall-0.025": 0.83686,
"recall-0.05": 0.980205,
"recall-0.025": 0.833447,
"recall-0.05": 0.983618,
"recall-0.1": 0.996587,
"recall-0.15": 0.996587,
"recall-0.25": 0.99727,
@ -1199,14 +1199,14 @@ Results of the model in various experiments on different datasets. \
},
"elbow_left": {
"count": 1461,
"mean": 0.022321,
"median": 0.016639,
"std": 0.019089,
"sem": 0.0005,
"min": 0.00117,
"mean": 0.022172,
"median": 0.016399,
"std": 0.019077,
"sem": 0.000499,
"min": 0.000543,
"max": 0.210066,
"recall-0.025": 0.733788,
"recall-0.05": 0.915358,
"recall-0.025": 0.735154,
"recall-0.05": 0.916724,
"recall-0.1": 0.990444,
"recall-0.15": 0.996587,
"recall-0.25": 0.99727,
@ -1215,15 +1215,15 @@ Results of the model in various experiments on different datasets. \
},
"elbow_right": {
"count": 1461,
"mean": 0.02069,
"median": 0.01607,
"std": 0.015618,
"sem": 0.000409,
"mean": 0.021149,
"median": 0.015999,
"std": 0.016746,
"sem": 0.000438,
"min": 0.001472,
"max": 0.162788,
"recall-0.025": 0.779904,
"recall-0.05": 0.939166,
"recall-0.1": 0.996582,
"recall-0.025": 0.780588,
"recall-0.05": 0.926863,
"recall-0.1": 0.995899,
"recall-0.15": 0.997949,
"recall-0.25": 0.998633,
"recall-0.5": 0.998633,
@ -1231,63 +1231,63 @@ Results of the model in various experiments on different datasets. \
},
"wrist_left": {
"count": 1432,
"mean": 0.035516,
"median": 0.016786,
"std": 0.05517,
"sem": 0.001458,
"mean": 0.035971,
"median": 0.016823,
"std": 0.055313,
"sem": 0.001462,
"min": 0.000898,
"max": 0.460746,
"recall-0.025": 0.672245,
"recall-0.05": 0.84728,
"recall-0.1": 0.906555,
"recall-0.15": 0.956764,
"recall-0.25": 0.975593,
"max": 0.450938,
"recall-0.025": 0.67364,
"recall-0.05": 0.843794,
"recall-0.1": 0.904463,
"recall-0.15": 0.953975,
"recall-0.25": 0.974895,
"recall-0.5": 0.998605,
"num_labels": 1434
},
"wrist_right": {
"count": 1455,
"mean": 0.024709,
"median": 0.016377,
"std": 0.027789,
"sem": 0.000729,
"mean": 0.026926,
"median": 0.016801,
"std": 0.033427,
"sem": 0.000877,
"min": 0.001361,
"max": 0.280646,
"recall-0.025": 0.711538,
"recall-0.05": 0.901786,
"recall-0.1": 0.974588,
"recall-0.15": 0.989011,
"recall-0.025": 0.690934,
"recall-0.05": 0.888736,
"recall-0.1": 0.964973,
"recall-0.15": 0.980769,
"recall-0.25": 0.997253,
"recall-0.5": 0.999313,
"num_labels": 1456
},
"hip_left": {
"count": 1461,
"mean": 0.035145,
"median": 0.031973,
"std": 0.019347,
"sem": 0.000506,
"mean": 0.034771,
"median": 0.031898,
"std": 0.019216,
"sem": 0.000503,
"min": 0.00101,
"max": 0.181992,
"recall-0.025": 0.326962,
"recall-0.05": 0.837543,
"recall-0.1": 0.990444,
"recall-0.15": 0.996587,
"recall-0.025": 0.32628,
"recall-0.05": 0.845051,
"recall-0.1": 0.989761,
"recall-0.15": 0.995904,
"recall-0.25": 0.99727,
"recall-0.5": 0.99727,
"num_labels": 1465
},
"hip_right": {
"count": 1462,
"mean": 0.037983,
"median": 0.032385,
"std": 0.025351,
"sem": 0.000663,
"mean": 0.037383,
"median": 0.032592,
"std": 0.024222,
"sem": 0.000634,
"min": 0.002509,
"max": 0.281736,
"recall-0.025": 0.31719,
"recall-0.05": 0.797408,
"recall-0.1": 0.969986,
"recall-0.025": 0.318554,
"recall-0.05": 0.802183,
"recall-0.1": 0.976126,
"recall-0.15": 0.993861,
"recall-0.25": 0.995907,
"recall-0.5": 0.997271,
@ -1295,82 +1295,82 @@ Results of the model in various experiments on different datasets. \
},
"knee_left": {
"count": 1461,
"mean": 0.039577,
"median": 0.033011,
"std": 0.034872,
"sem": 0.000913,
"min": 0.004244,
"mean": 0.038795,
"median": 0.032848,
"std": 0.034462,
"sem": 0.000902,
"min": 0.003309,
"max": 0.473605,
"recall-0.025": 0.283276,
"recall-0.05": 0.787031,
"recall-0.1": 0.967918,
"recall-0.15": 0.985666,
"recall-0.025": 0.287372,
"recall-0.05": 0.799317,
"recall-0.1": 0.978157,
"recall-0.15": 0.9843,
"recall-0.25": 0.990444,
"recall-0.5": 0.99727,
"num_labels": 1465
},
"knee_right": {
"count": 1455,
"mean": 0.039525,
"median": 0.031801,
"std": 0.029897,
"sem": 0.000784,
"mean": 0.038488,
"median": 0.031543,
"std": 0.026577,
"sem": 0.000697,
"min": 0.003512,
"max": 0.278788,
"recall-0.025": 0.343386,
"recall-0.05": 0.74366,
"recall-0.1": 0.959561,
"recall-0.15": 0.989034,
"recall-0.25": 0.994517,
"max": 0.275123,
"recall-0.025": 0.349554,
"recall-0.05": 0.749829,
"recall-0.1": 0.964359,
"recall-0.15": 0.993146,
"recall-0.25": 0.996573,
"recall-0.5": 0.997258,
"num_labels": 1459
},
"ankle_left": {
"count": 1457,
"mean": 0.064506,
"median": 0.035906,
"std": 0.071268,
"sem": 0.001868,
"count": 1458,
"mean": 0.056122,
"median": 0.034021,
"std": 0.062141,
"sem": 0.001628,
"min": 0.003035,
"max": 0.441184,
"recall-0.025": 0.334245,
"recall-0.05": 0.625427,
"recall-0.1": 0.805878,
"recall-0.15": 0.88175,
"recall-0.25": 0.958305,
"recall-0.5": 0.995899,
"max": 0.432301,
"recall-0.025": 0.347915,
"recall-0.05": 0.666439,
"recall-0.1": 0.855776,
"recall-0.15": 0.917977,
"recall-0.25": 0.971292,
"recall-0.5": 0.996582,
"num_labels": 1463
},
"ankle_right": {
"count": 1445,
"mean": 0.065253,
"median": 0.032697,
"std": 0.084609,
"sem": 0.002227,
"mean": 0.053755,
"median": 0.030905,
"std": 0.067212,
"sem": 0.001769,
"min": 0.001698,
"max": 0.469631,
"recall-0.025": 0.354795,
"recall-0.05": 0.680137,
"recall-0.1": 0.819178,
"recall-0.15": 0.867808,
"recall-0.25": 0.932877,
"max": 0.489965,
"recall-0.025": 0.378082,
"recall-0.05": 0.736301,
"recall-0.1": 0.856849,
"recall-0.15": 0.903425,
"recall-0.25": 0.962329,
"recall-0.5": 0.989726,
"num_labels": 1460
},
"joint_recalls": {
"num_labels": 18990,
"recall-0.025": 0.57177,
"recall-0.05": 0.84618,
"recall-0.1": 0.95108,
"recall-0.15": 0.97235,
"recall-0.25": 0.98673,
"recall-0.025": 0.57341,
"recall-0.05": 0.85408,
"recall-0.1": 0.9584,
"recall-0.15": 0.9772,
"recall-0.25": 0.99005,
"recall-0.5": 0.99674
}
}
{
"total_parts": 20444,
"correct_parts": 20170,
"pcp": 0.986598
"correct_parts": 20204,
"pcp": 0.988261
}
```

View File

@ -640,7 +640,7 @@ void TriangulatorInternal::undistort_poses(std::vector<cv::Mat> &poses, CameraIn
points.copyTo(poses[p].colRange(0, 2));
// Mask out points that are far outside the image (points slightly outside are still valid)
float mask_offset = (width + height) / 40.0;
float mask_offset = (width + height) / 20.0;
int num_joints = poses[p].rows;
for (int j = 0; j < num_joints; ++j)
{

View File

@ -867,8 +867,6 @@ namespace utils_2d_pose
int pad_top = paddings[2];
int box_left = box[0];
int box_top = box[1];
int img_w = image.cols;
int img_h = image.rows;
for (auto &kp : kpts)
{
@ -884,10 +882,6 @@ namespace utils_2d_pose
x += box_left;
y += box_top;
// Clamp to iamge region
x = std::max(0.0f, std::min(x, img_w - 1.0f));
y = std::max(0.0f, std::min(y, img_h - 1.0f));
}
}
@ -949,6 +943,37 @@ namespace utils_2d_pose
// Sometimes the detection model predicts multiple boxes with different shapes for the same
// person. They then result in strongly overlapping poses, which are merged here.
merge_close_poses(poses, {(size_t)image.cols, (size_t)image.rows});
// Clip keypoints far outside the image
float mask_offset = (image.cols + image.rows) / 10.0;
for (size_t i = 0; i < poses.size(); ++i)
{
for (size_t j = 0; j < poses[i].size(); ++j)
{
auto &kp = poses[i][j];
if (kp[0] < -mask_offset)
{
kp[0] = -mask_offset;
kp[2] = 0.001;
}
if (kp[1] < -mask_offset)
{
kp[1] = -mask_offset;
kp[2] = 0.001;
}
if (kp[0] >= image.cols + mask_offset)
{
kp[0] = image.cols + mask_offset;
kp[2] = 0.001;
}
if (kp[1] >= image.rows + mask_offset)
{
kp[1] = image.rows + mask_offset;
kp[2] = 0.001;
}
}
}
return poses;
}