Further small improvements.
This commit is contained in:
@ -7,12 +7,12 @@ onnx_config = dict(
|
|||||||
codebase_config = dict(
|
codebase_config = dict(
|
||||||
# For later TensorRT inference, the number of output boxes needs to be as stable as possible,
|
# For later TensorRT inference, the number of output boxes needs to be as stable as possible,
|
||||||
# because a drop in the box count leads to a re-optimization which takes a lot of time,
|
# because a drop in the box count leads to a re-optimization which takes a lot of time,
|
||||||
# therefore sort out low confidence boxes outside the model and reduce the maximum number
|
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out
|
||||||
# of output boxes to the smallest usable value.
|
# low confidence boxes outside the model.
|
||||||
post_processing=dict(
|
post_processing=dict(
|
||||||
score_threshold=0.0,
|
score_threshold=0.0,
|
||||||
confidence_threshold=0.0,
|
confidence_threshold=0.0,
|
||||||
iou_threshold=0.3,
|
iou_threshold=0.5,
|
||||||
max_output_boxes_per_class=10,
|
max_output_boxes_per_class=10,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@ -12,7 +12,7 @@ codebase_config = dict(
|
|||||||
post_processing=dict(
|
post_processing=dict(
|
||||||
score_threshold=0.0,
|
score_threshold=0.0,
|
||||||
confidence_threshold=0.0,
|
confidence_threshold=0.0,
|
||||||
iou_threshold=0.3,
|
iou_threshold=0.5,
|
||||||
max_output_boxes_per_class=10,
|
max_output_boxes_per_class=10,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
328
media/RESULTS.md
328
media/RESULTS.md
@ -6,74 +6,74 @@ Results of the model in various experiments on different datasets.
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"avg_time_2d": 0.010846347323918747,
|
"avg_time_2d": 0.010003441875263796,
|
||||||
"avg_time_3d": 0.0003320467674126059,
|
"avg_time_3d": 0.0003245426436602059,
|
||||||
"avg_fps": 89.45828817893282
|
"avg_fps": 96.824312446218
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
"person_nums": {
|
"person_nums": {
|
||||||
"total_frames": 600,
|
"total_frames": 600,
|
||||||
"total_labels": 600,
|
"total_labels": 600,
|
||||||
"total_preds": 601,
|
"total_preds": 600,
|
||||||
"considered_empty": 0,
|
"considered_empty": 0,
|
||||||
"valid_preds": 600,
|
"valid_preds": 600,
|
||||||
"invalid_preds": 1,
|
"invalid_preds": 0,
|
||||||
"missing": 0,
|
"missing": 0,
|
||||||
"invalid_fraction": 0.00166,
|
"invalid_fraction": 0.0,
|
||||||
"precision": 0.99834,
|
"precision": 1.0,
|
||||||
"recall": 1.0,
|
"recall": 1.0,
|
||||||
"f1": 0.99917,
|
"f1": 1.0,
|
||||||
"non_empty": 601
|
"non_empty": 600
|
||||||
},
|
},
|
||||||
"mpjpe": {
|
"mpjpe": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.066093,
|
"mean": 0.067074,
|
||||||
"median": 0.058635,
|
"median": 0.058987,
|
||||||
"std": 0.027815,
|
"std": 0.027958,
|
||||||
"sem": 0.001136,
|
"sem": 0.001142,
|
||||||
"min": 0.040333,
|
"min": 0.042414,
|
||||||
"max": 0.189198,
|
"max": 0.189648,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.101667,
|
"recall-0.05": 0.061667,
|
||||||
"recall-0.1": 0.938333,
|
"recall-0.1": 0.93,
|
||||||
"recall-0.15": 0.95,
|
"recall-0.15": 0.95,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600,
|
"num_labels": 600,
|
||||||
"ap-0.025": 0.0,
|
"ap-0.025": 0.0,
|
||||||
"ap-0.05": 0.023002,
|
"ap-0.05": 0.00503,
|
||||||
"ap-0.1": 0.897991,
|
"ap-0.1": 0.887557,
|
||||||
"ap-0.15": 0.914985,
|
"ap-0.15": 0.913732,
|
||||||
"ap-0.25": 1.0,
|
"ap-0.25": 1.0,
|
||||||
"ap-0.5": 1.0
|
"ap-0.5": 1.0
|
||||||
},
|
},
|
||||||
"nose": {
|
"nose": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.114181,
|
"mean": 0.114519,
|
||||||
"median": 0.099121,
|
"median": 0.097973,
|
||||||
"std": 0.042396,
|
"std": 0.044206,
|
||||||
"sem": 0.001732,
|
"sem": 0.001806,
|
||||||
"min": 0.029365,
|
"min": 0.025858,
|
||||||
"max": 0.287428,
|
"max": 0.292026,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.011667,
|
"recall-0.05": 0.015,
|
||||||
"recall-0.1": 0.508333,
|
"recall-0.1": 0.52,
|
||||||
"recall-0.15": 0.801667,
|
"recall-0.15": 0.816667,
|
||||||
"recall-0.25": 0.991667,
|
"recall-0.25": 0.988333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"shoulder_left": {
|
"shoulder_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.03478,
|
"mean": 0.034466,
|
||||||
"median": 0.026496,
|
"median": 0.025369,
|
||||||
"std": 0.031647,
|
"std": 0.032528,
|
||||||
"sem": 0.001293,
|
"sem": 0.001329,
|
||||||
"min": 0.003155,
|
"min": 0.002782,
|
||||||
"max": 0.183779,
|
"max": 0.182086,
|
||||||
"recall-0.025": 0.455,
|
"recall-0.025": 0.483333,
|
||||||
"recall-0.05": 0.853333,
|
"recall-0.05": 0.863333,
|
||||||
"recall-0.1": 0.95,
|
"recall-0.1": 0.941667,
|
||||||
"recall-0.15": 0.966667,
|
"recall-0.15": 0.966667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
@ -81,95 +81,95 @@ Results of the model in various experiments on different datasets.
|
|||||||
},
|
},
|
||||||
"shoulder_right": {
|
"shoulder_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.047867,
|
"mean": 0.048171,
|
||||||
"median": 0.034293,
|
"median": 0.03483,
|
||||||
"std": 0.039619,
|
"std": 0.040889,
|
||||||
"sem": 0.001619,
|
"sem": 0.001671,
|
||||||
"min": 0.005688,
|
"min": 0.003841,
|
||||||
"max": 0.254393,
|
"max": 0.258489,
|
||||||
"recall-0.025": 0.218333,
|
"recall-0.025": 0.221667,
|
||||||
"recall-0.05": 0.751667,
|
"recall-0.05": 0.751667,
|
||||||
"recall-0.1": 0.913333,
|
"recall-0.1": 0.913333,
|
||||||
"recall-0.15": 0.95,
|
"recall-0.15": 0.945,
|
||||||
"recall-0.25": 0.998333,
|
"recall-0.25": 0.998333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"elbow_left": {
|
"elbow_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.044022,
|
"mean": 0.043039,
|
||||||
"median": 0.035159,
|
"median": 0.03493,
|
||||||
"std": 0.034701,
|
"std": 0.034865,
|
||||||
"sem": 0.001418,
|
"sem": 0.001425,
|
||||||
"min": 0.002814,
|
"min": 0.002006,
|
||||||
"max": 0.194526,
|
"max": 0.197281,
|
||||||
"recall-0.025": 0.233333,
|
"recall-0.025": 0.248333,
|
||||||
"recall-0.05": 0.771667,
|
"recall-0.05": 0.805,
|
||||||
"recall-0.1": 0.943333,
|
"recall-0.1": 0.941667,
|
||||||
"recall-0.15": 0.958333,
|
"recall-0.15": 0.955,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"elbow_right": {
|
"elbow_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.04408,
|
"mean": 0.044694,
|
||||||
"median": 0.033951,
|
"median": 0.032396,
|
||||||
"std": 0.036319,
|
"std": 0.03821,
|
||||||
"sem": 0.001484,
|
"sem": 0.001561,
|
||||||
"min": 0.008171,
|
"min": 0.005657,
|
||||||
"max": 0.360134,
|
"max": 0.367138,
|
||||||
"recall-0.025": 0.265,
|
"recall-0.025": 0.24,
|
||||||
"recall-0.05": 0.78,
|
"recall-0.05": 0.791667,
|
||||||
"recall-0.1": 0.933333,
|
"recall-0.1": 0.928333,
|
||||||
"recall-0.15": 0.946667,
|
"recall-0.15": 0.943333,
|
||||||
"recall-0.25": 0.998333,
|
"recall-0.25": 0.996667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"wrist_left": {
|
"wrist_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.043753,
|
"mean": 0.043228,
|
||||||
"median": 0.027211,
|
"median": 0.024022,
|
||||||
"std": 0.044668,
|
"std": 0.047501,
|
||||||
"sem": 0.001825,
|
"sem": 0.001941,
|
||||||
"min": 0.002715,
|
"min": 0.002332,
|
||||||
"max": 0.190751,
|
"max": 0.283113,
|
||||||
"recall-0.025": 0.46,
|
"recall-0.025": 0.52,
|
||||||
"recall-0.05": 0.74,
|
"recall-0.05": 0.746667,
|
||||||
"recall-0.1": 0.891667,
|
"recall-0.1": 0.885,
|
||||||
"recall-0.15": 0.925,
|
"recall-0.15": 0.92,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 0.996667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"wrist_right": {
|
"wrist_right": {
|
||||||
"count": 600,
|
"count": 599,
|
||||||
"mean": 0.046553,
|
"mean": 0.047526,
|
||||||
"median": 0.026979,
|
"median": 0.027369,
|
||||||
"std": 0.050263,
|
"std": 0.055131,
|
||||||
"sem": 0.002054,
|
"sem": 0.002254,
|
||||||
"min": 0.003364,
|
"min": 0.001,
|
||||||
"max": 0.244861,
|
"max": 0.492857,
|
||||||
"recall-0.025": 0.46,
|
"recall-0.025": 0.451667,
|
||||||
"recall-0.05": 0.733333,
|
"recall-0.05": 0.74,
|
||||||
"recall-0.1": 0.87,
|
"recall-0.1": 0.873333,
|
||||||
"recall-0.15": 0.906667,
|
"recall-0.15": 0.906667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 0.991667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 0.998333,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"hip_left": {
|
"hip_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.08362,
|
"mean": 0.089504,
|
||||||
"median": 0.077619,
|
"median": 0.085316,
|
||||||
"std": 0.032967,
|
"std": 0.032919,
|
||||||
"sem": 0.001347,
|
"sem": 0.001345,
|
||||||
"min": 0.018157,
|
"min": 0.011484,
|
||||||
"max": 0.240771,
|
"max": 0.236463,
|
||||||
"recall-0.025": 0.005,
|
"recall-0.025": 0.006667,
|
||||||
"recall-0.05": 0.055,
|
"recall-0.05": 0.031667,
|
||||||
"recall-0.1": 0.848333,
|
"recall-0.1": 0.815,
|
||||||
"recall-0.15": 0.951667,
|
"recall-0.15": 0.951667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
@ -177,98 +177,98 @@ Results of the model in various experiments on different datasets.
|
|||||||
},
|
},
|
||||||
"hip_right": {
|
"hip_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.106567,
|
"mean": 0.112947,
|
||||||
"median": 0.104243,
|
"median": 0.112279,
|
||||||
"std": 0.026243,
|
"std": 0.026967,
|
||||||
"sem": 0.001072,
|
"sem": 0.001102,
|
||||||
"min": 0.035565,
|
"min": 0.041373,
|
||||||
"max": 0.245341,
|
"max": 0.235641,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.003333,
|
"recall-0.05": 0.01,
|
||||||
"recall-0.1": 0.415,
|
"recall-0.1": 0.245,
|
||||||
"recall-0.15": 0.946667,
|
"recall-0.15": 0.946667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"knee_left": {
|
"knee_left": {
|
||||||
"count": 599,
|
"count": 600,
|
||||||
"mean": 0.063278,
|
"mean": 0.061189,
|
||||||
"median": 0.047513,
|
"median": 0.045843,
|
||||||
"std": 0.056978,
|
"std": 0.0566,
|
||||||
"sem": 0.00233,
|
"sem": 0.002313,
|
||||||
"min": 0.017587,
|
"min": 0.012587,
|
||||||
"max": 0.4004,
|
"max": 0.400213,
|
||||||
"recall-0.025": 0.038333,
|
"recall-0.025": 0.05,
|
||||||
"recall-0.05": 0.546667,
|
"recall-0.05": 0.58,
|
||||||
"recall-0.1": 0.883333,
|
"recall-0.1": 0.91,
|
||||||
"recall-0.15": 0.925,
|
"recall-0.15": 0.926667,
|
||||||
"recall-0.25": 0.978333,
|
"recall-0.25": 0.981667,
|
||||||
"recall-0.5": 0.998333,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"knee_right": {
|
"knee_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.050742,
|
"mean": 0.052612,
|
||||||
"median": 0.041408,
|
"median": 0.04423,
|
||||||
"std": 0.037974,
|
"std": 0.037278,
|
||||||
"sem": 0.001552,
|
"sem": 0.001523,
|
||||||
"min": 0.01394,
|
"min": 0.01118,
|
||||||
"max": 0.279839,
|
"max": 0.249994,
|
||||||
"recall-0.025": 0.053333,
|
"recall-0.025": 0.038333,
|
||||||
"recall-0.05": 0.75,
|
"recall-0.05": 0.736667,
|
||||||
"recall-0.1": 0.941667,
|
"recall-0.1": 0.936667,
|
||||||
"recall-0.15": 0.941667,
|
"recall-0.15": 0.94,
|
||||||
"recall-0.25": 0.996667,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"ankle_left": {
|
"ankle_left": {
|
||||||
"count": 600,
|
"count": 598,
|
||||||
"mean": 0.096717,
|
"mean": 0.095824,
|
||||||
"median": 0.085484,
|
"median": 0.084767,
|
||||||
"std": 0.043279,
|
"std": 0.048441,
|
||||||
"sem": 0.001768,
|
"sem": 0.001983,
|
||||||
"min": 0.050765,
|
"min": 0.045599,
|
||||||
"max": 0.49651,
|
"max": 0.496625,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.0,
|
"recall-0.05": 0.003333,
|
||||||
"recall-0.1": 0.825,
|
"recall-0.1": 0.843333,
|
||||||
"recall-0.15": 0.935,
|
"recall-0.15": 0.94,
|
||||||
"recall-0.25": 0.988333,
|
"recall-0.25": 0.981667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 0.996667,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"ankle_right": {
|
"ankle_right": {
|
||||||
"count": 600,
|
"count": 598,
|
||||||
"mean": 0.08227,
|
"mean": 0.080368,
|
||||||
"median": 0.068786,
|
"median": 0.067762,
|
||||||
"std": 0.049929,
|
"std": 0.045136,
|
||||||
"sem": 0.00204,
|
"sem": 0.001847,
|
||||||
"min": 0.028705,
|
"min": 0.031319,
|
||||||
"max": 0.486848,
|
"max": 0.490733,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.033333,
|
"recall-0.05": 0.028333,
|
||||||
"recall-0.1": 0.896667,
|
"recall-0.1": 0.89,
|
||||||
"recall-0.15": 0.916667,
|
"recall-0.15": 0.913333,
|
||||||
"recall-0.25": 0.985,
|
"recall-0.25": 0.983333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 0.996667,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"joint_recalls": {
|
"joint_recalls": {
|
||||||
"num_labels": 7800,
|
"num_labels": 7800,
|
||||||
"recall-0.025": 0.16782,
|
"recall-0.025": 0.17346,
|
||||||
"recall-0.05": 0.46333,
|
"recall-0.05": 0.4691,
|
||||||
"recall-0.1": 0.83154,
|
"recall-0.1": 0.81808,
|
||||||
"recall-0.15": 0.92846,
|
"recall-0.15": 0.92833,
|
||||||
"recall-0.25": 0.99462,
|
"recall-0.25": 0.99333,
|
||||||
"recall-0.5": 0.99974
|
"recall-0.5": 0.99923
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
"total_parts": 8400,
|
"total_parts": 8400,
|
||||||
"correct_parts": 8111,
|
"correct_parts": 8084,
|
||||||
"pcp": 0.965595
|
"pcp": 0.962381
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -148,7 +148,7 @@ class LetterBox:
|
|||||||
resized_img = cv2.resize(
|
resized_img = cv2.resize(
|
||||||
image,
|
image,
|
||||||
(new_w, new_h),
|
(new_w, new_h),
|
||||||
interpolation=cv2.INTER_LINEAR,
|
interpolation=cv2.INTER_NEAREST,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Optionally pad the image
|
# Optionally pad the image
|
||||||
@ -273,7 +273,7 @@ class BoxCrop:
|
|||||||
resized_img = cv2.resize(
|
resized_img = cv2.resize(
|
||||||
cropped_img,
|
cropped_img,
|
||||||
(new_w, new_h),
|
(new_w, new_h),
|
||||||
interpolation=cv2.INTER_LINEAR,
|
interpolation=cv2.INTER_NEAREST,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Optionally pad the image
|
# Optionally pad the image
|
||||||
@ -309,6 +309,10 @@ class RTMDet(BaseModel):
|
|||||||
self.conf_threshold = conf_threshold
|
self.conf_threshold = conf_threshold
|
||||||
self.letterbox = LetterBox(self.target_size, fill_value=114)
|
self.letterbox = LetterBox(self.target_size, fill_value=114)
|
||||||
|
|
||||||
|
min_area_scale = 0.025 * 0.025
|
||||||
|
img_area = self.target_size[0] * self.target_size[1]
|
||||||
|
self.min_area = img_area * min_area_scale
|
||||||
|
|
||||||
def preprocess(self, image: np.ndarray):
|
def preprocess(self, image: np.ndarray):
|
||||||
image = self.letterbox.resize_image(image)
|
image = self.letterbox.resize_image(image)
|
||||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||||
@ -326,6 +330,11 @@ class RTMDet(BaseModel):
|
|||||||
keep = boxes[:, 4] > self.conf_threshold
|
keep = boxes[:, 4] > self.conf_threshold
|
||||||
boxes = boxes[keep]
|
boxes = boxes[keep]
|
||||||
|
|
||||||
|
# Drop boxes with too small area
|
||||||
|
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
||||||
|
keep = areas >= self.min_area
|
||||||
|
boxes = boxes[keep]
|
||||||
|
|
||||||
paddings, scale, _ = self.letterbox.calc_params(image.shape)
|
paddings, scale, _ = self.letterbox.calc_params(image.shape)
|
||||||
|
|
||||||
boxes[:, 0] -= paddings[0]
|
boxes[:, 0] -= paddings[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user