Further small improvements.

This commit is contained in:
Daniel
2024-12-06 18:15:08 +01:00
parent ee8b9bafb3
commit 23108dd594
4 changed files with 179 additions and 170 deletions

View File

@ -7,12 +7,12 @@ onnx_config = dict(
codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore sort out low confidence boxes outside the model and reduce the maximum number
# of output boxes to the smallest usable value.
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# low confidence boxes outside the model.
post_processing=dict(
score_threshold=0.0,
confidence_threshold=0.0,
iou_threshold=0.3,
iou_threshold=0.5,
max_output_boxes_per_class=10,
),
)

View File

@ -12,7 +12,7 @@ codebase_config = dict(
post_processing=dict(
score_threshold=0.0,
confidence_threshold=0.0,
iou_threshold=0.3,
iou_threshold=0.5,
max_output_boxes_per_class=10,
),
)

View File

@ -6,74 +6,74 @@ Results of the model in various experiments on different datasets.
```json
{
"avg_time_2d": 0.010846347323918747,
"avg_time_3d": 0.0003320467674126059,
"avg_fps": 89.45828817893282
"avg_time_2d": 0.010003441875263796,
"avg_time_3d": 0.0003245426436602059,
"avg_fps": 96.824312446218
}
{
"person_nums": {
"total_frames": 600,
"total_labels": 600,
"total_preds": 601,
"total_preds": 600,
"considered_empty": 0,
"valid_preds": 600,
"invalid_preds": 1,
"invalid_preds": 0,
"missing": 0,
"invalid_fraction": 0.00166,
"precision": 0.99834,
"invalid_fraction": 0.0,
"precision": 1.0,
"recall": 1.0,
"f1": 0.99917,
"non_empty": 601
"f1": 1.0,
"non_empty": 600
},
"mpjpe": {
"count": 600,
"mean": 0.066093,
"median": 0.058635,
"std": 0.027815,
"sem": 0.001136,
"min": 0.040333,
"max": 0.189198,
"mean": 0.067074,
"median": 0.058987,
"std": 0.027958,
"sem": 0.001142,
"min": 0.042414,
"max": 0.189648,
"recall-0.025": 0.0,
"recall-0.05": 0.101667,
"recall-0.1": 0.938333,
"recall-0.05": 0.061667,
"recall-0.1": 0.93,
"recall-0.15": 0.95,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600,
"ap-0.025": 0.0,
"ap-0.05": 0.023002,
"ap-0.1": 0.897991,
"ap-0.15": 0.914985,
"ap-0.05": 0.00503,
"ap-0.1": 0.887557,
"ap-0.15": 0.913732,
"ap-0.25": 1.0,
"ap-0.5": 1.0
},
"nose": {
"count": 600,
"mean": 0.114181,
"median": 0.099121,
"std": 0.042396,
"sem": 0.001732,
"min": 0.029365,
"max": 0.287428,
"mean": 0.114519,
"median": 0.097973,
"std": 0.044206,
"sem": 0.001806,
"min": 0.025858,
"max": 0.292026,
"recall-0.025": 0.0,
"recall-0.05": 0.011667,
"recall-0.1": 0.508333,
"recall-0.15": 0.801667,
"recall-0.25": 0.991667,
"recall-0.05": 0.015,
"recall-0.1": 0.52,
"recall-0.15": 0.816667,
"recall-0.25": 0.988333,
"recall-0.5": 1.0,
"num_labels": 600
},
"shoulder_left": {
"count": 600,
"mean": 0.03478,
"median": 0.026496,
"std": 0.031647,
"sem": 0.001293,
"min": 0.003155,
"max": 0.183779,
"recall-0.025": 0.455,
"recall-0.05": 0.853333,
"recall-0.1": 0.95,
"mean": 0.034466,
"median": 0.025369,
"std": 0.032528,
"sem": 0.001329,
"min": 0.002782,
"max": 0.182086,
"recall-0.025": 0.483333,
"recall-0.05": 0.863333,
"recall-0.1": 0.941667,
"recall-0.15": 0.966667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -81,95 +81,95 @@ Results of the model in various experiments on different datasets.
},
"shoulder_right": {
"count": 600,
"mean": 0.047867,
"median": 0.034293,
"std": 0.039619,
"sem": 0.001619,
"min": 0.005688,
"max": 0.254393,
"recall-0.025": 0.218333,
"mean": 0.048171,
"median": 0.03483,
"std": 0.040889,
"sem": 0.001671,
"min": 0.003841,
"max": 0.258489,
"recall-0.025": 0.221667,
"recall-0.05": 0.751667,
"recall-0.1": 0.913333,
"recall-0.15": 0.95,
"recall-0.15": 0.945,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"elbow_left": {
"count": 600,
"mean": 0.044022,
"median": 0.035159,
"std": 0.034701,
"sem": 0.001418,
"min": 0.002814,
"max": 0.194526,
"recall-0.025": 0.233333,
"recall-0.05": 0.771667,
"recall-0.1": 0.943333,
"recall-0.15": 0.958333,
"mean": 0.043039,
"median": 0.03493,
"std": 0.034865,
"sem": 0.001425,
"min": 0.002006,
"max": 0.197281,
"recall-0.025": 0.248333,
"recall-0.05": 0.805,
"recall-0.1": 0.941667,
"recall-0.15": 0.955,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"elbow_right": {
"count": 600,
"mean": 0.04408,
"median": 0.033951,
"std": 0.036319,
"sem": 0.001484,
"min": 0.008171,
"max": 0.360134,
"recall-0.025": 0.265,
"recall-0.05": 0.78,
"recall-0.1": 0.933333,
"recall-0.15": 0.946667,
"recall-0.25": 0.998333,
"mean": 0.044694,
"median": 0.032396,
"std": 0.03821,
"sem": 0.001561,
"min": 0.005657,
"max": 0.367138,
"recall-0.025": 0.24,
"recall-0.05": 0.791667,
"recall-0.1": 0.928333,
"recall-0.15": 0.943333,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_left": {
"count": 600,
"mean": 0.043753,
"median": 0.027211,
"std": 0.044668,
"sem": 0.001825,
"min": 0.002715,
"max": 0.190751,
"recall-0.025": 0.46,
"recall-0.05": 0.74,
"recall-0.1": 0.891667,
"recall-0.15": 0.925,
"recall-0.25": 1.0,
"mean": 0.043228,
"median": 0.024022,
"std": 0.047501,
"sem": 0.001941,
"min": 0.002332,
"max": 0.283113,
"recall-0.025": 0.52,
"recall-0.05": 0.746667,
"recall-0.1": 0.885,
"recall-0.15": 0.92,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_right": {
"count": 600,
"mean": 0.046553,
"median": 0.026979,
"std": 0.050263,
"sem": 0.002054,
"min": 0.003364,
"max": 0.244861,
"recall-0.025": 0.46,
"recall-0.05": 0.733333,
"recall-0.1": 0.87,
"count": 599,
"mean": 0.047526,
"median": 0.027369,
"std": 0.055131,
"sem": 0.002254,
"min": 0.001,
"max": 0.492857,
"recall-0.025": 0.451667,
"recall-0.05": 0.74,
"recall-0.1": 0.873333,
"recall-0.15": 0.906667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"recall-0.25": 0.991667,
"recall-0.5": 0.998333,
"num_labels": 600
},
"hip_left": {
"count": 600,
"mean": 0.08362,
"median": 0.077619,
"std": 0.032967,
"sem": 0.001347,
"min": 0.018157,
"max": 0.240771,
"recall-0.025": 0.005,
"recall-0.05": 0.055,
"recall-0.1": 0.848333,
"mean": 0.089504,
"median": 0.085316,
"std": 0.032919,
"sem": 0.001345,
"min": 0.011484,
"max": 0.236463,
"recall-0.025": 0.006667,
"recall-0.05": 0.031667,
"recall-0.1": 0.815,
"recall-0.15": 0.951667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
@ -177,98 +177,98 @@ Results of the model in various experiments on different datasets.
},
"hip_right": {
"count": 600,
"mean": 0.106567,
"median": 0.104243,
"std": 0.026243,
"sem": 0.001072,
"min": 0.035565,
"max": 0.245341,
"mean": 0.112947,
"median": 0.112279,
"std": 0.026967,
"sem": 0.001102,
"min": 0.041373,
"max": 0.235641,
"recall-0.025": 0.0,
"recall-0.05": 0.003333,
"recall-0.1": 0.415,
"recall-0.05": 0.01,
"recall-0.1": 0.245,
"recall-0.15": 0.946667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"knee_left": {
"count": 599,
"mean": 0.063278,
"median": 0.047513,
"std": 0.056978,
"sem": 0.00233,
"min": 0.017587,
"max": 0.4004,
"recall-0.025": 0.038333,
"recall-0.05": 0.546667,
"recall-0.1": 0.883333,
"recall-0.15": 0.925,
"recall-0.25": 0.978333,
"recall-0.5": 0.998333,
"count": 600,
"mean": 0.061189,
"median": 0.045843,
"std": 0.0566,
"sem": 0.002313,
"min": 0.012587,
"max": 0.400213,
"recall-0.025": 0.05,
"recall-0.05": 0.58,
"recall-0.1": 0.91,
"recall-0.15": 0.926667,
"recall-0.25": 0.981667,
"recall-0.5": 1.0,
"num_labels": 600
},
"knee_right": {
"count": 600,
"mean": 0.050742,
"median": 0.041408,
"std": 0.037974,
"sem": 0.001552,
"min": 0.01394,
"max": 0.279839,
"recall-0.025": 0.053333,
"recall-0.05": 0.75,
"recall-0.1": 0.941667,
"recall-0.15": 0.941667,
"recall-0.25": 0.996667,
"mean": 0.052612,
"median": 0.04423,
"std": 0.037278,
"sem": 0.001523,
"min": 0.01118,
"max": 0.249994,
"recall-0.025": 0.038333,
"recall-0.05": 0.736667,
"recall-0.1": 0.936667,
"recall-0.15": 0.94,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"ankle_left": {
"count": 600,
"mean": 0.096717,
"median": 0.085484,
"std": 0.043279,
"sem": 0.001768,
"min": 0.050765,
"max": 0.49651,
"count": 598,
"mean": 0.095824,
"median": 0.084767,
"std": 0.048441,
"sem": 0.001983,
"min": 0.045599,
"max": 0.496625,
"recall-0.025": 0.0,
"recall-0.05": 0.0,
"recall-0.1": 0.825,
"recall-0.15": 0.935,
"recall-0.25": 0.988333,
"recall-0.5": 1.0,
"recall-0.05": 0.003333,
"recall-0.1": 0.843333,
"recall-0.15": 0.94,
"recall-0.25": 0.981667,
"recall-0.5": 0.996667,
"num_labels": 600
},
"ankle_right": {
"count": 600,
"mean": 0.08227,
"median": 0.068786,
"std": 0.049929,
"sem": 0.00204,
"min": 0.028705,
"max": 0.486848,
"count": 598,
"mean": 0.080368,
"median": 0.067762,
"std": 0.045136,
"sem": 0.001847,
"min": 0.031319,
"max": 0.490733,
"recall-0.025": 0.0,
"recall-0.05": 0.033333,
"recall-0.1": 0.896667,
"recall-0.15": 0.916667,
"recall-0.25": 0.985,
"recall-0.5": 1.0,
"recall-0.05": 0.028333,
"recall-0.1": 0.89,
"recall-0.15": 0.913333,
"recall-0.25": 0.983333,
"recall-0.5": 0.996667,
"num_labels": 600
},
"joint_recalls": {
"num_labels": 7800,
"recall-0.025": 0.16782,
"recall-0.05": 0.46333,
"recall-0.1": 0.83154,
"recall-0.15": 0.92846,
"recall-0.25": 0.99462,
"recall-0.5": 0.99974
"recall-0.025": 0.17346,
"recall-0.05": 0.4691,
"recall-0.1": 0.81808,
"recall-0.15": 0.92833,
"recall-0.25": 0.99333,
"recall-0.5": 0.99923
}
}
{
"total_parts": 8400,
"correct_parts": 8111,
"pcp": 0.965595
"correct_parts": 8084,
"pcp": 0.962381
}
```

View File

@ -148,7 +148,7 @@ class LetterBox:
resized_img = cv2.resize(
image,
(new_w, new_h),
interpolation=cv2.INTER_LINEAR,
interpolation=cv2.INTER_NEAREST,
)
# Optionally pad the image
@ -273,7 +273,7 @@ class BoxCrop:
resized_img = cv2.resize(
cropped_img,
(new_w, new_h),
interpolation=cv2.INTER_LINEAR,
interpolation=cv2.INTER_NEAREST,
)
# Optionally pad the image
@ -309,6 +309,10 @@ class RTMDet(BaseModel):
self.conf_threshold = conf_threshold
self.letterbox = LetterBox(self.target_size, fill_value=114)
min_area_scale = 0.025 * 0.025
img_area = self.target_size[0] * self.target_size[1]
self.min_area = img_area * min_area_scale
def preprocess(self, image: np.ndarray):
image = self.letterbox.resize_image(image)
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
@ -326,6 +330,11 @@ class RTMDet(BaseModel):
keep = boxes[:, 4] > self.conf_threshold
boxes = boxes[keep]
# Drop boxes with too small area
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
keep = areas >= self.min_area
boxes = boxes[keep]
paddings, scale, _ = self.letterbox.calc_params(image.shape)
boxes[:, 0] -= paddings[0]