Further small improvements.

This commit is contained in:
Daniel
2024-12-06 18:15:08 +01:00
parent ee8b9bafb3
commit 23108dd594
4 changed files with 179 additions and 170 deletions

View File

@ -7,12 +7,12 @@ onnx_config = dict(
codebase_config = dict( codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible, # For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time, # because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore sort out low confidence boxes outside the model and reduce the maximum number # therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# of output boxes to the smallest usable value. # low confidence boxes outside the model.
post_processing=dict( post_processing=dict(
score_threshold=0.0, score_threshold=0.0,
confidence_threshold=0.0, confidence_threshold=0.0,
iou_threshold=0.3, iou_threshold=0.5,
max_output_boxes_per_class=10, max_output_boxes_per_class=10,
), ),
) )

View File

@ -12,7 +12,7 @@ codebase_config = dict(
post_processing=dict( post_processing=dict(
score_threshold=0.0, score_threshold=0.0,
confidence_threshold=0.0, confidence_threshold=0.0,
iou_threshold=0.3, iou_threshold=0.5,
max_output_boxes_per_class=10, max_output_boxes_per_class=10,
), ),
) )

View File

@ -6,74 +6,74 @@ Results of the model in various experiments on different datasets.
```json ```json
{ {
"avg_time_2d": 0.010846347323918747, "avg_time_2d": 0.010003441875263796,
"avg_time_3d": 0.0003320467674126059, "avg_time_3d": 0.0003245426436602059,
"avg_fps": 89.45828817893282 "avg_fps": 96.824312446218
} }
{ {
"person_nums": { "person_nums": {
"total_frames": 600, "total_frames": 600,
"total_labels": 600, "total_labels": 600,
"total_preds": 601, "total_preds": 600,
"considered_empty": 0, "considered_empty": 0,
"valid_preds": 600, "valid_preds": 600,
"invalid_preds": 1, "invalid_preds": 0,
"missing": 0, "missing": 0,
"invalid_fraction": 0.00166, "invalid_fraction": 0.0,
"precision": 0.99834, "precision": 1.0,
"recall": 1.0, "recall": 1.0,
"f1": 0.99917, "f1": 1.0,
"non_empty": 601 "non_empty": 600
}, },
"mpjpe": { "mpjpe": {
"count": 600, "count": 600,
"mean": 0.066093, "mean": 0.067074,
"median": 0.058635, "median": 0.058987,
"std": 0.027815, "std": 0.027958,
"sem": 0.001136, "sem": 0.001142,
"min": 0.040333, "min": 0.042414,
"max": 0.189198, "max": 0.189648,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.101667, "recall-0.05": 0.061667,
"recall-0.1": 0.938333, "recall-0.1": 0.93,
"recall-0.15": 0.95, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600, "num_labels": 600,
"ap-0.025": 0.0, "ap-0.025": 0.0,
"ap-0.05": 0.023002, "ap-0.05": 0.00503,
"ap-0.1": 0.897991, "ap-0.1": 0.887557,
"ap-0.15": 0.914985, "ap-0.15": 0.913732,
"ap-0.25": 1.0, "ap-0.25": 1.0,
"ap-0.5": 1.0 "ap-0.5": 1.0
}, },
"nose": { "nose": {
"count": 600, "count": 600,
"mean": 0.114181, "mean": 0.114519,
"median": 0.099121, "median": 0.097973,
"std": 0.042396, "std": 0.044206,
"sem": 0.001732, "sem": 0.001806,
"min": 0.029365, "min": 0.025858,
"max": 0.287428, "max": 0.292026,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.011667, "recall-0.05": 0.015,
"recall-0.1": 0.508333, "recall-0.1": 0.52,
"recall-0.15": 0.801667, "recall-0.15": 0.816667,
"recall-0.25": 0.991667, "recall-0.25": 0.988333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_left": { "shoulder_left": {
"count": 600, "count": 600,
"mean": 0.03478, "mean": 0.034466,
"median": 0.026496, "median": 0.025369,
"std": 0.031647, "std": 0.032528,
"sem": 0.001293, "sem": 0.001329,
"min": 0.003155, "min": 0.002782,
"max": 0.183779, "max": 0.182086,
"recall-0.025": 0.455, "recall-0.025": 0.483333,
"recall-0.05": 0.853333, "recall-0.05": 0.863333,
"recall-0.1": 0.95, "recall-0.1": 0.941667,
"recall-0.15": 0.966667, "recall-0.15": 0.966667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
@ -81,95 +81,95 @@ Results of the model in various experiments on different datasets.
}, },
"shoulder_right": { "shoulder_right": {
"count": 600, "count": 600,
"mean": 0.047867, "mean": 0.048171,
"median": 0.034293, "median": 0.03483,
"std": 0.039619, "std": 0.040889,
"sem": 0.001619, "sem": 0.001671,
"min": 0.005688, "min": 0.003841,
"max": 0.254393, "max": 0.258489,
"recall-0.025": 0.218333, "recall-0.025": 0.221667,
"recall-0.05": 0.751667, "recall-0.05": 0.751667,
"recall-0.1": 0.913333, "recall-0.1": 0.913333,
"recall-0.15": 0.95, "recall-0.15": 0.945,
"recall-0.25": 0.998333, "recall-0.25": 0.998333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_left": { "elbow_left": {
"count": 600, "count": 600,
"mean": 0.044022, "mean": 0.043039,
"median": 0.035159, "median": 0.03493,
"std": 0.034701, "std": 0.034865,
"sem": 0.001418, "sem": 0.001425,
"min": 0.002814, "min": 0.002006,
"max": 0.194526, "max": 0.197281,
"recall-0.025": 0.233333, "recall-0.025": 0.248333,
"recall-0.05": 0.771667, "recall-0.05": 0.805,
"recall-0.1": 0.943333, "recall-0.1": 0.941667,
"recall-0.15": 0.958333, "recall-0.15": 0.955,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_right": { "elbow_right": {
"count": 600, "count": 600,
"mean": 0.04408, "mean": 0.044694,
"median": 0.033951, "median": 0.032396,
"std": 0.036319, "std": 0.03821,
"sem": 0.001484, "sem": 0.001561,
"min": 0.008171, "min": 0.005657,
"max": 0.360134, "max": 0.367138,
"recall-0.025": 0.265, "recall-0.025": 0.24,
"recall-0.05": 0.78, "recall-0.05": 0.791667,
"recall-0.1": 0.933333, "recall-0.1": 0.928333,
"recall-0.15": 0.946667, "recall-0.15": 0.943333,
"recall-0.25": 0.998333, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_left": { "wrist_left": {
"count": 600, "count": 600,
"mean": 0.043753, "mean": 0.043228,
"median": 0.027211, "median": 0.024022,
"std": 0.044668, "std": 0.047501,
"sem": 0.001825, "sem": 0.001941,
"min": 0.002715, "min": 0.002332,
"max": 0.190751, "max": 0.283113,
"recall-0.025": 0.46, "recall-0.025": 0.52,
"recall-0.05": 0.74, "recall-0.05": 0.746667,
"recall-0.1": 0.891667, "recall-0.1": 0.885,
"recall-0.15": 0.925, "recall-0.15": 0.92,
"recall-0.25": 1.0, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_right": { "wrist_right": {
"count": 600, "count": 599,
"mean": 0.046553, "mean": 0.047526,
"median": 0.026979, "median": 0.027369,
"std": 0.050263, "std": 0.055131,
"sem": 0.002054, "sem": 0.002254,
"min": 0.003364, "min": 0.001,
"max": 0.244861, "max": 0.492857,
"recall-0.025": 0.46, "recall-0.025": 0.451667,
"recall-0.05": 0.733333, "recall-0.05": 0.74,
"recall-0.1": 0.87, "recall-0.1": 0.873333,
"recall-0.15": 0.906667, "recall-0.15": 0.906667,
"recall-0.25": 1.0, "recall-0.25": 0.991667,
"recall-0.5": 1.0, "recall-0.5": 0.998333,
"num_labels": 600 "num_labels": 600
}, },
"hip_left": { "hip_left": {
"count": 600, "count": 600,
"mean": 0.08362, "mean": 0.089504,
"median": 0.077619, "median": 0.085316,
"std": 0.032967, "std": 0.032919,
"sem": 0.001347, "sem": 0.001345,
"min": 0.018157, "min": 0.011484,
"max": 0.240771, "max": 0.236463,
"recall-0.025": 0.005, "recall-0.025": 0.006667,
"recall-0.05": 0.055, "recall-0.05": 0.031667,
"recall-0.1": 0.848333, "recall-0.1": 0.815,
"recall-0.15": 0.951667, "recall-0.15": 0.951667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
@ -177,98 +177,98 @@ Results of the model in various experiments on different datasets.
}, },
"hip_right": { "hip_right": {
"count": 600, "count": 600,
"mean": 0.106567, "mean": 0.112947,
"median": 0.104243, "median": 0.112279,
"std": 0.026243, "std": 0.026967,
"sem": 0.001072, "sem": 0.001102,
"min": 0.035565, "min": 0.041373,
"max": 0.245341, "max": 0.235641,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.003333, "recall-0.05": 0.01,
"recall-0.1": 0.415, "recall-0.1": 0.245,
"recall-0.15": 0.946667, "recall-0.15": 0.946667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_left": { "knee_left": {
"count": 599, "count": 600,
"mean": 0.063278, "mean": 0.061189,
"median": 0.047513, "median": 0.045843,
"std": 0.056978, "std": 0.0566,
"sem": 0.00233, "sem": 0.002313,
"min": 0.017587, "min": 0.012587,
"max": 0.4004, "max": 0.400213,
"recall-0.025": 0.038333, "recall-0.025": 0.05,
"recall-0.05": 0.546667, "recall-0.05": 0.58,
"recall-0.1": 0.883333, "recall-0.1": 0.91,
"recall-0.15": 0.925, "recall-0.15": 0.926667,
"recall-0.25": 0.978333, "recall-0.25": 0.981667,
"recall-0.5": 0.998333, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_right": { "knee_right": {
"count": 600, "count": 600,
"mean": 0.050742, "mean": 0.052612,
"median": 0.041408, "median": 0.04423,
"std": 0.037974, "std": 0.037278,
"sem": 0.001552, "sem": 0.001523,
"min": 0.01394, "min": 0.01118,
"max": 0.279839, "max": 0.249994,
"recall-0.025": 0.053333, "recall-0.025": 0.038333,
"recall-0.05": 0.75, "recall-0.05": 0.736667,
"recall-0.1": 0.941667, "recall-0.1": 0.936667,
"recall-0.15": 0.941667, "recall-0.15": 0.94,
"recall-0.25": 0.996667, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"ankle_left": { "ankle_left": {
"count": 600, "count": 598,
"mean": 0.096717, "mean": 0.095824,
"median": 0.085484, "median": 0.084767,
"std": 0.043279, "std": 0.048441,
"sem": 0.001768, "sem": 0.001983,
"min": 0.050765, "min": 0.045599,
"max": 0.49651, "max": 0.496625,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.0, "recall-0.05": 0.003333,
"recall-0.1": 0.825, "recall-0.1": 0.843333,
"recall-0.15": 0.935, "recall-0.15": 0.94,
"recall-0.25": 0.988333, "recall-0.25": 0.981667,
"recall-0.5": 1.0, "recall-0.5": 0.996667,
"num_labels": 600 "num_labels": 600
}, },
"ankle_right": { "ankle_right": {
"count": 600, "count": 598,
"mean": 0.08227, "mean": 0.080368,
"median": 0.068786, "median": 0.067762,
"std": 0.049929, "std": 0.045136,
"sem": 0.00204, "sem": 0.001847,
"min": 0.028705, "min": 0.031319,
"max": 0.486848, "max": 0.490733,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.033333, "recall-0.05": 0.028333,
"recall-0.1": 0.896667, "recall-0.1": 0.89,
"recall-0.15": 0.916667, "recall-0.15": 0.913333,
"recall-0.25": 0.985, "recall-0.25": 0.983333,
"recall-0.5": 1.0, "recall-0.5": 0.996667,
"num_labels": 600 "num_labels": 600
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 7800, "num_labels": 7800,
"recall-0.025": 0.16782, "recall-0.025": 0.17346,
"recall-0.05": 0.46333, "recall-0.05": 0.4691,
"recall-0.1": 0.83154, "recall-0.1": 0.81808,
"recall-0.15": 0.92846, "recall-0.15": 0.92833,
"recall-0.25": 0.99462, "recall-0.25": 0.99333,
"recall-0.5": 0.99974 "recall-0.5": 0.99923
} }
} }
{ {
"total_parts": 8400, "total_parts": 8400,
"correct_parts": 8111, "correct_parts": 8084,
"pcp": 0.965595 "pcp": 0.962381
} }
``` ```

View File

@ -148,7 +148,7 @@ class LetterBox:
resized_img = cv2.resize( resized_img = cv2.resize(
image, image,
(new_w, new_h), (new_w, new_h),
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_NEAREST,
) )
# Optionally pad the image # Optionally pad the image
@ -273,7 +273,7 @@ class BoxCrop:
resized_img = cv2.resize( resized_img = cv2.resize(
cropped_img, cropped_img,
(new_w, new_h), (new_w, new_h),
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_NEAREST,
) )
# Optionally pad the image # Optionally pad the image
@ -309,6 +309,10 @@ class RTMDet(BaseModel):
self.conf_threshold = conf_threshold self.conf_threshold = conf_threshold
self.letterbox = LetterBox(self.target_size, fill_value=114) self.letterbox = LetterBox(self.target_size, fill_value=114)
min_area_scale = 0.025 * 0.025
img_area = self.target_size[0] * self.target_size[1]
self.min_area = img_area * min_area_scale
def preprocess(self, image: np.ndarray): def preprocess(self, image: np.ndarray):
image = self.letterbox.resize_image(image) image = self.letterbox.resize_image(image)
tensor = np.asarray(image).astype(self.input_types[0], copy=False) tensor = np.asarray(image).astype(self.input_types[0], copy=False)
@ -326,6 +330,11 @@ class RTMDet(BaseModel):
keep = boxes[:, 4] > self.conf_threshold keep = boxes[:, 4] > self.conf_threshold
boxes = boxes[keep] boxes = boxes[keep]
# Drop boxes with too small area
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
keep = areas >= self.min_area
boxes = boxes[keep]
paddings, scale, _ = self.letterbox.calc_params(image.shape) paddings, scale, _ = self.letterbox.calc_params(image.shape)
boxes[:, 0] -= paddings[0] boxes[:, 0] -= paddings[0]