Some mixed improvements.

This commit is contained in:
Daniel
2024-12-06 13:58:34 +01:00
parent 7a253cd615
commit 108937d96c
2 changed files with 201 additions and 191 deletions

View File

@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
```json ```json
{ {
"avg_time_2d": 0.01109659348504018, "avg_time_2d": 0.010846347323918747,
"avg_time_3d": 0.00034234281313621394, "avg_time_3d": 0.0003320467674126059,
"avg_fps": 87.4207158719313 "avg_fps": 89.45828817893282
} }
{ {
"person_nums": { "person_nums": {
@ -27,149 +27,149 @@ Results of the model in various experiments on different datasets.
}, },
"mpjpe": { "mpjpe": {
"count": 600, "count": 600,
"mean": 0.06621, "mean": 0.066093,
"median": 0.058297, "median": 0.058635,
"std": 0.027913, "std": 0.027815,
"sem": 0.00114, "sem": 0.001136,
"min": 0.04047, "min": 0.040333,
"max": 0.189061, "max": 0.189198,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.098333, "recall-0.05": 0.101667,
"recall-0.1": 0.941667, "recall-0.1": 0.938333,
"recall-0.15": 0.95, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600, "num_labels": 600,
"ap-0.025": 0.0, "ap-0.025": 0.0,
"ap-0.05": 0.018429, "ap-0.05": 0.023002,
"ap-0.1": 0.901756, "ap-0.1": 0.897991,
"ap-0.15": 0.913878, "ap-0.15": 0.914985,
"ap-0.25": 1.0, "ap-0.25": 1.0,
"ap-0.5": 1.0 "ap-0.5": 1.0
}, },
"nose": { "nose": {
"count": 600, "count": 600,
"mean": 0.113174, "mean": 0.114181,
"median": 0.098547, "median": 0.099121,
"std": 0.041425, "std": 0.042396,
"sem": 0.001693, "sem": 0.001732,
"min": 0.029421, "min": 0.029365,
"max": 0.27266, "max": 0.287428,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.01, "recall-0.05": 0.011667,
"recall-0.1": 0.515, "recall-0.1": 0.508333,
"recall-0.15": 0.81, "recall-0.15": 0.801667,
"recall-0.25": 0.991667, "recall-0.25": 0.991667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_left": { "shoulder_left": {
"count": 600, "count": 600,
"mean": 0.034727, "mean": 0.03478,
"median": 0.026049, "median": 0.026496,
"std": 0.031822, "std": 0.031647,
"sem": 0.0013, "sem": 0.001293,
"min": 0.002176, "min": 0.003155,
"max": 0.183422, "max": 0.183779,
"recall-0.025": 0.471667, "recall-0.025": 0.455,
"recall-0.05": 0.855, "recall-0.05": 0.853333,
"recall-0.1": 0.95, "recall-0.1": 0.95,
"recall-0.15": 0.965, "recall-0.15": 0.966667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_right": { "shoulder_right": {
"count": 600, "count": 600,
"mean": 0.04794, "mean": 0.047867,
"median": 0.034508, "median": 0.034293,
"std": 0.039316, "std": 0.039619,
"sem": 0.001606, "sem": 0.001619,
"min": 0.004604, "min": 0.005688,
"max": 0.218143, "max": 0.254393,
"recall-0.025": 0.211667, "recall-0.025": 0.218333,
"recall-0.05": 0.76, "recall-0.05": 0.751667,
"recall-0.1": 0.918333, "recall-0.1": 0.913333,
"recall-0.15": 0.946667, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 0.998333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_left": { "elbow_left": {
"count": 600, "count": 600,
"mean": 0.044638, "mean": 0.044022,
"median": 0.036326, "median": 0.035159,
"std": 0.034761, "std": 0.034701,
"sem": 0.00142, "sem": 0.001418,
"min": 0.003696, "min": 0.002814,
"max": 0.196813, "max": 0.194526,
"recall-0.025": 0.226667, "recall-0.025": 0.233333,
"recall-0.05": 0.778333, "recall-0.05": 0.771667,
"recall-0.1": 0.941667, "recall-0.1": 0.943333,
"recall-0.15": 0.953333, "recall-0.15": 0.958333,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_right": { "elbow_right": {
"count": 600, "count": 600,
"mean": 0.044037, "mean": 0.04408,
"median": 0.033739, "median": 0.033951,
"std": 0.036263, "std": 0.036319,
"sem": 0.001482, "sem": 0.001484,
"min": 0.007995, "min": 0.008171,
"max": 0.351118, "max": 0.360134,
"recall-0.025": 0.251667, "recall-0.025": 0.265,
"recall-0.05": 0.788333, "recall-0.05": 0.78,
"recall-0.1": 0.931667, "recall-0.1": 0.933333,
"recall-0.15": 0.945, "recall-0.15": 0.946667,
"recall-0.25": 0.998333, "recall-0.25": 0.998333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_left": { "wrist_left": {
"count": 600, "count": 600,
"mean": 0.043333, "mean": 0.043753,
"median": 0.027284, "median": 0.027211,
"std": 0.044655, "std": 0.044668,
"sem": 0.001825, "sem": 0.001825,
"min": 0.002741, "min": 0.002715,
"max": 0.185438, "max": 0.190751,
"recall-0.025": 0.458333, "recall-0.025": 0.46,
"recall-0.05": 0.745, "recall-0.05": 0.74,
"recall-0.1": 0.891667, "recall-0.1": 0.891667,
"recall-0.15": 0.923333, "recall-0.15": 0.925,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_right": { "wrist_right": {
"count": 600, "count": 600,
"mean": 0.047488, "mean": 0.046553,
"median": 0.027367, "median": 0.026979,
"std": 0.053442, "std": 0.050263,
"sem": 0.002184, "sem": 0.002054,
"min": 0.001357, "min": 0.003364,
"max": 0.465438, "max": 0.244861,
"recall-0.025": 0.446667, "recall-0.025": 0.46,
"recall-0.05": 0.738333, "recall-0.05": 0.733333,
"recall-0.1": 0.868333, "recall-0.1": 0.87,
"recall-0.15": 0.898333, "recall-0.15": 0.906667,
"recall-0.25": 0.998333, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"hip_left": { "hip_left": {
"count": 600, "count": 600,
"mean": 0.084262, "mean": 0.08362,
"median": 0.078071, "median": 0.077619,
"std": 0.032944, "std": 0.032967,
"sem": 0.001346, "sem": 0.001347,
"min": 0.022541, "min": 0.018157,
"max": 0.239428, "max": 0.240771,
"recall-0.025": 0.003333, "recall-0.025": 0.005,
"recall-0.05": 0.055, "recall-0.05": 0.055,
"recall-0.1": 0.851667, "recall-0.1": 0.848333,
"recall-0.15": 0.951667, "recall-0.15": 0.951667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
@ -177,63 +177,63 @@ Results of the model in various experiments on different datasets.
}, },
"hip_right": { "hip_right": {
"count": 600, "count": 600,
"mean": 0.106676, "mean": 0.106567,
"median": 0.103778, "median": 0.104243,
"std": 0.025796, "std": 0.026243,
"sem": 0.001054, "sem": 0.001072,
"min": 0.042573, "min": 0.035565,
"max": 0.242475, "max": 0.245341,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.003333, "recall-0.05": 0.003333,
"recall-0.1": 0.421667, "recall-0.1": 0.415,
"recall-0.15": 0.948333, "recall-0.15": 0.946667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_left": { "knee_left": {
"count": 598, "count": 599,
"mean": 0.062386, "mean": 0.063278,
"median": 0.046647, "median": 0.047513,
"std": 0.055624, "std": 0.056978,
"sem": 0.002277, "sem": 0.00233,
"min": 0.012414, "min": 0.017587,
"max": 0.399633, "max": 0.4004,
"recall-0.025": 0.045, "recall-0.025": 0.038333,
"recall-0.05": 0.555, "recall-0.05": 0.546667,
"recall-0.1": 0.885, "recall-0.1": 0.883333,
"recall-0.15": 0.925, "recall-0.15": 0.925,
"recall-0.25": 0.978333, "recall-0.25": 0.978333,
"recall-0.5": 0.996667, "recall-0.5": 0.998333,
"num_labels": 600 "num_labels": 600
}, },
"knee_right": { "knee_right": {
"count": 600, "count": 600,
"mean": 0.050939, "mean": 0.050742,
"median": 0.041387, "median": 0.041408,
"std": 0.037661, "std": 0.037974,
"sem": 0.001539, "sem": 0.001552,
"min": 0.006788, "min": 0.01394,
"max": 0.268559, "max": 0.279839,
"recall-0.025": 0.045, "recall-0.025": 0.053333,
"recall-0.05": 0.73, "recall-0.05": 0.75,
"recall-0.1": 0.941667, "recall-0.1": 0.941667,
"recall-0.15": 0.943333, "recall-0.15": 0.941667,
"recall-0.25": 0.996667, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"ankle_left": { "ankle_left": {
"count": 600, "count": 600,
"mean": 0.096519, "mean": 0.096717,
"median": 0.085325, "median": 0.085484,
"std": 0.043518, "std": 0.043279,
"sem": 0.001778, "sem": 0.001768,
"min": 0.049769, "min": 0.050765,
"max": 0.494823, "max": 0.49651,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.001667, "recall-0.05": 0.0,
"recall-0.1": 0.828333, "recall-0.1": 0.825,
"recall-0.15": 0.935, "recall-0.15": 0.935,
"recall-0.25": 0.988333, "recall-0.25": 0.988333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
@ -241,34 +241,34 @@ Results of the model in various experiments on different datasets.
}, },
"ankle_right": { "ankle_right": {
"count": 600, "count": 600,
"mean": 0.082453, "mean": 0.08227,
"median": 0.068627, "median": 0.068786,
"std": 0.050525, "std": 0.049929,
"sem": 0.002064, "sem": 0.00204,
"min": 0.026098, "min": 0.028705,
"max": 0.482397, "max": 0.486848,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.035, "recall-0.05": 0.033333,
"recall-0.1": 0.896667, "recall-0.1": 0.896667,
"recall-0.15": 0.915, "recall-0.15": 0.916667,
"recall-0.25": 0.981667, "recall-0.25": 0.985,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 7800, "num_labels": 7800,
"recall-0.025": 0.1659, "recall-0.025": 0.16782,
"recall-0.05": 0.46526, "recall-0.05": 0.46333,
"recall-0.1": 0.83359, "recall-0.1": 0.83154,
"recall-0.15": 0.92705, "recall-0.15": 0.92846,
"recall-0.25": 0.99436, "recall-0.25": 0.99462,
"recall-0.5": 0.99974 "recall-0.5": 0.99974
} }
} }
{ {
"total_parts": 8400, "total_parts": 8400,
"correct_parts": 8113, "correct_parts": 8111,
"pcp": 0.965833 "pcp": 0.965595
} }
``` ```

View File

@ -37,14 +37,16 @@ class BaseModel(ABC):
self.input_types = [] self.input_types = []
for i in range(len(input_types)): for i in range(len(input_types)):
input_type = input_types[i] input_type = input_types[i]
if input_type == "tensor(float16)": if input_type == "tensor(float32)":
itype = np.float32
elif input_type == "tensor(float16)":
itype = np.float16 itype = np.float16
elif input_type == "tensor(uint8)":
itype = np.uint8
elif input_type == "tensor(int32)": elif input_type == "tensor(int32)":
itype = np.int32 itype = np.int32
elif input_type == "tensor(uint8)":
itype = np.uint8
else: else:
itype = np.float32 raise ValueError("Undefined input type:", input_type)
self.input_types.append(itype) self.input_types.append(itype)
if warmup > 0: if warmup > 0:
@ -59,6 +61,8 @@ class BaseModel(ABC):
pass pass
def warmup(self, epoch: int): def warmup(self, epoch: int):
np.random.seed(42)
print("Running warmup for '{}' ...".format(self.__class__.__name__)) print("Running warmup for '{}' ...".format(self.__class__.__name__))
for _ in tqdm(range(epoch)): for _ in tqdm(range(epoch)):
inputs = {} inputs = {}
@ -139,20 +143,30 @@ class LetterBox:
def resize_image(self, image): def resize_image(self, image):
paddings, _, new_size = self.calc_params(image.shape) paddings, _, new_size = self.calc_params(image.shape)
target_h, target_w = self.target_size # Resize the image
canvas = np.full(
(target_h, target_w, image.shape[2]),
self.fill_value,
dtype=image.dtype,
)
new_w, new_h = new_size new_w, new_h = new_size
dx, dy = paddings[0], paddings[2] resized_img = cv2.resize(
canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize( image,
image, (new_w, new_h), interpolation=cv2.INTER_LINEAR (new_w, new_h),
interpolation=cv2.INTER_LINEAR,
) )
return canvas # Optionally pad the image
pad_left, pad_right, pad_top, pad_bottom = paddings
if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
final_img = resized_img
else:
final_img = cv2.copyMakeBorder(
resized_img,
pad_top,
pad_bottom,
pad_left,
pad_right,
borderType=cv2.BORDER_CONSTANT,
value=[self.fill_value, self.fill_value, self.fill_value],
)
return final_img
# ================================================================================================== # ==================================================================================================
@ -211,6 +225,7 @@ class BoxCrop:
new_end_y = min(ishape[0] - 1, end_y) new_end_y = min(ishape[0] - 1, end_y)
new_box = [new_start_x, new_start_y, new_end_x, new_end_y] new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
# Calculate resized crop size
bbox_w = new_box[2] - new_box[0] bbox_w = new_box[2] - new_box[0]
bbox_h = new_box[3] - new_box[1] bbox_h = new_box[3] - new_box[1]
scale = min(target_w / bbox_w, target_h / bbox_h) scale = min(target_w / bbox_w, target_h / bbox_h)
@ -250,22 +265,33 @@ class BoxCrop:
def crop_resize_box(self, image, bbox): def crop_resize_box(self, image, bbox):
paddings, _, new_box, new_size = self.calc_params(image.shape, bbox) paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)
image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]] # Extract the bounding box
cropped_img = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
th, tw = self.target_size # Resize the image
canvas = np.full( new_w, new_h = new_size
(th, tw, image.shape[2]), resized_img = cv2.resize(
self.fill_value, cropped_img,
dtype=image.dtype, (new_w, new_h),
interpolation=cv2.INTER_LINEAR,
) )
nw, nh = new_size # Optionally pad the image
dx, dy = paddings[0], paddings[2] pad_left, pad_right, pad_top, pad_bottom = paddings
canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize( if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
image, (nw, nh), interpolation=cv2.INTER_LINEAR final_img = resized_img
else:
final_img = cv2.copyMakeBorder(
resized_img,
pad_top,
pad_bottom,
pad_left,
pad_right,
borderType=cv2.BORDER_CONSTANT,
value=[self.fill_value, self.fill_value, self.fill_value],
) )
return canvas return final_img
# ================================================================================================== # ==================================================================================================
@ -308,27 +334,17 @@ class RTMDet(BaseModel):
boxes[:, 3] -= paddings[2] boxes[:, 3] -= paddings[2]
boxes = np.maximum(boxes, 0) boxes = np.maximum(boxes, 0)
th, tw = self.target_size th, tw = self.target_size
pad_w = paddings[0] + paddings[1] pad_w = paddings[0] + paddings[1]
pad_h = paddings[2] + paddings[3] pad_h = paddings[2] + paddings[3]
max_w = tw - pad_w - 1 max_w = tw - pad_w - 1
max_h = th - pad_h - 1 max_h = th - pad_h - 1
b0 = boxes[:, 0] boxes[:, 0] = np.minimum(boxes[:, 0], max_w)
b1 = boxes[:, 1] boxes[:, 1] = np.minimum(boxes[:, 1], max_h)
b2 = boxes[:, 2] boxes[:, 2] = np.minimum(boxes[:, 2], max_w)
b3 = boxes[:, 3] boxes[:, 3] = np.minimum(boxes[:, 3], max_h)
b0 = np.minimum(b0, max_w)
b1 = np.minimum(b1, max_h)
b2 = np.minimum(b2, max_w)
b3 = np.minimum(b3, max_h)
boxes[:, 0] = b0
boxes[:, 1] = b1
boxes[:, 2] = b2
boxes[:, 3] = b3
boxes[:, 0:4] /= scale boxes[:, 0:4] /= scale
return boxes return boxes
@ -342,8 +358,6 @@ class RTMPose(BaseModel):
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0) self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
def preprocess(self, image: np.ndarray, bbox: np.ndarray): def preprocess(self, image: np.ndarray, bbox: np.ndarray):
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
tensor = np.expand_dims(tensor, axis=0)
bbox = np.asarray(bbox)[0:4] bbox = np.asarray(bbox)[0:4]
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8]) bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
bbox = bbox.round().astype(np.int32) bbox = bbox.round().astype(np.int32)
@ -368,12 +382,8 @@ class RTMPose(BaseModel):
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0) kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
max_w = image.shape[1] - 1 max_w = image.shape[1] - 1
max_h = image.shape[0] - 1 max_h = image.shape[0] - 1
b0 = kp[:, 0] kp[:, 0] = np.minimum(kp[:, 0], max_w)
b1 = kp[:, 1] kp[:, 1] = np.minimum(kp[:, 1], max_h)
b0 = np.minimum(b0, max_w)
b1 = np.minimum(b1, max_h)
kp[:, 0] = b0
kp[:, 1] = b1
return kp return kp