Some mixed improvements.
This commit is contained in:
284
media/RESULTS.md
284
media/RESULTS.md
@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
|
||||
|
||||
```json
|
||||
{
|
||||
"avg_time_2d": 0.01109659348504018,
|
||||
"avg_time_3d": 0.00034234281313621394,
|
||||
"avg_fps": 87.4207158719313
|
||||
"avg_time_2d": 0.010846347323918747,
|
||||
"avg_time_3d": 0.0003320467674126059,
|
||||
"avg_fps": 89.45828817893282
|
||||
}
|
||||
{
|
||||
"person_nums": {
|
||||
@ -27,149 +27,149 @@ Results of the model in various experiments on different datasets.
|
||||
},
|
||||
"mpjpe": {
|
||||
"count": 600,
|
||||
"mean": 0.06621,
|
||||
"median": 0.058297,
|
||||
"std": 0.027913,
|
||||
"sem": 0.00114,
|
||||
"min": 0.04047,
|
||||
"max": 0.189061,
|
||||
"mean": 0.066093,
|
||||
"median": 0.058635,
|
||||
"std": 0.027815,
|
||||
"sem": 0.001136,
|
||||
"min": 0.040333,
|
||||
"max": 0.189198,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.098333,
|
||||
"recall-0.1": 0.941667,
|
||||
"recall-0.05": 0.101667,
|
||||
"recall-0.1": 0.938333,
|
||||
"recall-0.15": 0.95,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600,
|
||||
"ap-0.025": 0.0,
|
||||
"ap-0.05": 0.018429,
|
||||
"ap-0.1": 0.901756,
|
||||
"ap-0.15": 0.913878,
|
||||
"ap-0.05": 0.023002,
|
||||
"ap-0.1": 0.897991,
|
||||
"ap-0.15": 0.914985,
|
||||
"ap-0.25": 1.0,
|
||||
"ap-0.5": 1.0
|
||||
},
|
||||
"nose": {
|
||||
"count": 600,
|
||||
"mean": 0.113174,
|
||||
"median": 0.098547,
|
||||
"std": 0.041425,
|
||||
"sem": 0.001693,
|
||||
"min": 0.029421,
|
||||
"max": 0.27266,
|
||||
"mean": 0.114181,
|
||||
"median": 0.099121,
|
||||
"std": 0.042396,
|
||||
"sem": 0.001732,
|
||||
"min": 0.029365,
|
||||
"max": 0.287428,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.01,
|
||||
"recall-0.1": 0.515,
|
||||
"recall-0.15": 0.81,
|
||||
"recall-0.05": 0.011667,
|
||||
"recall-0.1": 0.508333,
|
||||
"recall-0.15": 0.801667,
|
||||
"recall-0.25": 0.991667,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"shoulder_left": {
|
||||
"count": 600,
|
||||
"mean": 0.034727,
|
||||
"median": 0.026049,
|
||||
"std": 0.031822,
|
||||
"sem": 0.0013,
|
||||
"min": 0.002176,
|
||||
"max": 0.183422,
|
||||
"recall-0.025": 0.471667,
|
||||
"recall-0.05": 0.855,
|
||||
"mean": 0.03478,
|
||||
"median": 0.026496,
|
||||
"std": 0.031647,
|
||||
"sem": 0.001293,
|
||||
"min": 0.003155,
|
||||
"max": 0.183779,
|
||||
"recall-0.025": 0.455,
|
||||
"recall-0.05": 0.853333,
|
||||
"recall-0.1": 0.95,
|
||||
"recall-0.15": 0.965,
|
||||
"recall-0.15": 0.966667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"shoulder_right": {
|
||||
"count": 600,
|
||||
"mean": 0.04794,
|
||||
"median": 0.034508,
|
||||
"std": 0.039316,
|
||||
"sem": 0.001606,
|
||||
"min": 0.004604,
|
||||
"max": 0.218143,
|
||||
"recall-0.025": 0.211667,
|
||||
"recall-0.05": 0.76,
|
||||
"recall-0.1": 0.918333,
|
||||
"recall-0.15": 0.946667,
|
||||
"recall-0.25": 1.0,
|
||||
"mean": 0.047867,
|
||||
"median": 0.034293,
|
||||
"std": 0.039619,
|
||||
"sem": 0.001619,
|
||||
"min": 0.005688,
|
||||
"max": 0.254393,
|
||||
"recall-0.025": 0.218333,
|
||||
"recall-0.05": 0.751667,
|
||||
"recall-0.1": 0.913333,
|
||||
"recall-0.15": 0.95,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"elbow_left": {
|
||||
"count": 600,
|
||||
"mean": 0.044638,
|
||||
"median": 0.036326,
|
||||
"std": 0.034761,
|
||||
"sem": 0.00142,
|
||||
"min": 0.003696,
|
||||
"max": 0.196813,
|
||||
"recall-0.025": 0.226667,
|
||||
"recall-0.05": 0.778333,
|
||||
"recall-0.1": 0.941667,
|
||||
"recall-0.15": 0.953333,
|
||||
"mean": 0.044022,
|
||||
"median": 0.035159,
|
||||
"std": 0.034701,
|
||||
"sem": 0.001418,
|
||||
"min": 0.002814,
|
||||
"max": 0.194526,
|
||||
"recall-0.025": 0.233333,
|
||||
"recall-0.05": 0.771667,
|
||||
"recall-0.1": 0.943333,
|
||||
"recall-0.15": 0.958333,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"elbow_right": {
|
||||
"count": 600,
|
||||
"mean": 0.044037,
|
||||
"median": 0.033739,
|
||||
"std": 0.036263,
|
||||
"sem": 0.001482,
|
||||
"min": 0.007995,
|
||||
"max": 0.351118,
|
||||
"recall-0.025": 0.251667,
|
||||
"recall-0.05": 0.788333,
|
||||
"recall-0.1": 0.931667,
|
||||
"recall-0.15": 0.945,
|
||||
"mean": 0.04408,
|
||||
"median": 0.033951,
|
||||
"std": 0.036319,
|
||||
"sem": 0.001484,
|
||||
"min": 0.008171,
|
||||
"max": 0.360134,
|
||||
"recall-0.025": 0.265,
|
||||
"recall-0.05": 0.78,
|
||||
"recall-0.1": 0.933333,
|
||||
"recall-0.15": 0.946667,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"wrist_left": {
|
||||
"count": 600,
|
||||
"mean": 0.043333,
|
||||
"median": 0.027284,
|
||||
"std": 0.044655,
|
||||
"mean": 0.043753,
|
||||
"median": 0.027211,
|
||||
"std": 0.044668,
|
||||
"sem": 0.001825,
|
||||
"min": 0.002741,
|
||||
"max": 0.185438,
|
||||
"recall-0.025": 0.458333,
|
||||
"recall-0.05": 0.745,
|
||||
"min": 0.002715,
|
||||
"max": 0.190751,
|
||||
"recall-0.025": 0.46,
|
||||
"recall-0.05": 0.74,
|
||||
"recall-0.1": 0.891667,
|
||||
"recall-0.15": 0.923333,
|
||||
"recall-0.15": 0.925,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"wrist_right": {
|
||||
"count": 600,
|
||||
"mean": 0.047488,
|
||||
"median": 0.027367,
|
||||
"std": 0.053442,
|
||||
"sem": 0.002184,
|
||||
"min": 0.001357,
|
||||
"max": 0.465438,
|
||||
"recall-0.025": 0.446667,
|
||||
"recall-0.05": 0.738333,
|
||||
"recall-0.1": 0.868333,
|
||||
"recall-0.15": 0.898333,
|
||||
"recall-0.25": 0.998333,
|
||||
"mean": 0.046553,
|
||||
"median": 0.026979,
|
||||
"std": 0.050263,
|
||||
"sem": 0.002054,
|
||||
"min": 0.003364,
|
||||
"max": 0.244861,
|
||||
"recall-0.025": 0.46,
|
||||
"recall-0.05": 0.733333,
|
||||
"recall-0.1": 0.87,
|
||||
"recall-0.15": 0.906667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"hip_left": {
|
||||
"count": 600,
|
||||
"mean": 0.084262,
|
||||
"median": 0.078071,
|
||||
"std": 0.032944,
|
||||
"sem": 0.001346,
|
||||
"min": 0.022541,
|
||||
"max": 0.239428,
|
||||
"recall-0.025": 0.003333,
|
||||
"mean": 0.08362,
|
||||
"median": 0.077619,
|
||||
"std": 0.032967,
|
||||
"sem": 0.001347,
|
||||
"min": 0.018157,
|
||||
"max": 0.240771,
|
||||
"recall-0.025": 0.005,
|
||||
"recall-0.05": 0.055,
|
||||
"recall-0.1": 0.851667,
|
||||
"recall-0.1": 0.848333,
|
||||
"recall-0.15": 0.951667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
@ -177,63 +177,63 @@ Results of the model in various experiments on different datasets.
|
||||
},
|
||||
"hip_right": {
|
||||
"count": 600,
|
||||
"mean": 0.106676,
|
||||
"median": 0.103778,
|
||||
"std": 0.025796,
|
||||
"sem": 0.001054,
|
||||
"min": 0.042573,
|
||||
"max": 0.242475,
|
||||
"mean": 0.106567,
|
||||
"median": 0.104243,
|
||||
"std": 0.026243,
|
||||
"sem": 0.001072,
|
||||
"min": 0.035565,
|
||||
"max": 0.245341,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.003333,
|
||||
"recall-0.1": 0.421667,
|
||||
"recall-0.15": 0.948333,
|
||||
"recall-0.1": 0.415,
|
||||
"recall-0.15": 0.946667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"knee_left": {
|
||||
"count": 598,
|
||||
"mean": 0.062386,
|
||||
"median": 0.046647,
|
||||
"std": 0.055624,
|
||||
"sem": 0.002277,
|
||||
"min": 0.012414,
|
||||
"max": 0.399633,
|
||||
"recall-0.025": 0.045,
|
||||
"recall-0.05": 0.555,
|
||||
"recall-0.1": 0.885,
|
||||
"count": 599,
|
||||
"mean": 0.063278,
|
||||
"median": 0.047513,
|
||||
"std": 0.056978,
|
||||
"sem": 0.00233,
|
||||
"min": 0.017587,
|
||||
"max": 0.4004,
|
||||
"recall-0.025": 0.038333,
|
||||
"recall-0.05": 0.546667,
|
||||
"recall-0.1": 0.883333,
|
||||
"recall-0.15": 0.925,
|
||||
"recall-0.25": 0.978333,
|
||||
"recall-0.5": 0.996667,
|
||||
"recall-0.5": 0.998333,
|
||||
"num_labels": 600
|
||||
},
|
||||
"knee_right": {
|
||||
"count": 600,
|
||||
"mean": 0.050939,
|
||||
"median": 0.041387,
|
||||
"std": 0.037661,
|
||||
"sem": 0.001539,
|
||||
"min": 0.006788,
|
||||
"max": 0.268559,
|
||||
"recall-0.025": 0.045,
|
||||
"recall-0.05": 0.73,
|
||||
"mean": 0.050742,
|
||||
"median": 0.041408,
|
||||
"std": 0.037974,
|
||||
"sem": 0.001552,
|
||||
"min": 0.01394,
|
||||
"max": 0.279839,
|
||||
"recall-0.025": 0.053333,
|
||||
"recall-0.05": 0.75,
|
||||
"recall-0.1": 0.941667,
|
||||
"recall-0.15": 0.943333,
|
||||
"recall-0.15": 0.941667,
|
||||
"recall-0.25": 0.996667,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"ankle_left": {
|
||||
"count": 600,
|
||||
"mean": 0.096519,
|
||||
"median": 0.085325,
|
||||
"std": 0.043518,
|
||||
"sem": 0.001778,
|
||||
"min": 0.049769,
|
||||
"max": 0.494823,
|
||||
"mean": 0.096717,
|
||||
"median": 0.085484,
|
||||
"std": 0.043279,
|
||||
"sem": 0.001768,
|
||||
"min": 0.050765,
|
||||
"max": 0.49651,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.001667,
|
||||
"recall-0.1": 0.828333,
|
||||
"recall-0.05": 0.0,
|
||||
"recall-0.1": 0.825,
|
||||
"recall-0.15": 0.935,
|
||||
"recall-0.25": 0.988333,
|
||||
"recall-0.5": 1.0,
|
||||
@ -241,34 +241,34 @@ Results of the model in various experiments on different datasets.
|
||||
},
|
||||
"ankle_right": {
|
||||
"count": 600,
|
||||
"mean": 0.082453,
|
||||
"median": 0.068627,
|
||||
"std": 0.050525,
|
||||
"sem": 0.002064,
|
||||
"min": 0.026098,
|
||||
"max": 0.482397,
|
||||
"mean": 0.08227,
|
||||
"median": 0.068786,
|
||||
"std": 0.049929,
|
||||
"sem": 0.00204,
|
||||
"min": 0.028705,
|
||||
"max": 0.486848,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.035,
|
||||
"recall-0.05": 0.033333,
|
||||
"recall-0.1": 0.896667,
|
||||
"recall-0.15": 0.915,
|
||||
"recall-0.25": 0.981667,
|
||||
"recall-0.15": 0.916667,
|
||||
"recall-0.25": 0.985,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"joint_recalls": {
|
||||
"num_labels": 7800,
|
||||
"recall-0.025": 0.1659,
|
||||
"recall-0.05": 0.46526,
|
||||
"recall-0.1": 0.83359,
|
||||
"recall-0.15": 0.92705,
|
||||
"recall-0.25": 0.99436,
|
||||
"recall-0.025": 0.16782,
|
||||
"recall-0.05": 0.46333,
|
||||
"recall-0.1": 0.83154,
|
||||
"recall-0.15": 0.92846,
|
||||
"recall-0.25": 0.99462,
|
||||
"recall-0.5": 0.99974
|
||||
}
|
||||
}
|
||||
{
|
||||
"total_parts": 8400,
|
||||
"correct_parts": 8113,
|
||||
"pcp": 0.965833
|
||||
"correct_parts": 8111,
|
||||
"pcp": 0.965595
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@ -37,14 +37,16 @@ class BaseModel(ABC):
|
||||
self.input_types = []
|
||||
for i in range(len(input_types)):
|
||||
input_type = input_types[i]
|
||||
if input_type == "tensor(float16)":
|
||||
if input_type == "tensor(float32)":
|
||||
itype = np.float32
|
||||
elif input_type == "tensor(float16)":
|
||||
itype = np.float16
|
||||
elif input_type == "tensor(uint8)":
|
||||
itype = np.uint8
|
||||
elif input_type == "tensor(int32)":
|
||||
itype = np.int32
|
||||
elif input_type == "tensor(uint8)":
|
||||
itype = np.uint8
|
||||
else:
|
||||
itype = np.float32
|
||||
raise ValueError("Undefined input type:", input_type)
|
||||
self.input_types.append(itype)
|
||||
|
||||
if warmup > 0:
|
||||
@ -59,6 +61,8 @@ class BaseModel(ABC):
|
||||
pass
|
||||
|
||||
def warmup(self, epoch: int):
|
||||
np.random.seed(42)
|
||||
|
||||
print("Running warmup for '{}' ...".format(self.__class__.__name__))
|
||||
for _ in tqdm(range(epoch)):
|
||||
inputs = {}
|
||||
@ -139,20 +143,30 @@ class LetterBox:
|
||||
def resize_image(self, image):
|
||||
paddings, _, new_size = self.calc_params(image.shape)
|
||||
|
||||
target_h, target_w = self.target_size
|
||||
canvas = np.full(
|
||||
(target_h, target_w, image.shape[2]),
|
||||
self.fill_value,
|
||||
dtype=image.dtype,
|
||||
)
|
||||
|
||||
# Resize the image
|
||||
new_w, new_h = new_size
|
||||
dx, dy = paddings[0], paddings[2]
|
||||
canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize(
|
||||
image, (new_w, new_h), interpolation=cv2.INTER_LINEAR
|
||||
resized_img = cv2.resize(
|
||||
image,
|
||||
(new_w, new_h),
|
||||
interpolation=cv2.INTER_LINEAR,
|
||||
)
|
||||
|
||||
return canvas
|
||||
# Optionally pad the image
|
||||
pad_left, pad_right, pad_top, pad_bottom = paddings
|
||||
if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
|
||||
final_img = resized_img
|
||||
else:
|
||||
final_img = cv2.copyMakeBorder(
|
||||
resized_img,
|
||||
pad_top,
|
||||
pad_bottom,
|
||||
pad_left,
|
||||
pad_right,
|
||||
borderType=cv2.BORDER_CONSTANT,
|
||||
value=[self.fill_value, self.fill_value, self.fill_value],
|
||||
)
|
||||
|
||||
return final_img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
@ -211,6 +225,7 @@ class BoxCrop:
|
||||
new_end_y = min(ishape[0] - 1, end_y)
|
||||
new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
|
||||
|
||||
# Calculate resized crop size
|
||||
bbox_w = new_box[2] - new_box[0]
|
||||
bbox_h = new_box[3] - new_box[1]
|
||||
scale = min(target_w / bbox_w, target_h / bbox_h)
|
||||
@ -250,22 +265,33 @@ class BoxCrop:
|
||||
def crop_resize_box(self, image, bbox):
|
||||
paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)
|
||||
|
||||
image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
|
||||
# Extract the bounding box
|
||||
cropped_img = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
|
||||
|
||||
th, tw = self.target_size
|
||||
canvas = np.full(
|
||||
(th, tw, image.shape[2]),
|
||||
self.fill_value,
|
||||
dtype=image.dtype,
|
||||
# Resize the image
|
||||
new_w, new_h = new_size
|
||||
resized_img = cv2.resize(
|
||||
cropped_img,
|
||||
(new_w, new_h),
|
||||
interpolation=cv2.INTER_LINEAR,
|
||||
)
|
||||
|
||||
nw, nh = new_size
|
||||
dx, dy = paddings[0], paddings[2]
|
||||
canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
|
||||
image, (nw, nh), interpolation=cv2.INTER_LINEAR
|
||||
# Optionally pad the image
|
||||
pad_left, pad_right, pad_top, pad_bottom = paddings
|
||||
if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
|
||||
final_img = resized_img
|
||||
else:
|
||||
final_img = cv2.copyMakeBorder(
|
||||
resized_img,
|
||||
pad_top,
|
||||
pad_bottom,
|
||||
pad_left,
|
||||
pad_right,
|
||||
borderType=cv2.BORDER_CONSTANT,
|
||||
value=[self.fill_value, self.fill_value, self.fill_value],
|
||||
)
|
||||
|
||||
return canvas
|
||||
return final_img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
@ -308,27 +334,17 @@ class RTMDet(BaseModel):
|
||||
boxes[:, 3] -= paddings[2]
|
||||
|
||||
boxes = np.maximum(boxes, 0)
|
||||
|
||||
th, tw = self.target_size
|
||||
pad_w = paddings[0] + paddings[1]
|
||||
pad_h = paddings[2] + paddings[3]
|
||||
max_w = tw - pad_w - 1
|
||||
max_h = th - pad_h - 1
|
||||
b0 = boxes[:, 0]
|
||||
b1 = boxes[:, 1]
|
||||
b2 = boxes[:, 2]
|
||||
b3 = boxes[:, 3]
|
||||
b0 = np.minimum(b0, max_w)
|
||||
b1 = np.minimum(b1, max_h)
|
||||
b2 = np.minimum(b2, max_w)
|
||||
b3 = np.minimum(b3, max_h)
|
||||
boxes[:, 0] = b0
|
||||
boxes[:, 1] = b1
|
||||
boxes[:, 2] = b2
|
||||
boxes[:, 3] = b3
|
||||
boxes[:, 0] = np.minimum(boxes[:, 0], max_w)
|
||||
boxes[:, 1] = np.minimum(boxes[:, 1], max_h)
|
||||
boxes[:, 2] = np.minimum(boxes[:, 2], max_w)
|
||||
boxes[:, 3] = np.minimum(boxes[:, 3], max_h)
|
||||
|
||||
boxes[:, 0:4] /= scale
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
@ -342,8 +358,6 @@ class RTMPose(BaseModel):
|
||||
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
|
||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||
tensor = np.expand_dims(tensor, axis=0)
|
||||
bbox = np.asarray(bbox)[0:4]
|
||||
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
|
||||
bbox = bbox.round().astype(np.int32)
|
||||
@ -368,12 +382,8 @@ class RTMPose(BaseModel):
|
||||
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
|
||||
max_w = image.shape[1] - 1
|
||||
max_h = image.shape[0] - 1
|
||||
b0 = kp[:, 0]
|
||||
b1 = kp[:, 1]
|
||||
b0 = np.minimum(b0, max_w)
|
||||
b1 = np.minimum(b1, max_h)
|
||||
kp[:, 0] = b0
|
||||
kp[:, 1] = b1
|
||||
kp[:, 0] = np.minimum(kp[:, 0], max_w)
|
||||
kp[:, 1] = np.minimum(kp[:, 1], max_h)
|
||||
|
||||
return kp
|
||||
|
||||
|
||||
Reference in New Issue
Block a user