Use rgb input for both models.

This commit is contained in:
Daniel
2024-12-02 17:28:34 +01:00
parent dc44a71b2c
commit 36781e616b
3 changed files with 155 additions and 161 deletions

View File

@ -8,8 +8,8 @@ base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
pose_model_path = base_path + "rtmpose-m_384x288.onnx"
det_model_path = base_path + "rtmdet-nano_320x320.onnx"
norm_mean = -1 * np.array([103.53, 116.28, 123.675])
norm_std = 1.0 / np.array([57.375, 57.12, 58.395])
norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
# ==================================================================================================
@ -24,11 +24,6 @@ def add_steps_to_onnx(model_path):
mean = norm_mean.astype(np.float32)
std = norm_std.astype(np.float32)
use_bgr = bool("rtmpose" in model_path)
if use_bgr:
mean = mean[::-1]
std = std[::-1]
mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)

View File

@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
```json
{
"avg_time_2d": 0.016608773651769607,
"avg_time_3d": 0.00034795211533368645,
"avg_fps": 58.97364937870487
"avg_time_2d": 0.016274028309321,
"avg_time_3d": 0.00032552096803309556,
"avg_fps": 60.24259956047411
}
{
"person_nums": {
@ -27,52 +27,52 @@ Results of the model in various experiments on different datasets.
},
"mpjpe": {
"count": 600,
"mean": 0.067471,
"median": 0.0592,
"std": 0.02795,
"sem": 0.001142,
"min": 0.042592,
"max": 0.189987,
"mean": 0.066064,
"median": 0.058463,
"std": 0.027791,
"sem": 0.001136,
"min": 0.040706,
"max": 0.189425,
"recall-0.025": 0.0,
"recall-0.05": 0.048333,
"recall-0.1": 0.925,
"recall-0.05": 0.085,
"recall-0.1": 0.936667,
"recall-0.15": 0.95,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600,
"ap-0.025": 0.0,
"ap-0.05": 0.004097,
"ap-0.1": 0.885305,
"ap-0.15": 0.915769,
"ap-0.05": 0.012704,
"ap-0.1": 0.897461,
"ap-0.15": 0.915018,
"ap-0.25": 1.0,
"ap-0.5": 1.0
},
"nose": {
"count": 600,
"mean": 0.115621,
"median": 0.100161,
"std": 0.041657,
"sem": 0.001702,
"min": 0.031411,
"max": 0.276464,
"mean": 0.114664,
"median": 0.10192,
"std": 0.040958,
"sem": 0.001673,
"min": 0.027318,
"max": 0.26417,
"recall-0.025": 0.0,
"recall-0.05": 0.01,
"recall-0.1": 0.498333,
"recall-0.15": 0.826667,
"recall-0.05": 0.006667,
"recall-0.1": 0.488333,
"recall-0.15": 0.82,
"recall-0.25": 0.993333,
"recall-0.5": 1.0,
"num_labels": 600
},
"shoulder_left": {
"count": 600,
"mean": 0.033598,
"median": 0.025444,
"std": 0.032078,
"sem": 0.001311,
"min": 0.001187,
"max": 0.181528,
"recall-0.025": 0.486667,
"recall-0.05": 0.865,
"mean": 0.034211,
"median": 0.026464,
"std": 0.031942,
"sem": 0.001305,
"min": 0.001243,
"max": 0.178564,
"recall-0.025": 0.47,
"recall-0.05": 0.863333,
"recall-0.1": 0.946667,
"recall-0.15": 0.965,
"recall-0.25": 1.0,
@ -81,30 +81,30 @@ Results of the model in various experiments on different datasets.
},
"shoulder_right": {
"count": 600,
"mean": 0.049243,
"median": 0.033956,
"std": 0.042808,
"sem": 0.001749,
"min": 0.004642,
"max": 0.255344,
"recall-0.025": 0.218333,
"recall-0.05": 0.748333,
"recall-0.1": 0.901667,
"mean": 0.049177,
"median": 0.034548,
"std": 0.042414,
"sem": 0.001733,
"min": 0.004601,
"max": 0.249061,
"recall-0.025": 0.221667,
"recall-0.05": 0.73,
"recall-0.1": 0.908333,
"recall-0.15": 0.941667,
"recall-0.25": 0.998333,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"elbow_left": {
"count": 600,
"mean": 0.043499,
"median": 0.035409,
"std": 0.034789,
"sem": 0.001421,
"min": 0.002463,
"max": 0.200682,
"recall-0.025": 0.243333,
"recall-0.05": 0.8,
"mean": 0.043333,
"median": 0.034664,
"std": 0.034544,
"sem": 0.001411,
"min": 0.002445,
"max": 0.200532,
"recall-0.025": 0.238333,
"recall-0.05": 0.796667,
"recall-0.1": 0.945,
"recall-0.15": 0.953333,
"recall-0.25": 1.0,
@ -113,162 +113,162 @@ Results of the model in various experiments on different datasets.
},
"elbow_right": {
"count": 600,
"mean": 0.043289,
"median": 0.032684,
"std": 0.035003,
"sem": 0.00143,
"min": 0.007037,
"max": 0.202309,
"recall-0.025": 0.255,
"recall-0.05": 0.805,
"recall-0.1": 0.931667,
"recall-0.15": 0.941667,
"recall-0.25": 1.0,
"mean": 0.043379,
"median": 0.033008,
"std": 0.037384,
"sem": 0.001527,
"min": 0.00441,
"max": 0.300237,
"recall-0.025": 0.241667,
"recall-0.05": 0.828333,
"recall-0.1": 0.93,
"recall-0.15": 0.94,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_left": {
"count": 600,
"mean": 0.043376,
"median": 0.027016,
"std": 0.044176,
"sem": 0.001805,
"min": 0.000972,
"max": 0.340542,
"recall-0.025": 0.466667,
"recall-0.05": 0.728333,
"mean": 0.042137,
"median": 0.026475,
"std": 0.044455,
"sem": 0.001816,
"min": 0.000734,
"max": 0.289424,
"recall-0.025": 0.476667,
"recall-0.05": 0.738333,
"recall-0.1": 0.905,
"recall-0.15": 0.941667,
"recall-0.25": 0.998333,
"recall-0.15": 0.94,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_right": {
"count": 600,
"mean": 0.044908,
"median": 0.027102,
"std": 0.052541,
"sem": 0.002147,
"min": 0.001728,
"max": 0.485231,
"recall-0.025": 0.448333,
"recall-0.05": 0.776667,
"mean": 0.044742,
"median": 0.027623,
"std": 0.050977,
"sem": 0.002083,
"min": 0.001885,
"max": 0.455832,
"recall-0.025": 0.455,
"recall-0.05": 0.753333,
"recall-0.1": 0.893333,
"recall-0.15": 0.911667,
"recall-0.25": 0.995,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"hip_left": {
"count": 600,
"mean": 0.089001,
"median": 0.085342,
"std": 0.032716,
"sem": 0.001337,
"min": 0.007027,
"max": 0.235465,
"recall-0.025": 0.008333,
"recall-0.05": 0.031667,
"recall-0.1": 0.815,
"recall-0.15": 0.948333,
"mean": 0.084994,
"median": 0.079686,
"std": 0.033001,
"sem": 0.001348,
"min": 0.010753,
"max": 0.232419,
"recall-0.025": 0.005,
"recall-0.05": 0.041667,
"recall-0.1": 0.855,
"recall-0.15": 0.95,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"hip_right": {
"count": 600,
"mean": 0.113299,
"median": 0.113584,
"std": 0.026162,
"sem": 0.001069,
"min": 0.04703,
"max": 0.230987,
"mean": 0.108772,
"median": 0.107197,
"std": 0.025158,
"sem": 0.001028,
"min": 0.051859,
"max": 0.227885,
"recall-0.025": 0.0,
"recall-0.05": 0.001667,
"recall-0.1": 0.261667,
"recall-0.05": 0.0,
"recall-0.1": 0.351667,
"recall-0.15": 0.946667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"knee_left": {
"count": 600,
"mean": 0.062069,
"median": 0.044729,
"std": 0.06187,
"sem": 0.002528,
"min": 0.017903,
"max": 0.431859,
"recall-0.025": 0.06,
"recall-0.05": 0.591667,
"recall-0.1": 0.913333,
"recall-0.15": 0.92,
"recall-0.25": 0.978333,
"recall-0.5": 1.0,
"count": 599,
"mean": 0.060126,
"median": 0.044568,
"std": 0.057251,
"sem": 0.002341,
"min": 0.015543,
"max": 0.407951,
"recall-0.025": 0.05,
"recall-0.05": 0.586667,
"recall-0.1": 0.918333,
"recall-0.15": 0.923333,
"recall-0.25": 0.98,
"recall-0.5": 0.998333,
"num_labels": 600
},
"knee_right": {
"count": 600,
"mean": 0.050915,
"median": 0.04249,
"std": 0.036278,
"sem": 0.001482,
"min": 0.015193,
"max": 0.263834,
"recall-0.025": 0.033333,
"recall-0.05": 0.766667,
"recall-0.1": 0.941667,
"recall-0.15": 0.945,
"mean": 0.050346,
"median": 0.041731,
"std": 0.03615,
"sem": 0.001477,
"min": 0.01555,
"max": 0.278599,
"recall-0.025": 0.035,
"recall-0.05": 0.756667,
"recall-0.1": 0.946667,
"recall-0.15": 0.946667,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"ankle_left": {
"count": 598,
"mean": 0.098393,
"median": 0.086077,
"std": 0.050788,
"sem": 0.002079,
"min": 0.036989,
"max": 0.49288,
"count": 599,
"mean": 0.097233,
"median": 0.085626,
"std": 0.047643,
"sem": 0.001948,
"min": 0.050047,
"max": 0.497687,
"recall-0.025": 0.0,
"recall-0.05": 0.005,
"recall-0.1": 0.83,
"recall-0.15": 0.936667,
"recall-0.25": 0.978333,
"recall-0.5": 0.996667,
"recall-0.05": 0.0,
"recall-0.1": 0.84,
"recall-0.15": 0.935,
"recall-0.25": 0.985,
"recall-0.5": 0.998333,
"num_labels": 600
},
"ankle_right": {
"count": 597,
"mean": 0.085279,
"median": 0.069562,
"std": 0.05552,
"sem": 0.002274,
"min": 0.031135,
"max": 0.445133,
"count": 599,
"mean": 0.082942,
"median": 0.068818,
"std": 0.053498,
"sem": 0.002188,
"min": 0.02884,
"max": 0.443019,
"recall-0.025": 0.0,
"recall-0.05": 0.015,
"recall-0.1": 0.878333,
"recall-0.15": 0.901667,
"recall-0.25": 0.973333,
"recall-0.5": 0.995,
"recall-0.05": 0.026667,
"recall-0.1": 0.896667,
"recall-0.15": 0.911667,
"recall-0.25": 0.978333,
"recall-0.5": 0.998333,
"num_labels": 600
},
"joint_recalls": {
"num_labels": 7800,
"recall-0.025": 0.17013,
"recall-0.05": 0.47244,
"recall-0.1": 0.81949,
"recall-0.15": 0.92897,
"recall-0.25": 0.99244,
"recall-0.5": 0.99936
"recall-0.025": 0.16859,
"recall-0.05": 0.4709,
"recall-0.1": 0.83218,
"recall-0.15": 0.92923,
"recall-0.25": 0.99385,
"recall-0.5": 0.99923
}
}
{
"total_parts": 8400,
"correct_parts": 8089,
"pcp": 0.962976
"correct_parts": 8091,
"pcp": 0.963214
}
```

View File

@ -158,7 +158,7 @@ class RTMDet(BaseModel):
image, self.dx, self.dy, self.scale = self.letterbox(
image, (tw, th), fill_value=114
)
tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1]
tensor = np.asarray(image).astype(self.input_type, copy=False)
tensor = np.expand_dims(tensor, axis=0)
return tensor
@ -363,7 +363,6 @@ def get_2d_pose(model, imgs, num_joints=17):
new_poses = []
for i in range(len(imgs)):
img = imgs[i]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
poses = []
dets = model.predict(img)