Use rgb input for both models.

This commit is contained in:
Daniel
2024-12-02 17:28:34 +01:00
parent dc44a71b2c
commit 36781e616b
3 changed files with 155 additions and 161 deletions

View File

@ -8,8 +8,8 @@ base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
pose_model_path = base_path + "rtmpose-m_384x288.onnx" pose_model_path = base_path + "rtmpose-m_384x288.onnx"
det_model_path = base_path + "rtmdet-nano_320x320.onnx" det_model_path = base_path + "rtmdet-nano_320x320.onnx"
norm_mean = -1 * np.array([103.53, 116.28, 123.675]) norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
norm_std = 1.0 / np.array([57.375, 57.12, 58.395]) norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
# ================================================================================================== # ==================================================================================================
@ -24,11 +24,6 @@ def add_steps_to_onnx(model_path):
mean = norm_mean.astype(np.float32) mean = norm_mean.astype(np.float32)
std = norm_std.astype(np.float32) std = norm_std.astype(np.float32)
use_bgr = bool("rtmpose" in model_path)
if use_bgr:
mean = mean[::-1]
std = std[::-1]
mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32) mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32) std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)

View File

@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
```json ```json
{ {
"avg_time_2d": 0.016608773651769607, "avg_time_2d": 0.016274028309321,
"avg_time_3d": 0.00034795211533368645, "avg_time_3d": 0.00032552096803309556,
"avg_fps": 58.97364937870487 "avg_fps": 60.24259956047411
} }
{ {
"person_nums": { "person_nums": {
@ -27,52 +27,52 @@ Results of the model in various experiments on different datasets.
}, },
"mpjpe": { "mpjpe": {
"count": 600, "count": 600,
"mean": 0.067471, "mean": 0.066064,
"median": 0.0592, "median": 0.058463,
"std": 0.02795, "std": 0.027791,
"sem": 0.001142, "sem": 0.001136,
"min": 0.042592, "min": 0.040706,
"max": 0.189987, "max": 0.189425,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.048333, "recall-0.05": 0.085,
"recall-0.1": 0.925, "recall-0.1": 0.936667,
"recall-0.15": 0.95, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600, "num_labels": 600,
"ap-0.025": 0.0, "ap-0.025": 0.0,
"ap-0.05": 0.004097, "ap-0.05": 0.012704,
"ap-0.1": 0.885305, "ap-0.1": 0.897461,
"ap-0.15": 0.915769, "ap-0.15": 0.915018,
"ap-0.25": 1.0, "ap-0.25": 1.0,
"ap-0.5": 1.0 "ap-0.5": 1.0
}, },
"nose": { "nose": {
"count": 600, "count": 600,
"mean": 0.115621, "mean": 0.114664,
"median": 0.100161, "median": 0.10192,
"std": 0.041657, "std": 0.040958,
"sem": 0.001702, "sem": 0.001673,
"min": 0.031411, "min": 0.027318,
"max": 0.276464, "max": 0.26417,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.01, "recall-0.05": 0.006667,
"recall-0.1": 0.498333, "recall-0.1": 0.488333,
"recall-0.15": 0.826667, "recall-0.15": 0.82,
"recall-0.25": 0.993333, "recall-0.25": 0.993333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_left": { "shoulder_left": {
"count": 600, "count": 600,
"mean": 0.033598, "mean": 0.034211,
"median": 0.025444, "median": 0.026464,
"std": 0.032078, "std": 0.031942,
"sem": 0.001311, "sem": 0.001305,
"min": 0.001187, "min": 0.001243,
"max": 0.181528, "max": 0.178564,
"recall-0.025": 0.486667, "recall-0.025": 0.47,
"recall-0.05": 0.865, "recall-0.05": 0.863333,
"recall-0.1": 0.946667, "recall-0.1": 0.946667,
"recall-0.15": 0.965, "recall-0.15": 0.965,
"recall-0.25": 1.0, "recall-0.25": 1.0,
@ -81,30 +81,30 @@ Results of the model in various experiments on different datasets.
}, },
"shoulder_right": { "shoulder_right": {
"count": 600, "count": 600,
"mean": 0.049243, "mean": 0.049177,
"median": 0.033956, "median": 0.034548,
"std": 0.042808, "std": 0.042414,
"sem": 0.001749, "sem": 0.001733,
"min": 0.004642, "min": 0.004601,
"max": 0.255344, "max": 0.249061,
"recall-0.025": 0.218333, "recall-0.025": 0.221667,
"recall-0.05": 0.748333, "recall-0.05": 0.73,
"recall-0.1": 0.901667, "recall-0.1": 0.908333,
"recall-0.15": 0.941667, "recall-0.15": 0.941667,
"recall-0.25": 0.998333, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_left": { "elbow_left": {
"count": 600, "count": 600,
"mean": 0.043499, "mean": 0.043333,
"median": 0.035409, "median": 0.034664,
"std": 0.034789, "std": 0.034544,
"sem": 0.001421, "sem": 0.001411,
"min": 0.002463, "min": 0.002445,
"max": 0.200682, "max": 0.200532,
"recall-0.025": 0.243333, "recall-0.025": 0.238333,
"recall-0.05": 0.8, "recall-0.05": 0.796667,
"recall-0.1": 0.945, "recall-0.1": 0.945,
"recall-0.15": 0.953333, "recall-0.15": 0.953333,
"recall-0.25": 1.0, "recall-0.25": 1.0,
@ -113,162 +113,162 @@ Results of the model in various experiments on different datasets.
}, },
"elbow_right": { "elbow_right": {
"count": 600, "count": 600,
"mean": 0.043289, "mean": 0.043379,
"median": 0.032684, "median": 0.033008,
"std": 0.035003, "std": 0.037384,
"sem": 0.00143, "sem": 0.001527,
"min": 0.007037, "min": 0.00441,
"max": 0.202309, "max": 0.300237,
"recall-0.025": 0.255, "recall-0.025": 0.241667,
"recall-0.05": 0.805, "recall-0.05": 0.828333,
"recall-0.1": 0.931667, "recall-0.1": 0.93,
"recall-0.15": 0.941667, "recall-0.15": 0.94,
"recall-0.25": 1.0, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_left": { "wrist_left": {
"count": 600, "count": 600,
"mean": 0.043376, "mean": 0.042137,
"median": 0.027016, "median": 0.026475,
"std": 0.044176, "std": 0.044455,
"sem": 0.001805, "sem": 0.001816,
"min": 0.000972, "min": 0.000734,
"max": 0.340542, "max": 0.289424,
"recall-0.025": 0.466667, "recall-0.025": 0.476667,
"recall-0.05": 0.728333, "recall-0.05": 0.738333,
"recall-0.1": 0.905, "recall-0.1": 0.905,
"recall-0.15": 0.941667, "recall-0.15": 0.94,
"recall-0.25": 0.998333, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_right": { "wrist_right": {
"count": 600, "count": 600,
"mean": 0.044908, "mean": 0.044742,
"median": 0.027102, "median": 0.027623,
"std": 0.052541, "std": 0.050977,
"sem": 0.002147, "sem": 0.002083,
"min": 0.001728, "min": 0.001885,
"max": 0.485231, "max": 0.455832,
"recall-0.025": 0.448333, "recall-0.025": 0.455,
"recall-0.05": 0.776667, "recall-0.05": 0.753333,
"recall-0.1": 0.893333, "recall-0.1": 0.893333,
"recall-0.15": 0.911667, "recall-0.15": 0.911667,
"recall-0.25": 0.995, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"hip_left": { "hip_left": {
"count": 600, "count": 600,
"mean": 0.089001, "mean": 0.084994,
"median": 0.085342, "median": 0.079686,
"std": 0.032716, "std": 0.033001,
"sem": 0.001337, "sem": 0.001348,
"min": 0.007027, "min": 0.010753,
"max": 0.235465, "max": 0.232419,
"recall-0.025": 0.008333, "recall-0.025": 0.005,
"recall-0.05": 0.031667, "recall-0.05": 0.041667,
"recall-0.1": 0.815, "recall-0.1": 0.855,
"recall-0.15": 0.948333, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"hip_right": { "hip_right": {
"count": 600, "count": 600,
"mean": 0.113299, "mean": 0.108772,
"median": 0.113584, "median": 0.107197,
"std": 0.026162, "std": 0.025158,
"sem": 0.001069, "sem": 0.001028,
"min": 0.04703, "min": 0.051859,
"max": 0.230987, "max": 0.227885,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.001667, "recall-0.05": 0.0,
"recall-0.1": 0.261667, "recall-0.1": 0.351667,
"recall-0.15": 0.946667, "recall-0.15": 0.946667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_left": { "knee_left": {
"count": 600, "count": 599,
"mean": 0.062069, "mean": 0.060126,
"median": 0.044729, "median": 0.044568,
"std": 0.06187, "std": 0.057251,
"sem": 0.002528, "sem": 0.002341,
"min": 0.017903, "min": 0.015543,
"max": 0.431859, "max": 0.407951,
"recall-0.025": 0.06, "recall-0.025": 0.05,
"recall-0.05": 0.591667, "recall-0.05": 0.586667,
"recall-0.1": 0.913333, "recall-0.1": 0.918333,
"recall-0.15": 0.92, "recall-0.15": 0.923333,
"recall-0.25": 0.978333, "recall-0.25": 0.98,
"recall-0.5": 1.0, "recall-0.5": 0.998333,
"num_labels": 600 "num_labels": 600
}, },
"knee_right": { "knee_right": {
"count": 600, "count": 600,
"mean": 0.050915, "mean": 0.050346,
"median": 0.04249, "median": 0.041731,
"std": 0.036278, "std": 0.03615,
"sem": 0.001482, "sem": 0.001477,
"min": 0.015193, "min": 0.01555,
"max": 0.263834, "max": 0.278599,
"recall-0.025": 0.033333, "recall-0.025": 0.035,
"recall-0.05": 0.766667, "recall-0.05": 0.756667,
"recall-0.1": 0.941667, "recall-0.1": 0.946667,
"recall-0.15": 0.945, "recall-0.15": 0.946667,
"recall-0.25": 0.996667, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"ankle_left": { "ankle_left": {
"count": 598, "count": 599,
"mean": 0.098393, "mean": 0.097233,
"median": 0.086077, "median": 0.085626,
"std": 0.050788, "std": 0.047643,
"sem": 0.002079, "sem": 0.001948,
"min": 0.036989, "min": 0.050047,
"max": 0.49288, "max": 0.497687,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.005, "recall-0.05": 0.0,
"recall-0.1": 0.83, "recall-0.1": 0.84,
"recall-0.15": 0.936667, "recall-0.15": 0.935,
"recall-0.25": 0.978333, "recall-0.25": 0.985,
"recall-0.5": 0.996667, "recall-0.5": 0.998333,
"num_labels": 600 "num_labels": 600
}, },
"ankle_right": { "ankle_right": {
"count": 597, "count": 599,
"mean": 0.085279, "mean": 0.082942,
"median": 0.069562, "median": 0.068818,
"std": 0.05552, "std": 0.053498,
"sem": 0.002274, "sem": 0.002188,
"min": 0.031135, "min": 0.02884,
"max": 0.445133, "max": 0.443019,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.015, "recall-0.05": 0.026667,
"recall-0.1": 0.878333, "recall-0.1": 0.896667,
"recall-0.15": 0.901667, "recall-0.15": 0.911667,
"recall-0.25": 0.973333, "recall-0.25": 0.978333,
"recall-0.5": 0.995, "recall-0.5": 0.998333,
"num_labels": 600 "num_labels": 600
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 7800, "num_labels": 7800,
"recall-0.025": 0.17013, "recall-0.025": 0.16859,
"recall-0.05": 0.47244, "recall-0.05": 0.4709,
"recall-0.1": 0.81949, "recall-0.1": 0.83218,
"recall-0.15": 0.92897, "recall-0.15": 0.92923,
"recall-0.25": 0.99244, "recall-0.25": 0.99385,
"recall-0.5": 0.99936 "recall-0.5": 0.99923
} }
} }
{ {
"total_parts": 8400, "total_parts": 8400,
"correct_parts": 8089, "correct_parts": 8091,
"pcp": 0.962976 "pcp": 0.963214
} }
``` ```

View File

@ -158,7 +158,7 @@ class RTMDet(BaseModel):
image, self.dx, self.dy, self.scale = self.letterbox( image, self.dx, self.dy, self.scale = self.letterbox(
image, (tw, th), fill_value=114 image, (tw, th), fill_value=114
) )
tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1] tensor = np.asarray(image).astype(self.input_type, copy=False)
tensor = np.expand_dims(tensor, axis=0) tensor = np.expand_dims(tensor, axis=0)
return tensor return tensor
@ -363,7 +363,6 @@ def get_2d_pose(model, imgs, num_joints=17):
new_poses = [] new_poses = []
for i in range(len(imgs)): for i in range(len(imgs)):
img = imgs[i] img = imgs[i]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
poses = [] poses = []
dets = model.predict(img) dets = model.predict(img)