From 36781e616bdb2b54ab4f0a5da2977de3b753af15 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Dec 2024 17:28:34 +0100 Subject: [PATCH] Use rgb input for both models. --- extras/mmdeploy/add_extra_steps.py | 9 +- media/RESULTS.md | 304 ++++++++++++++--------------- scripts/utils_2d_pose_ort.py | 3 +- 3 files changed, 155 insertions(+), 161 deletions(-) diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py index 8f2af09..82b0dce 100644 --- a/extras/mmdeploy/add_extra_steps.py +++ b/extras/mmdeploy/add_extra_steps.py @@ -8,8 +8,8 @@ base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" pose_model_path = base_path + "rtmpose-m_384x288.onnx" det_model_path = base_path + "rtmdet-nano_320x320.onnx" -norm_mean = -1 * np.array([103.53, 116.28, 123.675]) -norm_std = 1.0 / np.array([57.375, 57.12, 58.395]) +norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255) +norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255) # ================================================================================================== @@ -24,11 +24,6 @@ def add_steps_to_onnx(model_path): mean = norm_mean.astype(np.float32) std = norm_std.astype(np.float32) - use_bgr = bool("rtmpose" in model_path) - if use_bgr: - mean = mean[::-1] - std = std[::-1] - mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32) std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32) diff --git a/media/RESULTS.md b/media/RESULTS.md index 2709cc3..fbe9ca9 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.016608773651769607, - "avg_time_3d": 0.00034795211533368645, - "avg_fps": 58.97364937870487 + "avg_time_2d": 0.016274028309321, + "avg_time_3d": 0.00032552096803309556, + "avg_fps": 60.24259956047411 } { "person_nums": { @@ -27,52 +27,52 @@ Results of the model in various experiments on different datasets. }, "mpjpe": { "count": 600, - "mean": 0.067471, - "median": 0.0592, - "std": 0.02795, - "sem": 0.001142, - "min": 0.042592, - "max": 0.189987, + "mean": 0.066064, + "median": 0.058463, + "std": 0.027791, + "sem": 0.001136, + "min": 0.040706, + "max": 0.189425, "recall-0.025": 0.0, - "recall-0.05": 0.048333, - "recall-0.1": 0.925, + "recall-0.05": 0.085, + "recall-0.1": 0.936667, "recall-0.15": 0.95, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600, "ap-0.025": 0.0, - "ap-0.05": 0.004097, - "ap-0.1": 0.885305, - "ap-0.15": 0.915769, + "ap-0.05": 0.012704, + "ap-0.1": 0.897461, + "ap-0.15": 0.915018, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "nose": { "count": 600, - "mean": 0.115621, - "median": 0.100161, - "std": 0.041657, - "sem": 0.001702, - "min": 0.031411, - "max": 0.276464, + "mean": 0.114664, + "median": 0.10192, + "std": 0.040958, + "sem": 0.001673, + "min": 0.027318, + "max": 0.26417, "recall-0.025": 0.0, - "recall-0.05": 0.01, - "recall-0.1": 0.498333, - "recall-0.15": 0.826667, + "recall-0.05": 0.006667, + "recall-0.1": 0.488333, + "recall-0.15": 0.82, "recall-0.25": 0.993333, "recall-0.5": 1.0, "num_labels": 600 }, "shoulder_left": { "count": 600, - "mean": 0.033598, - "median": 0.025444, - "std": 0.032078, - "sem": 0.001311, - "min": 0.001187, - "max": 0.181528, - "recall-0.025": 0.486667, - "recall-0.05": 0.865, + "mean": 0.034211, + "median": 0.026464, + "std": 0.031942, + "sem": 0.001305, + "min": 0.001243, + "max": 0.178564, + "recall-0.025": 0.47, + "recall-0.05": 0.863333, "recall-0.1": 0.946667, "recall-0.15": 0.965, "recall-0.25": 1.0, @@ -81,30 +81,30 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 600, - "mean": 0.049243, - "median": 0.033956, - "std": 0.042808, - "sem": 0.001749, - "min": 0.004642, - "max": 0.255344, - "recall-0.025": 0.218333, - "recall-0.05": 0.748333, - "recall-0.1": 0.901667, + "mean": 0.049177, + "median": 0.034548, + "std": 0.042414, + "sem": 0.001733, + "min": 0.004601, + "max": 0.249061, + "recall-0.025": 0.221667, + "recall-0.05": 0.73, + "recall-0.1": 0.908333, "recall-0.15": 0.941667, - "recall-0.25": 0.998333, + "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "elbow_left": { "count": 600, - "mean": 0.043499, - "median": 0.035409, - "std": 0.034789, - "sem": 0.001421, - "min": 0.002463, - "max": 0.200682, - "recall-0.025": 0.243333, - "recall-0.05": 0.8, + "mean": 0.043333, + "median": 0.034664, + "std": 0.034544, + "sem": 0.001411, + "min": 0.002445, + "max": 0.200532, + "recall-0.025": 0.238333, + "recall-0.05": 0.796667, "recall-0.1": 0.945, "recall-0.15": 0.953333, "recall-0.25": 1.0, @@ -113,162 +113,162 @@ Results of the model in various experiments on different datasets. }, "elbow_right": { "count": 600, - "mean": 0.043289, - "median": 0.032684, - "std": 0.035003, - "sem": 0.00143, - "min": 0.007037, - "max": 0.202309, - "recall-0.025": 0.255, - "recall-0.05": 0.805, - "recall-0.1": 0.931667, - "recall-0.15": 0.941667, - "recall-0.25": 1.0, + "mean": 0.043379, + "median": 0.033008, + "std": 0.037384, + "sem": 0.001527, + "min": 0.00441, + "max": 0.300237, + "recall-0.025": 0.241667, + "recall-0.05": 0.828333, + "recall-0.1": 0.93, + "recall-0.15": 0.94, + "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "wrist_left": { "count": 600, - "mean": 0.043376, - "median": 0.027016, - "std": 0.044176, - "sem": 0.001805, - "min": 0.000972, - "max": 0.340542, - "recall-0.025": 0.466667, - "recall-0.05": 0.728333, + "mean": 0.042137, + "median": 0.026475, + "std": 0.044455, + "sem": 0.001816, + "min": 0.000734, + "max": 0.289424, + "recall-0.025": 0.476667, + "recall-0.05": 0.738333, "recall-0.1": 0.905, - "recall-0.15": 0.941667, - "recall-0.25": 0.998333, + "recall-0.15": 0.94, + "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "wrist_right": { "count": 600, - "mean": 0.044908, - "median": 0.027102, - "std": 0.052541, - "sem": 0.002147, - "min": 0.001728, - "max": 0.485231, - "recall-0.025": 0.448333, - "recall-0.05": 0.776667, + "mean": 0.044742, + "median": 0.027623, + "std": 0.050977, + "sem": 0.002083, + "min": 0.001885, + "max": 0.455832, + "recall-0.025": 0.455, + "recall-0.05": 0.753333, "recall-0.1": 0.893333, "recall-0.15": 0.911667, - "recall-0.25": 0.995, + "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "hip_left": { "count": 600, - "mean": 0.089001, - "median": 0.085342, - "std": 0.032716, - "sem": 0.001337, - "min": 0.007027, - "max": 0.235465, - "recall-0.025": 0.008333, - "recall-0.05": 0.031667, - "recall-0.1": 0.815, - "recall-0.15": 0.948333, + "mean": 0.084994, + "median": 0.079686, + "std": 0.033001, + "sem": 0.001348, + "min": 0.010753, + "max": 0.232419, + "recall-0.025": 0.005, + "recall-0.05": 0.041667, + "recall-0.1": 0.855, + "recall-0.15": 0.95, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "hip_right": { "count": 600, - "mean": 0.113299, - "median": 0.113584, - "std": 0.026162, - "sem": 0.001069, - "min": 0.04703, - "max": 0.230987, + "mean": 0.108772, + "median": 0.107197, + "std": 0.025158, + "sem": 0.001028, + "min": 0.051859, + "max": 0.227885, "recall-0.025": 0.0, - "recall-0.05": 0.001667, - "recall-0.1": 0.261667, + "recall-0.05": 0.0, + "recall-0.1": 0.351667, "recall-0.15": 0.946667, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "knee_left": { - "count": 600, - "mean": 0.062069, - "median": 0.044729, - "std": 0.06187, - "sem": 0.002528, - "min": 0.017903, - "max": 0.431859, - "recall-0.025": 0.06, - "recall-0.05": 0.591667, - "recall-0.1": 0.913333, - "recall-0.15": 0.92, - "recall-0.25": 0.978333, - "recall-0.5": 1.0, + "count": 599, + "mean": 0.060126, + "median": 0.044568, + "std": 0.057251, + "sem": 0.002341, + "min": 0.015543, + "max": 0.407951, + "recall-0.025": 0.05, + "recall-0.05": 0.586667, + "recall-0.1": 0.918333, + "recall-0.15": 0.923333, + "recall-0.25": 0.98, + "recall-0.5": 0.998333, "num_labels": 600 }, "knee_right": { "count": 600, - "mean": 0.050915, - "median": 0.04249, - "std": 0.036278, - "sem": 0.001482, - "min": 0.015193, - "max": 0.263834, - "recall-0.025": 0.033333, - "recall-0.05": 0.766667, - "recall-0.1": 0.941667, - "recall-0.15": 0.945, + "mean": 0.050346, + "median": 0.041731, + "std": 0.03615, + "sem": 0.001477, + "min": 0.01555, + "max": 0.278599, + "recall-0.025": 0.035, + "recall-0.05": 0.756667, + "recall-0.1": 0.946667, + "recall-0.15": 0.946667, "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "ankle_left": { - "count": 598, - "mean": 0.098393, - "median": 0.086077, - "std": 0.050788, - "sem": 0.002079, - "min": 0.036989, - "max": 0.49288, + "count": 599, + "mean": 0.097233, + "median": 0.085626, + "std": 0.047643, + "sem": 0.001948, + "min": 0.050047, + "max": 0.497687, "recall-0.025": 0.0, - "recall-0.05": 0.005, - "recall-0.1": 0.83, - "recall-0.15": 0.936667, - "recall-0.25": 0.978333, - "recall-0.5": 0.996667, + "recall-0.05": 0.0, + "recall-0.1": 0.84, + "recall-0.15": 0.935, + "recall-0.25": 0.985, + "recall-0.5": 0.998333, "num_labels": 600 }, "ankle_right": { - "count": 597, - "mean": 0.085279, - "median": 0.069562, - "std": 0.05552, - "sem": 0.002274, - "min": 0.031135, - "max": 0.445133, + "count": 599, + "mean": 0.082942, + "median": 0.068818, + "std": 0.053498, + "sem": 0.002188, + "min": 0.02884, + "max": 0.443019, "recall-0.025": 0.0, - "recall-0.05": 0.015, - "recall-0.1": 0.878333, - "recall-0.15": 0.901667, - "recall-0.25": 0.973333, - "recall-0.5": 0.995, + "recall-0.05": 0.026667, + "recall-0.1": 0.896667, + "recall-0.15": 0.911667, + "recall-0.25": 0.978333, + "recall-0.5": 0.998333, "num_labels": 600 }, "joint_recalls": { "num_labels": 7800, - "recall-0.025": 0.17013, - "recall-0.05": 0.47244, - "recall-0.1": 0.81949, - "recall-0.15": 0.92897, - "recall-0.25": 0.99244, - "recall-0.5": 0.99936 + "recall-0.025": 0.16859, + "recall-0.05": 0.4709, + "recall-0.1": 0.83218, + "recall-0.15": 0.92923, + "recall-0.25": 0.99385, + "recall-0.5": 0.99923 } } { "total_parts": 8400, - "correct_parts": 8089, - "pcp": 0.962976 + "correct_parts": 8091, + "pcp": 0.963214 } ``` diff --git a/scripts/utils_2d_pose_ort.py b/scripts/utils_2d_pose_ort.py index a61cf67..8b925d8 100644 --- a/scripts/utils_2d_pose_ort.py +++ b/scripts/utils_2d_pose_ort.py @@ -158,7 +158,7 @@ class RTMDet(BaseModel): image, self.dx, self.dy, self.scale = self.letterbox( image, (tw, th), fill_value=114 ) - tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1] + tensor = np.asarray(image).astype(self.input_type, copy=False) tensor = np.expand_dims(tensor, axis=0) return tensor @@ -363,7 +363,6 @@ def get_2d_pose(model, imgs, num_joints=17): new_poses = [] for i in range(len(imgs)): img = imgs[i] - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) poses = [] dets = model.predict(img)