From dc44a71b2c9c4fd7fc4516c4143b278eecfe3aaf Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Dec 2024 17:00:04 +0100 Subject: [PATCH] Some further speedups. --- extras/mmdeploy/add_extra_steps.py | 39 +++- media/RESULTS.md | 320 ++++++++++++++--------------- scripts/test_triangulate.py | 2 +- scripts/utils_2d_pose_ort.py | 20 +- 4 files changed, 202 insertions(+), 179 deletions(-) diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py index cc44fdb..8f2af09 100644 --- a/extras/mmdeploy/add_extra_steps.py +++ b/extras/mmdeploy/add_extra_steps.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import helper, numpy_helper, TensorProto +from onnx import TensorProto, helper, numpy_helper # ================================================================================================== @@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path): # Define layer names, assuming the first input is the image tensor input_name = graph.input[0].name - # Set input type to always be float32 - graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT - - # Create to cast the float32 if needed + # Cast to internal type + # This has to be the first node, because tensorrt does not support uint8 layers cast_type = 10 if use_fp16 else 1 casted_output = "casted_output" cast_node = helper.make_node( @@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path): to=cast_type, ) + # Node to transpose + transpose_output = "transpose_output" + transpose_node = helper.make_node( + "Transpose", + inputs=[casted_output], + outputs=[transpose_output], + perm=[0, 3, 1, 2], + name="Transpose", + ) + # Node to add mean mean_added_output = "mean_added_output" mean_add_node = helper.make_node( "Add", - inputs=[casted_output, "norm_mean"], + inputs=[transpose_output, "norm_mean"], outputs=[mean_added_output], name="Mean_Addition", ) @@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path): # Add the new nodes to the graph graph.node.insert(0, cast_node) - graph.node.insert(1, mean_add_node) - graph.node.insert(2, std_mul_node) + graph.node.insert(1, transpose_node) + graph.node.insert(2, mean_add_node) + graph.node.insert(3, std_mul_node) - path = model_path.replace(".onnx", "_with-norm.onnx") + # Transpose the input shape + input_shape = graph.input[0].type.tensor_type.shape.dim + dims = [dim.dim_value for dim in input_shape] + for i, j in enumerate([0, 3, 1, 2]): + input_shape[j].dim_value = dims[i] + + # Set input type to int8 + model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 + + path = model_path.replace(".onnx", "_extra-steps.onnx") onnx.save(model, path) +# ================================================================================================== + + def main(): add_steps_to_onnx(pose_model_path) add_steps_to_onnx(det_model_path) diff --git a/media/RESULTS.md b/media/RESULTS.md index 0e7e5ae..2709cc3 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.019079747846571064, - "avg_time_3d": 0.0003823995590209961, - "avg_fps": 51.381791492991674 + "avg_time_2d": 0.016608773651769607, + "avg_time_3d": 0.00034795211533368645, + "avg_fps": 58.97364937870487 } { "person_nums": { @@ -28,247 +28,247 @@ Results of the model in various experiments on different datasets. "mpjpe": { "count": 600, "mean": 0.067471, - "median": 0.059656, - "std": 0.027837, - "sem": 0.001137, - "min": 0.04161, - "max": 0.191019, + "median": 0.0592, + "std": 0.02795, + "sem": 0.001142, + "min": 0.042592, + "max": 0.189987, "recall-0.025": 0.0, - "recall-0.05": 0.045, - "recall-0.1": 0.93, + "recall-0.05": 0.048333, + "recall-0.1": 0.925, "recall-0.15": 0.95, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600, "ap-0.025": 0.0, - "ap-0.05": 0.005008, - "ap-0.1": 0.890421, - "ap-0.15": 0.915856, + "ap-0.05": 0.004097, + "ap-0.1": 0.885305, + "ap-0.15": 0.915769, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "nose": { "count": 600, - "mean": 0.116291, - "median": 0.101326, - "std": 0.04236, - "sem": 0.001731, - "min": 0.021073, - "max": 0.288626, - "recall-0.025": 0.001667, - "recall-0.05": 0.006667, - "recall-0.1": 0.491667, - "recall-0.15": 0.821667, - "recall-0.25": 0.99, + "mean": 0.115621, + "median": 0.100161, + "std": 0.041657, + "sem": 0.001702, + "min": 0.031411, + "max": 0.276464, + "recall-0.025": 0.0, + "recall-0.05": 0.01, + "recall-0.1": 0.498333, + "recall-0.15": 0.826667, + "recall-0.25": 0.993333, "recall-0.5": 1.0, "num_labels": 600 }, "shoulder_left": { "count": 600, - "mean": 0.033847, - "median": 0.025987, - "std": 0.031824, - "sem": 0.0013, - "min": 0.002728, - "max": 0.181894, - "recall-0.025": 0.466667, - "recall-0.05": 0.866667, - "recall-0.1": 0.948333, - "recall-0.15": 0.968333, + "mean": 0.033598, + "median": 0.025444, + "std": 0.032078, + "sem": 0.001311, + "min": 0.001187, + "max": 0.181528, + "recall-0.025": 0.486667, + "recall-0.05": 0.865, + "recall-0.1": 0.946667, + "recall-0.15": 0.965, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "shoulder_right": { "count": 600, - "mean": 0.04973, - "median": 0.034604, - "std": 0.044052, - "sem": 0.0018, - "min": 0.00395, - "max": 0.256048, - "recall-0.025": 0.211667, - "recall-0.05": 0.753333, - "recall-0.1": 0.906667, - "recall-0.15": 0.936667, + "mean": 0.049243, + "median": 0.033956, + "std": 0.042808, + "sem": 0.001749, + "min": 0.004642, + "max": 0.255344, + "recall-0.025": 0.218333, + "recall-0.05": 0.748333, + "recall-0.1": 0.901667, + "recall-0.15": 0.941667, "recall-0.25": 0.998333, "recall-0.5": 1.0, "num_labels": 600 }, "elbow_left": { "count": 600, - "mean": 0.042919, - "median": 0.035054, - "std": 0.034438, - "sem": 0.001407, - "min": 0.00136, - "max": 0.198368, - "recall-0.025": 0.246667, - "recall-0.05": 0.825, - "recall-0.1": 0.948333, - "recall-0.15": 0.955, + "mean": 0.043499, + "median": 0.035409, + "std": 0.034789, + "sem": 0.001421, + "min": 0.002463, + "max": 0.200682, + "recall-0.025": 0.243333, + "recall-0.05": 0.8, + "recall-0.1": 0.945, + "recall-0.15": 0.953333, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "elbow_right": { "count": 600, - "mean": 0.043901, - "median": 0.033077, - "std": 0.036324, - "sem": 0.001484, - "min": 0.006465, - "max": 0.274304, - "recall-0.025": 0.241667, - "recall-0.05": 0.81, - "recall-0.1": 0.925, - "recall-0.15": 0.943333, - "recall-0.25": 0.998333, - "recall-0.5": 1.0, - "num_labels": 600 - }, - "wrist_left": { - "count": 600, - "mean": 0.043044, - "median": 0.026871, - "std": 0.04257, - "sem": 0.001739, - "min": 0.000214, - "max": 0.202968, - "recall-0.025": 0.466667, - "recall-0.05": 0.731667, - "recall-0.1": 0.903333, + "mean": 0.043289, + "median": 0.032684, + "std": 0.035003, + "sem": 0.00143, + "min": 0.007037, + "max": 0.202309, + "recall-0.025": 0.255, + "recall-0.05": 0.805, + "recall-0.1": 0.931667, "recall-0.15": 0.941667, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, + "wrist_left": { + "count": 600, + "mean": 0.043376, + "median": 0.027016, + "std": 0.044176, + "sem": 0.001805, + "min": 0.000972, + "max": 0.340542, + "recall-0.025": 0.466667, + "recall-0.05": 0.728333, + "recall-0.1": 0.905, + "recall-0.15": 0.941667, + "recall-0.25": 0.998333, + "recall-0.5": 1.0, + "num_labels": 600 + }, "wrist_right": { "count": 600, - "mean": 0.044102, - "median": 0.026291, - "std": 0.050657, - "sem": 0.00207, - "min": 0.003409, - "max": 0.460683, - "recall-0.025": 0.466667, + "mean": 0.044908, + "median": 0.027102, + "std": 0.052541, + "sem": 0.002147, + "min": 0.001728, + "max": 0.485231, + "recall-0.025": 0.448333, "recall-0.05": 0.776667, - "recall-0.1": 0.895, - "recall-0.15": 0.915, - "recall-0.25": 0.996667, + "recall-0.1": 0.893333, + "recall-0.15": 0.911667, + "recall-0.25": 0.995, "recall-0.5": 1.0, "num_labels": 600 }, "hip_left": { "count": 600, - "mean": 0.090158, - "median": 0.08564, - "std": 0.032791, - "sem": 0.00134, - "min": 0.014562, - "max": 0.236403, - "recall-0.025": 0.001667, - "recall-0.05": 0.036667, - "recall-0.1": 0.82, - "recall-0.15": 0.945, + "mean": 0.089001, + "median": 0.085342, + "std": 0.032716, + "sem": 0.001337, + "min": 0.007027, + "max": 0.235465, + "recall-0.025": 0.008333, + "recall-0.05": 0.031667, + "recall-0.1": 0.815, + "recall-0.15": 0.948333, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "hip_right": { "count": 600, - "mean": 0.11407, - "median": 0.114481, - "std": 0.02606, - "sem": 0.001065, - "min": 0.045083, - "max": 0.233275, + "mean": 0.113299, + "median": 0.113584, + "std": 0.026162, + "sem": 0.001069, + "min": 0.04703, + "max": 0.230987, "recall-0.025": 0.0, "recall-0.05": 0.001667, - "recall-0.1": 0.251667, - "recall-0.15": 0.948333, + "recall-0.1": 0.261667, + "recall-0.15": 0.946667, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "knee_left": { "count": 600, - "mean": 0.061433, - "median": 0.045616, - "std": 0.060807, - "sem": 0.002484, - "min": 0.018825, - "max": 0.425003, - "recall-0.025": 0.058333, - "recall-0.05": 0.593333, - "recall-0.1": 0.915, - "recall-0.15": 0.923333, - "recall-0.25": 0.98, + "mean": 0.062069, + "median": 0.044729, + "std": 0.06187, + "sem": 0.002528, + "min": 0.017903, + "max": 0.431859, + "recall-0.025": 0.06, + "recall-0.05": 0.591667, + "recall-0.1": 0.913333, + "recall-0.15": 0.92, + "recall-0.25": 0.978333, "recall-0.5": 1.0, "num_labels": 600 }, "knee_right": { "count": 600, - "mean": 0.050724, - "median": 0.042002, - "std": 0.036168, - "sem": 0.001478, - "min": 0.016654, - "max": 0.257622, - "recall-0.025": 0.038333, - "recall-0.05": 0.76, + "mean": 0.050915, + "median": 0.04249, + "std": 0.036278, + "sem": 0.001482, + "min": 0.015193, + "max": 0.263834, + "recall-0.025": 0.033333, + "recall-0.05": 0.766667, "recall-0.1": 0.941667, "recall-0.15": 0.945, - "recall-0.25": 0.998333, + "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "ankle_left": { - "count": 599, - "mean": 0.09891, - "median": 0.086872, - "std": 0.051463, - "sem": 0.002104, - "min": 0.035674, - "max": 0.490677, - "recall-0.025": 0.0, - "recall-0.05": 0.003333, - "recall-0.1": 0.831667, - "recall-0.15": 0.936667, - "recall-0.25": 0.98, - "recall-0.5": 0.998333, - "num_labels": 600 - }, - "ankle_right": { "count": 598, - "mean": 0.085207, - "median": 0.069747, - "std": 0.054773, - "sem": 0.002242, - "min": 0.030417, - "max": 0.440549, + "mean": 0.098393, + "median": 0.086077, + "std": 0.050788, + "sem": 0.002079, + "min": 0.036989, + "max": 0.49288, "recall-0.025": 0.0, - "recall-0.05": 0.021667, - "recall-0.1": 0.88, - "recall-0.15": 0.906667, + "recall-0.05": 0.005, + "recall-0.1": 0.83, + "recall-0.15": 0.936667, "recall-0.25": 0.978333, "recall-0.5": 0.996667, "num_labels": 600 }, + "ankle_right": { + "count": 597, + "mean": 0.085279, + "median": 0.069562, + "std": 0.05552, + "sem": 0.002274, + "min": 0.031135, + "max": 0.445133, + "recall-0.025": 0.0, + "recall-0.05": 0.015, + "recall-0.1": 0.878333, + "recall-0.15": 0.901667, + "recall-0.25": 0.973333, + "recall-0.5": 0.995, + "num_labels": 600 + }, "joint_recalls": { "num_labels": 7800, - "recall-0.025": 0.16897, - "recall-0.05": 0.47551, - "recall-0.1": 0.81936, - "recall-0.15": 0.92923, - "recall-0.25": 0.99333, - "recall-0.5": 0.99949 + "recall-0.025": 0.17013, + "recall-0.05": 0.47244, + "recall-0.1": 0.81949, + "recall-0.15": 0.92897, + "recall-0.25": 0.99244, + "recall-0.5": 0.99936 } } { "total_parts": 8400, - "correct_parts": 8091, - "pcp": 0.963214 + "correct_parts": 8089, + "pcp": 0.962976 } ``` diff --git a/scripts/test_triangulate.py b/scripts/test_triangulate.py index 0844130..5694d42 100644 --- a/scripts/test_triangulate.py +++ b/scripts/test_triangulate.py @@ -220,7 +220,7 @@ def update_sample(sample, new_dir=""): def load_image(path: str): image = cv2.imread(path, 3) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = np.array(image, dtype=np.float32) + image = np.asarray(image, dtype=np.uint8) return image diff --git a/scripts/utils_2d_pose_ort.py b/scripts/utils_2d_pose_ort.py index d48bed4..a61cf67 100644 --- a/scripts/utils_2d_pose_ort.py +++ b/scripts/utils_2d_pose_ort.py @@ -35,6 +35,8 @@ class BaseModel(ABC): input_type = self.session.get_inputs()[0].type if input_type == "tensor(float16)": self.input_type = np.float16 + elif input_type == "tensor(uint8)": + self.input_type = np.uint8 else: self.input_type = np.float32 @@ -152,12 +154,12 @@ class RTMDet(BaseModel): return boxes[keep_indices] def preprocess(self, image: np.ndarray): - th, tw = self.input_shape[2:] + th, tw = self.input_shape[1:3] image, self.dx, self.dy, self.scale = self.letterbox( image, (tw, th), fill_value=114 ) tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1] - tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) + tensor = np.expand_dims(tensor, axis=0) return tensor def postprocess(self, tensor: List[np.ndarray]): @@ -274,10 +276,10 @@ class RTMPose(BaseModel): return extracted_region, new_box, scale def preprocess(self, image: np.ndarray, bbox: np.ndarray): - th, tw = self.input_shape[2:] + th, tw = self.input_shape[1:3] region, self.bbox, _ = self.region_of_interest_warped(image, bbox, (tw, th)) tensor = np.asarray(region).astype(self.input_type, copy=False) - tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) + tensor = np.expand_dims(tensor, axis=0) return tensor def postprocess(self, tensor: List[np.ndarray], **kwargs): @@ -285,7 +287,7 @@ class RTMPose(BaseModel): kp = np.concatenate([tensor[0][0], np.expand_dims(scores, axis=-1)], axis=-1) # See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample() - th, tw = self.input_shape[2:] + th, tw = self.input_shape[1:3] bw, bh = [self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]] kp[:, :2] /= np.array([tw, th]) kp[:, :2] *= np.array([bw, bh]) @@ -331,10 +333,10 @@ def load_model(): print("Loading onnx model ...") model = TopDown( - # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx", - "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx", - # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx", - "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx", + # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_extra-steps.onnx", + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx", + # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx", + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx", conf_threshold=0.3, iou_threshold=0.3, warmup=30,