Some further speedups.

This commit is contained in:
Daniel
2024-12-02 17:00:04 +01:00
parent c8c48e4bf2
commit dc44a71b2c
4 changed files with 202 additions and 179 deletions

View File

@ -1,6 +1,6 @@
import numpy as np import numpy as np
import onnx import onnx
from onnx import helper, numpy_helper, TensorProto from onnx import TensorProto, helper, numpy_helper
# ================================================================================================== # ==================================================================================================
@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path):
# Define layer names, assuming the first input is the image tensor # Define layer names, assuming the first input is the image tensor
input_name = graph.input[0].name input_name = graph.input[0].name
# Set input type to always be float32 # Cast to internal type
graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT # This has to be the first node, because tensorrt does not support uint8 layers
# Create to cast the float32 if needed
cast_type = 10 if use_fp16 else 1 cast_type = 10 if use_fp16 else 1
casted_output = "casted_output" casted_output = "casted_output"
cast_node = helper.make_node( cast_node = helper.make_node(
@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path):
to=cast_type, to=cast_type,
) )
# Node to transpose
transpose_output = "transpose_output"
transpose_node = helper.make_node(
"Transpose",
inputs=[casted_output],
outputs=[transpose_output],
perm=[0, 3, 1, 2],
name="Transpose",
)
# Node to add mean # Node to add mean
mean_added_output = "mean_added_output" mean_added_output = "mean_added_output"
mean_add_node = helper.make_node( mean_add_node = helper.make_node(
"Add", "Add",
inputs=[casted_output, "norm_mean"], inputs=[transpose_output, "norm_mean"],
outputs=[mean_added_output], outputs=[mean_added_output],
name="Mean_Addition", name="Mean_Addition",
) )
@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path):
# Add the new nodes to the graph # Add the new nodes to the graph
graph.node.insert(0, cast_node) graph.node.insert(0, cast_node)
graph.node.insert(1, mean_add_node) graph.node.insert(1, transpose_node)
graph.node.insert(2, std_mul_node) graph.node.insert(2, mean_add_node)
graph.node.insert(3, std_mul_node)
path = model_path.replace(".onnx", "_with-norm.onnx") # Transpose the input shape
input_shape = graph.input[0].type.tensor_type.shape.dim
dims = [dim.dim_value for dim in input_shape]
for i, j in enumerate([0, 3, 1, 2]):
input_shape[j].dim_value = dims[i]
# Set input type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
path = model_path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path) onnx.save(model, path)
# ==================================================================================================
def main(): def main():
add_steps_to_onnx(pose_model_path) add_steps_to_onnx(pose_model_path)
add_steps_to_onnx(det_model_path) add_steps_to_onnx(det_model_path)

View File

@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
```json ```json
{ {
"avg_time_2d": 0.019079747846571064, "avg_time_2d": 0.016608773651769607,
"avg_time_3d": 0.0003823995590209961, "avg_time_3d": 0.00034795211533368645,
"avg_fps": 51.381791492991674 "avg_fps": 58.97364937870487
} }
{ {
"person_nums": { "person_nums": {
@ -28,247 +28,247 @@ Results of the model in various experiments on different datasets.
"mpjpe": { "mpjpe": {
"count": 600, "count": 600,
"mean": 0.067471, "mean": 0.067471,
"median": 0.059656, "median": 0.0592,
"std": 0.027837, "std": 0.02795,
"sem": 0.001137, "sem": 0.001142,
"min": 0.04161, "min": 0.042592,
"max": 0.191019, "max": 0.189987,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.045, "recall-0.05": 0.048333,
"recall-0.1": 0.93, "recall-0.1": 0.925,
"recall-0.15": 0.95, "recall-0.15": 0.95,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600, "num_labels": 600,
"ap-0.025": 0.0, "ap-0.025": 0.0,
"ap-0.05": 0.005008, "ap-0.05": 0.004097,
"ap-0.1": 0.890421, "ap-0.1": 0.885305,
"ap-0.15": 0.915856, "ap-0.15": 0.915769,
"ap-0.25": 1.0, "ap-0.25": 1.0,
"ap-0.5": 1.0 "ap-0.5": 1.0
}, },
"nose": { "nose": {
"count": 600, "count": 600,
"mean": 0.116291, "mean": 0.115621,
"median": 0.101326, "median": 0.100161,
"std": 0.04236, "std": 0.041657,
"sem": 0.001731, "sem": 0.001702,
"min": 0.021073, "min": 0.031411,
"max": 0.288626, "max": 0.276464,
"recall-0.025": 0.001667, "recall-0.025": 0.0,
"recall-0.05": 0.006667, "recall-0.05": 0.01,
"recall-0.1": 0.491667, "recall-0.1": 0.498333,
"recall-0.15": 0.821667, "recall-0.15": 0.826667,
"recall-0.25": 0.99, "recall-0.25": 0.993333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_left": { "shoulder_left": {
"count": 600, "count": 600,
"mean": 0.033847, "mean": 0.033598,
"median": 0.025987, "median": 0.025444,
"std": 0.031824, "std": 0.032078,
"sem": 0.0013, "sem": 0.001311,
"min": 0.002728, "min": 0.001187,
"max": 0.181894, "max": 0.181528,
"recall-0.025": 0.466667, "recall-0.025": 0.486667,
"recall-0.05": 0.866667, "recall-0.05": 0.865,
"recall-0.1": 0.948333, "recall-0.1": 0.946667,
"recall-0.15": 0.968333, "recall-0.15": 0.965,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"shoulder_right": { "shoulder_right": {
"count": 600, "count": 600,
"mean": 0.04973, "mean": 0.049243,
"median": 0.034604, "median": 0.033956,
"std": 0.044052, "std": 0.042808,
"sem": 0.0018, "sem": 0.001749,
"min": 0.00395, "min": 0.004642,
"max": 0.256048, "max": 0.255344,
"recall-0.025": 0.211667, "recall-0.025": 0.218333,
"recall-0.05": 0.753333, "recall-0.05": 0.748333,
"recall-0.1": 0.906667, "recall-0.1": 0.901667,
"recall-0.15": 0.936667, "recall-0.15": 0.941667,
"recall-0.25": 0.998333, "recall-0.25": 0.998333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_left": { "elbow_left": {
"count": 600, "count": 600,
"mean": 0.042919, "mean": 0.043499,
"median": 0.035054, "median": 0.035409,
"std": 0.034438, "std": 0.034789,
"sem": 0.001407, "sem": 0.001421,
"min": 0.00136, "min": 0.002463,
"max": 0.198368, "max": 0.200682,
"recall-0.025": 0.246667, "recall-0.025": 0.243333,
"recall-0.05": 0.825, "recall-0.05": 0.8,
"recall-0.1": 0.948333, "recall-0.1": 0.945,
"recall-0.15": 0.955, "recall-0.15": 0.953333,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"elbow_right": { "elbow_right": {
"count": 600, "count": 600,
"mean": 0.043901, "mean": 0.043289,
"median": 0.033077, "median": 0.032684,
"std": 0.036324, "std": 0.035003,
"sem": 0.001484, "sem": 0.00143,
"min": 0.006465, "min": 0.007037,
"max": 0.274304, "max": 0.202309,
"recall-0.025": 0.241667, "recall-0.025": 0.255,
"recall-0.05": 0.81, "recall-0.05": 0.805,
"recall-0.1": 0.925, "recall-0.1": 0.931667,
"recall-0.15": 0.943333,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_left": {
"count": 600,
"mean": 0.043044,
"median": 0.026871,
"std": 0.04257,
"sem": 0.001739,
"min": 0.000214,
"max": 0.202968,
"recall-0.025": 0.466667,
"recall-0.05": 0.731667,
"recall-0.1": 0.903333,
"recall-0.15": 0.941667, "recall-0.15": 0.941667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"wrist_left": {
"count": 600,
"mean": 0.043376,
"median": 0.027016,
"std": 0.044176,
"sem": 0.001805,
"min": 0.000972,
"max": 0.340542,
"recall-0.025": 0.466667,
"recall-0.05": 0.728333,
"recall-0.1": 0.905,
"recall-0.15": 0.941667,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_right": { "wrist_right": {
"count": 600, "count": 600,
"mean": 0.044102, "mean": 0.044908,
"median": 0.026291, "median": 0.027102,
"std": 0.050657, "std": 0.052541,
"sem": 0.00207, "sem": 0.002147,
"min": 0.003409, "min": 0.001728,
"max": 0.460683, "max": 0.485231,
"recall-0.025": 0.466667, "recall-0.025": 0.448333,
"recall-0.05": 0.776667, "recall-0.05": 0.776667,
"recall-0.1": 0.895, "recall-0.1": 0.893333,
"recall-0.15": 0.915, "recall-0.15": 0.911667,
"recall-0.25": 0.996667, "recall-0.25": 0.995,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"hip_left": { "hip_left": {
"count": 600, "count": 600,
"mean": 0.090158, "mean": 0.089001,
"median": 0.08564, "median": 0.085342,
"std": 0.032791, "std": 0.032716,
"sem": 0.00134, "sem": 0.001337,
"min": 0.014562, "min": 0.007027,
"max": 0.236403, "max": 0.235465,
"recall-0.025": 0.001667, "recall-0.025": 0.008333,
"recall-0.05": 0.036667, "recall-0.05": 0.031667,
"recall-0.1": 0.82, "recall-0.1": 0.815,
"recall-0.15": 0.945, "recall-0.15": 0.948333,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"hip_right": { "hip_right": {
"count": 600, "count": 600,
"mean": 0.11407, "mean": 0.113299,
"median": 0.114481, "median": 0.113584,
"std": 0.02606, "std": 0.026162,
"sem": 0.001065, "sem": 0.001069,
"min": 0.045083, "min": 0.04703,
"max": 0.233275, "max": 0.230987,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.001667, "recall-0.05": 0.001667,
"recall-0.1": 0.251667, "recall-0.1": 0.261667,
"recall-0.15": 0.948333, "recall-0.15": 0.946667,
"recall-0.25": 1.0, "recall-0.25": 1.0,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_left": { "knee_left": {
"count": 600, "count": 600,
"mean": 0.061433, "mean": 0.062069,
"median": 0.045616, "median": 0.044729,
"std": 0.060807, "std": 0.06187,
"sem": 0.002484, "sem": 0.002528,
"min": 0.018825, "min": 0.017903,
"max": 0.425003, "max": 0.431859,
"recall-0.025": 0.058333, "recall-0.025": 0.06,
"recall-0.05": 0.593333, "recall-0.05": 0.591667,
"recall-0.1": 0.915, "recall-0.1": 0.913333,
"recall-0.15": 0.923333, "recall-0.15": 0.92,
"recall-0.25": 0.98, "recall-0.25": 0.978333,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"knee_right": { "knee_right": {
"count": 600, "count": 600,
"mean": 0.050724, "mean": 0.050915,
"median": 0.042002, "median": 0.04249,
"std": 0.036168, "std": 0.036278,
"sem": 0.001478, "sem": 0.001482,
"min": 0.016654, "min": 0.015193,
"max": 0.257622, "max": 0.263834,
"recall-0.025": 0.038333, "recall-0.025": 0.033333,
"recall-0.05": 0.76, "recall-0.05": 0.766667,
"recall-0.1": 0.941667, "recall-0.1": 0.941667,
"recall-0.15": 0.945, "recall-0.15": 0.945,
"recall-0.25": 0.998333, "recall-0.25": 0.996667,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 600 "num_labels": 600
}, },
"ankle_left": { "ankle_left": {
"count": 599,
"mean": 0.09891,
"median": 0.086872,
"std": 0.051463,
"sem": 0.002104,
"min": 0.035674,
"max": 0.490677,
"recall-0.025": 0.0,
"recall-0.05": 0.003333,
"recall-0.1": 0.831667,
"recall-0.15": 0.936667,
"recall-0.25": 0.98,
"recall-0.5": 0.998333,
"num_labels": 600
},
"ankle_right": {
"count": 598, "count": 598,
"mean": 0.085207, "mean": 0.098393,
"median": 0.069747, "median": 0.086077,
"std": 0.054773, "std": 0.050788,
"sem": 0.002242, "sem": 0.002079,
"min": 0.030417, "min": 0.036989,
"max": 0.440549, "max": 0.49288,
"recall-0.025": 0.0, "recall-0.025": 0.0,
"recall-0.05": 0.021667, "recall-0.05": 0.005,
"recall-0.1": 0.88, "recall-0.1": 0.83,
"recall-0.15": 0.906667, "recall-0.15": 0.936667,
"recall-0.25": 0.978333, "recall-0.25": 0.978333,
"recall-0.5": 0.996667, "recall-0.5": 0.996667,
"num_labels": 600 "num_labels": 600
}, },
"ankle_right": {
"count": 597,
"mean": 0.085279,
"median": 0.069562,
"std": 0.05552,
"sem": 0.002274,
"min": 0.031135,
"max": 0.445133,
"recall-0.025": 0.0,
"recall-0.05": 0.015,
"recall-0.1": 0.878333,
"recall-0.15": 0.901667,
"recall-0.25": 0.973333,
"recall-0.5": 0.995,
"num_labels": 600
},
"joint_recalls": { "joint_recalls": {
"num_labels": 7800, "num_labels": 7800,
"recall-0.025": 0.16897, "recall-0.025": 0.17013,
"recall-0.05": 0.47551, "recall-0.05": 0.47244,
"recall-0.1": 0.81936, "recall-0.1": 0.81949,
"recall-0.15": 0.92923, "recall-0.15": 0.92897,
"recall-0.25": 0.99333, "recall-0.25": 0.99244,
"recall-0.5": 0.99949 "recall-0.5": 0.99936
} }
} }
{ {
"total_parts": 8400, "total_parts": 8400,
"correct_parts": 8091, "correct_parts": 8089,
"pcp": 0.963214 "pcp": 0.962976
} }
``` ```

View File

@ -220,7 +220,7 @@ def update_sample(sample, new_dir=""):
def load_image(path: str): def load_image(path: str):
image = cv2.imread(path, 3) image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.array(image, dtype=np.float32) image = np.asarray(image, dtype=np.uint8)
return image return image

View File

@ -35,6 +35,8 @@ class BaseModel(ABC):
input_type = self.session.get_inputs()[0].type input_type = self.session.get_inputs()[0].type
if input_type == "tensor(float16)": if input_type == "tensor(float16)":
self.input_type = np.float16 self.input_type = np.float16
elif input_type == "tensor(uint8)":
self.input_type = np.uint8
else: else:
self.input_type = np.float32 self.input_type = np.float32
@ -152,12 +154,12 @@ class RTMDet(BaseModel):
return boxes[keep_indices] return boxes[keep_indices]
def preprocess(self, image: np.ndarray): def preprocess(self, image: np.ndarray):
th, tw = self.input_shape[2:] th, tw = self.input_shape[1:3]
image, self.dx, self.dy, self.scale = self.letterbox( image, self.dx, self.dy, self.scale = self.letterbox(
image, (tw, th), fill_value=114 image, (tw, th), fill_value=114
) )
tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1] tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1]
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) tensor = np.expand_dims(tensor, axis=0)
return tensor return tensor
def postprocess(self, tensor: List[np.ndarray]): def postprocess(self, tensor: List[np.ndarray]):
@ -274,10 +276,10 @@ class RTMPose(BaseModel):
return extracted_region, new_box, scale return extracted_region, new_box, scale
def preprocess(self, image: np.ndarray, bbox: np.ndarray): def preprocess(self, image: np.ndarray, bbox: np.ndarray):
th, tw = self.input_shape[2:] th, tw = self.input_shape[1:3]
region, self.bbox, _ = self.region_of_interest_warped(image, bbox, (tw, th)) region, self.bbox, _ = self.region_of_interest_warped(image, bbox, (tw, th))
tensor = np.asarray(region).astype(self.input_type, copy=False) tensor = np.asarray(region).astype(self.input_type, copy=False)
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) tensor = np.expand_dims(tensor, axis=0)
return tensor return tensor
def postprocess(self, tensor: List[np.ndarray], **kwargs): def postprocess(self, tensor: List[np.ndarray], **kwargs):
@ -285,7 +287,7 @@ class RTMPose(BaseModel):
kp = np.concatenate([tensor[0][0], np.expand_dims(scores, axis=-1)], axis=-1) kp = np.concatenate([tensor[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
# See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample() # See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample()
th, tw = self.input_shape[2:] th, tw = self.input_shape[1:3]
bw, bh = [self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]] bw, bh = [self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]]
kp[:, :2] /= np.array([tw, th]) kp[:, :2] /= np.array([tw, th])
kp[:, :2] *= np.array([bw, bh]) kp[:, :2] *= np.array([bw, bh])
@ -331,10 +333,10 @@ def load_model():
print("Loading onnx model ...") print("Loading onnx model ...")
model = TopDown( model = TopDown(
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx", # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_extra-steps.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx", "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx",
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx", # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx", "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx",
conf_threshold=0.3, conf_threshold=0.3,
iou_threshold=0.3, iou_threshold=0.3,
warmup=30, warmup=30,