Some further speedups.

This commit is contained in:
Daniel
2024-12-02 17:00:04 +01:00
parent c8c48e4bf2
commit dc44a71b2c
4 changed files with 202 additions and 179 deletions

View File

@ -1,6 +1,6 @@
import numpy as np
import onnx
from onnx import helper, numpy_helper, TensorProto
from onnx import TensorProto, helper, numpy_helper
# ==================================================================================================
@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path):
# Define layer names, assuming the first input is the image tensor
input_name = graph.input[0].name
# Set input type to always be float32
graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT
# Create to cast the float32 if needed
# Cast to internal type
# This has to be the first node, because tensorrt does not support uint8 layers
cast_type = 10 if use_fp16 else 1
casted_output = "casted_output"
cast_node = helper.make_node(
@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path):
to=cast_type,
)
# Node to transpose
transpose_output = "transpose_output"
transpose_node = helper.make_node(
"Transpose",
inputs=[casted_output],
outputs=[transpose_output],
perm=[0, 3, 1, 2],
name="Transpose",
)
# Node to add mean
mean_added_output = "mean_added_output"
mean_add_node = helper.make_node(
"Add",
inputs=[casted_output, "norm_mean"],
inputs=[transpose_output, "norm_mean"],
outputs=[mean_added_output],
name="Mean_Addition",
)
@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path):
# Add the new nodes to the graph
graph.node.insert(0, cast_node)
graph.node.insert(1, mean_add_node)
graph.node.insert(2, std_mul_node)
graph.node.insert(1, transpose_node)
graph.node.insert(2, mean_add_node)
graph.node.insert(3, std_mul_node)
path = model_path.replace(".onnx", "_with-norm.onnx")
# Transpose the input shape
input_shape = graph.input[0].type.tensor_type.shape.dim
dims = [dim.dim_value for dim in input_shape]
for i, j in enumerate([0, 3, 1, 2]):
input_shape[j].dim_value = dims[i]
# Set input type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
path = model_path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path)
# ==================================================================================================
def main():
add_steps_to_onnx(pose_model_path)
add_steps_to_onnx(det_model_path)

View File

@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
```json
{
"avg_time_2d": 0.019079747846571064,
"avg_time_3d": 0.0003823995590209961,
"avg_fps": 51.381791492991674
"avg_time_2d": 0.016608773651769607,
"avg_time_3d": 0.00034795211533368645,
"avg_fps": 58.97364937870487
}
{
"person_nums": {
@ -28,247 +28,247 @@ Results of the model in various experiments on different datasets.
"mpjpe": {
"count": 600,
"mean": 0.067471,
"median": 0.059656,
"std": 0.027837,
"sem": 0.001137,
"min": 0.04161,
"max": 0.191019,
"median": 0.0592,
"std": 0.02795,
"sem": 0.001142,
"min": 0.042592,
"max": 0.189987,
"recall-0.025": 0.0,
"recall-0.05": 0.045,
"recall-0.1": 0.93,
"recall-0.05": 0.048333,
"recall-0.1": 0.925,
"recall-0.15": 0.95,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600,
"ap-0.025": 0.0,
"ap-0.05": 0.005008,
"ap-0.1": 0.890421,
"ap-0.15": 0.915856,
"ap-0.05": 0.004097,
"ap-0.1": 0.885305,
"ap-0.15": 0.915769,
"ap-0.25": 1.0,
"ap-0.5": 1.0
},
"nose": {
"count": 600,
"mean": 0.116291,
"median": 0.101326,
"std": 0.04236,
"sem": 0.001731,
"min": 0.021073,
"max": 0.288626,
"recall-0.025": 0.001667,
"recall-0.05": 0.006667,
"recall-0.1": 0.491667,
"recall-0.15": 0.821667,
"recall-0.25": 0.99,
"mean": 0.115621,
"median": 0.100161,
"std": 0.041657,
"sem": 0.001702,
"min": 0.031411,
"max": 0.276464,
"recall-0.025": 0.0,
"recall-0.05": 0.01,
"recall-0.1": 0.498333,
"recall-0.15": 0.826667,
"recall-0.25": 0.993333,
"recall-0.5": 1.0,
"num_labels": 600
},
"shoulder_left": {
"count": 600,
"mean": 0.033847,
"median": 0.025987,
"std": 0.031824,
"sem": 0.0013,
"min": 0.002728,
"max": 0.181894,
"recall-0.025": 0.466667,
"recall-0.05": 0.866667,
"recall-0.1": 0.948333,
"recall-0.15": 0.968333,
"mean": 0.033598,
"median": 0.025444,
"std": 0.032078,
"sem": 0.001311,
"min": 0.001187,
"max": 0.181528,
"recall-0.025": 0.486667,
"recall-0.05": 0.865,
"recall-0.1": 0.946667,
"recall-0.15": 0.965,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"shoulder_right": {
"count": 600,
"mean": 0.04973,
"median": 0.034604,
"std": 0.044052,
"sem": 0.0018,
"min": 0.00395,
"max": 0.256048,
"recall-0.025": 0.211667,
"recall-0.05": 0.753333,
"recall-0.1": 0.906667,
"recall-0.15": 0.936667,
"mean": 0.049243,
"median": 0.033956,
"std": 0.042808,
"sem": 0.001749,
"min": 0.004642,
"max": 0.255344,
"recall-0.025": 0.218333,
"recall-0.05": 0.748333,
"recall-0.1": 0.901667,
"recall-0.15": 0.941667,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"elbow_left": {
"count": 600,
"mean": 0.042919,
"median": 0.035054,
"std": 0.034438,
"sem": 0.001407,
"min": 0.00136,
"max": 0.198368,
"recall-0.025": 0.246667,
"recall-0.05": 0.825,
"recall-0.1": 0.948333,
"recall-0.15": 0.955,
"mean": 0.043499,
"median": 0.035409,
"std": 0.034789,
"sem": 0.001421,
"min": 0.002463,
"max": 0.200682,
"recall-0.025": 0.243333,
"recall-0.05": 0.8,
"recall-0.1": 0.945,
"recall-0.15": 0.953333,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"elbow_right": {
"count": 600,
"mean": 0.043901,
"median": 0.033077,
"std": 0.036324,
"sem": 0.001484,
"min": 0.006465,
"max": 0.274304,
"recall-0.025": 0.241667,
"recall-0.05": 0.81,
"recall-0.1": 0.925,
"recall-0.15": 0.943333,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_left": {
"count": 600,
"mean": 0.043044,
"median": 0.026871,
"std": 0.04257,
"sem": 0.001739,
"min": 0.000214,
"max": 0.202968,
"recall-0.025": 0.466667,
"recall-0.05": 0.731667,
"recall-0.1": 0.903333,
"mean": 0.043289,
"median": 0.032684,
"std": 0.035003,
"sem": 0.00143,
"min": 0.007037,
"max": 0.202309,
"recall-0.025": 0.255,
"recall-0.05": 0.805,
"recall-0.1": 0.931667,
"recall-0.15": 0.941667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_left": {
"count": 600,
"mean": 0.043376,
"median": 0.027016,
"std": 0.044176,
"sem": 0.001805,
"min": 0.000972,
"max": 0.340542,
"recall-0.025": 0.466667,
"recall-0.05": 0.728333,
"recall-0.1": 0.905,
"recall-0.15": 0.941667,
"recall-0.25": 0.998333,
"recall-0.5": 1.0,
"num_labels": 600
},
"wrist_right": {
"count": 600,
"mean": 0.044102,
"median": 0.026291,
"std": 0.050657,
"sem": 0.00207,
"min": 0.003409,
"max": 0.460683,
"recall-0.025": 0.466667,
"mean": 0.044908,
"median": 0.027102,
"std": 0.052541,
"sem": 0.002147,
"min": 0.001728,
"max": 0.485231,
"recall-0.025": 0.448333,
"recall-0.05": 0.776667,
"recall-0.1": 0.895,
"recall-0.15": 0.915,
"recall-0.25": 0.996667,
"recall-0.1": 0.893333,
"recall-0.15": 0.911667,
"recall-0.25": 0.995,
"recall-0.5": 1.0,
"num_labels": 600
},
"hip_left": {
"count": 600,
"mean": 0.090158,
"median": 0.08564,
"std": 0.032791,
"sem": 0.00134,
"min": 0.014562,
"max": 0.236403,
"recall-0.025": 0.001667,
"recall-0.05": 0.036667,
"recall-0.1": 0.82,
"recall-0.15": 0.945,
"mean": 0.089001,
"median": 0.085342,
"std": 0.032716,
"sem": 0.001337,
"min": 0.007027,
"max": 0.235465,
"recall-0.025": 0.008333,
"recall-0.05": 0.031667,
"recall-0.1": 0.815,
"recall-0.15": 0.948333,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"hip_right": {
"count": 600,
"mean": 0.11407,
"median": 0.114481,
"std": 0.02606,
"sem": 0.001065,
"min": 0.045083,
"max": 0.233275,
"mean": 0.113299,
"median": 0.113584,
"std": 0.026162,
"sem": 0.001069,
"min": 0.04703,
"max": 0.230987,
"recall-0.025": 0.0,
"recall-0.05": 0.001667,
"recall-0.1": 0.251667,
"recall-0.15": 0.948333,
"recall-0.1": 0.261667,
"recall-0.15": 0.946667,
"recall-0.25": 1.0,
"recall-0.5": 1.0,
"num_labels": 600
},
"knee_left": {
"count": 600,
"mean": 0.061433,
"median": 0.045616,
"std": 0.060807,
"sem": 0.002484,
"min": 0.018825,
"max": 0.425003,
"recall-0.025": 0.058333,
"recall-0.05": 0.593333,
"recall-0.1": 0.915,
"recall-0.15": 0.923333,
"recall-0.25": 0.98,
"mean": 0.062069,
"median": 0.044729,
"std": 0.06187,
"sem": 0.002528,
"min": 0.017903,
"max": 0.431859,
"recall-0.025": 0.06,
"recall-0.05": 0.591667,
"recall-0.1": 0.913333,
"recall-0.15": 0.92,
"recall-0.25": 0.978333,
"recall-0.5": 1.0,
"num_labels": 600
},
"knee_right": {
"count": 600,
"mean": 0.050724,
"median": 0.042002,
"std": 0.036168,
"sem": 0.001478,
"min": 0.016654,
"max": 0.257622,
"recall-0.025": 0.038333,
"recall-0.05": 0.76,
"mean": 0.050915,
"median": 0.04249,
"std": 0.036278,
"sem": 0.001482,
"min": 0.015193,
"max": 0.263834,
"recall-0.025": 0.033333,
"recall-0.05": 0.766667,
"recall-0.1": 0.941667,
"recall-0.15": 0.945,
"recall-0.25": 0.998333,
"recall-0.25": 0.996667,
"recall-0.5": 1.0,
"num_labels": 600
},
"ankle_left": {
"count": 599,
"mean": 0.09891,
"median": 0.086872,
"std": 0.051463,
"sem": 0.002104,
"min": 0.035674,
"max": 0.490677,
"recall-0.025": 0.0,
"recall-0.05": 0.003333,
"recall-0.1": 0.831667,
"recall-0.15": 0.936667,
"recall-0.25": 0.98,
"recall-0.5": 0.998333,
"num_labels": 600
},
"ankle_right": {
"count": 598,
"mean": 0.085207,
"median": 0.069747,
"std": 0.054773,
"sem": 0.002242,
"min": 0.030417,
"max": 0.440549,
"mean": 0.098393,
"median": 0.086077,
"std": 0.050788,
"sem": 0.002079,
"min": 0.036989,
"max": 0.49288,
"recall-0.025": 0.0,
"recall-0.05": 0.021667,
"recall-0.1": 0.88,
"recall-0.15": 0.906667,
"recall-0.05": 0.005,
"recall-0.1": 0.83,
"recall-0.15": 0.936667,
"recall-0.25": 0.978333,
"recall-0.5": 0.996667,
"num_labels": 600
},
"ankle_right": {
"count": 597,
"mean": 0.085279,
"median": 0.069562,
"std": 0.05552,
"sem": 0.002274,
"min": 0.031135,
"max": 0.445133,
"recall-0.025": 0.0,
"recall-0.05": 0.015,
"recall-0.1": 0.878333,
"recall-0.15": 0.901667,
"recall-0.25": 0.973333,
"recall-0.5": 0.995,
"num_labels": 600
},
"joint_recalls": {
"num_labels": 7800,
"recall-0.025": 0.16897,
"recall-0.05": 0.47551,
"recall-0.1": 0.81936,
"recall-0.15": 0.92923,
"recall-0.25": 0.99333,
"recall-0.5": 0.99949
"recall-0.025": 0.17013,
"recall-0.05": 0.47244,
"recall-0.1": 0.81949,
"recall-0.15": 0.92897,
"recall-0.25": 0.99244,
"recall-0.5": 0.99936
}
}
{
"total_parts": 8400,
"correct_parts": 8091,
"pcp": 0.963214
"correct_parts": 8089,
"pcp": 0.962976
}
```

View File

@ -220,7 +220,7 @@ def update_sample(sample, new_dir=""):
def load_image(path: str):
image = cv2.imread(path, 3)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = np.array(image, dtype=np.float32)
image = np.asarray(image, dtype=np.uint8)
return image

View File

@ -35,6 +35,8 @@ class BaseModel(ABC):
input_type = self.session.get_inputs()[0].type
if input_type == "tensor(float16)":
self.input_type = np.float16
elif input_type == "tensor(uint8)":
self.input_type = np.uint8
else:
self.input_type = np.float32
@ -152,12 +154,12 @@ class RTMDet(BaseModel):
return boxes[keep_indices]
def preprocess(self, image: np.ndarray):
th, tw = self.input_shape[2:]
th, tw = self.input_shape[1:3]
image, self.dx, self.dy, self.scale = self.letterbox(
image, (tw, th), fill_value=114
)
tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1]
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
tensor = np.expand_dims(tensor, axis=0)
return tensor
def postprocess(self, tensor: List[np.ndarray]):
@ -274,10 +276,10 @@ class RTMPose(BaseModel):
return extracted_region, new_box, scale
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
th, tw = self.input_shape[2:]
th, tw = self.input_shape[1:3]
region, self.bbox, _ = self.region_of_interest_warped(image, bbox, (tw, th))
tensor = np.asarray(region).astype(self.input_type, copy=False)
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
tensor = np.expand_dims(tensor, axis=0)
return tensor
def postprocess(self, tensor: List[np.ndarray], **kwargs):
@ -285,7 +287,7 @@ class RTMPose(BaseModel):
kp = np.concatenate([tensor[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
# See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample()
th, tw = self.input_shape[2:]
th, tw = self.input_shape[1:3]
bw, bh = [self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]]
kp[:, :2] /= np.array([tw, th])
kp[:, :2] *= np.array([bw, bh])
@ -331,10 +333,10 @@ def load_model():
print("Loading onnx model ...")
model = TopDown(
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx",
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx",
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_extra-steps.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx",
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx",
conf_threshold=0.3,
iou_threshold=0.3,
warmup=30,