Some further speedups.

This commit is contained in:
Daniel
2024-12-02 17:00:04 +01:00
parent c8c48e4bf2
commit dc44a71b2c
4 changed files with 202 additions and 179 deletions

View File

@ -1,6 +1,6 @@
import numpy as np
import onnx
from onnx import helper, numpy_helper, TensorProto
from onnx import TensorProto, helper, numpy_helper
# ==================================================================================================
@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path):
# Define layer names, assuming the first input is the image tensor
input_name = graph.input[0].name
# Set input type to always be float32
graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT
# Create to cast the float32 if needed
# Cast to internal type
# This has to be the first node, because tensorrt does not support uint8 layers
cast_type = 10 if use_fp16 else 1
casted_output = "casted_output"
cast_node = helper.make_node(
@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path):
to=cast_type,
)
# Node to transpose
transpose_output = "transpose_output"
transpose_node = helper.make_node(
"Transpose",
inputs=[casted_output],
outputs=[transpose_output],
perm=[0, 3, 1, 2],
name="Transpose",
)
# Node to add mean
mean_added_output = "mean_added_output"
mean_add_node = helper.make_node(
"Add",
inputs=[casted_output, "norm_mean"],
inputs=[transpose_output, "norm_mean"],
outputs=[mean_added_output],
name="Mean_Addition",
)
@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path):
# Add the new nodes to the graph
graph.node.insert(0, cast_node)
graph.node.insert(1, mean_add_node)
graph.node.insert(2, std_mul_node)
graph.node.insert(1, transpose_node)
graph.node.insert(2, mean_add_node)
graph.node.insert(3, std_mul_node)
path = model_path.replace(".onnx", "_with-norm.onnx")
# Transpose the input shape
input_shape = graph.input[0].type.tensor_type.shape.dim
dims = [dim.dim_value for dim in input_shape]
for i, j in enumerate([0, 3, 1, 2]):
input_shape[j].dim_value = dims[i]
# Set input type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
path = model_path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path)
# ==================================================================================================
def main():
add_steps_to_onnx(pose_model_path)
add_steps_to_onnx(det_model_path)