Some further speedups.

2024-12-02 17:00:04 +01:00
parent c8c48e4bf2
commit dc44a71b2c
4 changed files with 202 additions and 179 deletions
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@ -1,6 +1,6 @@
 import numpy as np
 import onnx
-from onnx import helper, numpy_helper, TensorProto
+from onnx import TensorProto, helper, numpy_helper

 # ==================================================================================================

@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path):
    # Define layer names, assuming the first input is the image tensor
    input_name = graph.input[0].name

-    # Set input type to always be float32
-    graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT
-
-    # Create to cast the float32 if needed
+    # Cast to internal type
+    # This has to be the first node, because tensorrt does not support uint8 layers
    cast_type = 10 if use_fp16 else 1
    casted_output = "casted_output"
    cast_node = helper.make_node(
@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path):
        to=cast_type,
    )

+    # Node to transpose
+    transpose_output = "transpose_output"
+    transpose_node = helper.make_node(
+        "Transpose",
+        inputs=[casted_output],
+        outputs=[transpose_output],
+        perm=[0, 3, 1, 2],
+        name="Transpose",
+    )
+
    # Node to add mean
    mean_added_output = "mean_added_output"
    mean_add_node = helper.make_node(
        "Add",
-        inputs=[casted_output, "norm_mean"],
+        inputs=[transpose_output, "norm_mean"],
        outputs=[mean_added_output],
        name="Mean_Addition",
    )
@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path):

    # Add the new nodes to the graph
    graph.node.insert(0, cast_node)
-    graph.node.insert(1, mean_add_node)
-    graph.node.insert(2, std_mul_node)
+    graph.node.insert(1, transpose_node)
+    graph.node.insert(2, mean_add_node)
+    graph.node.insert(3, std_mul_node)

-    path = model_path.replace(".onnx", "_with-norm.onnx")
+    # Transpose the input shape
+    input_shape = graph.input[0].type.tensor_type.shape.dim
+    dims = [dim.dim_value for dim in input_shape]
+    for i, j in enumerate([0, 3, 1, 2]):
+        input_shape[j].dim_value = dims[i]
+
+    # Set input type to int8
+    model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
+
+    path = model_path.replace(".onnx", "_extra-steps.onnx")
    onnx.save(model, path)


+# ==================================================================================================
+
+
 def main():
    add_steps_to_onnx(pose_model_path)
    add_steps_to_onnx(det_model_path)