import numpy as np import onnx from onnx import TensorProto, compose, helper, numpy_helper # ================================================================================================== base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" pose_model_path = base_path + "rtmpose-m_384x288.onnx" det_model_path = base_path + "rtmdet-nano_320x320.onnx" norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255) norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255) # ================================================================================================== def add_steps_to_onnx(model_path): # Load existing model model = onnx.load(model_path) graph = model.graph mean = norm_mean.astype(np.float32) std = norm_std.astype(np.float32) mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32) std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32) use_fp16 = bool("fp16" in model_path) if use_fp16: mean = mean.astype(np.float16) std = std.astype(np.float16) # Add the initializers to the graph mean_initializer = numpy_helper.from_array(mean, name="norm_mean") std_initializer = numpy_helper.from_array(std, name="norm_std") graph.initializer.extend([mean_initializer, std_initializer]) # Define layer names, assuming the first input is the image tensor input_name = graph.input[0].name # Cast to internal type # This has to be the first node, because tensorrt does not support uint8 layers cast_type = 10 if use_fp16 else 1 casted_output = "casted_output" cast_node = helper.make_node( "Cast", inputs=[input_name], outputs=[casted_output], to=cast_type, ) # Node to transpose transpose_output = "transpose_output" transpose_node = helper.make_node( "Transpose", inputs=[casted_output], outputs=[transpose_output], perm=[0, 3, 1, 2], name="Transpose", ) # Node to add mean mean_added_output = "mean_added_output" mean_add_node = helper.make_node( "Add", inputs=[transpose_output, "norm_mean"], outputs=[mean_added_output], name="Mean_Addition", ) # Node to multiply by std std_mult_output = "std_mult_output" std_mul_node = helper.make_node( "Mul", inputs=[mean_added_output, "norm_std"], outputs=[std_mult_output], name="Std_Multiplication", ) # Replace original input of the model with the output of normalization for node in graph.node: for idx, input_name_in_node in enumerate(node.input): if input_name_in_node == input_name: node.input[idx] = std_mult_output # Add the new nodes to the graph graph.node.insert(0, cast_node) graph.node.insert(1, transpose_node) graph.node.insert(2, mean_add_node) graph.node.insert(3, std_mul_node) # Transpose the input shape input_shape = graph.input[0].type.tensor_type.shape.dim dims = [dim.dim_value for dim in input_shape] for i, j in enumerate([0, 3, 1, 2]): input_shape[j].dim_value = dims[i] # Rename the input tensor main_input_image_name = model.graph.input[0].name for node in model.graph.node: for idx, name in enumerate(node.input): if name == main_input_image_name: node.input[idx] = "image_input" model.graph.input[0].name = "image_input" # Set input image type to int8 model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 path = model_path.replace(".onnx", "_extra-steps.onnx") onnx.save(model, path) # ================================================================================================== def main(): add_steps_to_onnx(pose_model_path) add_steps_to_onnx(det_model_path) add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx")) add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx")) # ================================================================================================== if __name__ == "__main__": main()