234 lines
7.7 KiB
Python
234 lines
7.7 KiB
Python
import re
|
|
|
|
import numpy as np
|
|
import onnx
|
|
from onnx import TensorProto, helper, numpy_helper
|
|
|
|
# ==================================================================================================
|
|
|
|
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
|
|
det_model_path1 = base_path + "rtmdet-nano_1x3x320x320.onnx"
|
|
det_model_path2 = base_path + "rtmdet-m_1x3x320x320.onnx"
|
|
pose_model_path1 = base_path + "rtmpose-m_Bx3x384x288.onnx"
|
|
pose_model_path2 = base_path + "rtmpose-m_1x3x384x288.onnx"
|
|
pose_model_path3 = base_path + "rtmpose-l_wb_Bx3x384x288.onnx"
|
|
pose_model_path4 = base_path + "rtmpose-l_wb_1x3x384x288.onnx"
|
|
|
|
norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
|
|
norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
|
|
|
|
|
|
# ==================================================================================================
|
|
|
|
|
|
def add_steps_to_onnx(model_path):
|
|
|
|
# Load existing model
|
|
model = onnx.load(model_path)
|
|
graph = model.graph
|
|
|
|
mean = norm_mean.astype(np.float32)
|
|
std = norm_std.astype(np.float32)
|
|
|
|
mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
|
|
std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)
|
|
|
|
use_fp16 = bool("fp16" in model_path)
|
|
if use_fp16:
|
|
mean = mean.astype(np.float16)
|
|
std = std.astype(np.float16)
|
|
|
|
# Add the initializers to the graph
|
|
mean_initializer = numpy_helper.from_array(mean, name="norm_mean")
|
|
std_initializer = numpy_helper.from_array(std, name="norm_std")
|
|
graph.initializer.extend([mean_initializer, std_initializer])
|
|
|
|
# Define layer names, assuming the first input is the image tensor
|
|
input_name = graph.input[0].name
|
|
|
|
# Cast to internal type
|
|
# This has to be the first node, because tensorrt does not support uint8 layers
|
|
cast_type = 10 if use_fp16 else 1
|
|
casted_output = "casted_output"
|
|
cast_node = helper.make_node(
|
|
"Cast",
|
|
inputs=[input_name],
|
|
outputs=[casted_output],
|
|
to=cast_type,
|
|
name="Cast_Input",
|
|
)
|
|
|
|
# Node to transpose
|
|
transpose_output = "transpose_output"
|
|
transpose_node = helper.make_node(
|
|
"Transpose",
|
|
inputs=[casted_output],
|
|
outputs=[transpose_output],
|
|
perm=[0, 3, 1, 2],
|
|
name="Transpose",
|
|
)
|
|
|
|
# Node to add mean
|
|
mean_added_output = "mean_added_output"
|
|
mean_add_node = helper.make_node(
|
|
"Add",
|
|
inputs=[transpose_output, "norm_mean"],
|
|
outputs=[mean_added_output],
|
|
name="Mean_Addition",
|
|
)
|
|
|
|
# Node to multiply by std
|
|
std_mult_output = "std_mult_output"
|
|
std_mul_node = helper.make_node(
|
|
"Mul",
|
|
inputs=[mean_added_output, "norm_std"],
|
|
outputs=[std_mult_output],
|
|
name="Std_Multiplication",
|
|
)
|
|
|
|
# Replace original input of the model with the output of normalization
|
|
for node in graph.node:
|
|
for idx, input_name_in_node in enumerate(node.input):
|
|
if input_name_in_node == input_name:
|
|
node.input[idx] = std_mult_output
|
|
|
|
# Add the new nodes to the graph
|
|
graph.node.insert(0, cast_node)
|
|
graph.node.insert(1, transpose_node)
|
|
graph.node.insert(2, mean_add_node)
|
|
graph.node.insert(3, std_mul_node)
|
|
|
|
# Transpose the input shape
|
|
input_shape = graph.input[0].type.tensor_type.shape.dim
|
|
dims = [dim.dim_value for dim in input_shape]
|
|
for i, j in enumerate([0, 3, 1, 2]):
|
|
input_shape[j].dim_value = dims[i]
|
|
|
|
# Set the batch size to a defined string
|
|
input_shape = graph.input[0].type.tensor_type.shape.dim
|
|
if input_shape[0].dim_value == 0:
|
|
input_shape[0].dim_param = "batch_size"
|
|
|
|
# Rename the input tensor
|
|
main_input_image_name = model.graph.input[0].name
|
|
for node in model.graph.node:
|
|
for idx, name in enumerate(node.input):
|
|
if name == main_input_image_name:
|
|
node.input[idx] = "image_input"
|
|
model.graph.input[0].name = "image_input"
|
|
|
|
# Set input image type to int8
|
|
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
|
|
|
|
# Cast all outputs to fp32 to avoid half precision issues in cpp code
|
|
for output in graph.output:
|
|
orig_output_name = output.name
|
|
internal_output_name = orig_output_name + "_internal"
|
|
|
|
# Rename the output tensor
|
|
for node in model.graph.node:
|
|
for idx, name in enumerate(node.output):
|
|
if name == orig_output_name:
|
|
node.output[idx] = internal_output_name
|
|
|
|
# Insert a Cast node that casts the internal output to fp32
|
|
cast_fp32_name = orig_output_name
|
|
cast_node_output = helper.make_node(
|
|
"Cast",
|
|
inputs=[internal_output_name],
|
|
outputs=[cast_fp32_name],
|
|
to=1,
|
|
name="Cast_Output_" + orig_output_name,
|
|
)
|
|
# Append the cast node to the graph
|
|
graph.node.append(cast_node_output)
|
|
|
|
# Update the output's data type info
|
|
output.type.tensor_type.elem_type = TensorProto.FLOAT
|
|
|
|
# Merge the two outputs
|
|
if "det" in model_path:
|
|
r1_output = "dets"
|
|
r2_output = "labels"
|
|
out_name = "bboxes"
|
|
out_dim = 6
|
|
if "pose" in model_path:
|
|
r1_output = "kpts"
|
|
r2_output = "scores"
|
|
out_name = "keypoints"
|
|
out_dim = 3
|
|
if "det" in model_path or "pose" in model_path:
|
|
# Node to expand
|
|
r2_expanded = r2_output + "_expanded"
|
|
unsqueeze_node = helper.make_node(
|
|
"Unsqueeze",
|
|
inputs=[r2_output],
|
|
outputs=[r2_expanded],
|
|
axes=[2],
|
|
name="Unsqueeze",
|
|
)
|
|
|
|
# Node to concatenate
|
|
r12_merged = out_name
|
|
concat_node = helper.make_node(
|
|
"Concat",
|
|
inputs=[r1_output, r2_expanded],
|
|
outputs=[r12_merged],
|
|
axis=2,
|
|
name="Merged",
|
|
)
|
|
|
|
# Define the new concatenated output
|
|
merged_output = helper.make_tensor_value_info(
|
|
r12_merged,
|
|
TensorProto.FLOAT,
|
|
[
|
|
(
|
|
graph.input[0].type.tensor_type.shape.dim[0].dim_value
|
|
if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
|
|
else None
|
|
),
|
|
(
|
|
graph.output[0].type.tensor_type.shape.dim[1].dim_value
|
|
if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
|
|
else None
|
|
),
|
|
out_dim,
|
|
],
|
|
)
|
|
|
|
# Update the graph
|
|
graph.node.append(unsqueeze_node)
|
|
graph.node.append(concat_node)
|
|
graph.output.pop()
|
|
graph.output.pop()
|
|
graph.output.append(merged_output)
|
|
|
|
path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
|
|
path = path.replace(".onnx", "_extra-steps.onnx")
|
|
onnx.save(model, path)
|
|
|
|
|
|
# ==================================================================================================
|
|
|
|
|
|
def main():
|
|
add_steps_to_onnx(det_model_path1)
|
|
add_steps_to_onnx(det_model_path2)
|
|
add_steps_to_onnx(pose_model_path1)
|
|
add_steps_to_onnx(pose_model_path2)
|
|
add_steps_to_onnx(pose_model_path3)
|
|
add_steps_to_onnx(pose_model_path4)
|
|
add_steps_to_onnx(det_model_path1.replace(".onnx", "_fp16.onnx"))
|
|
add_steps_to_onnx(det_model_path2.replace(".onnx", "_fp16.onnx"))
|
|
add_steps_to_onnx(pose_model_path1.replace(".onnx", "_fp16.onnx"))
|
|
add_steps_to_onnx(pose_model_path2.replace(".onnx", "_fp16.onnx"))
|
|
add_steps_to_onnx(pose_model_path3.replace(".onnx", "_fp16.onnx"))
|
|
add_steps_to_onnx(pose_model_path4.replace(".onnx", "_fp16.onnx"))
|
|
|
|
|
|
# ==================================================================================================
|
|
|
|
if __name__ == "__main__":
|
|
main()
|