import numpy as np
import onnx
from onnx import TensorProto, compose, helper, numpy_helper

# ==================================================================================================

base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
pose_model_path = base_path + "rtmpose-m_384x288.onnx"
det_model_path = base_path + "rtmdet-nano_320x320.onnx"

norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)


# ==================================================================================================


def add_steps_to_onnx(model_path):

    # Load existing model
    model = onnx.load(model_path)
    graph = model.graph

    mean = norm_mean.astype(np.float32)
    std = norm_std.astype(np.float32)

    mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
    std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)

    use_fp16 = bool("fp16" in model_path)
    if use_fp16:
        mean = mean.astype(np.float16)
        std = std.astype(np.float16)

    # Add the initializers to the graph
    mean_initializer = numpy_helper.from_array(mean, name="norm_mean")
    std_initializer = numpy_helper.from_array(std, name="norm_std")
    graph.initializer.extend([mean_initializer, std_initializer])

    # Define layer names, assuming the first input is the image tensor
    input_name = graph.input[0].name

    # Cast to internal type
    # This has to be the first node, because tensorrt does not support uint8 layers
    cast_type = 10 if use_fp16 else 1
    casted_output = "casted_output"
    cast_node = helper.make_node(
        "Cast",
        inputs=[input_name],
        outputs=[casted_output],
        to=cast_type,
    )

    # Node to transpose
    transpose_output = "transpose_output"
    transpose_node = helper.make_node(
        "Transpose",
        inputs=[casted_output],
        outputs=[transpose_output],
        perm=[0, 3, 1, 2],
        name="Transpose",
    )

    # Node to add mean
    mean_added_output = "mean_added_output"
    mean_add_node = helper.make_node(
        "Add",
        inputs=[transpose_output, "norm_mean"],
        outputs=[mean_added_output],
        name="Mean_Addition",
    )

    # Node to multiply by std
    std_mult_output = "std_mult_output"
    std_mul_node = helper.make_node(
        "Mul",
        inputs=[mean_added_output, "norm_std"],
        outputs=[std_mult_output],
        name="Std_Multiplication",
    )

    # Replace original input of the model with the output of normalization
    for node in graph.node:
        for idx, input_name_in_node in enumerate(node.input):
            if input_name_in_node == input_name:
                node.input[idx] = std_mult_output

    # Add the new nodes to the graph
    graph.node.insert(0, cast_node)
    graph.node.insert(1, transpose_node)
    graph.node.insert(2, mean_add_node)
    graph.node.insert(3, std_mul_node)

    # Transpose the input shape
    input_shape = graph.input[0].type.tensor_type.shape.dim
    dims = [dim.dim_value for dim in input_shape]
    for i, j in enumerate([0, 3, 1, 2]):
        input_shape[j].dim_value = dims[i]

    # Rename the input tensor
    main_input_image_name = model.graph.input[0].name
    for node in model.graph.node:
        for idx, name in enumerate(node.input):
            if name == main_input_image_name:
                node.input[idx] = "image_input"
    model.graph.input[0].name = "image_input"

    # Set input image type to int8
    model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8

    path = model_path.replace(".onnx", "_extra-steps.onnx")
    onnx.save(model, path)


# ==================================================================================================


def main():
    add_steps_to_onnx(pose_model_path)
    add_steps_to_onnx(det_model_path)
    add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx"))
    add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx"))


# ==================================================================================================

if __name__ == "__main__":
    main()