Merge remote-tracking branch 'origin/ros' into jetson

2025-01-20 12:44:38 +01:00
parent 23f8df32d5 86d8ccf797
commit ffd182bc07
38 changed files with 32308 additions and 4643 deletions
--- a/extras/mmdeploy/README.md
+++ b/extras/mmdeploy/README.md
@ -70,7 +70,8 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
 ```

 ```bash
-python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py
+python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_pt.py
+python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_tf.py
 ```

 ```bash
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@ -54,6 +54,7 @@ def add_steps_to_onnx(model_path):
        inputs=[input_name],
        outputs=[casted_output],
        to=cast_type,
+        name="Cast_Input",
    )

    # Node to transpose
@ -118,6 +119,90 @@ def add_steps_to_onnx(model_path):
    # Set input image type to int8
    model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8

+    # Cast all outputs to fp32 to avoid half precision issues in cpp code
+    for output in graph.output:
+        orig_output_name = output.name
+        internal_output_name = orig_output_name + "_internal"
+
+        # Rename the output tensor
+        for node in model.graph.node:
+            for idx, name in enumerate(node.output):
+                if name == orig_output_name:
+                    node.output[idx] = internal_output_name
+
+        # Insert a Cast node that casts the internal output to fp32
+        cast_fp32_name = orig_output_name
+        cast_node_output = helper.make_node(
+            "Cast",
+            inputs=[internal_output_name],
+            outputs=[cast_fp32_name],
+            to=1,
+            name="Cast_Output_" + orig_output_name,
+        )
+        # Append the cast node to the graph
+        graph.node.append(cast_node_output)
+
+        # Update the output's data type info
+        output.type.tensor_type.elem_type = TensorProto.FLOAT
+
+    # Merge the two outputs
+    if "det" in model_path:
+        r1_output = "dets"
+        r2_output = "labels"
+        out_name = "bboxes"
+        out_dim = 6
+    if "pose" in model_path:
+        r1_output = "kpts"
+        r2_output = "scores"
+        out_name = "keypoints"
+        out_dim = 3
+    if "det" in model_path or "pose" in model_path:
+        # Node to expand
+        r2_expanded = r2_output + "_expanded"
+        unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=[r2_output],
+            outputs=[r2_expanded],
+            axes=[2],
+            name="Unsqueeze",
+        )
+
+        # Node to concatenate
+        r12_merged = out_name
+        concat_node = helper.make_node(
+            "Concat",
+            inputs=[r1_output, r2_expanded],
+            outputs=[r12_merged],
+            axis=2,
+            name="Merged",
+        )
+
+        # Define the new concatenated output
+        merged_output = helper.make_tensor_value_info(
+            r12_merged,
+            TensorProto.FLOAT,
+            [
+                (
+                    graph.input[0].type.tensor_type.shape.dim[0].dim_value
+                    if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
+                    else None
+                ),
+                (
+                    graph.output[0].type.tensor_type.shape.dim[1].dim_value
+                    if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
+                    else None
+                ),
+                out_dim,
+            ],
+        )
+
+        # Update the graph
+        graph.node.append(unsqueeze_node)
+        graph.node.append(concat_node)
+        graph.output.pop()
+        graph.output.pop()
+        graph.output.append(merged_output)
+
    path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
    path = path.replace(".onnx", "_extra-steps.onnx")
    onnx.save(model, path)
--- a/extras/mmdeploy/dockerfile
+++ b/extras/mmdeploy/dockerfile
@ -34,5 +34,8 @@ RUN pip3 install --upgrade --no-cache-dir onnxconverter_common
 # Fix an error when profiling
 RUN pip3 install --upgrade --no-cache-dir "onnxruntime-gpu<1.17"

+RUN pip3 install --upgrade --no-cache-dir tensorflow
+RUN pip3 install --upgrade --no-cache-dir tf2onnx
+
 WORKDIR /mmdeploy/
 CMD ["/bin/bash"]
--- a/extras/mmdeploy/make_extra_graphs_pt.py
+++ b/extras/mmdeploy/make_extra_graphs_pt.py
--- a/extras/mmdeploy/make_extra_graphs_tf.py
+++ b/extras/mmdeploy/make_extra_graphs_tf.py
@ -0,0 +1,276 @@
+import cv2
+
+import numpy as np
+import tensorflow as tf
+import tf2onnx
+
+# ==================================================================================================
+
+base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
+det_target_size = (320, 320)
+
+# ==================================================================================================
+
+
+class BayerToRGB(tf.keras.layers.Layer):
+    """Convert Bayer image to RGB
+    See: https://stanford.edu/class/ee367/reading/Demosaicing_ICASSP04.pdf
+    See: https://github.com/cheind/pytorch-debayer/blob/master/debayer/modules.py#L231
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.layout = "RGGB"
+        self.max_val = 255.0
+
+        self.kernels = tf.constant(
+            np.array(
+                [
+                    # G at R/B locations
+                    [
+                        [0, 0, -1, 0, 0],
+                        [0, 0, 2, 0, 0],
+                        [-1, 2, 4, 2, -1],
+                        [0, 0, 2, 0, 0],
+                        [0, 0, -1, 0, 0],
+                    ],
+                    # R/B at G in R/B rows and B/R columns
+                    [
+                        [0, 0, 0.5, 0, 0],
+                        [0, -1, 0, -1, 0],
+                        [-1, 4, 5, 4, -1],
+                        [0, -1, 0, -1, 0],
+                        [0, 0, 0.5, 0, 0],
+                    ],
+                    # R/B at G in B/R rows and R/B columns
+                    [
+                        [0, 0, 0.5, 0, 0],
+                        [0, -1, 4, -1, 0],
+                        [-1, 0, 5, 0, -1],
+                        [0, -1, 4, -1, 0],
+                        [0, 0, 0.5, 0, 0],
+                    ],
+                    # R/B at B/R in B/R rows and B/R columns
+                    [
+                        [0, 0, -1.5, 0, 0],
+                        [0, 2, 0, 2, 0],
+                        [-1.5, 0, 6, 0, -1.5],
+                        [0, 2, 0, 2, 0],
+                        [0, 0, -1.5, 0, 0],
+                    ],
+                ],
+                dtype=np.float32,
+            )
+            .reshape(1, 4, 5, 5)
+            .transpose(2, 3, 0, 1)
+            / 8.0
+        )
+        self.index = tf.constant(
+            np.array(
+                # Describes the kernel indices that calculate the corresponding RGB values for
+                # the 2x2 layout (RGGB)  sub-structure
+                [
+                    # Destination R
+                    [
+                        [4, 1],  # identity, R at G in R row  B column
+                        [2, 3],  # R at G in B row R column, R at B in B row R column
+                    ],
+                    # Destination G
+                    [
+                        [0, 4],
+                        [4, 0],
+                    ],
+                    # Destination B
+                    [
+                        [3, 2],
+                        [1, 4],
+                    ],
+                ]
+            ).reshape(1, 3, 2, 2)
+        )
+
+    def call(self, img):
+        H, W = tf.shape(img)[1], tf.shape(img)[2]
+
+        # Pad the image
+        tpad = img[:, 0:2, :, :]
+        bpad = img[:, H - 2 : H, :, :]
+        ipad = tf.concat([tpad, img, bpad], axis=1)
+        lpad = ipad[:, :, 0:2, :]
+        rpad = ipad[:, :, W - 2 : W, :]
+        ipad = tf.concat([lpad, ipad, rpad], axis=2)
+
+        # Convolve with kernels
+        planes = tf.nn.conv2d(ipad, self.kernels, strides=[1, 1, 1, 1], padding="VALID")
+
+        # Concatenate identity kernel
+        planes = tf.concat([planes, img], axis=-1)
+
+        # Gather values
+        index_repeated = tf.tile(self.index, multiples=[1, 1, H // 2, W // 2])
+        index_repeated = tf.transpose(index_repeated, perm=[0, 2, 3, 1])
+        row_indices, col_indices = tf.meshgrid(tf.range(H), tf.range(W), indexing="ij")
+        index_tensor = tf.stack([row_indices, col_indices], axis=-1)
+        index_tensor = tf.expand_dims(index_tensor, axis=0)
+        index_tensor = tf.expand_dims(index_tensor, axis=-2)
+        index_tensor = tf.repeat(index_tensor, repeats=3, axis=-2)
+        index_repeated = tf.expand_dims(index_repeated, axis=-1)
+        indices = tf.concat([tf.cast(index_tensor, tf.int64), index_repeated], axis=-1)
+        rgb = tf.gather_nd(planes, indices, batch_dims=1)
+
+        if self.max_val == 255.0:
+            # Make value range valid again
+            rgb = tf.round(rgb)
+
+        return rgb
+
+
+# ==================================================================================================
+
+
+def bayer_resize(img, size):
+    """Resize a Bayer image by splitting color channels"""
+
+    # Split the image into 4 channels
+    r = img[:, 0::2, 0::2, 0]
+    g1 = img[:, 0::2, 1::2, 0]
+    g2 = img[:, 1::2, 0::2, 0]
+    b = img[:, 1::2, 1::2, 0]
+    bsplit = tf.stack([r, g1, g2, b], axis=-1)
+
+    # Resize the image
+    # Make sure the target size is divisible by 2
+    size = (size[0] // 2, size[1] // 2)
+    bsized = tf.image.resize(bsplit, size=size, method="bilinear")
+
+    # Create a bayer image again
+    img = tf.nn.depth_to_space(bsized, block_size=2)
+
+    return img
+
+
+# ==================================================================================================
+
+
+class Letterbox(tf.keras.layers.Layer):
+    def __init__(self, target_size, fill_value=128):
+        """Resize and pad image while keeping aspect ratio"""
+        super(Letterbox, self).__init__()
+
+        self.b2rgb = BayerToRGB()
+        self.target_size = target_size
+        self.fill_value = fill_value
+
+    def calc_params(self, ishape):
+        img_h, img_w = ishape[1], ishape[2]
+        target_h, target_w = self.target_size
+
+        scale = tf.minimum(target_w / img_w, target_h / img_h)
+        new_w = tf.round(tf.cast(img_w, scale.dtype) * scale)
+        new_h = tf.round(tf.cast(img_h, scale.dtype) * scale)
+        new_w = tf.cast(new_w, tf.int32)
+        new_h = tf.cast(new_h, tf.int32)
+        new_w = new_w - (new_w % 2)
+        new_h = new_h - (new_h % 2)
+
+        pad_w = target_w - new_w
+        pad_h = target_h - new_h
+        pad_left = tf.cast(tf.floor(tf.cast(pad_w, tf.float32) / 2.0), tf.int32)
+        pad_top = tf.cast(tf.floor(tf.cast(pad_h, tf.float32) / 2.0), tf.int32)
+        pad_right = pad_w - pad_left
+        pad_bottom = pad_h - pad_top
+        paddings = [pad_top, pad_bottom, pad_left, pad_right]
+
+        return paddings, scale, (new_w, new_h)
+
+    def call(self, img):
+        paddings, _, (nw, nh) = self.calc_params(tf.shape(img))
+
+        # Resize the image and convert to RGB
+        img = bayer_resize(img, (nh, nw))
+        img = self.b2rgb(img)
+
+        # Pad the image
+        pad_top, pad_bottom, pad_left, pad_right = paddings
+        img = tf.pad(
+            img,
+            paddings=[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
+            mode="CONSTANT",
+            constant_values=self.fill_value,
+        )
+
+        return img
+
+
+# ==================================================================================================
+
+
+class DetPreprocess(tf.keras.layers.Layer):
+    def __init__(self, target_size, fill_value=114):
+        super(DetPreprocess, self).__init__()
+        self.letterbox = Letterbox(target_size, fill_value)
+
+    def call(self, img):
+        """img: tf.Tensor of shape [batch, H, W, C], dtype=tf.uint8"""
+
+        # Cast to float32 since TensorRT does not support uint8 layers
+        img = tf.cast(img, tf.float32)
+
+        img = self.letterbox(img)
+        return img
+
+
+# ==================================================================================================
+
+
+def rgb2bayer(img):
+    bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
+    bayer[0::2, 0::2] = img[0::2, 0::2, 0]
+    bayer[0::2, 1::2] = img[0::2, 1::2, 1]
+    bayer[1::2, 0::2] = img[1::2, 0::2, 1]
+    bayer[1::2, 1::2] = img[1::2, 1::2, 2]
+    return bayer
+
+
+# ==================================================================================================
+
+
+def main():
+
+    img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg"
+    image = cv2.imread(img_path, 3)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = rgb2bayer(image)
+    image = np.expand_dims(image, axis=-1)
+    image = np.asarray(image, dtype=np.uint8)
+
+    # Initialize the DetPreprocess module
+    preprocess_model = tf.keras.Sequential()
+    preprocess_model.add(DetPreprocess(target_size=det_target_size))
+    det_dummy_input_a0 = tf.convert_to_tensor(
+        np.expand_dims(image, axis=0), dtype=tf.uint8
+    )
+    det_dummy_output_a0 = preprocess_model(det_dummy_input_a0)
+    print("\n", det_dummy_output_a0.shape, "\n")
+
+    output_a0 = det_dummy_output_a0.numpy()
+    output_a0 = np.squeeze(output_a0, axis=0)
+    output_a0 = np.asarray(output_a0, dtype=np.uint8)
+    output_a0 = cv2.cvtColor(output_a0, cv2.COLOR_RGB2BGR)
+    cv2.imwrite(base_path + "det_preprocess.jpg", output_a0)
+
+    # Export to ONNX
+    input_signature = [tf.TensorSpec([None, None, None, 1], tf.uint8, name="x")]
+    _, _ = tf2onnx.convert.from_keras(
+        preprocess_model,
+        input_signature,
+        opset=11,
+        output_path=base_path + "det_preprocess.onnx",
+        target=["tensorrt"],
+    )
+
+
+# ==================================================================================================
+
+if __name__ == "__main__":
+    main()