diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py
index 82b0dce..6d7054e 100644
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@@ -1,6 +1,6 @@
 import numpy as np
 import onnx
-from onnx import TensorProto, helper, numpy_helper
+from onnx import TensorProto, compose, helper, numpy_helper
 
 # ==================================================================================================
 
@@ -97,6 +97,37 @@ def add_steps_to_onnx(model_path):
     for i, j in enumerate([0, 3, 1, 2]):
         input_shape[j].dim_value = dims[i]
 
+    if "det" in model_path:
+        # Add preprocess model to main network
+        pp1_model = onnx.load(base_path + "det_preprocess.onnx")
+        model = compose.add_prefix(model, prefix="main_")
+        pp1_model = compose.add_prefix(pp1_model, prefix="preprocess_")
+        model = compose.merge_models(
+            pp1_model,
+            model,
+            io_map=[(pp1_model.graph.output[0].name, model.graph.input[0].name)],
+        )
+
+        # Add postprocess model
+        pp2_model = onnx.load(base_path + "det_postprocess.onnx")
+        pp2_model = compose.add_prefix(pp2_model, prefix="postprocess_")
+        model = compose.merge_models(
+            model,
+            pp2_model,
+            io_map=[
+                (model.graph.output[0].name, pp2_model.graph.input[1].name),
+            ],
+        )
+
+        # Update nodes from postprocess model to use the input of the main network
+        pp2_input_image_name = pp2_model.graph.input[0].name
+        main_input_name = model.graph.input[0].name
+        for node in model.graph.node:
+            for idx, name in enumerate(node.input):
+                if name == pp2_input_image_name:
+                    node.input[idx] = main_input_name
+        model.graph.input.pop(1)
+
     # Set input type to int8
     model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
 
diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
index a619c01..89764dd 100644
--- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
+++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
@@ -3,3 +3,7 @@ _base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime.py"]
 onnx_config = dict(
     input_shape=[320, 320],
 )
+
+codebase_config = dict(
+    post_processing=dict(score_threshold=0.3, iou_threshold=0.3),
+)
diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
index f4a2b8b..62dff76 100644
--- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
+++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
@@ -3,3 +3,7 @@ _base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py
 onnx_config = dict(
     input_shape=[320, 320],
 )
+
+codebase_config = dict(
+    post_processing=dict(score_threshold=0.3, iou_threshold=0.3),
+)
diff --git a/extras/mmdeploy/make_extra_graphs.py b/extras/mmdeploy/make_extra_graphs.py
new file mode 100644
index 0000000..a961032
--- /dev/null
+++ b/extras/mmdeploy/make_extra_graphs.py
@@ -0,0 +1,161 @@
+import cv2
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# ==================================================================================================
+
+base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
+det_target_size = (320, 320)
+
+# ==================================================================================================
+
+
+class Letterbox(nn.Module):
+    def __init__(self, target_size, fill_value=128):
+        """Resize and pad image while keeping aspect ratio"""
+        super(Letterbox, self).__init__()
+
+        self.target_size = target_size
+        self.fill_value = fill_value
+
+    def calc_params(self, img):
+        ih, iw = img.shape[1:3]
+        th, tw = self.target_size
+
+        scale = torch.min(tw / iw, th / ih)
+        nw = torch.round(iw * scale)
+        nh = torch.round(ih * scale)
+
+        pad_w = tw - nw
+        pad_h = th - nh
+        pad_left = pad_w // 2
+        pad_top = pad_h // 2
+        pad_right = pad_w - pad_left
+        pad_bottom = pad_h - pad_top
+        paddings = (pad_left, pad_right, pad_top, pad_bottom)
+
+        return paddings, scale, (nw, nh)
+
+    def forward(self, img):
+        paddings, _, (nw, nh) = self.calc_params(img)
+
+        # Resize the image
+        img = img.to(torch.float32)
+        img = F.interpolate(
+            img.permute(0, 3, 1, 2), size=(nh, nw), mode="bilinear", align_corners=False
+        )
+        img = img.permute(0, 2, 3, 1)
+        img = img.round()
+
+        # Pad the image
+        img = F.pad(
+            img.permute(0, 3, 1, 2),
+            pad=paddings,
+            mode="constant",
+            value=self.fill_value,
+        )
+        img = img.permute(0, 2, 3, 1)
+        canvas = img
+
+        return canvas
+
+
+# ==================================================================================================
+
+
+class DetPreprocess(nn.Module):
+    def __init__(self, target_size, fill_value=114):
+        super(DetPreprocess, self).__init__()
+        self.letterbox = Letterbox(target_size, fill_value)
+
+    def forward(self, img):
+        # img: torch.Tensor of shape [batch, H, W, C], dtype=torch.uint8
+        img = self.letterbox(img)
+        return img
+
+
+# ==================================================================================================
+
+
+class DetPostprocess(nn.Module):
+    def __init__(self, target_size):
+        super(DetPostprocess, self).__init__()
+        self.letterbox = Letterbox(target_size)
+
+    def forward(self, img, boxes):
+        paddings, scale, _ = self.letterbox.calc_params(img)
+
+        boxes = boxes.float()
+        boxes[:, :, 0] -= paddings[0]
+        boxes[:, :, 2] -= paddings[0]
+        boxes[:, :, 1] -= paddings[2]
+        boxes[:, :, 3] -= paddings[2]
+        boxes[:, :, 0:4] /= scale
+
+        ih, iw = img.shape[1:3]
+        boxes = torch.max(boxes, torch.tensor(0))
+        b0 = boxes[:, :, 0]
+        b1 = boxes[:, :, 1]
+        b2 = boxes[:, :, 2]
+        b3 = boxes[:, :, 3]
+        b0 = torch.min(b0, iw - 1)
+        b1 = torch.min(b1, ih - 1)
+        b2 = torch.min(b2, iw - 1)
+        b3 = torch.min(b3, ih - 1)
+        boxes = torch.stack((b0, b1, b2, b3, boxes[:, :, 4]), dim=2)
+
+        return boxes
+
+
+# ==================================================================================================
+
+
+def main():
+
+    img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg"
+    image = cv2.imread(img_path, 3)
+
+    # Initialize the DetPreprocess module
+    preprocess_model = DetPreprocess(target_size=det_target_size)
+    det_dummy_input_a0 = torch.from_numpy(image).unsqueeze(0)
+
+    # Export to ONNX
+    torch.onnx.export(
+        preprocess_model,
+        det_dummy_input_a0,
+        base_path + "det_preprocess.onnx",
+        opset_version=11,
+        input_names=["input_image"],
+        output_names=["preprocessed_image"],
+        dynamic_axes={
+            "input_image": {0: "batch_size", 1: "height", 2: "width"},
+            "preprocessed_image": {0: "batch_size"},
+        },
+    )
+
+    # Initialize the DetPostprocess module
+    postprocess_model = DetPostprocess(target_size=det_target_size)
+    det_dummy_input_b0 = torch.from_numpy(image).unsqueeze(0)
+    det_dummy_input_b1 = torch.rand(1, 10, 5)
+
+    # Export to ONNX
+    torch.onnx.export(
+        postprocess_model,
+        (det_dummy_input_b0, det_dummy_input_b1),
+        base_path + "det_postprocess.onnx",
+        opset_version=11,
+        input_names=["input_image", "boxes"],
+        output_names=["output_boxes"],
+        dynamic_axes={
+            "input_image": {0: "batch_size", 1: "height", 2: "width"},
+            "boxes": {0: "batch_size", 1: "num_boxes"},
+            "output_boxes": {0: "batch_size", 1: "num_boxes"},
+        },
+    )
+
+
+# ==================================================================================================
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/utils_2d_pose_ort.py b/scripts/utils_2d_pose_ort.py
index 8b925d8..2213970 100644
--- a/scripts/utils_2d_pose_ort.py
+++ b/scripts/utils_2d_pose_ort.py
@@ -31,6 +31,8 @@ class BaseModel(ABC):
 
         self.input_name = self.session.get_inputs()[0].name
         self.input_shape = self.session.get_inputs()[0].shape
+        if "batch_size" in self.input_shape:
+            self.input_shape = [1, 500, 500, 3]
 
         input_type = self.session.get_inputs()[0].type
         if input_type == "tensor(float16)":
@@ -72,116 +74,25 @@ class RTMDet(BaseModel):
         self,
         model_path: str,
         conf_threshold: float,
-        iou_threshold: float,
         warmup: int = 30,
     ):
         super(RTMDet, self).__init__(model_path, warmup)
         self.conf_threshold = conf_threshold
-        self.iou_threshold = iou_threshold
-        self.dx = 0
-        self.dy = 0
-        self.scale = 0
-
-    def letterbox(self, img: np.ndarray, target_size: List[int], fill_value: int = 128):
-        h, w = img.shape[:2]
-        tw, th = target_size
-
-        scale = min(tw / w, th / h)
-        nw, nh = int(w * scale), int(h * scale)
-        dx, dy = (tw - nw) // 2, (th - nh) // 2
-
-        canvas = np.full((th, tw, img.shape[2]), fill_value, dtype=img.dtype)
-        canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
-            img, (nw, nh), interpolation=cv2.INTER_LINEAR
-        )
-
-        return canvas, dx, dy, scale
-
-    def nms_optimized(
-        self, boxes: np.ndarray, iou_threshold: float, conf_threshold: float
-    ):
-        """
-        Perform Non-Maximum Suppression (NMS) on bounding boxes for a single class.
-        """
-
-        # Filter out boxes with low confidence scores
-        scores = boxes[:, 4]
-        keep = scores > conf_threshold
-        boxes = boxes[keep]
-        scores = scores[keep]
-
-        if boxes.shape[0] == 0:
-            return np.empty((0, 5), dtype=boxes.dtype)
-
-        # Compute the area of the bounding boxes
-        x1 = boxes[:, 0]
-        y1 = boxes[:, 1]
-        x2 = boxes[:, 2]
-        y2 = boxes[:, 3]
-        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
-
-        # Sort the boxes by scores in descending order
-        order = scores.argsort()[::-1]
-
-        keep_indices = []
-        while order.size > 0:
-            i = order[0]
-            keep_indices.append(i)
-
-            # Compute IoU of the current box with the rest
-            xx1 = np.maximum(x1[i], x1[order[1:]])
-            yy1 = np.maximum(y1[i], y1[order[1:]])
-            xx2 = np.minimum(x2[i], x2[order[1:]])
-            yy2 = np.minimum(y2[i], y2[order[1:]])
-
-            # Compute width and height of the overlapping area
-            w = np.maximum(0.0, xx2 - xx1 + 1)
-            h = np.maximum(0.0, yy2 - yy1 + 1)
-
-            # Compute the area of the intersection
-            inter = w * h
-
-            # Compute the IoU
-            iou = inter / (areas[i] + areas[order[1:]] - inter)
-
-            # Keep boxes with IoU less than the threshold
-            inds = np.where(iou <= iou_threshold)[0]
-
-            # Update the order array
-            order = order[inds + 1]
-
-        # Return the boxes that are kept
-        return boxes[keep_indices]
 
     def preprocess(self, image: np.ndarray):
-        th, tw = self.input_shape[1:3]
-        image, self.dx, self.dy, self.scale = self.letterbox(
-            image, (tw, th), fill_value=114
-        )
         tensor = np.asarray(image).astype(self.input_type, copy=False)
         tensor = np.expand_dims(tensor, axis=0)
         return tensor
 
     def postprocess(self, tensor: List[np.ndarray]):
-        boxes = np.squeeze(tensor[0], axis=0)
-        classes = np.expand_dims(np.squeeze(tensor[1], axis=0), axis=-1)
-        boxes = np.concatenate([boxes, classes], axis=-1)
+        boxes = np.squeeze(tensor[1], axis=0)
+        classes = np.squeeze(tensor[0], axis=0)
 
-        boxes = self.nms_optimized(boxes, self.iou_threshold, self.conf_threshold)
+        human_class = classes[:] == 0
+        boxes = boxes[human_class]
 
-        if boxes.shape[0] == 0:
-            return boxes
-
-        human_class = boxes[..., -1] == 0
-        boxes = boxes[human_class][..., :4]
-
-        boxes[:, 0] -= self.dx
-        boxes[:, 2] -= self.dx
-        boxes[:, 1] -= self.dy
-        boxes[:, 3] -= self.dy
-
-        boxes = np.clip(boxes, a_min=0, a_max=None)
-        boxes[:, :4] /= self.scale
+        keep = boxes[:, 4] > self.conf_threshold
+        boxes = boxes[keep]
 
         return boxes
 
@@ -201,7 +112,7 @@ class RTMPose(BaseModel):
         target_size: List[int],
         padding_scale: float = 1.25,
     ):
-        start_x, start_y, end_x, end_y = box
+        start_x, start_y, end_x, end_y = box[0:4]
         target_w, target_h = target_size
 
         # Calculate original bounding box width and height
@@ -305,8 +216,7 @@ class TopDown:
         self,
         det_model_path,
         pose_model_path,
-        conf_threshold=0.6,
-        iou_threshold=0.6,
+        box_conf_threshold=0.6,
         warmup=30,
     ):
         if (not det_model_path.endswith(".onnx")) or (
@@ -314,7 +224,7 @@ class TopDown:
         ):
             raise ValueError("Only ONNX models are supported.")
 
-        self.det_model = RTMDet(det_model_path, conf_threshold, iou_threshold, warmup)
+        self.det_model = RTMDet(det_model_path, box_conf_threshold, warmup)
         self.pose_model = RTMPose(pose_model_path, warmup)
 
     def predict(self, image):
@@ -337,8 +247,7 @@ def load_model():
         "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx",
         # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx",
         "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx",
-        conf_threshold=0.3,
-        iou_threshold=0.3,
+        box_conf_threshold=0.3,
         warmup=30,
     )