diff --git a/extras/mmdeploy/README.md b/extras/mmdeploy/README.md
index 2abffa9..fb30c9f 100644
--- a/extras/mmdeploy/README.md
+++ b/extras/mmdeploy/README.md
@@ -11,39 +11,53 @@ docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy .
 ## ONNX
 
 ```bash
-export withFP16="_fp16"
-cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320$withFP16.py configs/mmdet/detection/
-
 cd /mmdeploy/
+export withFP16="_fp16"
+cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320"$withFP16".py configs/mmdet/detection/
+
 python3 ./tools/deploy.py \
-    configs/mmdet/detection/detection_onnxruntime_static-320x320$withFP16.py \
+    configs/mmdet/detection/detection_onnxruntime_static-320x320"$withFP16".py \
     /mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \
     https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \
     /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
     --work-dir work_dir \
     --show
 
-mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320$withFP16.onnx
+mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x3x320x320"$withFP16".onnx
 ```
 
 ```bash
-export withFP16="_fp16"
-cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py configs/mmpose/
-
 cd /mmdeploy/
+export withFP16="_fp16"
+cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py configs/mmpose/
+cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py configs/mmpose/
+
 python3 ./tools/deploy.py \
-    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \
+    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
     /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
     https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
     /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
     --work-dir work_dir \
     --show
 
-mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx
+mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx
+
+python3 ./tools/deploy.py \
+    configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
+    /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
+    https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
+    /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
+    --work-dir work_dir \
+    --show
+
+mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx3x384x288"$withFP16".onnx
 ```
 
 ```bash
 python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py
+```
+
+```bash
 python3 /RapidPoseTriangulation/extras/mmdeploy/add_extra_steps.py
 ```
 
@@ -57,14 +71,17 @@ Run this directly in the inference container (the TensorRT versions need to be t
 export withFP16="_fp16"
 
 trtexec --fp16 \
-  --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320"$withFP16"_extra-steps.onnx \
+  --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.onnx \
   --saveEngine=end2end.engine
 
 mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.engine
 
 trtexec --fp16 \
-  --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288"$withFP16"_extra-steps.onnx \
-  --saveEngine=end2end.engine
+  --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx384x288x3"$withFP16"_extra-steps.onnx \
+  --saveEngine=end2end.engine \
+  --minShapes=image_input:1x384x288x3 \
+  --optShapes=image_input:1x384x288x3 \
+  --maxShapes=image_input:1x384x288x3
 
 mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x384x288x3"$withFP16"_extra-steps.engine
 ```
@@ -74,14 +91,14 @@ mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x
 ## Benchmark
 
 ```bash
+cd /mmdeploy/
 export withFP16="_fp16"
 
-cd /mmdeploy/
 python3 ./tools/profiler.py \
-    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \
+    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
     /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
     /RapidPoseTriangulation/extras/mmdeploy/testimages/ \
-    --model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx \
+    --model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx \
     --shape 384x288 \
     --device cuda \
     --warmup 50 \
diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py
index 8bd7a08..6ab35fa 100644
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@@ -1,12 +1,15 @@
+import re
+
 import numpy as np
 import onnx
-from onnx import TensorProto, compose, helper, numpy_helper
+from onnx import TensorProto, helper, numpy_helper
 
 # ==================================================================================================
 
 base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
-pose_model_path = base_path + "rtmpose-m_384x288.onnx"
-det_model_path = base_path + "rtmdet-nano_320x320.onnx"
+det_model_path = base_path + "rtmdet-nano_1x3x320x320.onnx"
+pose_model_path1 = base_path + "rtmpose-m_Bx3x384x288.onnx"
+pose_model_path2 = base_path + "rtmpose-m_1x3x384x288.onnx"
 
 norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
 norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
@@ -97,6 +100,11 @@ def add_steps_to_onnx(model_path):
     for i, j in enumerate([0, 3, 1, 2]):
         input_shape[j].dim_value = dims[i]
 
+    # Set the batch size to a defined string
+    input_shape = graph.input[0].type.tensor_type.shape.dim
+    if input_shape[0].dim_value == 0:
+        input_shape[0].dim_param = "batch_size"
+
     # Rename the input tensor
     main_input_image_name = model.graph.input[0].name
     for node in model.graph.node:
@@ -108,7 +116,8 @@ def add_steps_to_onnx(model_path):
     # Set input image type to int8
     model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
 
-    path = model_path.replace(".onnx", "_extra-steps.onnx")
+    path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
+    path = path.replace(".onnx", "_extra-steps.onnx")
     onnx.save(model, path)
 
 
@@ -116,10 +125,12 @@ def add_steps_to_onnx(model_path):
 
 
 def main():
-    add_steps_to_onnx(pose_model_path)
     add_steps_to_onnx(det_model_path)
+    add_steps_to_onnx(pose_model_path1)
+    add_steps_to_onnx(pose_model_path2)
     add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx"))
-    add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx"))
+    add_steps_to_onnx(pose_model_path1.replace(".onnx", "_fp16.onnx"))
+    add_steps_to_onnx(pose_model_path2.replace(".onnx", "_fp16.onnx"))
 
 
 # ==================================================================================================
diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
index 3918d65..d7d5b57 100644
--- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
+++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py
@@ -5,7 +5,7 @@ onnx_config = dict(
 )
 
 codebase_config = dict(
-    # For later TensorRT inference, the number of output boxes needs to be as stable as possible, 
+    # For later TensorRT inference, the number of output boxes needs to be as stable as possible,
     # because a drop in the box count leads to a re-optimization which takes a lot of time,
     # therefore reduce the maximum number of output boxes to the smallest usable value and sort out
     # low confidence boxes outside the model.
diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
index a724f53..1dd243b 100644
--- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
+++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
@@ -5,7 +5,7 @@ onnx_config = dict(
 )
 
 codebase_config = dict(
-    # For later TensorRT inference, the number of output boxes needs to be as stable as possible, 
+    # For later TensorRT inference, the number of output boxes needs to be as stable as possible,
     # because a drop in the box count leads to a re-optimization which takes a lot of time,
     # therefore reduce the maximum number of output boxes to the smallest usable value and sort out
     # low confidence boxes outside the model.
diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py
new file mode 100644
index 0000000..3d52547
--- /dev/null
+++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py
@@ -0,0 +1,19 @@
+_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"]
+
+onnx_config = dict(
+    input_shape=[288, 384],
+    output_names=["kpts", "scores"],
+    dynamic_axes={
+        "input": {
+            0: "batch",
+        },
+        "kpts": {
+            0: "batch",
+        },
+        "scores": {
+            0: "batch",
+        },
+    },
+)
+
+codebase_config = dict(export_postprocess=True)  # export get_simcc_maximum
diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py
new file mode 100644
index 0000000..fe0ca45
--- /dev/null
+++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py
@@ -0,0 +1,19 @@
+_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"]
+
+onnx_config = dict(
+    input_shape=[288, 384],
+    output_names=["kpts", "scores"],
+    dynamic_axes={
+        "input": {
+            0: "batch",
+        },
+        "kpts": {
+            0: "batch",
+        },
+        "scores": {
+            0: "batch",
+        },
+    },
+)
+
+codebase_config = dict(export_postprocess=True)  # export get_simcc_maximum
diff --git a/scripts/test_skelda_dataset.py b/scripts/test_skelda_dataset.py
index cc0b61c..aa6a148 100644
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@@ -53,6 +53,9 @@ default_min_match_score = 0.94
 # If the number of cameras is high, and the views are not occluded, use a higher value
 default_min_group_size = 1
 
+# Batch poses per image for faster processing
+# If most of the time only one person is in a image, disable it, because it is slightly slower then
+default_batch_poses = True
 
 datasets = {
     "human36m": {
@@ -62,6 +65,7 @@ datasets = {
         "min_group_size": 1,
         "min_bbox_score": 0.4,
         "min_bbox_area": 0.1 * 0.1,
+        "batch_poses": False,
     },
     "panoptic": {
         "path": "/datasets/panoptic/skelda/test.json",
@@ -310,13 +314,14 @@ def main():
     min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
     min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
     min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
+    batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
 
     # Load 2D pose model
     whole_body = test_triangulate.whole_body
     if any((whole_body[k] for k in whole_body)):
         kpt_model = utils_2d_pose.load_wb_model()
     else:
-        kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area)
+        kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
 
     # Manually set matplotlib backend
     try:
diff --git a/scripts/utils_2d_pose_ort.py b/scripts/utils_2d_pose_ort.py
index 1944983..7d4a91b 100644
--- a/scripts/utils_2d_pose_ort.py
+++ b/scripts/utils_2d_pose_ort.py
@@ -96,29 +96,12 @@ class BaseModel(ABC):
                 if "image" in iname:
                     ishape = list(self.input_shapes[i])
                     if "batch_size" in ishape:
-                        if "TensorrtExecutionProvider" in self.providers:
-                            # Using different images sizes for TensorRT warmup takes too long
-                            ishape = [1, 1000, 1000, 3]
-                        else:
-                            ishape = [
-                                1,
-                                np.random.randint(300, 1000),
-                                np.random.randint(300, 1000),
-                                3,
-                            ]
+                        max_batch_size = 10
+                        ishape[0] = np.random.choice(
+                            list(range(1, max_batch_size + 1))
+                        )
                     tensor = np.random.random(ishape)
                     tensor = tensor * 255
-                elif "bbox" in iname:
-                    tensor = np.array(
-                        [
-                            [
-                                np.random.randint(30, 100),
-                                np.random.randint(30, 100),
-                                np.random.randint(200, 300),
-                                np.random.randint(200, 300),
-                            ]
-                        ]
-                    )
                 else:
                     raise ValueError("Undefined input type:", iname)
 
@@ -401,35 +384,48 @@ class RTMPose(BaseModel):
         self.target_size = (384, 288)
         self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
 
-    def preprocess(self, image: np.ndarray, bbox: np.ndarray):
-        bbox = np.asarray(bbox)[0:4]
-        bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
-        bbox = bbox.round().astype(np.int32)
-        region = self.boxcrop.crop_resize_box(image, bbox)
-        tensor = np.asarray(region).astype(self.input_types[0], copy=False)
-        tensor = np.expand_dims(tensor, axis=0)
-        tensor = [tensor]
+    def preprocess(self, image: np.ndarray, bboxes: np.ndarray):
+        cutouts = []
+        for i in range(len(bboxes)):
+            bbox = np.asarray(bboxes[i])[0:4]
+            bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
+            bbox = bbox.round().astype(np.int32)
+            region = self.boxcrop.crop_resize_box(image, bbox)
+            tensor = np.asarray(region).astype(self.input_types[0], copy=False)
+            cutouts.append(tensor)
+
+        if len(bboxes) == 1:
+            cutouts = np.expand_dims(cutouts[0], axis=0)
+        else:
+            cutouts = np.stack(cutouts, axis=0)
+
+        tensor = [cutouts]
         return tensor
 
     def postprocess(
-        self, result: List[np.ndarray], image: np.ndarray, bbox: np.ndarray
+        self, result: List[np.ndarray], image: np.ndarray, bboxes: np.ndarray
     ):
-        scores = np.clip(result[1][0], 0, 1)
-        kp = np.concatenate([result[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
+        kpts = []
+        for i in range(len(bboxes)):
+            scores = np.clip(result[1][i], 0, 1)
+            kp = np.concatenate(
+                [result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
+            )
 
-        paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bbox)
-        kp[:, 0] -= paddings[0]
-        kp[:, 1] -= paddings[2]
-        kp[:, 0:2] /= scale
-        kp[:, 0] += bbox[0]
-        kp[:, 1] += bbox[1]
-        kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
-        max_w = image.shape[1] - 1
-        max_h = image.shape[0] - 1
-        kp[:, 0] = np.minimum(kp[:, 0], max_w)
-        kp[:, 1] = np.minimum(kp[:, 1], max_h)
+            paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
+            kp[:, 0] -= paddings[0]
+            kp[:, 1] -= paddings[2]
+            kp[:, 0:2] /= scale
+            kp[:, 0] += bbox[0]
+            kp[:, 1] += bbox[1]
+            kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
+            max_w = image.shape[1] - 1
+            max_h = image.shape[0] - 1
+            kp[:, 0] = np.minimum(kp[:, 0], max_w)
+            kp[:, 1] = np.minimum(kp[:, 1], max_h)
+            kpts.append(kp)
 
-        return kp
+        return kpts
 
 
 # ==================================================================================================
@@ -444,6 +440,8 @@ class TopDown:
         box_min_area: float,
         warmup: int = 30,
     ):
+        self.batch_poses = bool("Bx" in pose_model_path)
+
         self.det_model = RTMDet(
             det_model_path, box_conf_threshold, box_min_area, warmup
         )
@@ -451,22 +449,29 @@ class TopDown:
 
     def predict(self, image):
         boxes = self.det_model(image=image)
+        if len(boxes) == 0:
+            return []
+
         results = []
-        for i in range(boxes.shape[0]):
-            kp = self.pose_model(image=image, bbox=boxes[i])
-            results.append(kp)
+        if self.batch_poses:
+            results = self.pose_model(image=image, bboxes=boxes)
+        else:
+            for i in range(boxes.shape[0]):
+                kp = self.pose_model(image=image, bboxes=[boxes[i]])
+                results.append(kp[0])
+
         return results
 
 
 # ==================================================================================================
 
 
-def load_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1):
+def load_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1, batch_poses=False):
     print("Loading 2D model ...")
 
     model = TopDown(
-        "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx",
-        "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx",
+        "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx",
+        f"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_{'B' if batch_poses else '1'}x384x288x3_fp16_extra-steps.onnx",
         box_conf_threshold=min_bbox_score,
         box_min_area=min_bbox_area,
         warmup=30,