diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py
index f80ea77..d3ace0e 100644
--- a/extras/mmdeploy/add_extra_steps.py
+++ b/extras/mmdeploy/add_extra_steps.py
@@ -145,6 +145,64 @@ def add_steps_to_onnx(model_path):
         # Update the output's data type info
         output.type.tensor_type.elem_type = TensorProto.FLOAT
 
+    # Merge the two outputs
+    if "det" in model_path:
+        r1_output = "dets"
+        r2_output = "labels"
+        out_name = "bboxes"
+        out_dim = 6
+    if "pose" in model_path:
+        r1_output = "kpts"
+        r2_output = "scores"
+        out_name = "keypoints"
+        out_dim = 3
+    if "det" in model_path or "pose" in model_path:
+        # Node to expand
+        r2_expanded = r2_output + "_expanded"
+        unsqueeze_node = helper.make_node(
+            "Unsqueeze",
+            inputs=[r2_output],
+            outputs=[r2_expanded],
+            axes=[2],
+            name="Unsqueeze",
+        )
+
+        # Node to concatenate
+        r12_merged = out_name
+        concat_node = helper.make_node(
+            "Concat",
+            inputs=[r1_output, r2_expanded],
+            outputs=[r12_merged],
+            axis=2,
+            name="Merged",
+        )
+
+        # Define the new concatenated output
+        merged_output = helper.make_tensor_value_info(
+            r12_merged,
+            TensorProto.FLOAT,
+            [
+                (
+                    graph.input[0].type.tensor_type.shape.dim[0].dim_value
+                    if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
+                    else None
+                ),
+                (
+                    graph.output[0].type.tensor_type.shape.dim[1].dim_value
+                    if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
+                    else None
+                ),
+                out_dim,
+            ],
+        )
+
+        # Update the graph
+        graph.node.append(unsqueeze_node)
+        graph.node.append(concat_node)
+        graph.output.pop()
+        graph.output.pop()
+        graph.output.append(merged_output)
+
     path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
     path = path.replace(".onnx", "_extra-steps.onnx")
     onnx.save(model, path)
diff --git a/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp b/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
index 782a9f5..3b8a459 100644
--- a/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
+++ b/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
@@ -22,7 +22,7 @@ namespace utils_2d_pose
         explicit BaseModel(const std::string &model_path, int warmup_iterations);
         virtual ~BaseModel() = default;
 
-        std::vector<Ort::Value> call_by_image(const cv::Mat &img);
+        std::vector<std::vector<std::vector<float>>> call_by_image(const cv::Mat &img);
 
     protected:
         static Ort::Env &get_env()
@@ -200,7 +200,7 @@ namespace utils_2d_pose
 
     // =============================================================================================
 
-    std::vector<Ort::Value> BaseModel::call_by_image(const cv::Mat &img)
+    std::vector<std::vector<std::vector<float>>> BaseModel::call_by_image(const cv::Mat &img)
     {
         size_t height = img.rows;
         size_t width = img.cols;
@@ -222,8 +222,38 @@ namespace utils_2d_pose
                 shape.data(),
                 shape.size())));
 
+        // Call model
         auto outputs = call_model(input_tensors);
-        return outputs;
+
+        // Get pointer to ouput tensor
+        const float *tensor_data = outputs[0].GetTensorData<float>();
+        auto data_info = outputs[0].GetTensorTypeAndShapeInfo();
+        auto shape0 = data_info.GetShape();
+        size_t B = (size_t)shape0[0];
+        size_t N = (size_t)shape0[1];
+        size_t C = (size_t)shape0[2];
+
+        // Convert to vector of values
+        std::vector<std::vector<std::vector<float>>> data;
+        data.reserve(B);
+        for (size_t i = 0; i < B; i++)
+        {
+            std::vector<std::vector<float>> item;
+            item.reserve(N);
+            for (size_t j = 0; j < N; j++)
+            {
+                std::vector<float> values;
+                values.reserve(C);
+                for (size_t k = 0; k < C; k++)
+                {
+                    values.push_back(tensor_data[i * N * C + j * C + k]);
+                }
+                item.push_back(values);
+            }
+            data.push_back(item);
+        }
+
+        return data;
     }
 
     // =============================================================================================
@@ -566,8 +596,9 @@ namespace utils_2d_pose
         std::unique_ptr<LetterBox> letterbox;
 
         cv::Mat preprocess(const cv::Mat &image);
-        std::vector<std::array<float, 5>> postprocess(const std::vector<Ort::Value> &result,
-                                                      const cv::Mat &image);
+        std::vector<std::array<float, 5>> postprocess(
+            const std::vector<std::vector<std::vector<float>>> &result,
+            const cv::Mat &image);
 
         void clip_boxes(std::vector<std::array<float, 5>> &boxes,
                         const cv::Mat &image) const;
@@ -611,35 +642,21 @@ namespace utils_2d_pose
     // =============================================================================================
 
     std::vector<std::array<float, 5>> RTMDet::postprocess(
-        const std::vector<Ort::Value> &result,
+        const std::vector<std::vector<std::vector<float>>> &result,
         const cv::Mat &image)
     {
-        // // Expected output shapes:
-        //  result[0] => shape [1, N, 5] => (x1,y1,x2,y2,score)
-        //  result[1] => shape [1, N] => classes
+        // Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class)
 
-        // Get pointer to boxes
-        const float *boxes_data = result[0].GetTensorData<float>();
-        const float *classes_data = result[1].GetTensorData<float>();
-        auto data_info = result[0].GetTensorTypeAndShapeInfo();
-        auto shape0 = data_info.GetShape();
-        if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 5)
-        {
-            throw std::runtime_error("parse_outputs: unexpected shape for boxes");
-        }
-        size_t N = (size_t)shape0[1];
-
-        // Extract human boxes
+        // Convert to vector of boxes
         std::vector<std::array<float, 5>> boxes;
-        boxes.reserve(N);
-        for (size_t i = 0; i < N; i++)
+        for (auto &item : result[0])
         {
-            float x1 = boxes_data[i * 5 + 0];
-            float y1 = boxes_data[i * 5 + 1];
-            float x2 = boxes_data[i * 5 + 2];
-            float y2 = boxes_data[i * 5 + 3];
-            float score = boxes_data[i * 5 + 4];
-            float cls = classes_data[i];
+            float x1 = item[0];
+            float y1 = item[1];
+            float x2 = item[2];
+            float y2 = item[3];
+            float score = item[4];
+            float cls = item[5];
 
             if (cls == 0)
             {
@@ -746,7 +763,7 @@ namespace utils_2d_pose
 
         cv::Mat preprocess(const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes);
         std::vector<std::array<float, 3>> postprocess(
-            const std::vector<Ort::Value> &result,
+            const std::vector<std::vector<std::vector<float>>> &result,
             const cv::Mat &image,
             const std::vector<std::array<float, 5>> &bboxes);
 
@@ -801,33 +818,20 @@ namespace utils_2d_pose
     // =============================================================================================
 
     std::vector<std::array<float, 3>> RTMPose::postprocess(
-        const std::vector<Ort::Value> &result,
+        const std::vector<std::vector<std::vector<float>>> &result,
         const cv::Mat &image,
         const std::vector<std::array<float, 5>> &bboxes)
     {
-        // // Expected output shapes:
-        //  result[0] => shape [1, N, 2] => (x,y)
-        //  result[1] => shape [1, N] => scores
+        // Expected result shape: [B, N, 3] => (x,y,score)
 
-        // Get pointer to boxes
-        const float *kpts_data = result[0].GetTensorData<float>();
-        const float *scores_data = result[1].GetTensorData<float>();
-        auto data_info = result[0].GetTensorTypeAndShapeInfo();
-        auto shape0 = data_info.GetShape();
-        if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 2)
-        {
-            throw std::runtime_error("parse_outputs: unexpected shape for keypoints");
-        }
-        size_t N = (size_t)shape0[1];
-
-        // Extract human keypoints
+        // Convert to vector of keypoints
         std::vector<std::array<float, 3>> kpts;
-        kpts.reserve(N);
-        for (size_t i = 0; i < N; i++)
+        for (auto &item : result[0])
         {
-            float x = kpts_data[i * 2 + 0];
-            float y = kpts_data[i * 2 + 1];
-            float score = scores_data[i];
+            float x = item[0];
+            float y = item[1];
+            float score = item[2];
+
             kpts.push_back({x, y, score});
         }
 
diff --git a/scripts/utils_2d_pose.py b/scripts/utils_2d_pose.py
index 63eb89e..7b364e3 100644
--- a/scripts/utils_2d_pose.py
+++ b/scripts/utils_2d_pose.py
@@ -338,9 +338,8 @@ class RTMDet(BaseModel):
 
     def postprocess(self, result: List[np.ndarray], image: np.ndarray):
         boxes = np.squeeze(result[0], axis=0)
-        classes = np.squeeze(result[1], axis=0)
 
-        human_class = classes[:] == 0
+        human_class = boxes[:, 5] == 0
         boxes = boxes[human_class]
 
         keep = boxes[:, 4] > self.conf_threshold
@@ -408,10 +407,7 @@ class RTMPose(BaseModel):
     ):
         kpts = []
         for i in range(len(bboxes)):
-            scores = np.clip(result[1][i], 0, 1)
-            kp = np.concatenate(
-                [result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
-            )
+            kp = result[0][i]
 
             paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
             kp[:, 0] -= paddings[0]