diff --git a/scripts/utils_2d_pose.hpp b/scripts/utils_2d_pose.hpp
index 2fd98c5..85b66a8 100644
--- a/scripts/utils_2d_pose.hpp
+++ b/scripts/utils_2d_pose.hpp
@@ -424,10 +424,10 @@ namespace utils_2d_pose
         }
 
         std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
-        calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const;
+        calc_params(const cv::Mat &image, const std::array<float, 6> &bbox) const;
 
         cv::Mat crop_resize_box(const cv::Mat &image,
-                                const std::array<float, 5> &bbox) const;
+                                const std::array<float, 6> &bbox) const;
 
     private:
         std::array<size_t, 2> target_size;
@@ -438,7 +438,7 @@ namespace utils_2d_pose
     // =============================================================================================
 
     std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
-    BoxCrop::calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const
+    BoxCrop::calc_params(const cv::Mat &image, const std::array<float, 6> &bbox) const
     {
         // Extract some params
         int img_h = image.rows;
@@ -560,7 +560,7 @@ namespace utils_2d_pose
     // =============================================================================================
 
     cv::Mat BoxCrop::crop_resize_box(const cv::Mat &image,
-                                     const std::array<float, 5> &bbox) const
+                                     const std::array<float, 6> &bbox) const
     {
         auto [paddings, _, new_box, new_size] = calc_params(image, bbox);
 
@@ -625,7 +625,7 @@ namespace utils_2d_pose
             this->letterbox = std::make_unique<LetterBox>(target_size, 114);
         }
 
-        std::vector<std::array<float, 5>> call(const cv::Mat &image);
+        std::vector<std::array<float, 6>> call(const cv::Mat &image);
 
     private:
         std::array<size_t, 2> target_size;
@@ -634,17 +634,17 @@ namespace utils_2d_pose
         std::unique_ptr<LetterBox> letterbox;
 
         cv::Mat preprocess(const cv::Mat &image);
-        std::vector<std::array<float, 5>> postprocess(
+        std::vector<std::array<float, 6>> postprocess(
             const std::vector<std::vector<std::vector<float>>> &result,
             const cv::Mat &image);
 
-        void clip_boxes(std::vector<std::array<float, 5>> &boxes,
+        void clip_boxes(std::vector<std::array<float, 6>> &boxes,
                         const cv::Mat &image) const;
     };
 
     // =============================================================================================
 
-    std::vector<std::array<float, 5>> RTMDet::call(const cv::Mat &image)
+    std::vector<std::array<float, 6>> RTMDet::call(const cv::Mat &image)
     {
         cv::Mat preprocessed = preprocess(image);
         std::vector<cv::Mat> inputs = {preprocessed};
@@ -663,14 +663,14 @@ namespace utils_2d_pose
 
     // =============================================================================================
 
-    std::vector<std::array<float, 5>> RTMDet::postprocess(
+    std::vector<std::array<float, 6>> RTMDet::postprocess(
         const std::vector<std::vector<std::vector<float>>> &result,
         const cv::Mat &image)
     {
         // Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class)
 
         // Convert to vector of boxes
-        std::vector<std::array<float, 5>> boxes;
+        std::vector<std::array<float, 6>> boxes;
         for (auto &item : result[0])
         {
             float x1 = item[0];
@@ -679,11 +679,7 @@ namespace utils_2d_pose
             float y2 = item[3];
             float score = item[4];
             float cls = item[5];
-
-            if (cls == 0)
-            {
-                boxes.push_back({x1, y1, x2, y2, score});
-            }
+            boxes.push_back({x1, y1, x2, y2, score, cls});
         }
 
         if (boxes.empty())
@@ -721,7 +717,7 @@ namespace utils_2d_pose
 
     // =============================================================================================
 
-    void RTMDet::clip_boxes(std::vector<std::array<float, 5>> &boxes,
+    void RTMDet::clip_boxes(std::vector<std::array<float, 6>> &boxes,
                             const cv::Mat &image) const
     {
         // Get paddings, scale from letterbox
@@ -777,7 +773,7 @@ namespace utils_2d_pose
         }
 
         std::vector<std::vector<std::array<float, 3>>> call(const cv::Mat &image,
-                                                            const std::vector<std::array<float, 5>> &bboxes);
+                                                            const std::vector<std::array<float, 6>> &bboxes);
 
     private:
         std::array<size_t, 2> target_size;
@@ -785,21 +781,21 @@ namespace utils_2d_pose
 
         std::vector<cv::Mat> preprocess(
             const cv::Mat &image,
-            const std::vector<std::array<float, 5>> &bboxes);
+            const std::vector<std::array<float, 6>> &bboxes);
         std::vector<std::vector<std::array<float, 3>>> postprocess(
             const std::vector<std::vector<std::vector<float>>> &result,
             const cv::Mat &image,
-            const std::vector<std::array<float, 5>> &bboxes);
+            const std::vector<std::array<float, 6>> &bboxes);
 
         void clip_keypoints(std::vector<std::array<float, 3>> &kp,
                             const cv::Mat &image,
-                            const std::array<float, 5> &bbox) const;
+                            const std::array<float, 6> &bbox) const;
     };
 
     // =============================================================================================
 
     std::vector<std::vector<std::array<float, 3>>> RTMPose::call(
-        const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
+        const cv::Mat &image, const std::vector<std::array<float, 6>> &bboxes)
     {
         std::vector<cv::Mat> inputs = preprocess(image, bboxes);
         auto results = call_by_image(inputs);
@@ -810,7 +806,7 @@ namespace utils_2d_pose
     // =============================================================================================
 
     std::vector<cv::Mat> RTMPose::preprocess(
-        const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
+        const cv::Mat &image, const std::vector<std::array<float, 6>> &bboxes)
     {
         std::vector<cv::Mat> crops;
         for (auto &bbox : bboxes)
@@ -827,7 +823,7 @@ namespace utils_2d_pose
     std::vector<std::vector<std::array<float, 3>>> RTMPose::postprocess(
         const std::vector<std::vector<std::vector<float>>> &result,
         const cv::Mat &image,
-        const std::vector<std::array<float, 5>> &bboxes)
+        const std::vector<std::array<float, 6>> &bboxes)
     {
         // Expected result shape: [B, N, 3] => (x,y,score)
         std::vector<std::vector<std::array<float, 3>>> keypoints;
@@ -859,7 +855,7 @@ namespace utils_2d_pose
 
     void RTMPose::clip_keypoints(std::vector<std::array<float, 3>> &kpts,
                                  const cv::Mat &image,
-                                 const std::array<float, 5> &bbox) const
+                                 const std::array<float, 6> &bbox) const
     {
         // Get paddings, scale from boxcrop
         auto [paddings, scale, box, _] = boxcrop->calc_params(image, bbox);
@@ -923,6 +919,15 @@ namespace utils_2d_pose
     {
         auto bboxes = det_model->call(image);
 
+        // Drop boxes with non-person class
+        for (int i = bboxes.size() - 1; i >= 0; i--)
+        {
+            if (static_cast<int>(bboxes[i][5]) != 0)
+            {
+                bboxes.erase(bboxes.begin() + i);
+            }
+        }
+
         std::vector<std::vector<std::array<float, 3>>> poses;
         if (this->batch_poses)
         {