Filter bounding boxes later in the code.

This commit is contained in:
Daniel
2025-11-24 18:58:34 +01:00
parent 8fdf0d4096
commit 9a4e35f229

View File

@ -424,10 +424,10 @@ namespace utils_2d_pose
} }
std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>> std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const; calc_params(const cv::Mat &image, const std::array<float, 6> &bbox) const;
cv::Mat crop_resize_box(const cv::Mat &image, cv::Mat crop_resize_box(const cv::Mat &image,
const std::array<float, 5> &bbox) const; const std::array<float, 6> &bbox) const;
private: private:
std::array<size_t, 2> target_size; std::array<size_t, 2> target_size;
@ -438,7 +438,7 @@ namespace utils_2d_pose
// ============================================================================================= // =============================================================================================
std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>> std::tuple<std::array<int, 4>, float, std::array<int, 4>, std::array<size_t, 2>>
BoxCrop::calc_params(const cv::Mat &image, const std::array<float, 5> &bbox) const BoxCrop::calc_params(const cv::Mat &image, const std::array<float, 6> &bbox) const
{ {
// Extract some params // Extract some params
int img_h = image.rows; int img_h = image.rows;
@ -560,7 +560,7 @@ namespace utils_2d_pose
// ============================================================================================= // =============================================================================================
cv::Mat BoxCrop::crop_resize_box(const cv::Mat &image, cv::Mat BoxCrop::crop_resize_box(const cv::Mat &image,
const std::array<float, 5> &bbox) const const std::array<float, 6> &bbox) const
{ {
auto [paddings, _, new_box, new_size] = calc_params(image, bbox); auto [paddings, _, new_box, new_size] = calc_params(image, bbox);
@ -625,7 +625,7 @@ namespace utils_2d_pose
this->letterbox = std::make_unique<LetterBox>(target_size, 114); this->letterbox = std::make_unique<LetterBox>(target_size, 114);
} }
std::vector<std::array<float, 5>> call(const cv::Mat &image); std::vector<std::array<float, 6>> call(const cv::Mat &image);
private: private:
std::array<size_t, 2> target_size; std::array<size_t, 2> target_size;
@ -634,17 +634,17 @@ namespace utils_2d_pose
std::unique_ptr<LetterBox> letterbox; std::unique_ptr<LetterBox> letterbox;
cv::Mat preprocess(const cv::Mat &image); cv::Mat preprocess(const cv::Mat &image);
std::vector<std::array<float, 5>> postprocess( std::vector<std::array<float, 6>> postprocess(
const std::vector<std::vector<std::vector<float>>> &result, const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image); const cv::Mat &image);
void clip_boxes(std::vector<std::array<float, 5>> &boxes, void clip_boxes(std::vector<std::array<float, 6>> &boxes,
const cv::Mat &image) const; const cv::Mat &image) const;
}; };
// ============================================================================================= // =============================================================================================
std::vector<std::array<float, 5>> RTMDet::call(const cv::Mat &image) std::vector<std::array<float, 6>> RTMDet::call(const cv::Mat &image)
{ {
cv::Mat preprocessed = preprocess(image); cv::Mat preprocessed = preprocess(image);
std::vector<cv::Mat> inputs = {preprocessed}; std::vector<cv::Mat> inputs = {preprocessed};
@ -663,14 +663,14 @@ namespace utils_2d_pose
// ============================================================================================= // =============================================================================================
std::vector<std::array<float, 5>> RTMDet::postprocess( std::vector<std::array<float, 6>> RTMDet::postprocess(
const std::vector<std::vector<std::vector<float>>> &result, const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image) const cv::Mat &image)
{ {
// Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class) // Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class)
// Convert to vector of boxes // Convert to vector of boxes
std::vector<std::array<float, 5>> boxes; std::vector<std::array<float, 6>> boxes;
for (auto &item : result[0]) for (auto &item : result[0])
{ {
float x1 = item[0]; float x1 = item[0];
@ -679,11 +679,7 @@ namespace utils_2d_pose
float y2 = item[3]; float y2 = item[3];
float score = item[4]; float score = item[4];
float cls = item[5]; float cls = item[5];
boxes.push_back({x1, y1, x2, y2, score, cls});
if (cls == 0)
{
boxes.push_back({x1, y1, x2, y2, score});
}
} }
if (boxes.empty()) if (boxes.empty())
@ -721,7 +717,7 @@ namespace utils_2d_pose
// ============================================================================================= // =============================================================================================
void RTMDet::clip_boxes(std::vector<std::array<float, 5>> &boxes, void RTMDet::clip_boxes(std::vector<std::array<float, 6>> &boxes,
const cv::Mat &image) const const cv::Mat &image) const
{ {
// Get paddings, scale from letterbox // Get paddings, scale from letterbox
@ -777,7 +773,7 @@ namespace utils_2d_pose
} }
std::vector<std::vector<std::array<float, 3>>> call(const cv::Mat &image, std::vector<std::vector<std::array<float, 3>>> call(const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes); const std::vector<std::array<float, 6>> &bboxes);
private: private:
std::array<size_t, 2> target_size; std::array<size_t, 2> target_size;
@ -785,21 +781,21 @@ namespace utils_2d_pose
std::vector<cv::Mat> preprocess( std::vector<cv::Mat> preprocess(
const cv::Mat &image, const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes); const std::vector<std::array<float, 6>> &bboxes);
std::vector<std::vector<std::array<float, 3>>> postprocess( std::vector<std::vector<std::array<float, 3>>> postprocess(
const std::vector<std::vector<std::vector<float>>> &result, const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image, const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes); const std::vector<std::array<float, 6>> &bboxes);
void clip_keypoints(std::vector<std::array<float, 3>> &kp, void clip_keypoints(std::vector<std::array<float, 3>> &kp,
const cv::Mat &image, const cv::Mat &image,
const std::array<float, 5> &bbox) const; const std::array<float, 6> &bbox) const;
}; };
// ============================================================================================= // =============================================================================================
std::vector<std::vector<std::array<float, 3>>> RTMPose::call( std::vector<std::vector<std::array<float, 3>>> RTMPose::call(
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes) const cv::Mat &image, const std::vector<std::array<float, 6>> &bboxes)
{ {
std::vector<cv::Mat> inputs = preprocess(image, bboxes); std::vector<cv::Mat> inputs = preprocess(image, bboxes);
auto results = call_by_image(inputs); auto results = call_by_image(inputs);
@ -810,7 +806,7 @@ namespace utils_2d_pose
// ============================================================================================= // =============================================================================================
std::vector<cv::Mat> RTMPose::preprocess( std::vector<cv::Mat> RTMPose::preprocess(
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes) const cv::Mat &image, const std::vector<std::array<float, 6>> &bboxes)
{ {
std::vector<cv::Mat> crops; std::vector<cv::Mat> crops;
for (auto &bbox : bboxes) for (auto &bbox : bboxes)
@ -827,7 +823,7 @@ namespace utils_2d_pose
std::vector<std::vector<std::array<float, 3>>> RTMPose::postprocess( std::vector<std::vector<std::array<float, 3>>> RTMPose::postprocess(
const std::vector<std::vector<std::vector<float>>> &result, const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image, const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes) const std::vector<std::array<float, 6>> &bboxes)
{ {
// Expected result shape: [B, N, 3] => (x,y,score) // Expected result shape: [B, N, 3] => (x,y,score)
std::vector<std::vector<std::array<float, 3>>> keypoints; std::vector<std::vector<std::array<float, 3>>> keypoints;
@ -859,7 +855,7 @@ namespace utils_2d_pose
void RTMPose::clip_keypoints(std::vector<std::array<float, 3>> &kpts, void RTMPose::clip_keypoints(std::vector<std::array<float, 3>> &kpts,
const cv::Mat &image, const cv::Mat &image,
const std::array<float, 5> &bbox) const const std::array<float, 6> &bbox) const
{ {
// Get paddings, scale from boxcrop // Get paddings, scale from boxcrop
auto [paddings, scale, box, _] = boxcrop->calc_params(image, bbox); auto [paddings, scale, box, _] = boxcrop->calc_params(image, bbox);
@ -923,6 +919,15 @@ namespace utils_2d_pose
{ {
auto bboxes = det_model->call(image); auto bboxes = det_model->call(image);
// Drop boxes with non-person class
for (int i = bboxes.size() - 1; i >= 0; i--)
{
if (static_cast<int>(bboxes[i][5]) != 0)
{
bboxes.erase(bboxes.begin() + i);
}
}
std::vector<std::vector<std::array<float, 3>>> poses; std::vector<std::vector<std::array<float, 3>>> poses;
if (this->batch_poses) if (this->batch_poses)
{ {