Batching of poses in cpp implementation.

This commit is contained in:
Daniel
2025-01-20 12:20:25 +01:00
parent 8f2322694a
commit 86d8ccf797
5 changed files with 206 additions and 117 deletions

View File

@ -24,7 +24,7 @@ Run pose estimator with ros topics as inputs and publish detected poses.
```bash
cd /RapidPoseTriangulation/ros/rpt2D_wrapper_cpp/tests/
g++ -std=c++17 -O3 -march=native -DCOMPILE_EXAMPLE_MAIN \
g++ -std=c++17 -O3 -march=native -Wall -DCOMPILE_EXAMPLE_MAIN \
$(pkg-config --cflags opencv4) \
-I /onnxruntime/include \
-I /onnxruntime/include/onnxruntime/core/session \

View File

@ -32,7 +32,7 @@ static const std::string pose_out_topic = "/poses/" + cam_id;
static const float min_bbox_score = 0.4;
static const float min_bbox_area = 0.1 * 0.1;
static const bool batch_poses = false;
static const bool batch_poses = true;
// =================================================================================================
// =================================================================================================

View File

@ -22,7 +22,8 @@ namespace utils_2d_pose
explicit BaseModel(const std::string &model_path, int warmup_iterations);
virtual ~BaseModel() = default;
std::vector<std::vector<std::vector<float>>> call_by_image(const cv::Mat &img);
std::vector<std::vector<std::vector<float>>> call_by_image(
const std::vector<cv::Mat> &images);
protected:
static Ort::Env &get_env()
@ -157,6 +158,8 @@ namespace utils_2d_pose
void BaseModel::warmup(int epoch)
{
std::vector<int> use_batch_sizes = {4, 10, 5, 1, 9, 3, 7, 2, 8, 4, 6, 1, 3, 7, 2};
std::cout << "Warmup: ";
for (int e = 0; e < epoch; e++)
{
@ -170,28 +173,32 @@ namespace utils_2d_pose
// We'll only handle batch=1 in this example
if (batch_size != 1)
{
throw std::runtime_error("Warmup only supports batch=1 for demonstration.");
batch_size = use_batch_sizes[e % use_batch_sizes.size()];
}
// Create a multi-channel Mat of shape (Height, Width, Channels)
cv::Mat img(height, width, CV_8UC(channels));
std::vector<cv::Mat> images;
for (int i = 0; i < batch_size; i++)
{
// Create a multi-channel Mat of shape (Height, Width, Channels)
cv::Mat img(height, width, CV_8UC(channels));
if (channels == 1)
{
cv::randu(img, 0, 256);
}
else if (channels == 3)
{
cv::randu(img, cv::Scalar(0, 0, 0), cv::Scalar(256, 256, 256));
}
else
{
throw std::runtime_error("Unsupported number of channels for warmup.");
}
// For multi-channel images, randu requires matching channels in low/high
if (channels == 1)
{
cv::randu(img, 0, 256);
}
else if (channels == 3)
{
cv::randu(img, cv::Scalar(0, 0, 0), cv::Scalar(256, 256, 256));
}
else
{
throw std::runtime_error("Unsupported number of channels for warmup.");
images.push_back(std::move(img));
}
// Call the model
auto outputs = call_by_image(img);
auto outputs = call_by_image(images);
std::cout << "#";
}
@ -200,29 +207,46 @@ namespace utils_2d_pose
// =============================================================================================
std::vector<std::vector<std::vector<float>>> BaseModel::call_by_image(const cv::Mat &img)
std::vector<std::vector<std::vector<float>>> BaseModel::call_by_image(
const std::vector<cv::Mat> &images)
{
size_t height = img.rows;
size_t width = img.cols;
size_t channels = img.channels();
size_t batch_size = images.size();
int height = images[0].rows;
int width = images[0].cols;
int channels = images[0].channels();
// Create a vector of Ort::Value for the input tensors
std::vector<Ort::Value> input_tensors;
input_tensors.reserve(input_names.size());
// Flatten into single vector
size_t single_img_size = height * width * channels;
size_t total_elems = batch_size * single_img_size;
std::vector<uint8_t> batch_data(total_elems);
for (size_t b = 0; b < batch_size; b++)
{
auto &img = images[b];
if (img.rows != height || img.cols != width || img.channels() != channels)
{
throw std::runtime_error("All images must have the same dimensions!");
}
std::memcpy(batch_data.data() + b * single_img_size, img.data, single_img_size);
}
// Flatten and create an ONNX tensor
size_t total_elems = static_cast<size_t>(height * width * channels);
// Create an onnx tensor
auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
auto shape = input_shapes[0];
input_tensors.push_back(
std::move(Ort::Value::CreateTensor<uint8_t>(
mem_info,
img.data,
total_elems,
shape.data(),
shape.size())));
std::vector<int64_t> shape = {
static_cast<int64_t>(batch_size),
static_cast<int64_t>(height),
static_cast<int64_t>(width),
static_cast<int64_t>(channels),
};
Ort::Value input_tensor = Ort::Value::CreateTensor<uint8_t>(
mem_info,
batch_data.data(),
total_elems,
shape.data(),
shape.size());
// Call model
std::vector<Ort::Value> input_tensors;
input_tensors.push_back(std::move(input_tensor));
auto outputs = call_model(input_tensors);
// Get pointer to ouput tensor
@ -612,18 +636,19 @@ namespace utils_2d_pose
std::chrono::duration<float> elapsed;
cv::Mat preprocessed = preprocess(image);
std::vector<cv::Mat> inputs = {preprocessed};
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto inputs = call_by_image(preprocessed);
auto results = call_by_image(inputs);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Inference time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto outputs = postprocess(inputs, image);
auto outputs = postprocess(results, image);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
@ -754,45 +779,47 @@ namespace utils_2d_pose
this->boxcrop = std::make_unique<BoxCrop>(target_size, 1.25f, 114);
}
std::vector<std::array<float, 3>> call(const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes);
std::vector<std::vector<std::array<float, 3>>> call(const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes);
private:
std::array<size_t, 2> target_size;
std::unique_ptr<BoxCrop> boxcrop;
cv::Mat preprocess(const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes);
std::vector<std::array<float, 3>> postprocess(
std::vector<cv::Mat> preprocess(
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes);
std::vector<std::vector<std::array<float, 3>>> postprocess(
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes);
void clip_keypoints(std::vector<std::array<float, 3>> &kp,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes) const;
const std::array<float, 5> &bbox) const;
};
// =============================================================================================
std::vector<std::array<float, 3>> RTMPose::call(
std::vector<std::vector<std::array<float, 3>>> RTMPose::call(
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
{
auto stime = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> elapsed;
cv::Mat preprocessed = preprocess(image, bboxes);
std::vector<cv::Mat> inputs = preprocess(image, bboxes);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Preprocess time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto inputs = call_by_image(preprocessed);
auto results = call_by_image(inputs);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Inference time: " << elapsed.count() << "s\n";
stime = std::chrono::high_resolution_clock::now();
auto outputs = postprocess(inputs, image, bboxes);
auto outputs = postprocess(results, image, bboxes);
elapsed = std::chrono::high_resolution_clock::now() - stime;
std::cout << "Postprocess time: " << elapsed.count() << "s\n";
@ -802,62 +829,64 @@ namespace utils_2d_pose
// =============================================================================================
cv::Mat RTMPose::preprocess(
std::vector<cv::Mat> RTMPose::preprocess(
const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes)
{
// Assert only one box used
if (bboxes.size() != 1)
std::vector<cv::Mat> crops;
for (auto &bbox : bboxes)
{
throw std::runtime_error("Only one box is supported!");
cv::Mat cropped = boxcrop->crop_resize_box(image, bbox);
crops.push_back(std::move(cropped));
}
cv::Mat cropped = boxcrop->crop_resize_box(image, bboxes[0]);
return cropped;
return crops;
}
// =============================================================================================
std::vector<std::array<float, 3>> RTMPose::postprocess(
std::vector<std::vector<std::array<float, 3>>> RTMPose::postprocess(
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes)
{
// Expected result shape: [B, N, 3] => (x,y,score)
// Convert to vector of keypoints
std::vector<std::array<float, 3>> kpts;
for (auto &item : result[0])
std::vector<std::vector<std::array<float, 3>>> keypoints;
for (size_t i = 0; i < result.size(); i++)
{
float x = item[0];
float y = item[1];
float score = item[2];
// Convert to vector of keypoints
std::vector<std::array<float, 3>> kpts;
for (auto &item : result[i])
{
float x = item[0];
float y = item[1];
float score = item[2];
kpts.push_back({x, y, score});
kpts.push_back({x, y, score});
}
if (!kpts.empty())
{
// Shift by boxcrop padding and scale back
clip_keypoints(kpts, image, bboxes[i]);
keypoints.push_back(std::move(kpts));
}
}
if (kpts.empty())
{
return {};
}
// Shift by boxcrop padding and scale back
clip_keypoints(kpts, image, bboxes);
return kpts;
return keypoints;
}
// =============================================================================================
void RTMPose::clip_keypoints(std::vector<std::array<float, 3>> &kpts,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes) const
const std::array<float, 5> &bbox) const
{
// Get paddings, scale from boxcrop
auto [paddings, scale, bbox, _] = boxcrop->calc_params(image, bboxes[0]);
auto [paddings, scale, box, _] = boxcrop->calc_params(image, bbox);
int pad_left = paddings[0];
int pad_top = paddings[2];
int box_left = bbox[0];
int box_top = bbox[1];
int box_left = box[0];
int box_top = box[1];
int img_w = image.cols;
int img_h = image.rows;
@ -917,10 +946,20 @@ namespace utils_2d_pose
auto bboxes = det_model->call(image);
std::vector<std::vector<std::array<float, 3>>> keypoints;
for (auto &bbox : bboxes)
if (this->batch_poses)
{
auto kpts = pose_model->call(image, {bbox});
keypoints.push_back(std::move(kpts));
if (!bboxes.empty())
{
keypoints = std::move(pose_model->call(image, bboxes));
}
}
else
{
for (size_t i = 0; i < bboxes.size(); i++)
{
auto kpts = pose_model->call(image, {bboxes[i]});
keypoints.push_back(std::move(kpts[0]));
}
}
return keypoints;
}
@ -937,18 +976,43 @@ namespace utils_2d_pose
bool batch_poses = false)
{
std::string base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/";
std::string model_det = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
std::string model_pose1 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
std::string model_pose2 = base_path + "rtmpose-l_wb_1x384x288x3_fp16_extra-steps.onnx";
std::string path_det = base_path + "rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx";
std::string path_pose_m1 = base_path + "rtmpose-m_1x384x288x3_fp16_extra-steps.onnx";
std::string path_pose_mb = base_path + "rtmpose-m_Bx384x288x3_fp16_extra-steps.onnx";
std::string path_pose_w1 = base_path + "rtmpose-l_wb_1x384x288x3_fp16_extra-steps.onnx";
std::string path_pose_wb = base_path + "rtmpose-l_wb_Bx384x288x3_fp16_extra-steps.onnx";
std::string model_pose = whole_body ? model_pose2 : model_pose1;
this->num_joints = whole_body ? 133 : 17;
std::string path_pose;
if (!whole_body)
{
if (!batch_poses)
{
path_pose = path_pose_m1;
}
else
{
path_pose = path_pose_mb;
}
}
else
{
if (!batch_poses)
{
path_pose = path_pose_w1;
}
else
{
path_pose = path_pose_wb;
}
}
std::cout << "Loading 2D" + std::string(whole_body ? "-WB" : "") + " models ..."
<< std::endl;
this->topdown_model = std::make_unique<TopDown>(
model_det, model_pose, min_bbox_score, min_bbox_area, batch_poses, 30);
path_det, path_pose, min_bbox_score, min_bbox_area, batch_poses, 30);
std::cout << "Loaded models \n"
<< std::endl;

View File

@ -25,49 +25,72 @@ int main(int argc, char **argv)
"/RapidPoseTriangulation/data/h1/60457274-img_003201.jpg",
};
// RTMDet model1(model_path1, 0.3, 0.1 * 0.1, 30);
// RTMPose model2(model_path2, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// auto outputs1 = model1.call(img);
// std::cout << "Model1 outputs: " << outputs1[0][0] << " " << outputs1[0][1] << " "
// << outputs1[0][2] << " " << outputs1[0][3] << " " << outputs1[0][4] << " "
// << std::endl;
// for (size_t j = 0; j < outputs1.size(); j++)
// std::cout << "\nTesting RTMDet and RTMPose" << std::endl;
// RTMDet model1(model_path1, 0.3, 0.1 * 0.1, 30);
// RTMPose model2(model_path2, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// std::vector<std::array<float, 5>> bboxes = {outputs1[j]};
// auto outputs2 = model2.call(img, bboxes);
// std::cout << "Model2 outputs: " << outputs2[0][0] << " " << outputs2[0][1] << " "
// << outputs2[0][2] << " " << std::endl;
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// auto outputs1 = model1.call(img);
// std::cout << "Model1 outputs: " << outputs1[0][0] << " " << outputs1[0][1] << " "
// << outputs1[0][2] << " " << outputs1[0][3] << " " << outputs1[0][4] << " "
// << std::endl;
// for (size_t j = 0; j < outputs1.size(); j++)
// {
// std::vector<std::array<float, 5>> bboxes = {outputs1[j]};
// auto outputs2 = model2.call(img, bboxes);
// std::cout << "Model2 outputs: " << outputs2[0][0][0] << " "
// << outputs2[0][0][1] << " " << outputs2[0][0][2] << " " << std::endl;
// }
// }
// }
// TopDown model3(model_path1, model_path2, 0.3, 0.1 * 0.1, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// std::cout << "\nTesting TopDown" << std::endl;
// TopDown model3(model_path1, model_path2, 0.3, 0.1 * 0.1, false, 30);
// for (size_t i = 0; i < img_paths.size(); i++)
// {
// cv::Mat img = cv::imread(img_paths[i]);
// cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
// auto outputs3 = model3.predict(img);
// std::cout << "Model3 outputs: " << outputs3[0][0][0] << " " << outputs3[0][0][1] << " "
// << outputs3[0][0][2] << " " << std::endl;
// auto outputs3 = model3.predict(img);
// std::cout << "Model3 outputs: " << outputs3[0][0][0] << " "
// << outputs3[0][0][1] << " " << outputs3[0][0][2] << " " << std::endl;
// }
// }
PosePredictor model4(false, 0.3, 0.1 * 0.1, false);
std::vector<cv::Mat> images;
for (size_t i = 0; i < img_paths.size(); i++)
{
cv::Mat img = cv::imread(img_paths[i]);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
images.push_back(img);
std::cout << "\nTesting PosePredictor 1" << std::endl;
PosePredictor model4(false, 0.3, 0.1 * 0.1, false);
std::vector<cv::Mat> images;
for (size_t i = 0; i < img_paths.size(); i++)
{
cv::Mat img = cv::imread(img_paths[i]);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
images.push_back(img);
}
auto outputs4 = model4.predict(images);
std::cout << "Model4 outputs: " << outputs4[0][0][0][0] << " "
<< outputs4[0][0][0][1] << " " << outputs4[0][0][0][2] << " " << std::endl;
}
auto outputs4 = model4.predict(images);
std::cout << "Model4 outputs: " << outputs4[0][0][0][0] << " " << outputs4[0][0][0][1] << " "
<< outputs4[0][0][0][2] << " " << std::endl;
{
std::cout << "\nTesting PosePredictor 2" << std::endl;
PosePredictor model5(false, 0.3, 0.1 * 0.1, true);
std::vector<cv::Mat> images;
for (size_t i = 0; i < img_paths.size(); i++)
{
cv::Mat img = cv::imread(img_paths[i]);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
images.push_back(img);
}
auto outputs5 = model5.predict(images);
std::cout << "Model5 outputs: " << outputs5[0][0][0][0] << " "
<< outputs5[0][0][0][1] << " " << outputs5[0][0][0][2] << " " << std::endl;
}
return 0;
}

View File

@ -37,7 +37,7 @@ stop_flag = False
# Model config
min_bbox_score = 0.4
min_bbox_area = 0.1 * 0.1
batch_poses = False
batch_poses = True
# ==================================================================================================
@ -169,7 +169,9 @@ def main():
# Load 2D pose model
whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model()
kpt_model = utils_2d_pose.load_wb_model(
min_bbox_score, min_bbox_area, batch_poses
)
else:
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)