Merge model outputs directly in graph.

This commit is contained in:
Daniel
2025-01-17 18:33:19 +01:00
parent 97ea039f7d
commit 8f2322694a
3 changed files with 115 additions and 57 deletions

View File

@ -22,7 +22,7 @@ namespace utils_2d_pose
explicit BaseModel(const std::string &model_path, int warmup_iterations);
virtual ~BaseModel() = default;
std::vector<Ort::Value> call_by_image(const cv::Mat &img);
std::vector<std::vector<std::vector<float>>> call_by_image(const cv::Mat &img);
protected:
static Ort::Env &get_env()
@ -200,7 +200,7 @@ namespace utils_2d_pose
// =============================================================================================
std::vector<Ort::Value> BaseModel::call_by_image(const cv::Mat &img)
std::vector<std::vector<std::vector<float>>> BaseModel::call_by_image(const cv::Mat &img)
{
size_t height = img.rows;
size_t width = img.cols;
@ -222,8 +222,38 @@ namespace utils_2d_pose
shape.data(),
shape.size())));
// Call model
auto outputs = call_model(input_tensors);
return outputs;
// Get pointer to ouput tensor
const float *tensor_data = outputs[0].GetTensorData<float>();
auto data_info = outputs[0].GetTensorTypeAndShapeInfo();
auto shape0 = data_info.GetShape();
size_t B = (size_t)shape0[0];
size_t N = (size_t)shape0[1];
size_t C = (size_t)shape0[2];
// Convert to vector of values
std::vector<std::vector<std::vector<float>>> data;
data.reserve(B);
for (size_t i = 0; i < B; i++)
{
std::vector<std::vector<float>> item;
item.reserve(N);
for (size_t j = 0; j < N; j++)
{
std::vector<float> values;
values.reserve(C);
for (size_t k = 0; k < C; k++)
{
values.push_back(tensor_data[i * N * C + j * C + k]);
}
item.push_back(values);
}
data.push_back(item);
}
return data;
}
// =============================================================================================
@ -566,8 +596,9 @@ namespace utils_2d_pose
std::unique_ptr<LetterBox> letterbox;
cv::Mat preprocess(const cv::Mat &image);
std::vector<std::array<float, 5>> postprocess(const std::vector<Ort::Value> &result,
const cv::Mat &image);
std::vector<std::array<float, 5>> postprocess(
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image);
void clip_boxes(std::vector<std::array<float, 5>> &boxes,
const cv::Mat &image) const;
@ -611,35 +642,21 @@ namespace utils_2d_pose
// =============================================================================================
std::vector<std::array<float, 5>> RTMDet::postprocess(
const std::vector<Ort::Value> &result,
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image)
{
// // Expected output shapes:
// result[0] => shape [1, N, 5] => (x1,y1,x2,y2,score)
// result[1] => shape [1, N] => classes
// Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class)
// Get pointer to boxes
const float *boxes_data = result[0].GetTensorData<float>();
const float *classes_data = result[1].GetTensorData<float>();
auto data_info = result[0].GetTensorTypeAndShapeInfo();
auto shape0 = data_info.GetShape();
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 5)
{
throw std::runtime_error("parse_outputs: unexpected shape for boxes");
}
size_t N = (size_t)shape0[1];
// Extract human boxes
// Convert to vector of boxes
std::vector<std::array<float, 5>> boxes;
boxes.reserve(N);
for (size_t i = 0; i < N; i++)
for (auto &item : result[0])
{
float x1 = boxes_data[i * 5 + 0];
float y1 = boxes_data[i * 5 + 1];
float x2 = boxes_data[i * 5 + 2];
float y2 = boxes_data[i * 5 + 3];
float score = boxes_data[i * 5 + 4];
float cls = classes_data[i];
float x1 = item[0];
float y1 = item[1];
float x2 = item[2];
float y2 = item[3];
float score = item[4];
float cls = item[5];
if (cls == 0)
{
@ -746,7 +763,7 @@ namespace utils_2d_pose
cv::Mat preprocess(const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes);
std::vector<std::array<float, 3>> postprocess(
const std::vector<Ort::Value> &result,
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes);
@ -801,33 +818,20 @@ namespace utils_2d_pose
// =============================================================================================
std::vector<std::array<float, 3>> RTMPose::postprocess(
const std::vector<Ort::Value> &result,
const std::vector<std::vector<std::vector<float>>> &result,
const cv::Mat &image,
const std::vector<std::array<float, 5>> &bboxes)
{
// // Expected output shapes:
// result[0] => shape [1, N, 2] => (x,y)
// result[1] => shape [1, N] => scores
// Expected result shape: [B, N, 3] => (x,y,score)
// Get pointer to boxes
const float *kpts_data = result[0].GetTensorData<float>();
const float *scores_data = result[1].GetTensorData<float>();
auto data_info = result[0].GetTensorTypeAndShapeInfo();
auto shape0 = data_info.GetShape();
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 2)
{
throw std::runtime_error("parse_outputs: unexpected shape for keypoints");
}
size_t N = (size_t)shape0[1];
// Extract human keypoints
// Convert to vector of keypoints
std::vector<std::array<float, 3>> kpts;
kpts.reserve(N);
for (size_t i = 0; i < N; i++)
for (auto &item : result[0])
{
float x = kpts_data[i * 2 + 0];
float y = kpts_data[i * 2 + 1];
float score = scores_data[i];
float x = item[0];
float y = item[1];
float score = item[2];
kpts.push_back({x, y, score});
}