Merge model outputs directly in graph.
This commit is contained in:
@ -145,6 +145,64 @@ def add_steps_to_onnx(model_path):
|
||||
# Update the output's data type info
|
||||
output.type.tensor_type.elem_type = TensorProto.FLOAT
|
||||
|
||||
# Merge the two outputs
|
||||
if "det" in model_path:
|
||||
r1_output = "dets"
|
||||
r2_output = "labels"
|
||||
out_name = "bboxes"
|
||||
out_dim = 6
|
||||
if "pose" in model_path:
|
||||
r1_output = "kpts"
|
||||
r2_output = "scores"
|
||||
out_name = "keypoints"
|
||||
out_dim = 3
|
||||
if "det" in model_path or "pose" in model_path:
|
||||
# Node to expand
|
||||
r2_expanded = r2_output + "_expanded"
|
||||
unsqueeze_node = helper.make_node(
|
||||
"Unsqueeze",
|
||||
inputs=[r2_output],
|
||||
outputs=[r2_expanded],
|
||||
axes=[2],
|
||||
name="Unsqueeze",
|
||||
)
|
||||
|
||||
# Node to concatenate
|
||||
r12_merged = out_name
|
||||
concat_node = helper.make_node(
|
||||
"Concat",
|
||||
inputs=[r1_output, r2_expanded],
|
||||
outputs=[r12_merged],
|
||||
axis=2,
|
||||
name="Merged",
|
||||
)
|
||||
|
||||
# Define the new concatenated output
|
||||
merged_output = helper.make_tensor_value_info(
|
||||
r12_merged,
|
||||
TensorProto.FLOAT,
|
||||
[
|
||||
(
|
||||
graph.input[0].type.tensor_type.shape.dim[0].dim_value
|
||||
if graph.input[0].type.tensor_type.shape.dim[0].dim_value > 0
|
||||
else None
|
||||
),
|
||||
(
|
||||
graph.output[0].type.tensor_type.shape.dim[1].dim_value
|
||||
if graph.output[0].type.tensor_type.shape.dim[1].dim_value > 0
|
||||
else None
|
||||
),
|
||||
out_dim,
|
||||
],
|
||||
)
|
||||
|
||||
# Update the graph
|
||||
graph.node.append(unsqueeze_node)
|
||||
graph.node.append(concat_node)
|
||||
graph.output.pop()
|
||||
graph.output.pop()
|
||||
graph.output.append(merged_output)
|
||||
|
||||
path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
|
||||
path = path.replace(".onnx", "_extra-steps.onnx")
|
||||
onnx.save(model, path)
|
||||
|
||||
@ -22,7 +22,7 @@ namespace utils_2d_pose
|
||||
explicit BaseModel(const std::string &model_path, int warmup_iterations);
|
||||
virtual ~BaseModel() = default;
|
||||
|
||||
std::vector<Ort::Value> call_by_image(const cv::Mat &img);
|
||||
std::vector<std::vector<std::vector<float>>> call_by_image(const cv::Mat &img);
|
||||
|
||||
protected:
|
||||
static Ort::Env &get_env()
|
||||
@ -200,7 +200,7 @@ namespace utils_2d_pose
|
||||
|
||||
// =============================================================================================
|
||||
|
||||
std::vector<Ort::Value> BaseModel::call_by_image(const cv::Mat &img)
|
||||
std::vector<std::vector<std::vector<float>>> BaseModel::call_by_image(const cv::Mat &img)
|
||||
{
|
||||
size_t height = img.rows;
|
||||
size_t width = img.cols;
|
||||
@ -222,8 +222,38 @@ namespace utils_2d_pose
|
||||
shape.data(),
|
||||
shape.size())));
|
||||
|
||||
// Call model
|
||||
auto outputs = call_model(input_tensors);
|
||||
return outputs;
|
||||
|
||||
// Get pointer to ouput tensor
|
||||
const float *tensor_data = outputs[0].GetTensorData<float>();
|
||||
auto data_info = outputs[0].GetTensorTypeAndShapeInfo();
|
||||
auto shape0 = data_info.GetShape();
|
||||
size_t B = (size_t)shape0[0];
|
||||
size_t N = (size_t)shape0[1];
|
||||
size_t C = (size_t)shape0[2];
|
||||
|
||||
// Convert to vector of values
|
||||
std::vector<std::vector<std::vector<float>>> data;
|
||||
data.reserve(B);
|
||||
for (size_t i = 0; i < B; i++)
|
||||
{
|
||||
std::vector<std::vector<float>> item;
|
||||
item.reserve(N);
|
||||
for (size_t j = 0; j < N; j++)
|
||||
{
|
||||
std::vector<float> values;
|
||||
values.reserve(C);
|
||||
for (size_t k = 0; k < C; k++)
|
||||
{
|
||||
values.push_back(tensor_data[i * N * C + j * C + k]);
|
||||
}
|
||||
item.push_back(values);
|
||||
}
|
||||
data.push_back(item);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
@ -566,8 +596,9 @@ namespace utils_2d_pose
|
||||
std::unique_ptr<LetterBox> letterbox;
|
||||
|
||||
cv::Mat preprocess(const cv::Mat &image);
|
||||
std::vector<std::array<float, 5>> postprocess(const std::vector<Ort::Value> &result,
|
||||
const cv::Mat &image);
|
||||
std::vector<std::array<float, 5>> postprocess(
|
||||
const std::vector<std::vector<std::vector<float>>> &result,
|
||||
const cv::Mat &image);
|
||||
|
||||
void clip_boxes(std::vector<std::array<float, 5>> &boxes,
|
||||
const cv::Mat &image) const;
|
||||
@ -611,35 +642,21 @@ namespace utils_2d_pose
|
||||
// =============================================================================================
|
||||
|
||||
std::vector<std::array<float, 5>> RTMDet::postprocess(
|
||||
const std::vector<Ort::Value> &result,
|
||||
const std::vector<std::vector<std::vector<float>>> &result,
|
||||
const cv::Mat &image)
|
||||
{
|
||||
// // Expected output shapes:
|
||||
// result[0] => shape [1, N, 5] => (x1,y1,x2,y2,score)
|
||||
// result[1] => shape [1, N] => classes
|
||||
// Expected result shape: [B, N, 6] => (x1,y1,x2,y2,score,class)
|
||||
|
||||
// Get pointer to boxes
|
||||
const float *boxes_data = result[0].GetTensorData<float>();
|
||||
const float *classes_data = result[1].GetTensorData<float>();
|
||||
auto data_info = result[0].GetTensorTypeAndShapeInfo();
|
||||
auto shape0 = data_info.GetShape();
|
||||
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 5)
|
||||
{
|
||||
throw std::runtime_error("parse_outputs: unexpected shape for boxes");
|
||||
}
|
||||
size_t N = (size_t)shape0[1];
|
||||
|
||||
// Extract human boxes
|
||||
// Convert to vector of boxes
|
||||
std::vector<std::array<float, 5>> boxes;
|
||||
boxes.reserve(N);
|
||||
for (size_t i = 0; i < N; i++)
|
||||
for (auto &item : result[0])
|
||||
{
|
||||
float x1 = boxes_data[i * 5 + 0];
|
||||
float y1 = boxes_data[i * 5 + 1];
|
||||
float x2 = boxes_data[i * 5 + 2];
|
||||
float y2 = boxes_data[i * 5 + 3];
|
||||
float score = boxes_data[i * 5 + 4];
|
||||
float cls = classes_data[i];
|
||||
float x1 = item[0];
|
||||
float y1 = item[1];
|
||||
float x2 = item[2];
|
||||
float y2 = item[3];
|
||||
float score = item[4];
|
||||
float cls = item[5];
|
||||
|
||||
if (cls == 0)
|
||||
{
|
||||
@ -746,7 +763,7 @@ namespace utils_2d_pose
|
||||
|
||||
cv::Mat preprocess(const cv::Mat &image, const std::vector<std::array<float, 5>> &bboxes);
|
||||
std::vector<std::array<float, 3>> postprocess(
|
||||
const std::vector<Ort::Value> &result,
|
||||
const std::vector<std::vector<std::vector<float>>> &result,
|
||||
const cv::Mat &image,
|
||||
const std::vector<std::array<float, 5>> &bboxes);
|
||||
|
||||
@ -801,33 +818,20 @@ namespace utils_2d_pose
|
||||
// =============================================================================================
|
||||
|
||||
std::vector<std::array<float, 3>> RTMPose::postprocess(
|
||||
const std::vector<Ort::Value> &result,
|
||||
const std::vector<std::vector<std::vector<float>>> &result,
|
||||
const cv::Mat &image,
|
||||
const std::vector<std::array<float, 5>> &bboxes)
|
||||
{
|
||||
// // Expected output shapes:
|
||||
// result[0] => shape [1, N, 2] => (x,y)
|
||||
// result[1] => shape [1, N] => scores
|
||||
// Expected result shape: [B, N, 3] => (x,y,score)
|
||||
|
||||
// Get pointer to boxes
|
||||
const float *kpts_data = result[0].GetTensorData<float>();
|
||||
const float *scores_data = result[1].GetTensorData<float>();
|
||||
auto data_info = result[0].GetTensorTypeAndShapeInfo();
|
||||
auto shape0 = data_info.GetShape();
|
||||
if (shape0.size() != 3 || shape0[0] != 1 || shape0[2] != 2)
|
||||
{
|
||||
throw std::runtime_error("parse_outputs: unexpected shape for keypoints");
|
||||
}
|
||||
size_t N = (size_t)shape0[1];
|
||||
|
||||
// Extract human keypoints
|
||||
// Convert to vector of keypoints
|
||||
std::vector<std::array<float, 3>> kpts;
|
||||
kpts.reserve(N);
|
||||
for (size_t i = 0; i < N; i++)
|
||||
for (auto &item : result[0])
|
||||
{
|
||||
float x = kpts_data[i * 2 + 0];
|
||||
float y = kpts_data[i * 2 + 1];
|
||||
float score = scores_data[i];
|
||||
float x = item[0];
|
||||
float y = item[1];
|
||||
float score = item[2];
|
||||
|
||||
kpts.push_back({x, y, score});
|
||||
}
|
||||
|
||||
|
||||
@ -338,9 +338,8 @@ class RTMDet(BaseModel):
|
||||
|
||||
def postprocess(self, result: List[np.ndarray], image: np.ndarray):
|
||||
boxes = np.squeeze(result[0], axis=0)
|
||||
classes = np.squeeze(result[1], axis=0)
|
||||
|
||||
human_class = classes[:] == 0
|
||||
human_class = boxes[:, 5] == 0
|
||||
boxes = boxes[human_class]
|
||||
|
||||
keep = boxes[:, 4] > self.conf_threshold
|
||||
@ -408,10 +407,7 @@ class RTMPose(BaseModel):
|
||||
):
|
||||
kpts = []
|
||||
for i in range(len(bboxes)):
|
||||
scores = np.clip(result[1][i], 0, 1)
|
||||
kp = np.concatenate(
|
||||
[result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
|
||||
)
|
||||
kp = result[0][i]
|
||||
|
||||
paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
|
||||
kp[:, 0] -= paddings[0]
|
||||
|
||||
Reference in New Issue
Block a user