Optional batched pose processing.

This commit is contained in:
Daniel
2024-12-18 16:22:08 +01:00
parent 7b8d209601
commit 07426fac2f
8 changed files with 151 additions and 75 deletions

View File

@ -11,39 +11,53 @@ docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy .
## ONNX ## ONNX
```bash ```bash
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320$withFP16.py configs/mmdet/detection/
cd /mmdeploy/ cd /mmdeploy/
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320"$withFP16".py configs/mmdet/detection/
python3 ./tools/deploy.py \ python3 ./tools/deploy.py \
configs/mmdet/detection/detection_onnxruntime_static-320x320$withFP16.py \ configs/mmdet/detection/detection_onnxruntime_static-320x320"$withFP16".py \
/mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \ /mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \ https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \ --work-dir work_dir \
--show --show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320$withFP16.onnx mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x3x320x320"$withFP16".onnx
``` ```
```bash ```bash
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py configs/mmpose/
cd /mmdeploy/ cd /mmdeploy/
export withFP16="_fp16"
cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py configs/mmpose/
cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py configs/mmpose/
python3 ./tools/deploy.py \ python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \ configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \ https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \ --work-dir work_dir \
--show --show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx
python3 ./tools/deploy.py \
configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
/mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
--work-dir work_dir \
--show
mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx3x384x288"$withFP16".onnx
``` ```
```bash ```bash
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py
```
```bash
python3 /RapidPoseTriangulation/extras/mmdeploy/add_extra_steps.py python3 /RapidPoseTriangulation/extras/mmdeploy/add_extra_steps.py
``` ```
@ -57,14 +71,17 @@ Run this directly in the inference container (the TensorRT versions need to be t
export withFP16="_fp16" export withFP16="_fp16"
trtexec --fp16 \ trtexec --fp16 \
--onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320"$withFP16"_extra-steps.onnx \ --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.onnx \
--saveEngine=end2end.engine --saveEngine=end2end.engine
mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.engine mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.engine
trtexec --fp16 \ trtexec --fp16 \
--onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288"$withFP16"_extra-steps.onnx \ --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx384x288x3"$withFP16"_extra-steps.onnx \
--saveEngine=end2end.engine --saveEngine=end2end.engine \
--minShapes=image_input:1x384x288x3 \
--optShapes=image_input:1x384x288x3 \
--maxShapes=image_input:1x384x288x3
mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x384x288x3"$withFP16"_extra-steps.engine mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x384x288x3"$withFP16"_extra-steps.engine
``` ```
@ -74,14 +91,14 @@ mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x
## Benchmark ## Benchmark
```bash ```bash
cd /mmdeploy/
export withFP16="_fp16" export withFP16="_fp16"
cd /mmdeploy/
python3 ./tools/profiler.py \ python3 ./tools/profiler.py \
configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \ configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \
/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
/RapidPoseTriangulation/extras/mmdeploy/testimages/ \ /RapidPoseTriangulation/extras/mmdeploy/testimages/ \
--model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx \ --model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx \
--shape 384x288 \ --shape 384x288 \
--device cuda \ --device cuda \
--warmup 50 \ --warmup 50 \

View File

@ -1,12 +1,15 @@
import re
import numpy as np import numpy as np
import onnx import onnx
from onnx import TensorProto, compose, helper, numpy_helper from onnx import TensorProto, helper, numpy_helper
# ================================================================================================== # ==================================================================================================
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
pose_model_path = base_path + "rtmpose-m_384x288.onnx" det_model_path = base_path + "rtmdet-nano_1x3x320x320.onnx"
det_model_path = base_path + "rtmdet-nano_320x320.onnx" pose_model_path1 = base_path + "rtmpose-m_Bx3x384x288.onnx"
pose_model_path2 = base_path + "rtmpose-m_1x3x384x288.onnx"
norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255) norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255)
norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255) norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255)
@ -97,6 +100,11 @@ def add_steps_to_onnx(model_path):
for i, j in enumerate([0, 3, 1, 2]): for i, j in enumerate([0, 3, 1, 2]):
input_shape[j].dim_value = dims[i] input_shape[j].dim_value = dims[i]
# Set the batch size to a defined string
input_shape = graph.input[0].type.tensor_type.shape.dim
if input_shape[0].dim_value == 0:
input_shape[0].dim_param = "batch_size"
# Rename the input tensor # Rename the input tensor
main_input_image_name = model.graph.input[0].name main_input_image_name = model.graph.input[0].name
for node in model.graph.node: for node in model.graph.node:
@ -108,7 +116,8 @@ def add_steps_to_onnx(model_path):
# Set input image type to int8 # Set input image type to int8
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
path = model_path.replace(".onnx", "_extra-steps.onnx") path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path)
path = path.replace(".onnx", "_extra-steps.onnx")
onnx.save(model, path) onnx.save(model, path)
@ -116,10 +125,12 @@ def add_steps_to_onnx(model_path):
def main(): def main():
add_steps_to_onnx(pose_model_path)
add_steps_to_onnx(det_model_path) add_steps_to_onnx(det_model_path)
add_steps_to_onnx(pose_model_path1)
add_steps_to_onnx(pose_model_path2)
add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx")) add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx")) add_steps_to_onnx(pose_model_path1.replace(".onnx", "_fp16.onnx"))
add_steps_to_onnx(pose_model_path2.replace(".onnx", "_fp16.onnx"))
# ================================================================================================== # ==================================================================================================

View File

@ -5,7 +5,7 @@ onnx_config = dict(
) )
codebase_config = dict( codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible, # For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time, # because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out # therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# low confidence boxes outside the model. # low confidence boxes outside the model.

View File

@ -5,7 +5,7 @@ onnx_config = dict(
) )
codebase_config = dict( codebase_config = dict(
# For later TensorRT inference, the number of output boxes needs to be as stable as possible, # For later TensorRT inference, the number of output boxes needs to be as stable as possible,
# because a drop in the box count leads to a re-optimization which takes a lot of time, # because a drop in the box count leads to a re-optimization which takes a lot of time,
# therefore reduce the maximum number of output boxes to the smallest usable value and sort out # therefore reduce the maximum number of output boxes to the smallest usable value and sort out
# low confidence boxes outside the model. # low confidence boxes outside the model.

View File

@ -0,0 +1,19 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
dynamic_axes={
"input": {
0: "batch",
},
"kpts": {
0: "batch",
},
"scores": {
0: "batch",
},
},
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum

View File

@ -0,0 +1,19 @@
_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"]
onnx_config = dict(
input_shape=[288, 384],
output_names=["kpts", "scores"],
dynamic_axes={
"input": {
0: "batch",
},
"kpts": {
0: "batch",
},
"scores": {
0: "batch",
},
},
)
codebase_config = dict(export_postprocess=True) # export get_simcc_maximum

View File

@ -53,6 +53,9 @@ default_min_match_score = 0.94
# If the number of cameras is high, and the views are not occluded, use a higher value # If the number of cameras is high, and the views are not occluded, use a higher value
default_min_group_size = 1 default_min_group_size = 1
# Batch poses per image for faster processing
# If most of the time only one person is in a image, disable it, because it is slightly slower then
default_batch_poses = True
datasets = { datasets = {
"human36m": { "human36m": {
@ -62,6 +65,7 @@ datasets = {
"min_group_size": 1, "min_group_size": 1,
"min_bbox_score": 0.4, "min_bbox_score": 0.4,
"min_bbox_area": 0.1 * 0.1, "min_bbox_area": 0.1 * 0.1,
"batch_poses": False,
}, },
"panoptic": { "panoptic": {
"path": "/datasets/panoptic/skelda/test.json", "path": "/datasets/panoptic/skelda/test.json",
@ -310,13 +314,14 @@ def main():
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size) min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score) min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area) min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
# Load 2D pose model # Load 2D pose model
whole_body = test_triangulate.whole_body whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)): if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model() kpt_model = utils_2d_pose.load_wb_model()
else: else:
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area) kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses)
# Manually set matplotlib backend # Manually set matplotlib backend
try: try:

View File

@ -96,29 +96,12 @@ class BaseModel(ABC):
if "image" in iname: if "image" in iname:
ishape = list(self.input_shapes[i]) ishape = list(self.input_shapes[i])
if "batch_size" in ishape: if "batch_size" in ishape:
if "TensorrtExecutionProvider" in self.providers: max_batch_size = 10
# Using different images sizes for TensorRT warmup takes too long ishape[0] = np.random.choice(
ishape = [1, 1000, 1000, 3] list(range(1, max_batch_size + 1))
else: )
ishape = [
1,
np.random.randint(300, 1000),
np.random.randint(300, 1000),
3,
]
tensor = np.random.random(ishape) tensor = np.random.random(ishape)
tensor = tensor * 255 tensor = tensor * 255
elif "bbox" in iname:
tensor = np.array(
[
[
np.random.randint(30, 100),
np.random.randint(30, 100),
np.random.randint(200, 300),
np.random.randint(200, 300),
]
]
)
else: else:
raise ValueError("Undefined input type:", iname) raise ValueError("Undefined input type:", iname)
@ -401,35 +384,48 @@ class RTMPose(BaseModel):
self.target_size = (384, 288) self.target_size = (384, 288)
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0) self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
def preprocess(self, image: np.ndarray, bbox: np.ndarray): def preprocess(self, image: np.ndarray, bboxes: np.ndarray):
bbox = np.asarray(bbox)[0:4] cutouts = []
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8]) for i in range(len(bboxes)):
bbox = bbox.round().astype(np.int32) bbox = np.asarray(bboxes[i])[0:4]
region = self.boxcrop.crop_resize_box(image, bbox) bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
tensor = np.asarray(region).astype(self.input_types[0], copy=False) bbox = bbox.round().astype(np.int32)
tensor = np.expand_dims(tensor, axis=0) region = self.boxcrop.crop_resize_box(image, bbox)
tensor = [tensor] tensor = np.asarray(region).astype(self.input_types[0], copy=False)
cutouts.append(tensor)
if len(bboxes) == 1:
cutouts = np.expand_dims(cutouts[0], axis=0)
else:
cutouts = np.stack(cutouts, axis=0)
tensor = [cutouts]
return tensor return tensor
def postprocess( def postprocess(
self, result: List[np.ndarray], image: np.ndarray, bbox: np.ndarray self, result: List[np.ndarray], image: np.ndarray, bboxes: np.ndarray
): ):
scores = np.clip(result[1][0], 0, 1) kpts = []
kp = np.concatenate([result[0][0], np.expand_dims(scores, axis=-1)], axis=-1) for i in range(len(bboxes)):
scores = np.clip(result[1][i], 0, 1)
kp = np.concatenate(
[result[0][i], np.expand_dims(scores, axis=-1)], axis=-1
)
paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bbox) paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i])
kp[:, 0] -= paddings[0] kp[:, 0] -= paddings[0]
kp[:, 1] -= paddings[2] kp[:, 1] -= paddings[2]
kp[:, 0:2] /= scale kp[:, 0:2] /= scale
kp[:, 0] += bbox[0] kp[:, 0] += bbox[0]
kp[:, 1] += bbox[1] kp[:, 1] += bbox[1]
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0) kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
max_w = image.shape[1] - 1 max_w = image.shape[1] - 1
max_h = image.shape[0] - 1 max_h = image.shape[0] - 1
kp[:, 0] = np.minimum(kp[:, 0], max_w) kp[:, 0] = np.minimum(kp[:, 0], max_w)
kp[:, 1] = np.minimum(kp[:, 1], max_h) kp[:, 1] = np.minimum(kp[:, 1], max_h)
kpts.append(kp)
return kp return kpts
# ================================================================================================== # ==================================================================================================
@ -444,6 +440,8 @@ class TopDown:
box_min_area: float, box_min_area: float,
warmup: int = 30, warmup: int = 30,
): ):
self.batch_poses = bool("Bx" in pose_model_path)
self.det_model = RTMDet( self.det_model = RTMDet(
det_model_path, box_conf_threshold, box_min_area, warmup det_model_path, box_conf_threshold, box_min_area, warmup
) )
@ -451,22 +449,29 @@ class TopDown:
def predict(self, image): def predict(self, image):
boxes = self.det_model(image=image) boxes = self.det_model(image=image)
if len(boxes) == 0:
return []
results = [] results = []
for i in range(boxes.shape[0]): if self.batch_poses:
kp = self.pose_model(image=image, bbox=boxes[i]) results = self.pose_model(image=image, bboxes=boxes)
results.append(kp) else:
for i in range(boxes.shape[0]):
kp = self.pose_model(image=image, bboxes=[boxes[i]])
results.append(kp[0])
return results return results
# ================================================================================================== # ==================================================================================================
def load_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1): def load_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1, batch_poses=False):
print("Loading 2D model ...") print("Loading 2D model ...")
model = TopDown( model = TopDown(
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx", "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx",
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx", f"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_{'B' if batch_poses else '1'}x384x288x3_fp16_extra-steps.onnx",
box_conf_threshold=min_bbox_score, box_conf_threshold=min_bbox_score,
box_min_area=min_bbox_area, box_min_area=min_bbox_area,
warmup=30, warmup=30,