From 19015c632634642d7419b8a6857052b0af9492be Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 29 Nov 2024 17:37:44 +0100 Subject: [PATCH] Exporting fp16 onnx models. --- extras/easypose/base_model.py | 61 +++++++++++++++++++ extras/easypose/detection.py | 2 +- extras/easypose/pose.py | 2 +- extras/easypose/run_container.sh | 1 + extras/mmdeploy/README.md | 22 ++++--- extras/mmdeploy/add_norm_step.py | 14 ++++- ...tection_onnxruntime_static-320x320_fp16.py | 5 ++ ...n_simcc_onnxruntime_static-384x288_fp16.py | 8 +++ extras/mmdeploy/dockerfile | 3 + scripts/utils_2d_pose_ep.py | 2 + 10 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 extras/easypose/base_model.py create mode 100644 extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py create mode 100644 extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py diff --git a/extras/easypose/base_model.py b/extras/easypose/base_model.py new file mode 100644 index 0000000..b6dc7c5 --- /dev/null +++ b/extras/easypose/base_model.py @@ -0,0 +1,61 @@ +import warnings +from abc import ABC, abstractmethod +from typing import List +import time +import numpy as np +import onnxruntime as ort +from tqdm import tqdm + + +class BaseModel(ABC): + def __init__(self, model_path: str, device: str = 'CUDA', warmup: int = 30): + self.opt = ort.SessionOptions() + + if device == 'CUDA': + provider = 'CUDAExecutionProvider' + if provider not in ort.get_available_providers(): + warnings.warn("No CUDAExecutionProvider found, switched to CPUExecutionProvider.", UserWarning) + provider = 'CPUExecutionProvider' + elif device == 'CPU': + provider = 'CPUExecutionProvider' + else: + raise ValueError('Provider {} does not exist.'.format(device)) + + self.session = ort.InferenceSession(model_path, + providers=[provider], + sess_options=self.opt) + + self.input_name = self.session.get_inputs()[0].name + self.input_shape = self.session.get_inputs()[0].shape + + input_type = self.session.get_inputs()[0].type + if input_type == 'tensor(float16)': + self.input_type = np.float16 + else: + self.input_type = np.float32 + + if warmup > 0: + self.warmup(warmup) + + @abstractmethod + def preprocess(self, image: np.ndarray): + pass + + @abstractmethod + def postprocess(self, tensor: List[np.ndarray]): + pass + + def forward(self, image: np.ndarray): + tensor = self.preprocess(image) + result = self.session.run(None, {self.input_name: tensor}) + output = self.postprocess(result) + return output + + def warmup(self, epoch: int = 30): + print('{} start warmup!'.format(self.__class__.__name__)) + tensor = np.random.random(self.input_shape).astype(self.input_type) + for _ in tqdm(range(epoch)): + self.session.run(None, {self.input_name: tensor}) + + def __call__(self, image: np.ndarray, *args, **kwargs): + return self.forward(image) diff --git a/extras/easypose/detection.py b/extras/easypose/detection.py index cc5f458..001d3ac 100644 --- a/extras/easypose/detection.py +++ b/extras/easypose/detection.py @@ -24,7 +24,7 @@ class RTMDet(BaseModel): tensor, self.dx, self.dy, self.scale = letterbox( image, (tw, th), fill_value=114 ) - tensor = tensor.astype(np.float32, copy=False) + tensor = tensor.astype(self.input_type, copy=False) tensor = tensor[..., ::-1] tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) return tensor diff --git a/extras/easypose/pose.py b/extras/easypose/pose.py index fe5aa72..10d4086 100644 --- a/extras/easypose/pose.py +++ b/extras/easypose/pose.py @@ -45,7 +45,7 @@ class SimCC(BaseModel): def preprocess(self, image: np.ndarray): tensor, self.dx, self.dy, self.scale = image, 0, 0, 1 - tensor = tensor.astype(np.float32, copy=False) + tensor = tensor.astype(self.input_type, copy=False) tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)) return tensor diff --git a/extras/easypose/run_container.sh b/extras/easypose/run_container.sh index c3536a1..e08d6bf 100644 --- a/extras/easypose/run_container.sh +++ b/extras/easypose/run_container.sh @@ -5,6 +5,7 @@ docker run --privileged --rm --network host -it \ --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ --volume "$(pwd)"/:/RapidPoseTriangulation/ \ --volume "$(pwd)"/extras/easypose/pipeline.py:/EasyPose/easypose/pipeline.py \ + --volume "$(pwd)"/extras/easypose/base_model.py:/EasyPose/easypose/model/base_model.py \ --volume "$(pwd)"/extras/easypose/detection.py:/EasyPose/easypose/model/detection.py \ --volume "$(pwd)"/extras/easypose/pose.py:/EasyPose/easypose/model/pose.py \ --volume "$(pwd)"/extras/easypose/utils.py:/EasyPose/easypose/model/utils.py \ diff --git a/extras/mmdeploy/README.md b/extras/mmdeploy/README.md index cf5c094..85944bf 100644 --- a/extras/mmdeploy/README.md +++ b/extras/mmdeploy/README.md @@ -11,31 +11,37 @@ docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy . ## ONNX ```bash -cd /mmdeploy/ -cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py configs/mmdet/detection/ +export withFP16="_fp16" +cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320$withFP16.py configs/mmdet/detection/ +cd /mmdeploy/ python3 ./tools/deploy.py \ - configs/mmdet/detection/detection_onnxruntime_static-320x320.py \ + configs/mmdet/detection/detection_onnxruntime_static-320x320$withFP16.py \ /mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \ https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \ /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ --work-dir work_dir \ --show -mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320.onnx +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320$withFP16.onnx ``` ```bash -cd /mmdeploy/ -cp /RapidPoseTriangulation/extras/mmdeploy/pose-detection_simcc_onnxruntime_static-384x288.py configs/mmpose/ +export withFP16="_fp16" +cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py configs/mmpose/ +cd /mmdeploy/ python3 ./tools/deploy.py \ - configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \ /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \ /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ --work-dir work_dir \ --show -mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288.onnx +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx +``` + +```bash +python3 /RapidPoseTriangulation/extras/mmdeploy/add_norm_step.py ``` diff --git a/extras/mmdeploy/add_norm_step.py b/extras/mmdeploy/add_norm_step.py index a8450fb..581a4a7 100644 --- a/extras/mmdeploy/add_norm_step.py +++ b/extras/mmdeploy/add_norm_step.py @@ -15,7 +15,7 @@ norm_std = 1.0 / np.array([57.375, 57.12, 58.395]) # ================================================================================================== -def add_steps_to_onnx(model_path, use_bgr=False): +def add_steps_to_onnx(model_path): # Load existing model model = onnx.load(model_path) @@ -24,6 +24,7 @@ def add_steps_to_onnx(model_path, use_bgr=False): mean = norm_mean.astype(np.float32) std = norm_std.astype(np.float32) + use_bgr = bool("rtmpose" in model_path) if use_bgr: mean = mean[::-1] std = std[::-1] @@ -31,6 +32,11 @@ def add_steps_to_onnx(model_path, use_bgr=False): mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32) std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32) + use_fp16 = bool("fp16" in model_path) + if use_fp16: + mean = mean.astype(np.float16) + std = std.astype(np.float16) + # Add the initializers to the graph mean_initializer = numpy_helper.from_array(mean, name="norm_mean") std_initializer = numpy_helper.from_array(std, name="norm_std") @@ -72,8 +78,10 @@ def add_steps_to_onnx(model_path, use_bgr=False): def main(): - add_steps_to_onnx(pose_model_path, use_bgr=True) - add_steps_to_onnx(det_model_path, use_bgr=False) + add_steps_to_onnx(pose_model_path) + add_steps_to_onnx(det_model_path) + add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx")) + add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx")) # ================================================================================================== diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py new file mode 100644 index 0000000..f4a2b8b --- /dev/null +++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py @@ -0,0 +1,5 @@ +_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py"] + +onnx_config = dict( + input_shape=[320, 320], +) diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py new file mode 100644 index 0000000..1fee327 --- /dev/null +++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py @@ -0,0 +1,8 @@ +_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"] + +onnx_config = dict( + input_shape=[288, 384], + output_names=["simcc_x", "simcc_y"], +) + +codebase_config = dict(export_postprocess=False) # do not export get_simcc_maximum diff --git a/extras/mmdeploy/dockerfile b/extras/mmdeploy/dockerfile index 9333901..7eae439 100644 --- a/extras/mmdeploy/dockerfile +++ b/extras/mmdeploy/dockerfile @@ -28,5 +28,8 @@ RUN echo 'export LD_LIBRARY_PATH=/mmdeploy/../mmdeploy-dep/onnxruntime-linux-x64 # Show images RUN apt-get update && apt-get install -y --no-install-recommends python3-tk +# Tool for fp16 conversion +RUN pip3 install --upgrade --no-cache-dir onnxconverter_common + WORKDIR /mmdeploy/ CMD ["/bin/bash"] diff --git a/scripts/utils_2d_pose_ep.py b/scripts/utils_2d_pose_ep.py index 76da721..d1ec06e 100644 --- a/scripts/utils_2d_pose_ep.py +++ b/scripts/utils_2d_pose_ep.py @@ -17,8 +17,10 @@ def load_model(): # model = ep.TopDown("rtmpose_m", "SimCC", "rtmdet_s") model = ep.TopDown( "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx", + # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx", "SimCC", "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx", + # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx", conf_threshold=0.3, iou_threshold=0.3, warmup=10,