From 19015c632634642d7419b8a6857052b0af9492be Mon Sep 17 00:00:00 2001
From: Daniel <daniel@mail.de>
Date: Fri, 29 Nov 2024 17:37:44 +0100
Subject: [PATCH] Exporting fp16 onnx models.

---
 extras/easypose/base_model.py                 | 61 +++++++++++++++++++
 extras/easypose/detection.py                  |  2 +-
 extras/easypose/pose.py                       |  2 +-
 extras/easypose/run_container.sh              |  1 +
 extras/mmdeploy/README.md                     | 22 ++++---
 extras/mmdeploy/add_norm_step.py              | 14 ++++-
 ...tection_onnxruntime_static-320x320_fp16.py |  5 ++
 ...n_simcc_onnxruntime_static-384x288_fp16.py |  8 +++
 extras/mmdeploy/dockerfile                    |  3 +
 scripts/utils_2d_pose_ep.py                   |  2 +
 10 files changed, 107 insertions(+), 13 deletions(-)
 create mode 100644 extras/easypose/base_model.py
 create mode 100644 extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
 create mode 100644 extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py

diff --git a/extras/easypose/base_model.py b/extras/easypose/base_model.py
new file mode 100644
index 0000000..b6dc7c5
--- /dev/null
+++ b/extras/easypose/base_model.py
@@ -0,0 +1,61 @@
+import warnings
+from abc import ABC, abstractmethod
+from typing import List
+import time
+import numpy as np
+import onnxruntime as ort
+from tqdm import tqdm
+
+
+class BaseModel(ABC):
+    def __init__(self, model_path: str, device: str = 'CUDA', warmup: int = 30):
+        self.opt = ort.SessionOptions()
+
+        if device == 'CUDA':
+            provider = 'CUDAExecutionProvider'
+            if provider not in ort.get_available_providers():
+                warnings.warn("No CUDAExecutionProvider found, switched to CPUExecutionProvider.", UserWarning)
+                provider = 'CPUExecutionProvider'
+        elif device == 'CPU':
+            provider = 'CPUExecutionProvider'
+        else:
+            raise ValueError('Provider {} does not exist.'.format(device))
+
+        self.session = ort.InferenceSession(model_path,
+                                            providers=[provider],
+                                            sess_options=self.opt)
+
+        self.input_name = self.session.get_inputs()[0].name
+        self.input_shape = self.session.get_inputs()[0].shape
+
+        input_type = self.session.get_inputs()[0].type
+        if input_type == 'tensor(float16)':
+            self.input_type = np.float16
+        else:
+            self.input_type = np.float32
+
+        if warmup > 0:
+            self.warmup(warmup)
+
+    @abstractmethod
+    def preprocess(self, image: np.ndarray):
+        pass
+
+    @abstractmethod
+    def postprocess(self, tensor: List[np.ndarray]):
+        pass
+
+    def forward(self, image: np.ndarray):
+        tensor = self.preprocess(image)
+        result = self.session.run(None, {self.input_name: tensor})
+        output = self.postprocess(result)
+        return output
+
+    def warmup(self, epoch: int = 30):
+        print('{} start warmup!'.format(self.__class__.__name__))
+        tensor = np.random.random(self.input_shape).astype(self.input_type)
+        for _ in tqdm(range(epoch)):
+            self.session.run(None, {self.input_name: tensor})
+
+    def __call__(self, image: np.ndarray, *args, **kwargs):
+        return self.forward(image)
diff --git a/extras/easypose/detection.py b/extras/easypose/detection.py
index cc5f458..001d3ac 100644
--- a/extras/easypose/detection.py
+++ b/extras/easypose/detection.py
@@ -24,7 +24,7 @@ class RTMDet(BaseModel):
         tensor, self.dx, self.dy, self.scale = letterbox(
             image, (tw, th), fill_value=114
         )
-        tensor = tensor.astype(np.float32, copy=False)
+        tensor = tensor.astype(self.input_type, copy=False)
         tensor = tensor[..., ::-1]
         tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
         return tensor
diff --git a/extras/easypose/pose.py b/extras/easypose/pose.py
index fe5aa72..10d4086 100644
--- a/extras/easypose/pose.py
+++ b/extras/easypose/pose.py
@@ -45,7 +45,7 @@ class SimCC(BaseModel):
 
     def preprocess(self, image: np.ndarray):
         tensor, self.dx, self.dy, self.scale = image, 0, 0, 1
-        tensor = tensor.astype(np.float32, copy=False)
+        tensor = tensor.astype(self.input_type, copy=False)
         tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
         return tensor
 
diff --git a/extras/easypose/run_container.sh b/extras/easypose/run_container.sh
index c3536a1..e08d6bf 100644
--- a/extras/easypose/run_container.sh
+++ b/extras/easypose/run_container.sh
@@ -5,6 +5,7 @@ docker run --privileged --rm --network host -it \
   --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
   --volume "$(pwd)"/:/RapidPoseTriangulation/ \
   --volume "$(pwd)"/extras/easypose/pipeline.py:/EasyPose/easypose/pipeline.py \
+  --volume "$(pwd)"/extras/easypose/base_model.py:/EasyPose/easypose/model/base_model.py \
   --volume "$(pwd)"/extras/easypose/detection.py:/EasyPose/easypose/model/detection.py \
   --volume "$(pwd)"/extras/easypose/pose.py:/EasyPose/easypose/model/pose.py \
   --volume "$(pwd)"/extras/easypose/utils.py:/EasyPose/easypose/model/utils.py \
diff --git a/extras/mmdeploy/README.md b/extras/mmdeploy/README.md
index cf5c094..85944bf 100644
--- a/extras/mmdeploy/README.md
+++ b/extras/mmdeploy/README.md
@@ -11,31 +11,37 @@ docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy .
 ## ONNX
 
 ```bash
-cd /mmdeploy/
-cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py configs/mmdet/detection/
+export withFP16="_fp16"
+cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320$withFP16.py configs/mmdet/detection/
 
+cd /mmdeploy/
 python3 ./tools/deploy.py \
-    configs/mmdet/detection/detection_onnxruntime_static-320x320.py \
+    configs/mmdet/detection/detection_onnxruntime_static-320x320$withFP16.py \
     /mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \
     https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \
     /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
     --work-dir work_dir \
     --show
 
-mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320.onnx
+mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320$withFP16.onnx
 ```
 
 ```bash
-cd /mmdeploy/
-cp /RapidPoseTriangulation/extras/mmdeploy/pose-detection_simcc_onnxruntime_static-384x288.py configs/mmpose/
+export withFP16="_fp16"
+cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py configs/mmpose/
 
+cd /mmdeploy/
 python3 ./tools/deploy.py \
-    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288.py \
+    configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288$withFP16.py \
     /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \
     https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \
     /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \
     --work-dir work_dir \
     --show
 
-mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288.onnx
+mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288$withFP16.onnx
+```
+
+```bash
+python3 /RapidPoseTriangulation/extras/mmdeploy/add_norm_step.py
 ```
diff --git a/extras/mmdeploy/add_norm_step.py b/extras/mmdeploy/add_norm_step.py
index a8450fb..581a4a7 100644
--- a/extras/mmdeploy/add_norm_step.py
+++ b/extras/mmdeploy/add_norm_step.py
@@ -15,7 +15,7 @@ norm_std = 1.0 / np.array([57.375, 57.12, 58.395])
 # ==================================================================================================
 
 
-def add_steps_to_onnx(model_path, use_bgr=False):
+def add_steps_to_onnx(model_path):
 
     # Load existing model
     model = onnx.load(model_path)
@@ -24,6 +24,7 @@ def add_steps_to_onnx(model_path, use_bgr=False):
     mean = norm_mean.astype(np.float32)
     std = norm_std.astype(np.float32)
 
+    use_bgr = bool("rtmpose" in model_path)
     if use_bgr:
         mean = mean[::-1]
         std = std[::-1]
@@ -31,6 +32,11 @@ def add_steps_to_onnx(model_path, use_bgr=False):
     mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32)
     std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32)
 
+    use_fp16 = bool("fp16" in model_path)
+    if use_fp16:
+        mean = mean.astype(np.float16)
+        std = std.astype(np.float16)
+
     # Add the initializers to the graph
     mean_initializer = numpy_helper.from_array(mean, name="norm_mean")
     std_initializer = numpy_helper.from_array(std, name="norm_std")
@@ -72,8 +78,10 @@ def add_steps_to_onnx(model_path, use_bgr=False):
 
 
 def main():
-    add_steps_to_onnx(pose_model_path, use_bgr=True)
-    add_steps_to_onnx(det_model_path, use_bgr=False)
+    add_steps_to_onnx(pose_model_path)
+    add_steps_to_onnx(det_model_path)
+    add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx"))
+    add_steps_to_onnx(pose_model_path.replace(".onnx", "_fp16.onnx"))
 
 
 # ==================================================================================================
diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
new file mode 100644
index 0000000..f4a2b8b
--- /dev/null
+++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py
@@ -0,0 +1,5 @@
+_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py"]
+
+onnx_config = dict(
+    input_shape=[320, 320],
+)
diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py
new file mode 100644
index 0000000..1fee327
--- /dev/null
+++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py
@@ -0,0 +1,8 @@
+_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"]
+
+onnx_config = dict(
+    input_shape=[288, 384],
+    output_names=["simcc_x", "simcc_y"],
+)
+
+codebase_config = dict(export_postprocess=False)  # do not export get_simcc_maximum
diff --git a/extras/mmdeploy/dockerfile b/extras/mmdeploy/dockerfile
index 9333901..7eae439 100644
--- a/extras/mmdeploy/dockerfile
+++ b/extras/mmdeploy/dockerfile
@@ -28,5 +28,8 @@ RUN echo 'export LD_LIBRARY_PATH=/mmdeploy/../mmdeploy-dep/onnxruntime-linux-x64
 # Show images
 RUN apt-get update && apt-get install -y --no-install-recommends python3-tk
 
+# Tool for fp16 conversion
+RUN pip3 install --upgrade --no-cache-dir onnxconverter_common 
+
 WORKDIR /mmdeploy/
 CMD ["/bin/bash"]
diff --git a/scripts/utils_2d_pose_ep.py b/scripts/utils_2d_pose_ep.py
index 76da721..d1ec06e 100644
--- a/scripts/utils_2d_pose_ep.py
+++ b/scripts/utils_2d_pose_ep.py
@@ -17,8 +17,10 @@ def load_model():
     # model = ep.TopDown("rtmpose_m", "SimCC", "rtmdet_s")
     model = ep.TopDown(
         "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx",
+        # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx",
         "SimCC",
         "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx",
+        # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx",
         conf_threshold=0.3,
         iou_threshold=0.3,
         warmup=10,