diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py index 82b0dce..6d7054e 100644 --- a/extras/mmdeploy/add_extra_steps.py +++ b/extras/mmdeploy/add_extra_steps.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import TensorProto, helper, numpy_helper +from onnx import TensorProto, compose, helper, numpy_helper # ================================================================================================== @@ -97,6 +97,37 @@ def add_steps_to_onnx(model_path): for i, j in enumerate([0, 3, 1, 2]): input_shape[j].dim_value = dims[i] + if "det" in model_path: + # Add preprocess model to main network + pp1_model = onnx.load(base_path + "det_preprocess.onnx") + model = compose.add_prefix(model, prefix="main_") + pp1_model = compose.add_prefix(pp1_model, prefix="preprocess_") + model = compose.merge_models( + pp1_model, + model, + io_map=[(pp1_model.graph.output[0].name, model.graph.input[0].name)], + ) + + # Add postprocess model + pp2_model = onnx.load(base_path + "det_postprocess.onnx") + pp2_model = compose.add_prefix(pp2_model, prefix="postprocess_") + model = compose.merge_models( + model, + pp2_model, + io_map=[ + (model.graph.output[0].name, pp2_model.graph.input[1].name), + ], + ) + + # Update nodes from postprocess model to use the input of the main network + pp2_input_image_name = pp2_model.graph.input[0].name + main_input_name = model.graph.input[0].name + for node in model.graph.node: + for idx, name in enumerate(node.input): + if name == pp2_input_image_name: + node.input[idx] = main_input_name + model.graph.input.pop(1) + # Set input type to int8 model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py index a619c01..89764dd 100644 --- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py +++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py @@ -3,3 +3,7 @@ _base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime.py"] onnx_config = dict( input_shape=[320, 320], ) + +codebase_config = dict( + post_processing=dict(score_threshold=0.3, iou_threshold=0.3), +) diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py index f4a2b8b..62dff76 100644 --- a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py +++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py @@ -3,3 +3,7 @@ _base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py onnx_config = dict( input_shape=[320, 320], ) + +codebase_config = dict( + post_processing=dict(score_threshold=0.3, iou_threshold=0.3), +) diff --git a/extras/mmdeploy/make_extra_graphs.py b/extras/mmdeploy/make_extra_graphs.py new file mode 100644 index 0000000..a961032 --- /dev/null +++ b/extras/mmdeploy/make_extra_graphs.py @@ -0,0 +1,161 @@ +import cv2 +import torch +import torch.nn as nn +import torch.nn.functional as F + +# ================================================================================================== + +base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" +det_target_size = (320, 320) + +# ================================================================================================== + + +class Letterbox(nn.Module): + def __init__(self, target_size, fill_value=128): + """Resize and pad image while keeping aspect ratio""" + super(Letterbox, self).__init__() + + self.target_size = target_size + self.fill_value = fill_value + + def calc_params(self, img): + ih, iw = img.shape[1:3] + th, tw = self.target_size + + scale = torch.min(tw / iw, th / ih) + nw = torch.round(iw * scale) + nh = torch.round(ih * scale) + + pad_w = tw - nw + pad_h = th - nh + pad_left = pad_w // 2 + pad_top = pad_h // 2 + pad_right = pad_w - pad_left + pad_bottom = pad_h - pad_top + paddings = (pad_left, pad_right, pad_top, pad_bottom) + + return paddings, scale, (nw, nh) + + def forward(self, img): + paddings, _, (nw, nh) = self.calc_params(img) + + # Resize the image + img = img.to(torch.float32) + img = F.interpolate( + img.permute(0, 3, 1, 2), size=(nh, nw), mode="bilinear", align_corners=False + ) + img = img.permute(0, 2, 3, 1) + img = img.round() + + # Pad the image + img = F.pad( + img.permute(0, 3, 1, 2), + pad=paddings, + mode="constant", + value=self.fill_value, + ) + img = img.permute(0, 2, 3, 1) + canvas = img + + return canvas + + +# ================================================================================================== + + +class DetPreprocess(nn.Module): + def __init__(self, target_size, fill_value=114): + super(DetPreprocess, self).__init__() + self.letterbox = Letterbox(target_size, fill_value) + + def forward(self, img): + # img: torch.Tensor of shape [batch, H, W, C], dtype=torch.uint8 + img = self.letterbox(img) + return img + + +# ================================================================================================== + + +class DetPostprocess(nn.Module): + def __init__(self, target_size): + super(DetPostprocess, self).__init__() + self.letterbox = Letterbox(target_size) + + def forward(self, img, boxes): + paddings, scale, _ = self.letterbox.calc_params(img) + + boxes = boxes.float() + boxes[:, :, 0] -= paddings[0] + boxes[:, :, 2] -= paddings[0] + boxes[:, :, 1] -= paddings[2] + boxes[:, :, 3] -= paddings[2] + boxes[:, :, 0:4] /= scale + + ih, iw = img.shape[1:3] + boxes = torch.max(boxes, torch.tensor(0)) + b0 = boxes[:, :, 0] + b1 = boxes[:, :, 1] + b2 = boxes[:, :, 2] + b3 = boxes[:, :, 3] + b0 = torch.min(b0, iw - 1) + b1 = torch.min(b1, ih - 1) + b2 = torch.min(b2, iw - 1) + b3 = torch.min(b3, ih - 1) + boxes = torch.stack((b0, b1, b2, b3, boxes[:, :, 4]), dim=2) + + return boxes + + +# ================================================================================================== + + +def main(): + + img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg" + image = cv2.imread(img_path, 3) + + # Initialize the DetPreprocess module + preprocess_model = DetPreprocess(target_size=det_target_size) + det_dummy_input_a0 = torch.from_numpy(image).unsqueeze(0) + + # Export to ONNX + torch.onnx.export( + preprocess_model, + det_dummy_input_a0, + base_path + "det_preprocess.onnx", + opset_version=11, + input_names=["input_image"], + output_names=["preprocessed_image"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "preprocessed_image": {0: "batch_size"}, + }, + ) + + # Initialize the DetPostprocess module + postprocess_model = DetPostprocess(target_size=det_target_size) + det_dummy_input_b0 = torch.from_numpy(image).unsqueeze(0) + det_dummy_input_b1 = torch.rand(1, 10, 5) + + # Export to ONNX + torch.onnx.export( + postprocess_model, + (det_dummy_input_b0, det_dummy_input_b1), + base_path + "det_postprocess.onnx", + opset_version=11, + input_names=["input_image", "boxes"], + output_names=["output_boxes"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "boxes": {0: "batch_size", 1: "num_boxes"}, + "output_boxes": {0: "batch_size", 1: "num_boxes"}, + }, + ) + + +# ================================================================================================== + +if __name__ == "__main__": + main() diff --git a/scripts/utils_2d_pose_ort.py b/scripts/utils_2d_pose_ort.py index 8b925d8..2213970 100644 --- a/scripts/utils_2d_pose_ort.py +++ b/scripts/utils_2d_pose_ort.py @@ -31,6 +31,8 @@ class BaseModel(ABC): self.input_name = self.session.get_inputs()[0].name self.input_shape = self.session.get_inputs()[0].shape + if "batch_size" in self.input_shape: + self.input_shape = [1, 500, 500, 3] input_type = self.session.get_inputs()[0].type if input_type == "tensor(float16)": @@ -72,116 +74,25 @@ class RTMDet(BaseModel): self, model_path: str, conf_threshold: float, - iou_threshold: float, warmup: int = 30, ): super(RTMDet, self).__init__(model_path, warmup) self.conf_threshold = conf_threshold - self.iou_threshold = iou_threshold - self.dx = 0 - self.dy = 0 - self.scale = 0 - - def letterbox(self, img: np.ndarray, target_size: List[int], fill_value: int = 128): - h, w = img.shape[:2] - tw, th = target_size - - scale = min(tw / w, th / h) - nw, nh = int(w * scale), int(h * scale) - dx, dy = (tw - nw) // 2, (th - nh) // 2 - - canvas = np.full((th, tw, img.shape[2]), fill_value, dtype=img.dtype) - canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize( - img, (nw, nh), interpolation=cv2.INTER_LINEAR - ) - - return canvas, dx, dy, scale - - def nms_optimized( - self, boxes: np.ndarray, iou_threshold: float, conf_threshold: float - ): - """ - Perform Non-Maximum Suppression (NMS) on bounding boxes for a single class. - """ - - # Filter out boxes with low confidence scores - scores = boxes[:, 4] - keep = scores > conf_threshold - boxes = boxes[keep] - scores = scores[keep] - - if boxes.shape[0] == 0: - return np.empty((0, 5), dtype=boxes.dtype) - - # Compute the area of the bounding boxes - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - - # Sort the boxes by scores in descending order - order = scores.argsort()[::-1] - - keep_indices = [] - while order.size > 0: - i = order[0] - keep_indices.append(i) - - # Compute IoU of the current box with the rest - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - # Compute width and height of the overlapping area - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - - # Compute the area of the intersection - inter = w * h - - # Compute the IoU - iou = inter / (areas[i] + areas[order[1:]] - inter) - - # Keep boxes with IoU less than the threshold - inds = np.where(iou <= iou_threshold)[0] - - # Update the order array - order = order[inds + 1] - - # Return the boxes that are kept - return boxes[keep_indices] def preprocess(self, image: np.ndarray): - th, tw = self.input_shape[1:3] - image, self.dx, self.dy, self.scale = self.letterbox( - image, (tw, th), fill_value=114 - ) tensor = np.asarray(image).astype(self.input_type, copy=False) tensor = np.expand_dims(tensor, axis=0) return tensor def postprocess(self, tensor: List[np.ndarray]): - boxes = np.squeeze(tensor[0], axis=0) - classes = np.expand_dims(np.squeeze(tensor[1], axis=0), axis=-1) - boxes = np.concatenate([boxes, classes], axis=-1) + boxes = np.squeeze(tensor[1], axis=0) + classes = np.squeeze(tensor[0], axis=0) - boxes = self.nms_optimized(boxes, self.iou_threshold, self.conf_threshold) + human_class = classes[:] == 0 + boxes = boxes[human_class] - if boxes.shape[0] == 0: - return boxes - - human_class = boxes[..., -1] == 0 - boxes = boxes[human_class][..., :4] - - boxes[:, 0] -= self.dx - boxes[:, 2] -= self.dx - boxes[:, 1] -= self.dy - boxes[:, 3] -= self.dy - - boxes = np.clip(boxes, a_min=0, a_max=None) - boxes[:, :4] /= self.scale + keep = boxes[:, 4] > self.conf_threshold + boxes = boxes[keep] return boxes @@ -201,7 +112,7 @@ class RTMPose(BaseModel): target_size: List[int], padding_scale: float = 1.25, ): - start_x, start_y, end_x, end_y = box + start_x, start_y, end_x, end_y = box[0:4] target_w, target_h = target_size # Calculate original bounding box width and height @@ -305,8 +216,7 @@ class TopDown: self, det_model_path, pose_model_path, - conf_threshold=0.6, - iou_threshold=0.6, + box_conf_threshold=0.6, warmup=30, ): if (not det_model_path.endswith(".onnx")) or ( @@ -314,7 +224,7 @@ class TopDown: ): raise ValueError("Only ONNX models are supported.") - self.det_model = RTMDet(det_model_path, conf_threshold, iou_threshold, warmup) + self.det_model = RTMDet(det_model_path, box_conf_threshold, warmup) self.pose_model = RTMPose(pose_model_path, warmup) def predict(self, image): @@ -337,8 +247,7 @@ def load_model(): "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx", # "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx", "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx", - conf_threshold=0.3, - iou_threshold=0.3, + box_conf_threshold=0.3, warmup=30, )