From 93d4611a91e10d398b7b5fdb3195df29d023472a Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 29 Nov 2024 15:18:57 +0100 Subject: [PATCH] Fixed running custom onnx models. --- extras/easypose/detection.py | 19 +++-- extras/easypose/pipeline.py | 127 +++++++++++++++++++++++++++---- extras/easypose/pose.py | 10 ++- extras/easypose/run_container.sh | 6 +- extras/easypose/utils.py | 18 +++-- scripts/utils_2d_pose_ep.py | 10 ++- 6 files changed, 158 insertions(+), 32 deletions(-) diff --git a/extras/easypose/detection.py b/extras/easypose/detection.py index d15e229..a57c524 100644 --- a/extras/easypose/detection.py +++ b/extras/easypose/detection.py @@ -21,18 +21,27 @@ class RTMDet(BaseModel): def preprocess(self, image: np.ndarray): th, tw = self.input_shape[2:] - image, self.dx, self.dy, self.scale = letterbox(image, (tw, th)) - tensor = (image - np.array((103.53, 116.28, 123.675))) / np.array((57.375, 57.12, 58.395)) - tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + tensor, self.dx, self.dy, self.scale = letterbox( + image, (tw, th), fill_value=114 + ) + tensor -= np.array((123.675, 116.28, 103.53)) + tensor /= np.array((58.395, 57.12, 57.375)) + tensor = tensor[..., ::-1] + tensor = ( + np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + ) return tensor def postprocess(self, tensor: List[np.ndarray]): boxes = tensor[0] + classes = tensor[1] boxes = np.squeeze(boxes, axis=0) - boxes[..., [4, 5]] = boxes[..., [5, 4]] + classes = np.squeeze(classes, axis=0) + classes = np.expand_dims(classes, axis=-1) + boxes = np.concatenate([boxes, classes], axis=-1) boxes = nms(boxes, self.iou_threshold, self.conf_threshold) - + if boxes.shape[0] == 0: return boxes diff --git a/extras/easypose/pipeline.py b/extras/easypose/pipeline.py index e661d1b..31bf8db 100644 --- a/extras/easypose/pipeline.py +++ b/extras/easypose/pipeline.py @@ -1,5 +1,6 @@ import os +import cv2 import numpy as np from easypose import model @@ -23,6 +24,87 @@ def get_det_model(det_model_path, model_type, conf_thre, iou_thre, device, warmu return det_model +def region_of_interest_warped( + image: np.ndarray, + box: np.ndarray, + target_size=(288, 384), + padding_scale: float = 1.25, +): + start_x, start_y, end_x, end_y = box + target_w, target_h = target_size + + # Calculate original bounding box width and height + bbox_w = end_x - start_x + bbox_h = end_y - start_y + + if bbox_w <= 0 or bbox_h <= 0: + raise ValueError("Invalid bounding box!") + + # Calculate the aspect ratios + bbox_aspect = bbox_w / bbox_h + target_aspect = target_w / target_h + + # Adjust the scaled bounding box to match the target aspect ratio + if bbox_aspect > target_aspect: + adjusted_h = bbox_w / target_aspect + adjusted_w = bbox_w + else: + adjusted_w = bbox_h * target_aspect + adjusted_h = bbox_h + + # Scale the bounding box by the padding_scale + scaled_bbox_w = adjusted_w * padding_scale + scaled_bbox_h = adjusted_h * padding_scale + + # Calculate the center of the original box + center_x = (start_x + end_x) / 2.0 + center_y = (start_y + end_y) / 2.0 + + # Calculate scaled bounding box coordinates + new_start_x = center_x - scaled_bbox_w / 2.0 + new_start_y = center_y - scaled_bbox_h / 2.0 + new_end_x = center_x + scaled_bbox_w / 2.0 + new_end_y = center_y + scaled_bbox_h / 2.0 + + # Define the new box coordinates + new_box = np.array( + [new_start_x, new_start_y, new_end_x, new_end_y], dtype=np.float32 + ) + scale = target_w / scaled_bbox_w + + # Define source and destination points for affine transformation + # See: /mmpose/structures/bbox/transforms.py + src_pts = np.array( + [ + [center_x, center_y], + [new_start_x, center_y], + [new_start_x, center_y + (center_x - new_start_x)], + ], + dtype=np.float32, + ) + dst_pts = np.array( + [ + [target_w * 0.5, target_h * 0.5], + [0, target_h * 0.5], + [0, target_h * 0.5 + (target_w * 0.5 - 0)], + ], + dtype=np.float32, + ) + + # Compute the affine transformation matrix + M = cv2.getAffineTransform(src_pts, dst_pts) + + # Apply affine transformation with border filling + extracted_region = cv2.warpAffine( + image, + M, + target_size, + flags=cv2.INTER_LINEAR, + ) + + return extracted_region, new_box, scale + + class TopDown: def __init__(self, pose_model_name, @@ -32,21 +114,24 @@ class TopDown: iou_threshold=0.6, device='CUDA', warmup=30): - if pose_model_name not in AvailablePoseModels.POSE_MODELS: + if not pose_model_name.endswith('.onnx') and pose_model_name not in AvailablePoseModels.POSE_MODELS: raise ValueError( 'The {} human pose estimation model is not in the model repository.'.format(pose_model_name)) - if pose_model_decoder not in AvailablePoseModels.POSE_MODELS[pose_model_name]: + if not pose_model_name.endswith('.onnx') and pose_model_decoder not in AvailablePoseModels.POSE_MODELS[pose_model_name]: raise ValueError( 'No {} decoding head for the {} model was found in the model repository.'.format(pose_model_decoder, pose_model_name)) - if det_model_name not in AvailableDetModels.DET_MODELS: + if not pose_model_name.endswith('.onnx') and det_model_name not in AvailableDetModels.DET_MODELS: raise ValueError( 'The {} detection model is not in the model repository.'.format(det_model_name)) - pose_model_dir = get_model_path(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], - detection_model=False) - pose_model_path = os.path.join(pose_model_dir, - AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder]) + if not pose_model_name.endswith('.onnx'): + pose_model_dir = get_model_path(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + pose_model_path = os.path.join(pose_model_dir, + AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder]) + else: + pose_model_path = pose_model_name if os.path.exists(pose_model_path): try: @@ -62,11 +147,17 @@ class TopDown: download(url, pose_model_dir) self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) - det_model_dir = get_model_path(AvailableDetModels.DET_MODELS[det_model_name]['file_name'], - detection_model=True) - det_model_path = os.path.join(det_model_dir, - AvailableDetModels.DET_MODELS[det_model_name]['file_name']) - det_model_type = AvailableDetModels.DET_MODELS[det_model_name]['model_type'] + if not det_model_name.endswith('.onnx'): + det_model_dir = get_model_path(AvailableDetModels.DET_MODELS[det_model_name]['file_name'], + detection_model=True) + det_model_path = os.path.join(det_model_dir, + AvailableDetModels.DET_MODELS[det_model_name]['file_name']) + det_model_type = AvailableDetModels.DET_MODELS[det_model_name]['model_type'] + else: + det_model_path = det_model_name + if "rtmdet" in det_model_name: + det_model_type = 'RTMDet' + if os.path.exists(det_model_path): try: self.det_model = get_det_model(det_model_path, @@ -102,9 +193,17 @@ class TopDown: for i in range(boxes.shape[0]): p = Person() p.box = boxes[i] - region = region_of_interest(image, p.box) + region, p.box, _ = region_of_interest_warped(image, p.box) kp = self.pose_model(region) - p.keypoints = restore_keypoints(p.box, kp) + + # See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample() + th, tw = region.shape[:2] + bw, bh = [p.box[2] - p.box[0], p.box[3] - p.box[1]] + kp[:, :2] = kp[:, :2] / np.array([tw, th]) * np.array([bw, bh]) + kp[:, :2] += np.array([p.box[0] + bw / 2, p.box[1] + bh / 2]) + kp[:, :2] -= 0.5 * np.array([bw, bh]) + + p.keypoints = kp results.append(p) return results diff --git a/extras/easypose/pose.py b/extras/easypose/pose.py index ca33247..be8a29c 100644 --- a/extras/easypose/pose.py +++ b/extras/easypose/pose.py @@ -44,10 +44,12 @@ class SimCC(BaseModel): self.scale = 0 def preprocess(self, image: np.ndarray): - th, tw = self.input_shape[2:] - image, self.dx, self.dy, self.scale = letterbox(image, (tw, th)) - tensor = (image - np.array((103.53, 116.28, 123.675))) / np.array((57.375, 57.12, 58.395)) - tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + tensor, self.dx, self.dy, self.scale = image, 0, 0, 1 + tensor -= np.array((123.675, 116.28, 103.53)) + tensor /= np.array((58.395, 57.12, 57.375)) + tensor = ( + np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + ) return tensor def postprocess(self, tensor: List[np.ndarray]): diff --git a/extras/easypose/run_container.sh b/extras/easypose/run_container.sh index f8811f6..c3536a1 100644 --- a/extras/easypose/run_container.sh +++ b/extras/easypose/run_container.sh @@ -4,8 +4,12 @@ xhost + docker run --privileged --rm --network host -it \ --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ --volume "$(pwd)"/:/RapidPoseTriangulation/ \ + --volume "$(pwd)"/extras/easypose/pipeline.py:/EasyPose/easypose/pipeline.py \ + --volume "$(pwd)"/extras/easypose/detection.py:/EasyPose/easypose/model/detection.py \ + --volume "$(pwd)"/extras/easypose/pose.py:/EasyPose/easypose/model/pose.py \ + --volume "$(pwd)"/extras/easypose/utils.py:/EasyPose/easypose/model/utils.py \ --volume "$(pwd)"/../datasets/:/datasets/ \ - --volume "$(pwd)"/../skelda/:/skelda/ \ + --volume "$(pwd)"/skelda/:/skelda/ \ --volume /tmp/.X11-unix:/tmp/.X11-unix \ --env DISPLAY --env QT_X11_NO_MITSHM=1 \ rpt_easypose diff --git a/extras/easypose/utils.py b/extras/easypose/utils.py index c268963..bb42582 100644 --- a/extras/easypose/utils.py +++ b/extras/easypose/utils.py @@ -177,18 +177,22 @@ def get_real_keypoints(keypoints: np.ndarray, heatmaps: np.ndarray, img_size: Se return keypoints -def simcc_decoder(simcc_x: np.ndarray, - simcc_y: np.ndarray, - input_size: Sequence[int], - dx: int, - dy: int, - scale: float): +def simcc_decoder( + simcc_x: np.ndarray, + simcc_y: np.ndarray, + input_size: Sequence[int], + dx: int, + dy: int, + scale: float, +): + # See: /mmpose/codecs/utils/post_processing.py - get_simcc_maximum() + x = np.argmax(simcc_x, axis=-1, keepdims=True).astype(np.float32) y = np.argmax(simcc_y, axis=-1, keepdims=True).astype(np.float32) x_conf = np.max(simcc_x, axis=-1, keepdims=True) y_conf = np.max(simcc_y, axis=-1, keepdims=True) - conf = (x_conf + y_conf) / 2 + conf = np.minimum(x_conf, y_conf) x /= simcc_x.shape[-1] y /= simcc_y.shape[-1] diff --git a/scripts/utils_2d_pose_ep.py b/scripts/utils_2d_pose_ep.py index 8c4b3d9..4947b0b 100644 --- a/scripts/utils_2d_pose_ep.py +++ b/scripts/utils_2d_pose_ep.py @@ -14,7 +14,15 @@ filepath = os.path.dirname(os.path.realpath(__file__)) + "/" def load_model(): print("Loading mmpose model ...") - model = ep.TopDown("rtmpose_m", "SimCC", "rtmdet_s") + # model = ep.TopDown("rtmpose_m", "SimCC", "rtmdet_s") + model = ep.TopDown( + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384.onnx", + "SimCC", + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet_nano_320.onnx", + conf_threshold=0.3, + iou_threshold=0.3, + warmup=10, + ) print("Loaded mmpose model") return model