Some mixed improvements.

2024-12-06 13:58:34 +01:00
parent 7a253cd615
commit 108937d96c
2 changed files with 201 additions and 191 deletions
--- a/scripts/utils_2d_pose_ort.py
+++ b/scripts/utils_2d_pose_ort.py
@ -37,14 +37,16 @@ class BaseModel(ABC):
        self.input_types = []
        for i in range(len(input_types)):
            input_type = input_types[i]
-            if input_type == "tensor(float16)":
+            if input_type == "tensor(float32)":
+                itype = np.float32
+            elif input_type == "tensor(float16)":
                itype = np.float16
-            elif input_type == "tensor(uint8)":
-                itype = np.uint8
            elif input_type == "tensor(int32)":
                itype = np.int32
+            elif input_type == "tensor(uint8)":
+                itype = np.uint8
            else:
-                itype = np.float32
+                raise ValueError("Undefined input type:", input_type)
            self.input_types.append(itype)

        if warmup > 0:
@ -59,6 +61,8 @@ class BaseModel(ABC):
        pass

    def warmup(self, epoch: int):
+        np.random.seed(42)
+
        print("Running warmup for '{}' ...".format(self.__class__.__name__))
        for _ in tqdm(range(epoch)):
            inputs = {}
@ -139,20 +143,30 @@ class LetterBox:
    def resize_image(self, image):
        paddings, _, new_size = self.calc_params(image.shape)

-        target_h, target_w = self.target_size
-        canvas = np.full(
-            (target_h, target_w, image.shape[2]),
-            self.fill_value,
-            dtype=image.dtype,
-        )
-
+        # Resize the image
        new_w, new_h = new_size
-        dx, dy = paddings[0], paddings[2]
-        canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize(
-            image, (new_w, new_h), interpolation=cv2.INTER_LINEAR
+        resized_img = cv2.resize(
+            image,
+            (new_w, new_h),
+            interpolation=cv2.INTER_LINEAR,
        )

-        return canvas
+        # Optionally pad the image
+        pad_left, pad_right, pad_top, pad_bottom = paddings
+        if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
+            final_img = resized_img
+        else:
+            final_img = cv2.copyMakeBorder(
+                resized_img,
+                pad_top,
+                pad_bottom,
+                pad_left,
+                pad_right,
+                borderType=cv2.BORDER_CONSTANT,
+                value=[self.fill_value, self.fill_value, self.fill_value],
+            )
+
+        return final_img


 # ==================================================================================================
@ -211,6 +225,7 @@ class BoxCrop:
        new_end_y = min(ishape[0] - 1, end_y)
        new_box = [new_start_x, new_start_y, new_end_x, new_end_y]

+        # Calculate resized crop size
        bbox_w = new_box[2] - new_box[0]
        bbox_h = new_box[3] - new_box[1]
        scale = min(target_w / bbox_w, target_h / bbox_h)
@ -250,22 +265,33 @@ class BoxCrop:
    def crop_resize_box(self, image, bbox):
        paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)

-        image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
+        # Extract the bounding box
+        cropped_img = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]

-        th, tw = self.target_size
-        canvas = np.full(
-            (th, tw, image.shape[2]),
-            self.fill_value,
-            dtype=image.dtype,
+        # Resize the image
+        new_w, new_h = new_size
+        resized_img = cv2.resize(
+            cropped_img,
+            (new_w, new_h),
+            interpolation=cv2.INTER_LINEAR,
        )

-        nw, nh = new_size
-        dx, dy = paddings[0], paddings[2]
-        canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
-            image, (nw, nh), interpolation=cv2.INTER_LINEAR
-        )
+        # Optionally pad the image
+        pad_left, pad_right, pad_top, pad_bottom = paddings
+        if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
+            final_img = resized_img
+        else:
+            final_img = cv2.copyMakeBorder(
+                resized_img,
+                pad_top,
+                pad_bottom,
+                pad_left,
+                pad_right,
+                borderType=cv2.BORDER_CONSTANT,
+                value=[self.fill_value, self.fill_value, self.fill_value],
+            )

-        return canvas
+        return final_img


 # ==================================================================================================
@ -308,27 +334,17 @@ class RTMDet(BaseModel):
        boxes[:, 3] -= paddings[2]

        boxes = np.maximum(boxes, 0)
-
        th, tw = self.target_size
        pad_w = paddings[0] + paddings[1]
        pad_h = paddings[2] + paddings[3]
        max_w = tw - pad_w - 1
        max_h = th - pad_h - 1
-        b0 = boxes[:, 0]
-        b1 = boxes[:, 1]
-        b2 = boxes[:, 2]
-        b3 = boxes[:, 3]
-        b0 = np.minimum(b0, max_w)
-        b1 = np.minimum(b1, max_h)
-        b2 = np.minimum(b2, max_w)
-        b3 = np.minimum(b3, max_h)
-        boxes[:, 0] = b0
-        boxes[:, 1] = b1
-        boxes[:, 2] = b2
-        boxes[:, 3] = b3
+        boxes[:, 0] = np.minimum(boxes[:, 0], max_w)
+        boxes[:, 1] = np.minimum(boxes[:, 1], max_h)
+        boxes[:, 2] = np.minimum(boxes[:, 2], max_w)
+        boxes[:, 3] = np.minimum(boxes[:, 3], max_h)

        boxes[:, 0:4] /= scale
-
        return boxes


@ -342,8 +358,6 @@ class RTMPose(BaseModel):
        self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)

    def preprocess(self, image: np.ndarray, bbox: np.ndarray):
-        tensor = np.asarray(image).astype(self.input_types[0], copy=False)
-        tensor = np.expand_dims(tensor, axis=0)
        bbox = np.asarray(bbox)[0:4]
        bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
        bbox = bbox.round().astype(np.int32)
@ -368,12 +382,8 @@ class RTMPose(BaseModel):
        kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
        max_w = image.shape[1] - 1
        max_h = image.shape[0] - 1
-        b0 = kp[:, 0]
-        b1 = kp[:, 1]
-        b0 = np.minimum(b0, max_w)
-        b1 = np.minimum(b1, max_h)
-        kp[:, 0] = b0
-        kp[:, 1] = b1
+        kp[:, 0] = np.minimum(kp[:, 0], max_w)
+        kp[:, 1] = np.minimum(kp[:, 1], max_h)

        return kp