Moved image cropping out of the graph again.
This commit is contained in:
@ -1,6 +1,8 @@
|
||||
import math
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from tqdm import tqdm
|
||||
@ -49,11 +51,11 @@ class BaseModel(ABC):
|
||||
self.warmup(warmup)
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args, **kwargs):
|
||||
def preprocess(self, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, tensor: List[np.ndarray], *args, **kwargs):
|
||||
def postprocess(self, **kwargs):
|
||||
pass
|
||||
|
||||
def warmup(self, epoch: int):
|
||||
@ -97,20 +99,178 @@ class BaseModel(ABC):
|
||||
|
||||
self.session.run(None, inputs)
|
||||
|
||||
def __call__(self, image: np.ndarray, *args, **kwargs):
|
||||
tensor = self.preprocess(image, *args, **kwargs)
|
||||
def __call__(self, **kwargs):
|
||||
tensor = self.preprocess(**kwargs)
|
||||
inputs = {}
|
||||
for i in range(len(self.input_names)):
|
||||
iname = self.input_names[i]
|
||||
inputs[iname] = tensor[i]
|
||||
result = self.session.run(None, inputs)
|
||||
output = self.postprocess(result, *args, **kwargs)
|
||||
output = self.postprocess(result=result, **kwargs)
|
||||
return output
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class LetterBox:
|
||||
def __init__(self, target_size, fill_value=0):
|
||||
self.target_size = target_size
|
||||
self.fill_value = fill_value
|
||||
|
||||
def calc_params(self, ishape):
|
||||
img_h, img_w = ishape[:2]
|
||||
target_h, target_w = self.target_size
|
||||
|
||||
scale = min(target_w / img_w, target_h / img_h)
|
||||
new_w = round(img_w * scale)
|
||||
new_h = round(img_h * scale)
|
||||
|
||||
pad_w = target_w - new_w
|
||||
pad_h = target_h - new_h
|
||||
pad_left = pad_w // 2
|
||||
pad_top = pad_h // 2
|
||||
pad_right = pad_w - pad_left
|
||||
pad_bottom = pad_h - pad_top
|
||||
paddings = (pad_left, pad_right, pad_top, pad_bottom)
|
||||
|
||||
return paddings, scale, (new_w, new_h)
|
||||
|
||||
def resize_image(self, image):
|
||||
paddings, _, new_size = self.calc_params(image.shape)
|
||||
|
||||
target_h, target_w = self.target_size
|
||||
canvas = np.full(
|
||||
(target_h, target_w, image.shape[2]),
|
||||
self.fill_value,
|
||||
dtype=image.dtype,
|
||||
)
|
||||
|
||||
new_w, new_h = new_size
|
||||
dx, dy = paddings[0], paddings[2]
|
||||
canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize(
|
||||
image, (new_w, new_h), interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
return canvas
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class BoxCrop:
|
||||
def __init__(self, target_size, padding_scale=1.0, fill_value=0):
|
||||
self.target_size = target_size
|
||||
self.padding_scale = padding_scale
|
||||
self.fill_value = fill_value
|
||||
|
||||
def calc_params(self, ishape, bbox):
|
||||
start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3]
|
||||
target_h, target_w = self.target_size
|
||||
|
||||
# Calculate original bounding box center
|
||||
center_x = (start_x + end_x) / 2.0
|
||||
center_y = (start_y + end_y) / 2.0
|
||||
|
||||
# Scale the bounding box by the padding_scale
|
||||
bbox_w = end_x - start_x
|
||||
bbox_h = end_y - start_y
|
||||
scaled_w = bbox_w * self.padding_scale
|
||||
scaled_h = bbox_h * self.padding_scale
|
||||
|
||||
# Calculate the aspect ratios
|
||||
bbox_aspect = scaled_w / scaled_h
|
||||
target_aspect = target_w / target_h
|
||||
|
||||
# Adjust the scaled bounding box to match the target aspect ratio
|
||||
if bbox_aspect > target_aspect:
|
||||
adjusted_h = scaled_w / target_aspect
|
||||
adjusted_w = scaled_w
|
||||
else:
|
||||
adjusted_w = scaled_h * target_aspect
|
||||
adjusted_h = scaled_h
|
||||
|
||||
# Calculate scaled bounding box coordinates
|
||||
bbox_w = adjusted_w
|
||||
bbox_h = adjusted_h
|
||||
new_start_x = center_x - bbox_w / 2.0
|
||||
new_start_y = center_y - bbox_h / 2.0
|
||||
new_end_x = center_x + bbox_w / 2.0
|
||||
new_end_y = center_y + bbox_h / 2.0
|
||||
|
||||
# Round the box coordinates
|
||||
start_x = int(math.floor(new_start_x))
|
||||
start_y = int(math.floor(new_start_y))
|
||||
end_x = int(math.ceil(new_end_x))
|
||||
end_y = int(math.ceil(new_end_y))
|
||||
|
||||
# Define the new box coordinates
|
||||
new_start_x = max(0, start_x)
|
||||
new_start_y = max(0, start_y)
|
||||
new_end_x = min(ishape[1] - 1, end_x)
|
||||
new_end_y = min(ishape[0] - 1, end_y)
|
||||
new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
|
||||
|
||||
bbox_w = new_box[2] - new_box[0]
|
||||
bbox_h = new_box[3] - new_box[1]
|
||||
scale = min(target_w / bbox_w, target_h / bbox_h)
|
||||
new_w = round(bbox_w * scale)
|
||||
new_h = round(bbox_h * scale)
|
||||
|
||||
# Calculate paddings
|
||||
pad_w = target_w - new_w
|
||||
pad_h = target_h - new_h
|
||||
pad_left, pad_right, pad_top, pad_bottom = 0, 0, 0, 0
|
||||
if pad_w > 0:
|
||||
if start_x < 0:
|
||||
pad_left = pad_w
|
||||
pad_right = 0
|
||||
elif end_x > ishape[1]:
|
||||
pad_left = 0
|
||||
pad_right = pad_w
|
||||
else:
|
||||
# Can be caused by bbox rounding
|
||||
pad_left = pad_w // 2
|
||||
pad_right = pad_w - pad_left
|
||||
if pad_h > 0:
|
||||
if start_y < 0:
|
||||
pad_top = pad_h
|
||||
pad_bottom = 0
|
||||
elif end_y > ishape[0]:
|
||||
pad_top = 0
|
||||
pad_bottom = pad_h
|
||||
else:
|
||||
# Can be caused by bbox rounding
|
||||
pad_top = pad_h // 2
|
||||
pad_bottom = pad_h - pad_top
|
||||
paddings = (pad_left, pad_right, pad_top, pad_bottom)
|
||||
|
||||
return paddings, scale, new_box, (new_w, new_h)
|
||||
|
||||
def crop_resize_box(self, image, bbox):
|
||||
paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)
|
||||
|
||||
image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
|
||||
|
||||
th, tw = self.target_size
|
||||
canvas = np.full(
|
||||
(th, tw, image.shape[2]),
|
||||
self.fill_value,
|
||||
dtype=image.dtype,
|
||||
)
|
||||
|
||||
nw, nh = new_size
|
||||
dx, dy = paddings[0], paddings[2]
|
||||
canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
|
||||
image, (nw, nh), interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
return canvas
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class RTMDet(BaseModel):
|
||||
def __init__(
|
||||
self,
|
||||
@ -119,17 +279,20 @@ class RTMDet(BaseModel):
|
||||
warmup: int = 30,
|
||||
):
|
||||
super(RTMDet, self).__init__(model_path, warmup)
|
||||
self.target_size = (320, 320)
|
||||
self.conf_threshold = conf_threshold
|
||||
self.letterbox = LetterBox(self.target_size, fill_value=114)
|
||||
|
||||
def preprocess(self, image: np.ndarray):
|
||||
image = self.letterbox.resize_image(image)
|
||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||
tensor = np.expand_dims(tensor, axis=0)
|
||||
tensor = [tensor]
|
||||
return tensor
|
||||
|
||||
def postprocess(self, tensor: List[np.ndarray]):
|
||||
boxes = np.squeeze(tensor[1], axis=0)
|
||||
classes = np.squeeze(tensor[0], axis=0)
|
||||
def postprocess(self, result: List[np.ndarray], image: np.ndarray):
|
||||
boxes = np.squeeze(result[0], axis=0)
|
||||
classes = np.squeeze(result[1], axis=0)
|
||||
|
||||
human_class = classes[:] == 0
|
||||
boxes = boxes[human_class]
|
||||
@ -137,6 +300,35 @@ class RTMDet(BaseModel):
|
||||
keep = boxes[:, 4] > self.conf_threshold
|
||||
boxes = boxes[keep]
|
||||
|
||||
paddings, scale, _ = self.letterbox.calc_params(image.shape)
|
||||
|
||||
boxes[:, 0] -= paddings[0]
|
||||
boxes[:, 2] -= paddings[0]
|
||||
boxes[:, 1] -= paddings[2]
|
||||
boxes[:, 3] -= paddings[2]
|
||||
|
||||
boxes = np.maximum(boxes, 0)
|
||||
|
||||
th, tw = self.target_size
|
||||
pad_w = paddings[0] + paddings[1]
|
||||
pad_h = paddings[2] + paddings[3]
|
||||
max_w = tw - pad_w - 1
|
||||
max_h = th - pad_h - 1
|
||||
b0 = boxes[:, 0]
|
||||
b1 = boxes[:, 1]
|
||||
b2 = boxes[:, 2]
|
||||
b3 = boxes[:, 3]
|
||||
b0 = np.minimum(b0, max_w)
|
||||
b1 = np.minimum(b1, max_h)
|
||||
b2 = np.minimum(b2, max_w)
|
||||
b3 = np.minimum(b3, max_h)
|
||||
boxes[:, 0] = b0
|
||||
boxes[:, 1] = b1
|
||||
boxes[:, 2] = b2
|
||||
boxes[:, 3] = b3
|
||||
|
||||
boxes[:, 0:4] /= scale
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
@ -146,7 +338,8 @@ class RTMDet(BaseModel):
|
||||
class RTMPose(BaseModel):
|
||||
def __init__(self, model_path: str, warmup: int = 30):
|
||||
super(RTMPose, self).__init__(model_path, warmup)
|
||||
self.bbox = None
|
||||
self.target_size = (384, 288)
|
||||
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
|
||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||
@ -154,13 +347,34 @@ class RTMPose(BaseModel):
|
||||
bbox = np.asarray(bbox)[0:4]
|
||||
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
|
||||
bbox = bbox.round().astype(np.int32)
|
||||
bbox = np.expand_dims(bbox, axis=0)
|
||||
tensor = [tensor, bbox]
|
||||
region = self.boxcrop.crop_resize_box(image, bbox)
|
||||
tensor = np.asarray(region).astype(self.input_types[0], copy=False)
|
||||
tensor = np.expand_dims(tensor, axis=0)
|
||||
tensor = [tensor]
|
||||
return tensor
|
||||
|
||||
def postprocess(self, tensor: List[np.ndarray], **kwargs):
|
||||
scores = np.clip(tensor[0][0], 0, 1)
|
||||
kp = np.concatenate([tensor[1][0], np.expand_dims(scores, axis=-1)], axis=-1)
|
||||
def postprocess(
|
||||
self, result: List[np.ndarray], image: np.ndarray, bbox: np.ndarray
|
||||
):
|
||||
scores = np.clip(result[1][0], 0, 1)
|
||||
kp = np.concatenate([result[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
|
||||
|
||||
paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bbox)
|
||||
kp[:, 0] -= paddings[0]
|
||||
kp[:, 1] -= paddings[2]
|
||||
kp[:, 0:2] /= scale
|
||||
kp[:, 0] += bbox[0]
|
||||
kp[:, 1] += bbox[1]
|
||||
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
|
||||
max_w = image.shape[1] - 1
|
||||
max_h = image.shape[0] - 1
|
||||
b0 = kp[:, 0]
|
||||
b1 = kp[:, 1]
|
||||
b0 = np.minimum(b0, max_w)
|
||||
b1 = np.minimum(b1, max_h)
|
||||
kp[:, 0] = b0
|
||||
kp[:, 1] = b1
|
||||
|
||||
return kp
|
||||
|
||||
|
||||
@ -184,10 +398,10 @@ class TopDown:
|
||||
self.pose_model = RTMPose(pose_model_path, warmup)
|
||||
|
||||
def predict(self, image):
|
||||
boxes = self.det_model(image)
|
||||
boxes = self.det_model(image=image)
|
||||
results = []
|
||||
for i in range(boxes.shape[0]):
|
||||
kp = self.pose_model(image, bbox=boxes[i])
|
||||
kp = self.pose_model(image=image, bbox=boxes[i])
|
||||
results.append(kp)
|
||||
return results
|
||||
|
||||
|
||||
Reference in New Issue
Block a user