Some mixed improvements.

This commit is contained in:
Daniel
2024-12-06 13:58:34 +01:00
parent 7a253cd615
commit 108937d96c
2 changed files with 201 additions and 191 deletions

View File

@ -37,14 +37,16 @@ class BaseModel(ABC):
self.input_types = []
for i in range(len(input_types)):
input_type = input_types[i]
if input_type == "tensor(float16)":
if input_type == "tensor(float32)":
itype = np.float32
elif input_type == "tensor(float16)":
itype = np.float16
elif input_type == "tensor(uint8)":
itype = np.uint8
elif input_type == "tensor(int32)":
itype = np.int32
elif input_type == "tensor(uint8)":
itype = np.uint8
else:
itype = np.float32
raise ValueError("Undefined input type:", input_type)
self.input_types.append(itype)
if warmup > 0:
@ -59,6 +61,8 @@ class BaseModel(ABC):
pass
def warmup(self, epoch: int):
np.random.seed(42)
print("Running warmup for '{}' ...".format(self.__class__.__name__))
for _ in tqdm(range(epoch)):
inputs = {}
@ -139,20 +143,30 @@ class LetterBox:
def resize_image(self, image):
paddings, _, new_size = self.calc_params(image.shape)
target_h, target_w = self.target_size
canvas = np.full(
(target_h, target_w, image.shape[2]),
self.fill_value,
dtype=image.dtype,
)
# Resize the image
new_w, new_h = new_size
dx, dy = paddings[0], paddings[2]
canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize(
image, (new_w, new_h), interpolation=cv2.INTER_LINEAR
resized_img = cv2.resize(
image,
(new_w, new_h),
interpolation=cv2.INTER_LINEAR,
)
return canvas
# Optionally pad the image
pad_left, pad_right, pad_top, pad_bottom = paddings
if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
final_img = resized_img
else:
final_img = cv2.copyMakeBorder(
resized_img,
pad_top,
pad_bottom,
pad_left,
pad_right,
borderType=cv2.BORDER_CONSTANT,
value=[self.fill_value, self.fill_value, self.fill_value],
)
return final_img
# ==================================================================================================
@ -211,6 +225,7 @@ class BoxCrop:
new_end_y = min(ishape[0] - 1, end_y)
new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
# Calculate resized crop size
bbox_w = new_box[2] - new_box[0]
bbox_h = new_box[3] - new_box[1]
scale = min(target_w / bbox_w, target_h / bbox_h)
@ -250,22 +265,33 @@ class BoxCrop:
def crop_resize_box(self, image, bbox):
paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)
image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
# Extract the bounding box
cropped_img = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
th, tw = self.target_size
canvas = np.full(
(th, tw, image.shape[2]),
self.fill_value,
dtype=image.dtype,
# Resize the image
new_w, new_h = new_size
resized_img = cv2.resize(
cropped_img,
(new_w, new_h),
interpolation=cv2.INTER_LINEAR,
)
nw, nh = new_size
dx, dy = paddings[0], paddings[2]
canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
image, (nw, nh), interpolation=cv2.INTER_LINEAR
)
# Optionally pad the image
pad_left, pad_right, pad_top, pad_bottom = paddings
if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0:
final_img = resized_img
else:
final_img = cv2.copyMakeBorder(
resized_img,
pad_top,
pad_bottom,
pad_left,
pad_right,
borderType=cv2.BORDER_CONSTANT,
value=[self.fill_value, self.fill_value, self.fill_value],
)
return canvas
return final_img
# ==================================================================================================
@ -308,27 +334,17 @@ class RTMDet(BaseModel):
boxes[:, 3] -= paddings[2]
boxes = np.maximum(boxes, 0)
th, tw = self.target_size
pad_w = paddings[0] + paddings[1]
pad_h = paddings[2] + paddings[3]
max_w = tw - pad_w - 1
max_h = th - pad_h - 1
b0 = boxes[:, 0]
b1 = boxes[:, 1]
b2 = boxes[:, 2]
b3 = boxes[:, 3]
b0 = np.minimum(b0, max_w)
b1 = np.minimum(b1, max_h)
b2 = np.minimum(b2, max_w)
b3 = np.minimum(b3, max_h)
boxes[:, 0] = b0
boxes[:, 1] = b1
boxes[:, 2] = b2
boxes[:, 3] = b3
boxes[:, 0] = np.minimum(boxes[:, 0], max_w)
boxes[:, 1] = np.minimum(boxes[:, 1], max_h)
boxes[:, 2] = np.minimum(boxes[:, 2], max_w)
boxes[:, 3] = np.minimum(boxes[:, 3], max_h)
boxes[:, 0:4] /= scale
return boxes
@ -342,8 +358,6 @@ class RTMPose(BaseModel):
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
tensor = np.expand_dims(tensor, axis=0)
bbox = np.asarray(bbox)[0:4]
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
bbox = bbox.round().astype(np.int32)
@ -368,12 +382,8 @@ class RTMPose(BaseModel):
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
max_w = image.shape[1] - 1
max_h = image.shape[0] - 1
b0 = kp[:, 0]
b1 = kp[:, 1]
b0 = np.minimum(b0, max_w)
b1 = np.minimum(b1, max_h)
kp[:, 0] = b0
kp[:, 1] = b1
kp[:, 0] = np.minimum(kp[:, 0], max_w)
kp[:, 1] = np.minimum(kp[:, 1], max_h)
return kp