diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py index 4eb822b..f80ea77 100644 --- a/extras/mmdeploy/add_extra_steps.py +++ b/extras/mmdeploy/add_extra_steps.py @@ -54,6 +54,7 @@ def add_steps_to_onnx(model_path): inputs=[input_name], outputs=[casted_output], to=cast_type, + name="Cast_Input", ) # Node to transpose @@ -118,6 +119,32 @@ def add_steps_to_onnx(model_path): # Set input image type to int8 model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 + # Cast all outputs to fp32 to avoid half precision issues in cpp code + for output in graph.output: + orig_output_name = output.name + internal_output_name = orig_output_name + "_internal" + + # Rename the output tensor + for node in model.graph.node: + for idx, name in enumerate(node.output): + if name == orig_output_name: + node.output[idx] = internal_output_name + + # Insert a Cast node that casts the internal output to fp32 + cast_fp32_name = orig_output_name + cast_node_output = helper.make_node( + "Cast", + inputs=[internal_output_name], + outputs=[cast_fp32_name], + to=1, + name="Cast_Output_" + orig_output_name, + ) + # Append the cast node to the graph + graph.node.append(cast_node_output) + + # Update the output's data type info + output.type.tensor_type.elem_type = TensorProto.FLOAT + path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path) path = path.replace(".onnx", "_extra-steps.onnx") onnx.save(model, path) diff --git a/scripts/test_triangulate.py b/scripts/test_triangulate.py index 481e4b0..5c25d44 100644 --- a/scripts/test_triangulate.py +++ b/scripts/test_triangulate.py @@ -253,11 +253,11 @@ def update_keypoints(poses_2d: list, joint_names: List[str]) -> list: new_body = body[:17] if whole_body["foots"]: - new_body.extend(body[17:22]) + new_body.extend(body[17:23]) if whole_body["face"]: - new_body.extend(body[22:90]) + new_body.extend(body[23:91]) if whole_body["hands"]: - new_body.extend(body[90:]) + new_body.extend(body[91:]) body = new_body hlid = joint_names.index("hip_left") diff --git a/scripts/utils_2d_pose.py b/scripts/utils_2d_pose.py index 0117870..63eb89e 100644 --- a/scripts/utils_2d_pose.py +++ b/scripts/utils_2d_pose.py @@ -189,9 +189,15 @@ class BoxCrop: self.fill_value = fill_value def calc_params(self, ishape, bbox): - start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3] + img_h, img_w = ishape[:2] target_h, target_w = self.target_size + # Round the bounding box coordinates + start_x = math.floor(bbox[0]) + start_y = math.floor(bbox[1]) + end_x = math.ceil(bbox[2]) + end_y = math.ceil(bbox[3]) + # Calculate original bounding box center center_x = (start_x + end_x) / 2.0 center_y = (start_y + end_y) / 2.0 @@ -231,8 +237,8 @@ class BoxCrop: # Define the new box coordinates new_start_x = max(0, start_x) new_start_y = max(0, start_y) - new_end_x = min(ishape[1] - 1, end_x) - new_end_y = min(ishape[0] - 1, end_y) + new_end_x = min(img_w - 1, end_x) + new_end_y = min(img_h - 1, end_y) new_box = [new_start_x, new_start_y, new_end_x, new_end_y] # Calculate resized crop size @@ -344,7 +350,6 @@ class RTMDet(BaseModel): return np.array([]) # Drop boxes with too small area - boxes = boxes.astype(np.float32) areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) keep = areas >= self.min_area boxes = boxes[keep] @@ -386,10 +391,7 @@ class RTMPose(BaseModel): def preprocess(self, image: np.ndarray, bboxes: np.ndarray): cutouts = [] for i in range(len(bboxes)): - bbox = np.asarray(bboxes[i])[0:4] - bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8]) - bbox = bbox.round().astype(np.int32) - region = self.boxcrop.crop_resize_box(image, bbox) + region = self.boxcrop.crop_resize_box(image, bboxes[i]) tensor = np.asarray(region).astype(self.input_types[0], copy=False) cutouts.append(tensor)