Some further speedups.
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
import onnx
|
||||
from onnx import helper, numpy_helper, TensorProto
|
||||
from onnx import TensorProto, helper, numpy_helper
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
@ -45,10 +45,8 @@ def add_steps_to_onnx(model_path):
|
||||
# Define layer names, assuming the first input is the image tensor
|
||||
input_name = graph.input[0].name
|
||||
|
||||
# Set input type to always be float32
|
||||
graph.input[0].type.tensor_type.elem_type = TensorProto.FLOAT
|
||||
|
||||
# Create to cast the float32 if needed
|
||||
# Cast to internal type
|
||||
# This has to be the first node, because tensorrt does not support uint8 layers
|
||||
cast_type = 10 if use_fp16 else 1
|
||||
casted_output = "casted_output"
|
||||
cast_node = helper.make_node(
|
||||
@ -58,11 +56,21 @@ def add_steps_to_onnx(model_path):
|
||||
to=cast_type,
|
||||
)
|
||||
|
||||
# Node to transpose
|
||||
transpose_output = "transpose_output"
|
||||
transpose_node = helper.make_node(
|
||||
"Transpose",
|
||||
inputs=[casted_output],
|
||||
outputs=[transpose_output],
|
||||
perm=[0, 3, 1, 2],
|
||||
name="Transpose",
|
||||
)
|
||||
|
||||
# Node to add mean
|
||||
mean_added_output = "mean_added_output"
|
||||
mean_add_node = helper.make_node(
|
||||
"Add",
|
||||
inputs=[casted_output, "norm_mean"],
|
||||
inputs=[transpose_output, "norm_mean"],
|
||||
outputs=[mean_added_output],
|
||||
name="Mean_Addition",
|
||||
)
|
||||
@ -84,13 +92,26 @@ def add_steps_to_onnx(model_path):
|
||||
|
||||
# Add the new nodes to the graph
|
||||
graph.node.insert(0, cast_node)
|
||||
graph.node.insert(1, mean_add_node)
|
||||
graph.node.insert(2, std_mul_node)
|
||||
graph.node.insert(1, transpose_node)
|
||||
graph.node.insert(2, mean_add_node)
|
||||
graph.node.insert(3, std_mul_node)
|
||||
|
||||
path = model_path.replace(".onnx", "_with-norm.onnx")
|
||||
# Transpose the input shape
|
||||
input_shape = graph.input[0].type.tensor_type.shape.dim
|
||||
dims = [dim.dim_value for dim in input_shape]
|
||||
for i, j in enumerate([0, 3, 1, 2]):
|
||||
input_shape[j].dim_value = dims[i]
|
||||
|
||||
# Set input type to int8
|
||||
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
|
||||
|
||||
path = model_path.replace(".onnx", "_extra-steps.onnx")
|
||||
onnx.save(model, path)
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def main():
|
||||
add_steps_to_onnx(pose_model_path)
|
||||
add_steps_to_onnx(det_model_path)
|
||||
|
||||
320
media/RESULTS.md
320
media/RESULTS.md
@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets.
|
||||
|
||||
```json
|
||||
{
|
||||
"avg_time_2d": 0.019079747846571064,
|
||||
"avg_time_3d": 0.0003823995590209961,
|
||||
"avg_fps": 51.381791492991674
|
||||
"avg_time_2d": 0.016608773651769607,
|
||||
"avg_time_3d": 0.00034795211533368645,
|
||||
"avg_fps": 58.97364937870487
|
||||
}
|
||||
{
|
||||
"person_nums": {
|
||||
@ -28,247 +28,247 @@ Results of the model in various experiments on different datasets.
|
||||
"mpjpe": {
|
||||
"count": 600,
|
||||
"mean": 0.067471,
|
||||
"median": 0.059656,
|
||||
"std": 0.027837,
|
||||
"sem": 0.001137,
|
||||
"min": 0.04161,
|
||||
"max": 0.191019,
|
||||
"median": 0.0592,
|
||||
"std": 0.02795,
|
||||
"sem": 0.001142,
|
||||
"min": 0.042592,
|
||||
"max": 0.189987,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.045,
|
||||
"recall-0.1": 0.93,
|
||||
"recall-0.05": 0.048333,
|
||||
"recall-0.1": 0.925,
|
||||
"recall-0.15": 0.95,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600,
|
||||
"ap-0.025": 0.0,
|
||||
"ap-0.05": 0.005008,
|
||||
"ap-0.1": 0.890421,
|
||||
"ap-0.15": 0.915856,
|
||||
"ap-0.05": 0.004097,
|
||||
"ap-0.1": 0.885305,
|
||||
"ap-0.15": 0.915769,
|
||||
"ap-0.25": 1.0,
|
||||
"ap-0.5": 1.0
|
||||
},
|
||||
"nose": {
|
||||
"count": 600,
|
||||
"mean": 0.116291,
|
||||
"median": 0.101326,
|
||||
"std": 0.04236,
|
||||
"sem": 0.001731,
|
||||
"min": 0.021073,
|
||||
"max": 0.288626,
|
||||
"recall-0.025": 0.001667,
|
||||
"recall-0.05": 0.006667,
|
||||
"recall-0.1": 0.491667,
|
||||
"recall-0.15": 0.821667,
|
||||
"recall-0.25": 0.99,
|
||||
"mean": 0.115621,
|
||||
"median": 0.100161,
|
||||
"std": 0.041657,
|
||||
"sem": 0.001702,
|
||||
"min": 0.031411,
|
||||
"max": 0.276464,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.01,
|
||||
"recall-0.1": 0.498333,
|
||||
"recall-0.15": 0.826667,
|
||||
"recall-0.25": 0.993333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"shoulder_left": {
|
||||
"count": 600,
|
||||
"mean": 0.033847,
|
||||
"median": 0.025987,
|
||||
"std": 0.031824,
|
||||
"sem": 0.0013,
|
||||
"min": 0.002728,
|
||||
"max": 0.181894,
|
||||
"recall-0.025": 0.466667,
|
||||
"recall-0.05": 0.866667,
|
||||
"recall-0.1": 0.948333,
|
||||
"recall-0.15": 0.968333,
|
||||
"mean": 0.033598,
|
||||
"median": 0.025444,
|
||||
"std": 0.032078,
|
||||
"sem": 0.001311,
|
||||
"min": 0.001187,
|
||||
"max": 0.181528,
|
||||
"recall-0.025": 0.486667,
|
||||
"recall-0.05": 0.865,
|
||||
"recall-0.1": 0.946667,
|
||||
"recall-0.15": 0.965,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"shoulder_right": {
|
||||
"count": 600,
|
||||
"mean": 0.04973,
|
||||
"median": 0.034604,
|
||||
"std": 0.044052,
|
||||
"sem": 0.0018,
|
||||
"min": 0.00395,
|
||||
"max": 0.256048,
|
||||
"recall-0.025": 0.211667,
|
||||
"recall-0.05": 0.753333,
|
||||
"recall-0.1": 0.906667,
|
||||
"recall-0.15": 0.936667,
|
||||
"mean": 0.049243,
|
||||
"median": 0.033956,
|
||||
"std": 0.042808,
|
||||
"sem": 0.001749,
|
||||
"min": 0.004642,
|
||||
"max": 0.255344,
|
||||
"recall-0.025": 0.218333,
|
||||
"recall-0.05": 0.748333,
|
||||
"recall-0.1": 0.901667,
|
||||
"recall-0.15": 0.941667,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"elbow_left": {
|
||||
"count": 600,
|
||||
"mean": 0.042919,
|
||||
"median": 0.035054,
|
||||
"std": 0.034438,
|
||||
"sem": 0.001407,
|
||||
"min": 0.00136,
|
||||
"max": 0.198368,
|
||||
"recall-0.025": 0.246667,
|
||||
"recall-0.05": 0.825,
|
||||
"recall-0.1": 0.948333,
|
||||
"recall-0.15": 0.955,
|
||||
"mean": 0.043499,
|
||||
"median": 0.035409,
|
||||
"std": 0.034789,
|
||||
"sem": 0.001421,
|
||||
"min": 0.002463,
|
||||
"max": 0.200682,
|
||||
"recall-0.025": 0.243333,
|
||||
"recall-0.05": 0.8,
|
||||
"recall-0.1": 0.945,
|
||||
"recall-0.15": 0.953333,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"elbow_right": {
|
||||
"count": 600,
|
||||
"mean": 0.043901,
|
||||
"median": 0.033077,
|
||||
"std": 0.036324,
|
||||
"sem": 0.001484,
|
||||
"min": 0.006465,
|
||||
"max": 0.274304,
|
||||
"recall-0.025": 0.241667,
|
||||
"recall-0.05": 0.81,
|
||||
"recall-0.1": 0.925,
|
||||
"recall-0.15": 0.943333,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"wrist_left": {
|
||||
"count": 600,
|
||||
"mean": 0.043044,
|
||||
"median": 0.026871,
|
||||
"std": 0.04257,
|
||||
"sem": 0.001739,
|
||||
"min": 0.000214,
|
||||
"max": 0.202968,
|
||||
"recall-0.025": 0.466667,
|
||||
"recall-0.05": 0.731667,
|
||||
"recall-0.1": 0.903333,
|
||||
"mean": 0.043289,
|
||||
"median": 0.032684,
|
||||
"std": 0.035003,
|
||||
"sem": 0.00143,
|
||||
"min": 0.007037,
|
||||
"max": 0.202309,
|
||||
"recall-0.025": 0.255,
|
||||
"recall-0.05": 0.805,
|
||||
"recall-0.1": 0.931667,
|
||||
"recall-0.15": 0.941667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"wrist_left": {
|
||||
"count": 600,
|
||||
"mean": 0.043376,
|
||||
"median": 0.027016,
|
||||
"std": 0.044176,
|
||||
"sem": 0.001805,
|
||||
"min": 0.000972,
|
||||
"max": 0.340542,
|
||||
"recall-0.025": 0.466667,
|
||||
"recall-0.05": 0.728333,
|
||||
"recall-0.1": 0.905,
|
||||
"recall-0.15": 0.941667,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"wrist_right": {
|
||||
"count": 600,
|
||||
"mean": 0.044102,
|
||||
"median": 0.026291,
|
||||
"std": 0.050657,
|
||||
"sem": 0.00207,
|
||||
"min": 0.003409,
|
||||
"max": 0.460683,
|
||||
"recall-0.025": 0.466667,
|
||||
"mean": 0.044908,
|
||||
"median": 0.027102,
|
||||
"std": 0.052541,
|
||||
"sem": 0.002147,
|
||||
"min": 0.001728,
|
||||
"max": 0.485231,
|
||||
"recall-0.025": 0.448333,
|
||||
"recall-0.05": 0.776667,
|
||||
"recall-0.1": 0.895,
|
||||
"recall-0.15": 0.915,
|
||||
"recall-0.25": 0.996667,
|
||||
"recall-0.1": 0.893333,
|
||||
"recall-0.15": 0.911667,
|
||||
"recall-0.25": 0.995,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"hip_left": {
|
||||
"count": 600,
|
||||
"mean": 0.090158,
|
||||
"median": 0.08564,
|
||||
"std": 0.032791,
|
||||
"sem": 0.00134,
|
||||
"min": 0.014562,
|
||||
"max": 0.236403,
|
||||
"recall-0.025": 0.001667,
|
||||
"recall-0.05": 0.036667,
|
||||
"recall-0.1": 0.82,
|
||||
"recall-0.15": 0.945,
|
||||
"mean": 0.089001,
|
||||
"median": 0.085342,
|
||||
"std": 0.032716,
|
||||
"sem": 0.001337,
|
||||
"min": 0.007027,
|
||||
"max": 0.235465,
|
||||
"recall-0.025": 0.008333,
|
||||
"recall-0.05": 0.031667,
|
||||
"recall-0.1": 0.815,
|
||||
"recall-0.15": 0.948333,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"hip_right": {
|
||||
"count": 600,
|
||||
"mean": 0.11407,
|
||||
"median": 0.114481,
|
||||
"std": 0.02606,
|
||||
"sem": 0.001065,
|
||||
"min": 0.045083,
|
||||
"max": 0.233275,
|
||||
"mean": 0.113299,
|
||||
"median": 0.113584,
|
||||
"std": 0.026162,
|
||||
"sem": 0.001069,
|
||||
"min": 0.04703,
|
||||
"max": 0.230987,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.001667,
|
||||
"recall-0.1": 0.251667,
|
||||
"recall-0.15": 0.948333,
|
||||
"recall-0.1": 0.261667,
|
||||
"recall-0.15": 0.946667,
|
||||
"recall-0.25": 1.0,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"knee_left": {
|
||||
"count": 600,
|
||||
"mean": 0.061433,
|
||||
"median": 0.045616,
|
||||
"std": 0.060807,
|
||||
"sem": 0.002484,
|
||||
"min": 0.018825,
|
||||
"max": 0.425003,
|
||||
"recall-0.025": 0.058333,
|
||||
"recall-0.05": 0.593333,
|
||||
"recall-0.1": 0.915,
|
||||
"recall-0.15": 0.923333,
|
||||
"recall-0.25": 0.98,
|
||||
"mean": 0.062069,
|
||||
"median": 0.044729,
|
||||
"std": 0.06187,
|
||||
"sem": 0.002528,
|
||||
"min": 0.017903,
|
||||
"max": 0.431859,
|
||||
"recall-0.025": 0.06,
|
||||
"recall-0.05": 0.591667,
|
||||
"recall-0.1": 0.913333,
|
||||
"recall-0.15": 0.92,
|
||||
"recall-0.25": 0.978333,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"knee_right": {
|
||||
"count": 600,
|
||||
"mean": 0.050724,
|
||||
"median": 0.042002,
|
||||
"std": 0.036168,
|
||||
"sem": 0.001478,
|
||||
"min": 0.016654,
|
||||
"max": 0.257622,
|
||||
"recall-0.025": 0.038333,
|
||||
"recall-0.05": 0.76,
|
||||
"mean": 0.050915,
|
||||
"median": 0.04249,
|
||||
"std": 0.036278,
|
||||
"sem": 0.001482,
|
||||
"min": 0.015193,
|
||||
"max": 0.263834,
|
||||
"recall-0.025": 0.033333,
|
||||
"recall-0.05": 0.766667,
|
||||
"recall-0.1": 0.941667,
|
||||
"recall-0.15": 0.945,
|
||||
"recall-0.25": 0.998333,
|
||||
"recall-0.25": 0.996667,
|
||||
"recall-0.5": 1.0,
|
||||
"num_labels": 600
|
||||
},
|
||||
"ankle_left": {
|
||||
"count": 599,
|
||||
"mean": 0.09891,
|
||||
"median": 0.086872,
|
||||
"std": 0.051463,
|
||||
"sem": 0.002104,
|
||||
"min": 0.035674,
|
||||
"max": 0.490677,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.003333,
|
||||
"recall-0.1": 0.831667,
|
||||
"recall-0.15": 0.936667,
|
||||
"recall-0.25": 0.98,
|
||||
"recall-0.5": 0.998333,
|
||||
"num_labels": 600
|
||||
},
|
||||
"ankle_right": {
|
||||
"count": 598,
|
||||
"mean": 0.085207,
|
||||
"median": 0.069747,
|
||||
"std": 0.054773,
|
||||
"sem": 0.002242,
|
||||
"min": 0.030417,
|
||||
"max": 0.440549,
|
||||
"mean": 0.098393,
|
||||
"median": 0.086077,
|
||||
"std": 0.050788,
|
||||
"sem": 0.002079,
|
||||
"min": 0.036989,
|
||||
"max": 0.49288,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.021667,
|
||||
"recall-0.1": 0.88,
|
||||
"recall-0.15": 0.906667,
|
||||
"recall-0.05": 0.005,
|
||||
"recall-0.1": 0.83,
|
||||
"recall-0.15": 0.936667,
|
||||
"recall-0.25": 0.978333,
|
||||
"recall-0.5": 0.996667,
|
||||
"num_labels": 600
|
||||
},
|
||||
"ankle_right": {
|
||||
"count": 597,
|
||||
"mean": 0.085279,
|
||||
"median": 0.069562,
|
||||
"std": 0.05552,
|
||||
"sem": 0.002274,
|
||||
"min": 0.031135,
|
||||
"max": 0.445133,
|
||||
"recall-0.025": 0.0,
|
||||
"recall-0.05": 0.015,
|
||||
"recall-0.1": 0.878333,
|
||||
"recall-0.15": 0.901667,
|
||||
"recall-0.25": 0.973333,
|
||||
"recall-0.5": 0.995,
|
||||
"num_labels": 600
|
||||
},
|
||||
"joint_recalls": {
|
||||
"num_labels": 7800,
|
||||
"recall-0.025": 0.16897,
|
||||
"recall-0.05": 0.47551,
|
||||
"recall-0.1": 0.81936,
|
||||
"recall-0.15": 0.92923,
|
||||
"recall-0.25": 0.99333,
|
||||
"recall-0.5": 0.99949
|
||||
"recall-0.025": 0.17013,
|
||||
"recall-0.05": 0.47244,
|
||||
"recall-0.1": 0.81949,
|
||||
"recall-0.15": 0.92897,
|
||||
"recall-0.25": 0.99244,
|
||||
"recall-0.5": 0.99936
|
||||
}
|
||||
}
|
||||
{
|
||||
"total_parts": 8400,
|
||||
"correct_parts": 8091,
|
||||
"pcp": 0.963214
|
||||
"correct_parts": 8089,
|
||||
"pcp": 0.962976
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@ -220,7 +220,7 @@ def update_sample(sample, new_dir=""):
|
||||
def load_image(path: str):
|
||||
image = cv2.imread(path, 3)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
image = np.array(image, dtype=np.float32)
|
||||
image = np.asarray(image, dtype=np.uint8)
|
||||
return image
|
||||
|
||||
|
||||
|
||||
@ -35,6 +35,8 @@ class BaseModel(ABC):
|
||||
input_type = self.session.get_inputs()[0].type
|
||||
if input_type == "tensor(float16)":
|
||||
self.input_type = np.float16
|
||||
elif input_type == "tensor(uint8)":
|
||||
self.input_type = np.uint8
|
||||
else:
|
||||
self.input_type = np.float32
|
||||
|
||||
@ -152,12 +154,12 @@ class RTMDet(BaseModel):
|
||||
return boxes[keep_indices]
|
||||
|
||||
def preprocess(self, image: np.ndarray):
|
||||
th, tw = self.input_shape[2:]
|
||||
th, tw = self.input_shape[1:3]
|
||||
image, self.dx, self.dy, self.scale = self.letterbox(
|
||||
image, (tw, th), fill_value=114
|
||||
)
|
||||
tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1]
|
||||
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
|
||||
tensor = np.expand_dims(tensor, axis=0)
|
||||
return tensor
|
||||
|
||||
def postprocess(self, tensor: List[np.ndarray]):
|
||||
@ -274,10 +276,10 @@ class RTMPose(BaseModel):
|
||||
return extracted_region, new_box, scale
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
|
||||
th, tw = self.input_shape[2:]
|
||||
th, tw = self.input_shape[1:3]
|
||||
region, self.bbox, _ = self.region_of_interest_warped(image, bbox, (tw, th))
|
||||
tensor = np.asarray(region).astype(self.input_type, copy=False)
|
||||
tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2))
|
||||
tensor = np.expand_dims(tensor, axis=0)
|
||||
return tensor
|
||||
|
||||
def postprocess(self, tensor: List[np.ndarray], **kwargs):
|
||||
@ -285,7 +287,7 @@ class RTMPose(BaseModel):
|
||||
kp = np.concatenate([tensor[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
|
||||
|
||||
# See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample()
|
||||
th, tw = self.input_shape[2:]
|
||||
th, tw = self.input_shape[1:3]
|
||||
bw, bh = [self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]]
|
||||
kp[:, :2] /= np.array([tw, th])
|
||||
kp[:, :2] *= np.array([bw, bh])
|
||||
@ -331,10 +333,10 @@ def load_model():
|
||||
print("Loading onnx model ...")
|
||||
|
||||
model = TopDown(
|
||||
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_with-norm.onnx",
|
||||
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_with-norm.onnx",
|
||||
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_with-norm.onnx",
|
||||
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_with-norm.onnx",
|
||||
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_extra-steps.onnx",
|
||||
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx",
|
||||
# "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_extra-steps.onnx",
|
||||
"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx",
|
||||
conf_threshold=0.3,
|
||||
iou_threshold=0.3,
|
||||
warmup=30,
|
||||
|
||||
Reference in New Issue
Block a user