Moved image cropping out of the graph again.
This commit is contained in:
@ -97,77 +97,13 @@ def add_steps_to_onnx(model_path):
|
|||||||
for i, j in enumerate([0, 3, 1, 2]):
|
for i, j in enumerate([0, 3, 1, 2]):
|
||||||
input_shape[j].dim_value = dims[i]
|
input_shape[j].dim_value = dims[i]
|
||||||
|
|
||||||
if "det" in model_path:
|
# Rename the input tensor
|
||||||
# Add preprocess model to main network
|
|
||||||
pp1_model = onnx.load(base_path + "det_preprocess.onnx")
|
|
||||||
model = compose.add_prefix(model, prefix="main_")
|
|
||||||
pp1_model = compose.add_prefix(pp1_model, prefix="preprocess_")
|
|
||||||
model = compose.merge_models(
|
|
||||||
pp1_model,
|
|
||||||
model,
|
|
||||||
io_map=[(pp1_model.graph.output[0].name, model.graph.input[0].name)],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add postprocess model
|
|
||||||
pp2_model = onnx.load(base_path + "det_postprocess.onnx")
|
|
||||||
pp2_model = compose.add_prefix(pp2_model, prefix="postprocess_")
|
|
||||||
model = compose.merge_models(
|
|
||||||
model,
|
|
||||||
pp2_model,
|
|
||||||
io_map=[
|
|
||||||
(model.graph.output[0].name, pp2_model.graph.input[1].name),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update nodes from postprocess model to use the input of the main network
|
|
||||||
pp2_input_image_name = pp2_model.graph.input[0].name
|
|
||||||
main_input_image_name = model.graph.input[0].name
|
main_input_image_name = model.graph.input[0].name
|
||||||
for node in model.graph.node:
|
for node in model.graph.node:
|
||||||
for idx, name in enumerate(node.input):
|
for idx, name in enumerate(node.input):
|
||||||
if name == pp2_input_image_name:
|
if name == main_input_image_name:
|
||||||
node.input[idx] = main_input_image_name
|
node.input[idx] = "image_input"
|
||||||
model.graph.input.pop(1)
|
model.graph.input[0].name = "image_input"
|
||||||
|
|
||||||
if "pose" in model_path:
|
|
||||||
# Add preprocess model to main network
|
|
||||||
pp1_model = onnx.load(base_path + "pose_preprocess.onnx")
|
|
||||||
model = compose.add_prefix(model, prefix="main_")
|
|
||||||
pp1_model = compose.add_prefix(pp1_model, prefix="preprocess_")
|
|
||||||
model = compose.merge_models(
|
|
||||||
pp1_model,
|
|
||||||
model,
|
|
||||||
io_map=[
|
|
||||||
(pp1_model.graph.output[0].name, model.graph.input[0].name),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add postprocess model
|
|
||||||
pp2_model = onnx.load(base_path + "pose_postprocess.onnx")
|
|
||||||
pp2_model = compose.add_prefix(pp2_model, prefix="postprocess_")
|
|
||||||
model = compose.merge_models(
|
|
||||||
model,
|
|
||||||
pp2_model,
|
|
||||||
io_map=[
|
|
||||||
(model.graph.output[0].name, pp2_model.graph.input[2].name),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update nodes from postprocess model to use the input of the main network
|
|
||||||
pp2_input_image_name = pp2_model.graph.input[0].name
|
|
||||||
pp2_input_bbox_name = pp2_model.graph.input[1].name
|
|
||||||
main_input_image_name = model.graph.input[0].name
|
|
||||||
main_input_bbox_name = model.graph.input[1].name
|
|
||||||
for node in model.graph.node:
|
|
||||||
for idx, name in enumerate(node.input):
|
|
||||||
if name == pp2_input_image_name:
|
|
||||||
node.input[idx] = main_input_image_name
|
|
||||||
if name == pp2_input_bbox_name:
|
|
||||||
node.input[idx] = main_input_bbox_name
|
|
||||||
model.graph.input.pop(2)
|
|
||||||
model.graph.input.pop(2)
|
|
||||||
|
|
||||||
# Set input box type to int32
|
|
||||||
model.graph.input[1].type.tensor_type.elem_type = TensorProto.INT32
|
|
||||||
|
|
||||||
# Set input image type to int8
|
# Set input image type to int8
|
||||||
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
|
model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8
|
||||||
|
|||||||
324
media/RESULTS.md
324
media/RESULTS.md
@ -6,269 +6,269 @@ Results of the model in various experiments on different datasets.
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"avg_time_2d": 0.01303539154893261,
|
"avg_time_2d": 0.01109659348504018,
|
||||||
"avg_time_3d": 0.00036579309883764233,
|
"avg_time_3d": 0.00034234281313621394,
|
||||||
"avg_fps": 74.62026875112002
|
"avg_fps": 87.4207158719313
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
"person_nums": {
|
"person_nums": {
|
||||||
"total_frames": 600,
|
"total_frames": 600,
|
||||||
"total_labels": 600,
|
"total_labels": 600,
|
||||||
"total_preds": 600,
|
"total_preds": 601,
|
||||||
"considered_empty": 0,
|
"considered_empty": 0,
|
||||||
"valid_preds": 600,
|
"valid_preds": 600,
|
||||||
"invalid_preds": 0,
|
"invalid_preds": 1,
|
||||||
"missing": 0,
|
"missing": 0,
|
||||||
"invalid_fraction": 0.0,
|
"invalid_fraction": 0.00166,
|
||||||
"precision": 1.0,
|
"precision": 0.99834,
|
||||||
"recall": 1.0,
|
"recall": 1.0,
|
||||||
"f1": 1.0,
|
"f1": 0.99917,
|
||||||
"non_empty": 600
|
"non_empty": 601
|
||||||
},
|
},
|
||||||
"mpjpe": {
|
"mpjpe": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.06664,
|
"mean": 0.06621,
|
||||||
"median": 0.05883,
|
"median": 0.058297,
|
||||||
"std": 0.027642,
|
"std": 0.027913,
|
||||||
"sem": 0.001129,
|
"sem": 0.00114,
|
||||||
"min": 0.037832,
|
"min": 0.04047,
|
||||||
"max": 0.189745,
|
"max": 0.189061,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.1,
|
"recall-0.05": 0.098333,
|
||||||
"recall-0.1": 0.941667,
|
"recall-0.1": 0.941667,
|
||||||
"recall-0.15": 0.95,
|
"recall-0.15": 0.95,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600,
|
"num_labels": 600,
|
||||||
"ap-0.025": 0.0,
|
"ap-0.025": 0.0,
|
||||||
"ap-0.05": 0.018725,
|
"ap-0.05": 0.018429,
|
||||||
"ap-0.1": 0.902023,
|
"ap-0.1": 0.901756,
|
||||||
"ap-0.15": 0.914628,
|
"ap-0.15": 0.913878,
|
||||||
"ap-0.25": 1.0,
|
"ap-0.25": 1.0,
|
||||||
"ap-0.5": 1.0
|
"ap-0.5": 1.0
|
||||||
},
|
},
|
||||||
"nose": {
|
"nose": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.114935,
|
"mean": 0.113174,
|
||||||
"median": 0.099561,
|
"median": 0.098547,
|
||||||
"std": 0.042845,
|
"std": 0.041425,
|
||||||
"sem": 0.001751,
|
"sem": 0.001693,
|
||||||
"min": 0.029831,
|
"min": 0.029421,
|
||||||
"max": 0.268342,
|
"max": 0.27266,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.015,
|
"recall-0.05": 0.01,
|
||||||
"recall-0.1": 0.506667,
|
"recall-0.1": 0.515,
|
||||||
"recall-0.15": 0.803333,
|
"recall-0.15": 0.81,
|
||||||
"recall-0.25": 0.995,
|
"recall-0.25": 0.991667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"shoulder_left": {
|
"shoulder_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.036888,
|
"mean": 0.034727,
|
||||||
"median": 0.028719,
|
"median": 0.026049,
|
||||||
"std": 0.031747,
|
"std": 0.031822,
|
||||||
"sem": 0.001297,
|
"sem": 0.0013,
|
||||||
"min": 0.004721,
|
"min": 0.002176,
|
||||||
"max": 0.182985,
|
"max": 0.183422,
|
||||||
"recall-0.025": 0.401667,
|
"recall-0.025": 0.471667,
|
||||||
"recall-0.05": 0.833333,
|
"recall-0.05": 0.855,
|
||||||
"recall-0.1": 0.948333,
|
"recall-0.1": 0.95,
|
||||||
"recall-0.15": 0.963333,
|
"recall-0.15": 0.965,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"shoulder_right": {
|
"shoulder_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.050032,
|
"mean": 0.04794,
|
||||||
"median": 0.036552,
|
"median": 0.034508,
|
||||||
"std": 0.040712,
|
"std": 0.039316,
|
||||||
"sem": 0.001663,
|
"sem": 0.001606,
|
||||||
"min": 0.006749,
|
"min": 0.004604,
|
||||||
"max": 0.239156,
|
"max": 0.218143,
|
||||||
"recall-0.025": 0.201667,
|
"recall-0.025": 0.211667,
|
||||||
"recall-0.05": 0.708333,
|
"recall-0.05": 0.76,
|
||||||
"recall-0.1": 0.915,
|
"recall-0.1": 0.918333,
|
||||||
"recall-0.15": 0.945,
|
"recall-0.15": 0.946667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"elbow_left": {
|
"elbow_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.045586,
|
"mean": 0.044638,
|
||||||
"median": 0.037313,
|
"median": 0.036326,
|
||||||
"std": 0.034633,
|
"std": 0.034761,
|
||||||
"sem": 0.001415,
|
"sem": 0.00142,
|
||||||
"min": 0.003768,
|
"min": 0.003696,
|
||||||
"max": 0.200457,
|
"max": 0.196813,
|
||||||
"recall-0.025": 0.216667,
|
"recall-0.025": 0.226667,
|
||||||
"recall-0.05": 0.746667,
|
"recall-0.05": 0.778333,
|
||||||
"recall-0.1": 0.946667,
|
"recall-0.1": 0.941667,
|
||||||
"recall-0.15": 0.955,
|
"recall-0.15": 0.953333,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"elbow_right": {
|
"elbow_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.04539,
|
"mean": 0.044037,
|
||||||
"median": 0.035591,
|
"median": 0.033739,
|
||||||
"std": 0.036356,
|
"std": 0.036263,
|
||||||
"sem": 0.001485,
|
"sem": 0.001482,
|
||||||
"min": 0.007803,
|
"min": 0.007995,
|
||||||
"max": 0.281955,
|
"max": 0.351118,
|
||||||
"recall-0.025": 0.245,
|
"recall-0.025": 0.251667,
|
||||||
"recall-0.05": 0.773333,
|
"recall-0.05": 0.788333,
|
||||||
"recall-0.1": 0.923333,
|
"recall-0.1": 0.931667,
|
||||||
"recall-0.15": 0.941667,
|
"recall-0.15": 0.945,
|
||||||
"recall-0.25": 0.998333,
|
"recall-0.25": 0.998333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"wrist_left": {
|
"wrist_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.046389,
|
"mean": 0.043333,
|
||||||
"median": 0.029742,
|
"median": 0.027284,
|
||||||
"std": 0.04752,
|
"std": 0.044655,
|
||||||
"sem": 0.001942,
|
"sem": 0.001825,
|
||||||
"min": 0.00236,
|
"min": 0.002741,
|
||||||
"max": 0.287479,
|
"max": 0.185438,
|
||||||
"recall-0.025": 0.426667,
|
"recall-0.025": 0.458333,
|
||||||
"recall-0.05": 0.728333,
|
"recall-0.05": 0.745,
|
||||||
"recall-0.1": 0.888333,
|
"recall-0.1": 0.891667,
|
||||||
"recall-0.15": 0.91,
|
"recall-0.15": 0.923333,
|
||||||
"recall-0.25": 0.996667,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"wrist_right": {
|
"wrist_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.046403,
|
"mean": 0.047488,
|
||||||
"median": 0.028916,
|
"median": 0.027367,
|
||||||
"std": 0.046566,
|
"std": 0.053442,
|
||||||
"sem": 0.001903,
|
"sem": 0.002184,
|
||||||
"min": 0.002735,
|
"min": 0.001357,
|
||||||
"max": 0.236808,
|
"max": 0.465438,
|
||||||
"recall-0.025": 0.428333,
|
"recall-0.025": 0.446667,
|
||||||
"recall-0.05": 0.731667,
|
"recall-0.05": 0.738333,
|
||||||
"recall-0.1": 0.87,
|
"recall-0.1": 0.868333,
|
||||||
"recall-0.15": 0.926667,
|
"recall-0.15": 0.898333,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 0.998333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"hip_left": {
|
"hip_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.079732,
|
"mean": 0.084262,
|
||||||
"median": 0.072175,
|
"median": 0.078071,
|
||||||
"std": 0.034532,
|
"std": 0.032944,
|
||||||
"sem": 0.001411,
|
"sem": 0.001346,
|
||||||
"min": 0.013963,
|
"min": 0.022541,
|
||||||
"max": 0.24229,
|
"max": 0.239428,
|
||||||
"recall-0.025": 0.013333,
|
"recall-0.025": 0.003333,
|
||||||
"recall-0.05": 0.081667,
|
"recall-0.05": 0.055,
|
||||||
"recall-0.1": 0.875,
|
"recall-0.1": 0.851667,
|
||||||
"recall-0.15": 0.945,
|
"recall-0.15": 0.951667,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"hip_right": {
|
"hip_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.101424,
|
"mean": 0.106676,
|
||||||
"median": 0.099206,
|
"median": 0.103778,
|
||||||
"std": 0.02636,
|
"std": 0.025796,
|
||||||
"sem": 0.001077,
|
"sem": 0.001054,
|
||||||
"min": 0.032964,
|
"min": 0.042573,
|
||||||
"max": 0.226018,
|
"max": 0.242475,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.008333,
|
"recall-0.05": 0.003333,
|
||||||
"recall-0.1": 0.52,
|
"recall-0.1": 0.421667,
|
||||||
"recall-0.15": 0.946667,
|
"recall-0.15": 0.948333,
|
||||||
"recall-0.25": 1.0,
|
"recall-0.25": 1.0,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"knee_left": {
|
"knee_left": {
|
||||||
"count": 600,
|
"count": 598,
|
||||||
"mean": 0.06299,
|
"mean": 0.062386,
|
||||||
"median": 0.047078,
|
"median": 0.046647,
|
||||||
"std": 0.055676,
|
"std": 0.055624,
|
||||||
"sem": 0.002275,
|
"sem": 0.002277,
|
||||||
"min": 0.013748,
|
"min": 0.012414,
|
||||||
"max": 0.412425,
|
"max": 0.399633,
|
||||||
"recall-0.025": 0.03,
|
"recall-0.025": 0.045,
|
||||||
"recall-0.05": 0.548333,
|
"recall-0.05": 0.555,
|
||||||
"recall-0.1": 0.89,
|
"recall-0.1": 0.885,
|
||||||
"recall-0.15": 0.926667,
|
"recall-0.15": 0.925,
|
||||||
"recall-0.25": 0.983333,
|
"recall-0.25": 0.978333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 0.996667,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"knee_right": {
|
"knee_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.053303,
|
"mean": 0.050939,
|
||||||
"median": 0.039785,
|
"median": 0.041387,
|
||||||
"std": 0.048089,
|
"std": 0.037661,
|
||||||
"sem": 0.001965,
|
"sem": 0.001539,
|
||||||
"min": 0.009094,
|
"min": 0.006788,
|
||||||
"max": 0.470447,
|
"max": 0.268559,
|
||||||
"recall-0.025": 0.06,
|
"recall-0.025": 0.045,
|
||||||
"recall-0.05": 0.736667,
|
"recall-0.05": 0.73,
|
||||||
"recall-0.1": 0.923333,
|
"recall-0.1": 0.941667,
|
||||||
"recall-0.15": 0.926667,
|
"recall-0.15": 0.943333,
|
||||||
"recall-0.25": 0.988333,
|
"recall-0.25": 0.996667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"ankle_left": {
|
"ankle_left": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.097848,
|
"mean": 0.096519,
|
||||||
"median": 0.087393,
|
"median": 0.085325,
|
||||||
"std": 0.039465,
|
"std": 0.043518,
|
||||||
"sem": 0.001613,
|
"sem": 0.001778,
|
||||||
"min": 0.049149,
|
"min": 0.049769,
|
||||||
"max": 0.49791,
|
"max": 0.494823,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.005,
|
"recall-0.05": 0.001667,
|
||||||
"recall-0.1": 0.805,
|
"recall-0.1": 0.828333,
|
||||||
"recall-0.15": 0.923333,
|
"recall-0.15": 0.935,
|
||||||
"recall-0.25": 0.99,
|
"recall-0.25": 0.988333,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"ankle_right": {
|
"ankle_right": {
|
||||||
"count": 600,
|
"count": 600,
|
||||||
"mean": 0.085394,
|
"mean": 0.082453,
|
||||||
"median": 0.070638,
|
"median": 0.068627,
|
||||||
"std": 0.050932,
|
"std": 0.050525,
|
||||||
"sem": 0.002081,
|
"sem": 0.002064,
|
||||||
"min": 0.027674,
|
"min": 0.026098,
|
||||||
"max": 0.441898,
|
"max": 0.482397,
|
||||||
"recall-0.025": 0.0,
|
"recall-0.025": 0.0,
|
||||||
"recall-0.05": 0.023333,
|
"recall-0.05": 0.035,
|
||||||
"recall-0.1": 0.876667,
|
"recall-0.1": 0.896667,
|
||||||
"recall-0.15": 0.9,
|
"recall-0.15": 0.915,
|
||||||
"recall-0.25": 0.983333,
|
"recall-0.25": 0.981667,
|
||||||
"recall-0.5": 1.0,
|
"recall-0.5": 1.0,
|
||||||
"num_labels": 600
|
"num_labels": 600
|
||||||
},
|
},
|
||||||
"joint_recalls": {
|
"joint_recalls": {
|
||||||
"num_labels": 7800,
|
"num_labels": 7800,
|
||||||
"recall-0.025": 0.15538,
|
"recall-0.025": 0.1659,
|
||||||
"recall-0.05": 0.45603,
|
"recall-0.05": 0.46526,
|
||||||
"recall-0.1": 0.83705,
|
"recall-0.1": 0.83359,
|
||||||
"recall-0.15": 0.92372,
|
"recall-0.15": 0.92705,
|
||||||
"recall-0.25": 0.99449,
|
"recall-0.25": 0.99436,
|
||||||
"recall-0.5": 1.0
|
"recall-0.5": 0.99974
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
"total_parts": 8400,
|
"total_parts": 8400,
|
||||||
"correct_parts": 8090,
|
"correct_parts": 8113,
|
||||||
"pcp": 0.963095
|
"pcp": 0.965833
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
|
import math
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import onnxruntime as ort
|
import onnxruntime as ort
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
@ -49,11 +51,11 @@ class BaseModel(ABC):
|
|||||||
self.warmup(warmup)
|
self.warmup(warmup)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def preprocess(self, image: np.ndarray, *args, **kwargs):
|
def preprocess(self, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def postprocess(self, tensor: List[np.ndarray], *args, **kwargs):
|
def postprocess(self, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def warmup(self, epoch: int):
|
def warmup(self, epoch: int):
|
||||||
@ -97,20 +99,178 @@ class BaseModel(ABC):
|
|||||||
|
|
||||||
self.session.run(None, inputs)
|
self.session.run(None, inputs)
|
||||||
|
|
||||||
def __call__(self, image: np.ndarray, *args, **kwargs):
|
def __call__(self, **kwargs):
|
||||||
tensor = self.preprocess(image, *args, **kwargs)
|
tensor = self.preprocess(**kwargs)
|
||||||
inputs = {}
|
inputs = {}
|
||||||
for i in range(len(self.input_names)):
|
for i in range(len(self.input_names)):
|
||||||
iname = self.input_names[i]
|
iname = self.input_names[i]
|
||||||
inputs[iname] = tensor[i]
|
inputs[iname] = tensor[i]
|
||||||
result = self.session.run(None, inputs)
|
result = self.session.run(None, inputs)
|
||||||
output = self.postprocess(result, *args, **kwargs)
|
output = self.postprocess(result=result, **kwargs)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
# ==================================================================================================
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class LetterBox:
|
||||||
|
def __init__(self, target_size, fill_value=0):
|
||||||
|
self.target_size = target_size
|
||||||
|
self.fill_value = fill_value
|
||||||
|
|
||||||
|
def calc_params(self, ishape):
|
||||||
|
img_h, img_w = ishape[:2]
|
||||||
|
target_h, target_w = self.target_size
|
||||||
|
|
||||||
|
scale = min(target_w / img_w, target_h / img_h)
|
||||||
|
new_w = round(img_w * scale)
|
||||||
|
new_h = round(img_h * scale)
|
||||||
|
|
||||||
|
pad_w = target_w - new_w
|
||||||
|
pad_h = target_h - new_h
|
||||||
|
pad_left = pad_w // 2
|
||||||
|
pad_top = pad_h // 2
|
||||||
|
pad_right = pad_w - pad_left
|
||||||
|
pad_bottom = pad_h - pad_top
|
||||||
|
paddings = (pad_left, pad_right, pad_top, pad_bottom)
|
||||||
|
|
||||||
|
return paddings, scale, (new_w, new_h)
|
||||||
|
|
||||||
|
def resize_image(self, image):
|
||||||
|
paddings, _, new_size = self.calc_params(image.shape)
|
||||||
|
|
||||||
|
target_h, target_w = self.target_size
|
||||||
|
canvas = np.full(
|
||||||
|
(target_h, target_w, image.shape[2]),
|
||||||
|
self.fill_value,
|
||||||
|
dtype=image.dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
new_w, new_h = new_size
|
||||||
|
dx, dy = paddings[0], paddings[2]
|
||||||
|
canvas[dy : dy + new_h, dx : dx + new_w, :] = cv2.resize(
|
||||||
|
image, (new_w, new_h), interpolation=cv2.INTER_LINEAR
|
||||||
|
)
|
||||||
|
|
||||||
|
return canvas
|
||||||
|
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class BoxCrop:
|
||||||
|
def __init__(self, target_size, padding_scale=1.0, fill_value=0):
|
||||||
|
self.target_size = target_size
|
||||||
|
self.padding_scale = padding_scale
|
||||||
|
self.fill_value = fill_value
|
||||||
|
|
||||||
|
def calc_params(self, ishape, bbox):
|
||||||
|
start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3]
|
||||||
|
target_h, target_w = self.target_size
|
||||||
|
|
||||||
|
# Calculate original bounding box center
|
||||||
|
center_x = (start_x + end_x) / 2.0
|
||||||
|
center_y = (start_y + end_y) / 2.0
|
||||||
|
|
||||||
|
# Scale the bounding box by the padding_scale
|
||||||
|
bbox_w = end_x - start_x
|
||||||
|
bbox_h = end_y - start_y
|
||||||
|
scaled_w = bbox_w * self.padding_scale
|
||||||
|
scaled_h = bbox_h * self.padding_scale
|
||||||
|
|
||||||
|
# Calculate the aspect ratios
|
||||||
|
bbox_aspect = scaled_w / scaled_h
|
||||||
|
target_aspect = target_w / target_h
|
||||||
|
|
||||||
|
# Adjust the scaled bounding box to match the target aspect ratio
|
||||||
|
if bbox_aspect > target_aspect:
|
||||||
|
adjusted_h = scaled_w / target_aspect
|
||||||
|
adjusted_w = scaled_w
|
||||||
|
else:
|
||||||
|
adjusted_w = scaled_h * target_aspect
|
||||||
|
adjusted_h = scaled_h
|
||||||
|
|
||||||
|
# Calculate scaled bounding box coordinates
|
||||||
|
bbox_w = adjusted_w
|
||||||
|
bbox_h = adjusted_h
|
||||||
|
new_start_x = center_x - bbox_w / 2.0
|
||||||
|
new_start_y = center_y - bbox_h / 2.0
|
||||||
|
new_end_x = center_x + bbox_w / 2.0
|
||||||
|
new_end_y = center_y + bbox_h / 2.0
|
||||||
|
|
||||||
|
# Round the box coordinates
|
||||||
|
start_x = int(math.floor(new_start_x))
|
||||||
|
start_y = int(math.floor(new_start_y))
|
||||||
|
end_x = int(math.ceil(new_end_x))
|
||||||
|
end_y = int(math.ceil(new_end_y))
|
||||||
|
|
||||||
|
# Define the new box coordinates
|
||||||
|
new_start_x = max(0, start_x)
|
||||||
|
new_start_y = max(0, start_y)
|
||||||
|
new_end_x = min(ishape[1] - 1, end_x)
|
||||||
|
new_end_y = min(ishape[0] - 1, end_y)
|
||||||
|
new_box = [new_start_x, new_start_y, new_end_x, new_end_y]
|
||||||
|
|
||||||
|
bbox_w = new_box[2] - new_box[0]
|
||||||
|
bbox_h = new_box[3] - new_box[1]
|
||||||
|
scale = min(target_w / bbox_w, target_h / bbox_h)
|
||||||
|
new_w = round(bbox_w * scale)
|
||||||
|
new_h = round(bbox_h * scale)
|
||||||
|
|
||||||
|
# Calculate paddings
|
||||||
|
pad_w = target_w - new_w
|
||||||
|
pad_h = target_h - new_h
|
||||||
|
pad_left, pad_right, pad_top, pad_bottom = 0, 0, 0, 0
|
||||||
|
if pad_w > 0:
|
||||||
|
if start_x < 0:
|
||||||
|
pad_left = pad_w
|
||||||
|
pad_right = 0
|
||||||
|
elif end_x > ishape[1]:
|
||||||
|
pad_left = 0
|
||||||
|
pad_right = pad_w
|
||||||
|
else:
|
||||||
|
# Can be caused by bbox rounding
|
||||||
|
pad_left = pad_w // 2
|
||||||
|
pad_right = pad_w - pad_left
|
||||||
|
if pad_h > 0:
|
||||||
|
if start_y < 0:
|
||||||
|
pad_top = pad_h
|
||||||
|
pad_bottom = 0
|
||||||
|
elif end_y > ishape[0]:
|
||||||
|
pad_top = 0
|
||||||
|
pad_bottom = pad_h
|
||||||
|
else:
|
||||||
|
# Can be caused by bbox rounding
|
||||||
|
pad_top = pad_h // 2
|
||||||
|
pad_bottom = pad_h - pad_top
|
||||||
|
paddings = (pad_left, pad_right, pad_top, pad_bottom)
|
||||||
|
|
||||||
|
return paddings, scale, new_box, (new_w, new_h)
|
||||||
|
|
||||||
|
def crop_resize_box(self, image, bbox):
|
||||||
|
paddings, _, new_box, new_size = self.calc_params(image.shape, bbox)
|
||||||
|
|
||||||
|
image = image[new_box[1] : new_box[3], new_box[0] : new_box[2]]
|
||||||
|
|
||||||
|
th, tw = self.target_size
|
||||||
|
canvas = np.full(
|
||||||
|
(th, tw, image.shape[2]),
|
||||||
|
self.fill_value,
|
||||||
|
dtype=image.dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
nw, nh = new_size
|
||||||
|
dx, dy = paddings[0], paddings[2]
|
||||||
|
canvas[dy : dy + nh, dx : dx + nw, :] = cv2.resize(
|
||||||
|
image, (nw, nh), interpolation=cv2.INTER_LINEAR
|
||||||
|
)
|
||||||
|
|
||||||
|
return canvas
|
||||||
|
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
class RTMDet(BaseModel):
|
class RTMDet(BaseModel):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -119,17 +279,20 @@ class RTMDet(BaseModel):
|
|||||||
warmup: int = 30,
|
warmup: int = 30,
|
||||||
):
|
):
|
||||||
super(RTMDet, self).__init__(model_path, warmup)
|
super(RTMDet, self).__init__(model_path, warmup)
|
||||||
|
self.target_size = (320, 320)
|
||||||
self.conf_threshold = conf_threshold
|
self.conf_threshold = conf_threshold
|
||||||
|
self.letterbox = LetterBox(self.target_size, fill_value=114)
|
||||||
|
|
||||||
def preprocess(self, image: np.ndarray):
|
def preprocess(self, image: np.ndarray):
|
||||||
|
image = self.letterbox.resize_image(image)
|
||||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||||
tensor = np.expand_dims(tensor, axis=0)
|
tensor = np.expand_dims(tensor, axis=0)
|
||||||
tensor = [tensor]
|
tensor = [tensor]
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
def postprocess(self, tensor: List[np.ndarray]):
|
def postprocess(self, result: List[np.ndarray], image: np.ndarray):
|
||||||
boxes = np.squeeze(tensor[1], axis=0)
|
boxes = np.squeeze(result[0], axis=0)
|
||||||
classes = np.squeeze(tensor[0], axis=0)
|
classes = np.squeeze(result[1], axis=0)
|
||||||
|
|
||||||
human_class = classes[:] == 0
|
human_class = classes[:] == 0
|
||||||
boxes = boxes[human_class]
|
boxes = boxes[human_class]
|
||||||
@ -137,6 +300,35 @@ class RTMDet(BaseModel):
|
|||||||
keep = boxes[:, 4] > self.conf_threshold
|
keep = boxes[:, 4] > self.conf_threshold
|
||||||
boxes = boxes[keep]
|
boxes = boxes[keep]
|
||||||
|
|
||||||
|
paddings, scale, _ = self.letterbox.calc_params(image.shape)
|
||||||
|
|
||||||
|
boxes[:, 0] -= paddings[0]
|
||||||
|
boxes[:, 2] -= paddings[0]
|
||||||
|
boxes[:, 1] -= paddings[2]
|
||||||
|
boxes[:, 3] -= paddings[2]
|
||||||
|
|
||||||
|
boxes = np.maximum(boxes, 0)
|
||||||
|
|
||||||
|
th, tw = self.target_size
|
||||||
|
pad_w = paddings[0] + paddings[1]
|
||||||
|
pad_h = paddings[2] + paddings[3]
|
||||||
|
max_w = tw - pad_w - 1
|
||||||
|
max_h = th - pad_h - 1
|
||||||
|
b0 = boxes[:, 0]
|
||||||
|
b1 = boxes[:, 1]
|
||||||
|
b2 = boxes[:, 2]
|
||||||
|
b3 = boxes[:, 3]
|
||||||
|
b0 = np.minimum(b0, max_w)
|
||||||
|
b1 = np.minimum(b1, max_h)
|
||||||
|
b2 = np.minimum(b2, max_w)
|
||||||
|
b3 = np.minimum(b3, max_h)
|
||||||
|
boxes[:, 0] = b0
|
||||||
|
boxes[:, 1] = b1
|
||||||
|
boxes[:, 2] = b2
|
||||||
|
boxes[:, 3] = b3
|
||||||
|
|
||||||
|
boxes[:, 0:4] /= scale
|
||||||
|
|
||||||
return boxes
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
@ -146,7 +338,8 @@ class RTMDet(BaseModel):
|
|||||||
class RTMPose(BaseModel):
|
class RTMPose(BaseModel):
|
||||||
def __init__(self, model_path: str, warmup: int = 30):
|
def __init__(self, model_path: str, warmup: int = 30):
|
||||||
super(RTMPose, self).__init__(model_path, warmup)
|
super(RTMPose, self).__init__(model_path, warmup)
|
||||||
self.bbox = None
|
self.target_size = (384, 288)
|
||||||
|
self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0)
|
||||||
|
|
||||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
|
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
|
||||||
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
tensor = np.asarray(image).astype(self.input_types[0], copy=False)
|
||||||
@ -154,13 +347,34 @@ class RTMPose(BaseModel):
|
|||||||
bbox = np.asarray(bbox)[0:4]
|
bbox = np.asarray(bbox)[0:4]
|
||||||
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
|
bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8])
|
||||||
bbox = bbox.round().astype(np.int32)
|
bbox = bbox.round().astype(np.int32)
|
||||||
bbox = np.expand_dims(bbox, axis=0)
|
region = self.boxcrop.crop_resize_box(image, bbox)
|
||||||
tensor = [tensor, bbox]
|
tensor = np.asarray(region).astype(self.input_types[0], copy=False)
|
||||||
|
tensor = np.expand_dims(tensor, axis=0)
|
||||||
|
tensor = [tensor]
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
def postprocess(self, tensor: List[np.ndarray], **kwargs):
|
def postprocess(
|
||||||
scores = np.clip(tensor[0][0], 0, 1)
|
self, result: List[np.ndarray], image: np.ndarray, bbox: np.ndarray
|
||||||
kp = np.concatenate([tensor[1][0], np.expand_dims(scores, axis=-1)], axis=-1)
|
):
|
||||||
|
scores = np.clip(result[1][0], 0, 1)
|
||||||
|
kp = np.concatenate([result[0][0], np.expand_dims(scores, axis=-1)], axis=-1)
|
||||||
|
|
||||||
|
paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bbox)
|
||||||
|
kp[:, 0] -= paddings[0]
|
||||||
|
kp[:, 1] -= paddings[2]
|
||||||
|
kp[:, 0:2] /= scale
|
||||||
|
kp[:, 0] += bbox[0]
|
||||||
|
kp[:, 1] += bbox[1]
|
||||||
|
kp[:, 0:2] = np.maximum(kp[:, 0:2], 0)
|
||||||
|
max_w = image.shape[1] - 1
|
||||||
|
max_h = image.shape[0] - 1
|
||||||
|
b0 = kp[:, 0]
|
||||||
|
b1 = kp[:, 1]
|
||||||
|
b0 = np.minimum(b0, max_w)
|
||||||
|
b1 = np.minimum(b1, max_h)
|
||||||
|
kp[:, 0] = b0
|
||||||
|
kp[:, 1] = b1
|
||||||
|
|
||||||
return kp
|
return kp
|
||||||
|
|
||||||
|
|
||||||
@ -184,10 +398,10 @@ class TopDown:
|
|||||||
self.pose_model = RTMPose(pose_model_path, warmup)
|
self.pose_model = RTMPose(pose_model_path, warmup)
|
||||||
|
|
||||||
def predict(self, image):
|
def predict(self, image):
|
||||||
boxes = self.det_model(image)
|
boxes = self.det_model(image=image)
|
||||||
results = []
|
results = []
|
||||||
for i in range(boxes.shape[0]):
|
for i in range(boxes.shape[0]):
|
||||||
kp = self.pose_model(image, bbox=boxes[i])
|
kp = self.pose_model(image=image, bbox=boxes[i])
|
||||||
results.append(kp)
|
results.append(kp)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user