Files
RapidPoseTriangulation/scripts/test_skelda_dataset.py
2025-06-12 12:43:09 +02:00

512 lines
17 KiB
Python

import json
import os
import numpy as np
import utils_pipeline
from skelda import evals
from skelda.writers import json_writer
# ==================================================================================================
whole_body = {
"foots": False,
"face": False,
"hands": False,
}
dataset_use = "human36m"
# dataset_use = "panoptic"
# dataset_use = "mvor"
# dataset_use = "shelf"
# dataset_use = "campus"
# dataset_use = "ikeaasm"
# dataset_use = "chi3d"
# dataset_use = "tsinghua"
# dataset_use = "human36m_wb"
# dataset_use = "egohumans_tagging"
# dataset_use = "egohumans_legoassemble"
# dataset_use = "egohumans_fencing"
# dataset_use = "egohumans_basketball"
# dataset_use = "egohumans_volleyball"
# dataset_use = "egohumans_badminton"
# dataset_use = "egohumans_tennis"
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
# If the persons are small in the image, use a lower value
default_min_bbox_area = 0.1 * 0.1
# Describes how confident a 2D bounding box needs to be to be considered
# If the persons are small in the image, or poorly recognizable, use a lower value
default_min_bbox_score = 0.3
# Describes how good two 2D poses need to match each other to create a valid triangulation
# If the quality of the 2D detections is poor, use a lower value
default_min_match_score = 0.94
# Describes the minimum number of camera pairs that need to detect the same person
# If the number of cameras is high, and the views are not occluded, use a higher value
default_min_group_size = 1
# Batch poses per image for faster processing
# If most of the time only one person is in a image, disable it, because it is slightly slower then
default_batch_poses = True
# Approach speed of EN ISO 13855 with 2000 mm/sec for hand speed
# and an additional factor to compensate for noise-based jumps
default_max_movement_speed = 2.0 * 1.5
# The size of an A4 sheet of paper which is assumed to fit between two different persons
# and additionally the distance a person can move between two frames (here at 10 fps)
default_max_track_distance = 0.3 + default_max_movement_speed / 10
datasets = {
"human36m": {
"path": "/datasets/human36m/skelda/pose_test.json",
"take_interval": 5,
"fps": 50,
"min_match_score": 0.95,
"min_group_size": 1,
"min_bbox_score": 0.4,
"min_bbox_area": 0.1 * 0.1,
"batch_poses": False,
"max_movement_speed": 2.0 * 1.5,
"max_track_distance": 0.3 + default_max_movement_speed / (50 / 5),
},
"panoptic": {
"path": "/datasets/panoptic/skelda/test.json",
"cams": ["00_03", "00_06", "00_12", "00_13", "00_23"],
# "cams": ["00_03", "00_06", "00_12"],
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10"],
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
# "cams": [],
"take_interval": 3,
"fps": 30,
"min_match_score": 0.95,
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
"min_group_size": 1,
# "min_group_size": 1,
# "min_group_size": 1,
# "min_group_size": 2,
# "min_group_size": 11,
"min_bbox_area": 0.05 * 0.05,
"max_track_distance": 0.3 + default_max_movement_speed / (30 / 3),
},
"mvor": {
"path": "/datasets/mvor/skelda/all.json",
"take_interval": 1,
"fps": -1,
"min_match_score": 0.81,
"min_bbox_score": 0.25,
},
"campus": {
"path": "/datasets/campus/skelda/test.json",
"fps": 25,
"take_interval": 1,
"min_match_score": 0.91,
"min_bbox_score": 0.5,
"max_track_distance": 0.3 + default_max_movement_speed / 25,
},
"shelf": {
"path": "/datasets/shelf/skelda/test.json",
"take_interval": 1,
"fps": 25,
"min_match_score": 0.95,
"min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / 25,
},
"ikeaasm": {
"path": "/datasets/ikeaasm/skelda/test.json",
"take_interval": 2,
"fps": -1,
"min_match_score": 0.81,
"min_bbox_score": 0.20,
},
"chi3d": {
"path": "/datasets/chi3d/skelda/all.json",
"take_interval": 5,
"fps": 50,
"max_track_distance": 0.3 + default_max_movement_speed / (50 / 5),
},
"tsinghua": {
"path": "/datasets/tsinghua/skelda/test.json",
"take_interval": 3,
"fps": 30,
"min_match_score": 0.95,
"min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / (30 / 3),
},
"human36m_wb": {
"path": "/datasets/human36m/skelda/wb/test.json",
"take_interval": 100,
"fps": -1,
"min_bbox_score": 0.4,
"batch_poses": False,
},
"egohumans_tagging": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "tagging",
"min_match_score": 0.89,
"min_group_size": 1,
"min_bbox_score": 0.2,
"min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
},
"egohumans_legoassemble": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "legoassemble",
"min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / (20 / 2),
},
"egohumans_fencing": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "fencing",
"min_group_size": 7,
"min_bbox_score": 0.5,
"min_bbox_area": 0.05 * 0.05,
"max_track_distance": 0.3 + default_max_movement_speed / (20 / 2),
},
"egohumans_basketball": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "basketball",
"min_group_size": 4,
"min_bbox_score": 0.25,
"min_bbox_area": 0.025 * 0.025,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
},
"egohumans_volleyball": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "volleyball",
"min_match_score": 0.95,
"min_group_size": 7,
"min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
},
"egohumans_badminton": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "badminton",
"min_group_size": 7,
"min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
},
"egohumans_tennis": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"fps": 20,
"subset": "tennis",
"min_group_size": 11,
"min_bbox_area": 0.025 * 0.025,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
},
}
joint_names_2d = utils_pipeline.get_joint_names(whole_body)
joint_names_3d = list(joint_names_2d)
eval_joints = [
"head",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
]
if dataset_use == "human36m":
eval_joints[eval_joints.index("head")] = "nose"
if dataset_use == "panoptic":
eval_joints[eval_joints.index("head")] = "nose"
if dataset_use == "human36m_wb":
if utils_pipeline.use_whole_body(whole_body):
eval_joints = list(joint_names_2d)
else:
eval_joints[eval_joints.index("head")] = "nose"
# output_dir = "/RapidPoseTriangulation/data/testoutput/"
output_dir = ""
# pred_export_path = f"/datasets/predictions/{dataset_use}/RapidPoseTriangulation.json"
pred_export_path = ""
# ==================================================================================================
def load_labels(dataset: dict):
"""Load labels by dataset description"""
if "panoptic" in dataset:
labels = utils_pipeline.load_json(dataset["panoptic"]["path"])
labels = [lb for i, lb in enumerate(labels) if i % 1500 < 90]
# Filter by maximum number of persons
labels = [l for l in labels if len(l["bodies3D"]) <= 10]
# Filter scenes
if "use_scenes" in dataset["panoptic"]:
labels = [
l for l in labels if l["scene"] in dataset["panoptic"]["use_scenes"]
]
# Filter cameras
if not "cameras_depth" in labels[0] and len(dataset["panoptic"]["cams"]) > 0:
for label in labels:
for i, cam in reversed(list(enumerate(label["cameras"]))):
if cam["name"] not in dataset["panoptic"]["cams"]:
label["cameras"].pop(i)
label["imgpaths"].pop(i)
elif "human36m" in dataset:
labels = utils_pipeline.load_json(dataset["human36m"]["path"])
labels = [lb for lb in labels if lb["subject"] == "S9"]
labels = [lb for i, lb in enumerate(labels) if i % 4000 < 150]
elif "mvor" in dataset:
labels = utils_pipeline.load_json(dataset["mvor"]["path"])
# Rename keys
for label in labels:
label["cameras_color"] = label["cameras"]
label["imgpaths_color"] = label["imgpaths"]
elif "ikeaasm" in dataset:
labels = utils_pipeline.load_json(dataset["ikeaasm"]["path"])
cams0 = str(labels[0]["cameras"])
labels = [lb for lb in labels if str(lb["cameras"]) == cams0]
elif "shelf" in dataset:
labels = utils_pipeline.load_json(dataset["shelf"]["path"])
labels = [lb for lb in labels if "test" in lb["splits"]]
elif "campus" in dataset:
labels = utils_pipeline.load_json(dataset["campus"]["path"])
labels = [lb for lb in labels if "test" in lb["splits"]]
elif "tsinghua" in dataset:
labels = utils_pipeline.load_json(dataset["tsinghua"]["path"])
labels = [lb for lb in labels if "test" in lb["splits"]]
labels = [lb for lb in labels if lb["seq"] == "seq_1"]
labels = [lb for i, lb in enumerate(labels) if i % 300 < 90]
for label in labels:
label["bodyids"] = list(range(len(label["bodies3D"])))
elif "chi3d" in dataset:
labels = utils_pipeline.load_json(dataset["chi3d"]["path"])
labels = [lb for lb in labels if lb["setup"] == "s03"]
labels = [lb for i, lb in enumerate(labels) if i % 2000 < 150]
elif "human36m_wb" in dataset:
labels = utils_pipeline.load_json(dataset["human36m_wb"]["path"])
elif any(("egohumans" in key for key in dataset)):
labels = utils_pipeline.load_json(dataset[dataset_use]["path"])
labels = [lb for lb in labels if "test" in lb["splits"]]
labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]]
if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]:
labels = [lb for i, lb in enumerate(labels) if i % 150 < 60]
else:
raise ValueError("Dataset not available")
# Optionally drop samples to speed up train/eval
if "take_interval" in dataset:
take_interval = dataset["take_interval"]
if take_interval > 1:
labels = [l for i, l in enumerate(labels) if i % take_interval == 0]
# Add default values
for label in labels:
if "scene" not in label:
label["scene"] = "default"
for cam in label["cameras"]:
if not "type" in cam:
cam["type"] = "pinhole"
return labels
# ==================================================================================================
def main():
global joint_names_3d, eval_joints
print("Loading dataset ...")
labels = load_labels(
{
dataset_use: datasets[dataset_use],
"take_interval": datasets[dataset_use]["take_interval"],
}
)
# Print a dataset sample for debugging
print("Amount of samples:", len(labels))
print(labels[0])
# Save dataset
tmp_export_dir = "/tmp/rpt/"
for label in labels:
if "splits" in label:
label.pop("splits")
json_writer.save_dataset(labels, tmp_export_dir)
# Load dataset specific parameters
min_match_score = datasets[dataset_use].get(
"min_match_score", default_min_match_score
)
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
max_movement_speed = datasets[dataset_use].get(
"max_movement_speed", default_max_movement_speed
)
max_track_distance = datasets[dataset_use].get(
"max_track_distance", default_max_track_distance
)
# Save config
config_path = tmp_export_dir + "config.json"
config = {
"min_match_score": min_match_score,
"min_group_size": min_group_size,
"min_bbox_score": min_bbox_score,
"min_bbox_area": min_bbox_area,
"batch_poses": batch_poses,
"max_movement_speed": max_movement_speed,
"max_track_distance": max_track_distance,
"whole_body": whole_body,
"take_interval": datasets[dataset_use]["take_interval"],
"fps": datasets[dataset_use]["fps"],
}
utils_pipeline.save_json(config, config_path)
# Call the CPP binary
os.system("/RapidPoseTriangulation/scripts/test_skelda_dataset.bin")
# Load the results
print("Loading exports ...")
res_path = tmp_export_dir + "results.json"
results = utils_pipeline.load_json(res_path)
all_poses_3d = results["all_poses_3d"]
all_poses_2d = results["all_poses_2d"]
all_ids = results["all_ids"]
joint_names_3d = results["joint_names_3d"]
# # Visualize labels and predictions
# from skelda import utils_view
# for i in range(0, len(labels), 1):
# posesL = []
# posesR = []
# jnames = []
# for j in labels[i]["joints"]:
# if "->" in j:
# jnames.append(j.split("->")[-1])
# else:
# jnames.append(j)
# for j in range(len(labels[i]["bodies3D"])):
# pose = []
# for k in range(len(eval_joints)):
# n = eval_joints[k]
# pose.append(labels[i]["bodies3D"][j][jnames.index(n)])
# posesL.append(pose)
# for j in range(len(all_poses_3d[i])):
# pose = []
# for k in range(len(eval_joints)):
# n = eval_joints[k]
# pose.append(all_poses_3d[i][j][joint_names_3d.index(n)])
# posesR.append(pose)
# poses_3d = posesL + posesR
# sample = labels[i]
# sample["bodies3D"] = np.array(poses_3d).round(3).tolist()
# sample["joints"] = eval_joints
# sample["num_persons"] = len(poses_3d)
# print(sample)
# utils_view.draw_sample_3d(sample)
# utils_view.draw_many_images(
# sample["imgpaths"],
# sample["cameras"],
# [],
# all_poses_2d[i],
# joint_names_3d,
# "2D detections",
# )
# utils_view.show_plots()
if pred_export_path != "":
# Export predictions
print("\nExporting predictions ...")
all_poses_3d = [np.array(poses).round(3).tolist() for poses in all_poses_3d]
data = {
"poses3D": all_poses_3d,
"ids": all_ids,
"joint_names": joint_names_3d,
}
os.makedirs(os.path.dirname(pred_export_path), exist_ok=True)
with open(pred_export_path, "w") as file:
json.dump(data, file, indent=0)
# Run evaluation
_ = evals.mpjpe.run_eval(
labels,
all_poses_3d,
all_ids,
joint_names_net=joint_names_3d,
joint_names_use=eval_joints,
save_error_imgs=output_dir,
debug_2D_preds=all_poses_2d,
)
_ = evals.pcp.run_eval(
labels,
all_poses_3d,
all_ids,
joint_names_net=joint_names_3d,
joint_names_use=eval_joints,
replace_head_with_nose=True,
)
if dataset_use == "shelf":
# Also run old-style evaluation for shelf dataset
odir = os.path.join(output_dir, "pcp/") if output_dir != "" else ""
_ = evals.campus_shelf.run_eval(
labels,
all_poses_3d,
all_ids,
joint_names_net=joint_names_3d,
save_error_imgs=odir,
debug_2D_preds=all_poses_2d,
)
# ==================================================================================================
if __name__ == "__main__":
main()