Updated dataset configs and results.

This commit is contained in:
Daniel
2024-12-16 14:43:54 +01:00
parent 17ff41010a
commit 21e93661f5
2 changed files with 3230 additions and 3172 deletions

File diff suppressed because it is too large Load Diff

View File

@ -17,8 +17,8 @@ import rpt
# ================================================================================================== # ==================================================================================================
# dataset_use = "panoptic"
dataset_use = "human36m" dataset_use = "human36m"
# dataset_use = "panoptic"
# dataset_use = "mvor" # dataset_use = "mvor"
# dataset_use = "shelf" # dataset_use = "shelf"
# dataset_use = "campus" # dataset_use = "campus"
@ -26,8 +26,43 @@ dataset_use = "human36m"
# dataset_use = "chi3d" # dataset_use = "chi3d"
# dataset_use = "tsinghua" # dataset_use = "tsinghua"
# dataset_use = "human36m_wb" # dataset_use = "human36m_wb"
# dataset_use = "egohumans" # dataset_use = "egohumans_tagging"
# dataset_use = "egohumans_legoassemble"
# dataset_use = "egohumans_fencing"
# dataset_use = "egohumans_basketball"
# dataset_use = "egohumans_volleyball"
# dataset_use = "egohumans_badminton"
# dataset_use = "egohumans_tennis"
# dataset_use = "ntu"
# dataset_use = "koarob"
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
# If the persons are small in the image, use a lower value
default_min_bbox_area = 0.1 * 0.1
# Describes how confident a 2D bounding box needs to be to be considered
# If the persons are small in the image, or poorly recognizable, use a lower value
default_min_bbox_score = 0.3
# Describes how good two 2D poses need to match each other to create a valid triangulation
# If the quality of the 2D detections is poor, use a lower value
default_min_match_score = 0.94
# Describes the minimum number of camera pairs that need to detect the same person
# If the number of cameras is high, and the views are not occluded, use a higher value
default_min_group_size = 1
datasets = { datasets = {
"human36m": {
"path": "/datasets/human36m/skelda/pose_test.json",
"take_interval": 5,
"min_match_score": 0.94,
"min_group_size": 1,
"min_bbox_score": 0.4,
"min_bbox_area": 0.1 * 0.1,
},
"panoptic": { "panoptic": {
"path": "/datasets/panoptic/skelda/test.json", "path": "/datasets/panoptic/skelda/test.json",
"cams": ["00_03", "00_06", "00_12", "00_13", "00_23"], "cams": ["00_03", "00_06", "00_12", "00_13", "00_23"],
@ -35,27 +70,33 @@ datasets = {
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"], # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
"take_interval": 3, "take_interval": 3,
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"], "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
}, "min_group_size": 1,
"human36m": { # "min_group_size": 4,
"path": "/datasets/human36m/skelda/pose_test.json", "min_bbox_area": 0.05 * 0.05,
"take_interval": 5,
}, },
"mvor": { "mvor": {
"path": "/datasets/mvor/skelda/all.json", "path": "/datasets/mvor/skelda/all.json",
"take_interval": 1, "take_interval": 1,
"with_depth": False, "with_depth": False,
"min_match_score": 0.85,
"min_bbox_score": 0.25,
}, },
"campus": { "campus": {
"path": "/datasets/campus/skelda/test.json", "path": "/datasets/campus/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"min_bbox_score": 0.5,
}, },
"shelf": { "shelf": {
"path": "/datasets/shelf/skelda/test.json", "path": "/datasets/shelf/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"min_match_score": 0.96,
"min_group_size": 2,
}, },
"ikeaasm": { "ikeaasm": {
"path": "/datasets/ikeaasm/skelda/test.json", "path": "/datasets/ikeaasm/skelda/test.json",
"take_interval": 2, "take_interval": 2,
"min_match_score": 0.92,
"min_bbox_score": 0.20,
}, },
"chi3d": { "chi3d": {
"path": "/datasets/chi3d/skelda/all.json", "path": "/datasets/chi3d/skelda/all.json",
@ -64,21 +105,65 @@ datasets = {
"tsinghua": { "tsinghua": {
"path": "/datasets/tsinghua/skelda/test.json", "path": "/datasets/tsinghua/skelda/test.json",
"take_interval": 3, "take_interval": 3,
"min_group_size": 2,
}, },
"human36m_wb": { "human36m_wb": {
"path": "/datasets/human36m/skelda/wb/test.json", "path": "/datasets/human36m/skelda/wb/test.json",
"take_interval": 100, "take_interval": 100,
"min_bbox_score": 0.4,
}, },
"egohumans": { "egohumans_tagging": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"subset": "tagging",
"min_group_size": 2,
"min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05,
},
"egohumans_legoassemble": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
# "subset": "tagging",
"subset": "legoassemble", "subset": "legoassemble",
# "subset": "fencing", "min_group_size": 2,
# "subset": "basketball", },
# "subset": "volleyball", "egohumans_fencing": {
# "subset": "badminton", "path": "/datasets/egohumans/skelda/all.json",
# "subset": "tennis", "take_interval": 2,
"subset": "fencing",
"min_group_size": 7,
"min_bbox_score": 0.5,
"min_bbox_area": 0.05 * 0.05,
},
"egohumans_basketball": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"subset": "basketball",
"min_group_size": 7,
"min_bbox_score": 0.25,
"min_bbox_area": 0.025 * 0.025,
},
"egohumans_volleyball": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"subset": "volleyball",
"min_group_size": 11,
"min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05,
},
"egohumans_badminton": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"subset": "badminton",
"min_group_size": 7,
"min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05,
},
"egohumans_tennis": {
"path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2,
"subset": "tennis",
"min_group_size": 11,
"min_bbox_area": 0.025 * 0.025,
}, },
} }
@ -102,8 +187,10 @@ eval_joints = [
if dataset_use in ["human36m", "panoptic"]: if dataset_use in ["human36m", "panoptic"]:
eval_joints[eval_joints.index("head")] = "nose" eval_joints[eval_joints.index("head")] = "nose"
if dataset_use.endswith("_wb"): if dataset_use.endswith("_wb"):
# eval_joints[eval_joints.index("head")] = "nose" if any((test_triangulate.whole_body.values())):
eval_joints = list(joint_names_2d) eval_joints = list(joint_names_2d)
else:
eval_joints[eval_joints.index("head")] = "nose"
# output_dir = "/RapidPoseTriangulation/data/testoutput/" # output_dir = "/RapidPoseTriangulation/data/testoutput/"
output_dir = "" output_dir = ""
@ -191,11 +278,11 @@ def load_labels(dataset: dict):
elif "human36m_wb" in dataset: elif "human36m_wb" in dataset:
labels = load_json(dataset["human36m_wb"]["path"]) labels = load_json(dataset["human36m_wb"]["path"])
elif "egohumans" in dataset: elif any(("egohumans" in key for key in dataset)):
labels = load_json(dataset["egohumans"]["path"]) labels = load_json(dataset[dataset_use]["path"])
labels = [lb for lb in labels if "test" in lb["splits"]] labels = [lb for lb in labels if "test" in lb["splits"]]
labels = [lb for lb in labels if dataset["egohumans"]["subset"] in lb["seq"]] labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]]
if dataset["egohumans"]["subset"] in ["volleyball", "tennis"]: if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]:
labels = [lb for i, lb in enumerate(labels) if i % 150 < 60] labels = [lb for i, lb in enumerate(labels) if i % 150 < 60]
else: else:
@ -216,11 +303,20 @@ def load_labels(dataset: dict):
def main(): def main():
global joint_names_3d, eval_joints global joint_names_3d, eval_joints
# Load dataset specific parameters
min_match_score = datasets[dataset_use].get(
"min_match_score", default_min_match_score
)
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
# Load 2D pose model
whole_body = test_triangulate.whole_body whole_body = test_triangulate.whole_body
if any((whole_body[k] for k in whole_body)): if any((whole_body[k] for k in whole_body)):
kpt_model = utils_2d_pose.load_wb_model() kpt_model = utils_2d_pose.load_wb_model()
else: else:
kpt_model = utils_2d_pose.load_model() kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area)
# Manually set matplotlib backend # Manually set matplotlib backend
try: try:
@ -239,50 +335,12 @@ def main():
# Print a dataset sample for debugging # Print a dataset sample for debugging
print(labels[0]) print(labels[0])
minscores = {
# Describes how good two 2D poses need to match each other to create a valid triangulation
# If the quality of the 2D detections is poor, use a lower value
"panoptic": 0.94,
"human36m": 0.94,
"mvor": 0.86,
"campus": 0.96,
"shelf": 0.96,
"ikeaasm": 0.89,
"chi3d": 0.94,
"tsinghua": 0.96,
"egohumans": 0.95,
"human36m_wb": 0.94,
}
minscore = minscores.get(dataset_use, 0.95)
min_group_sizes = {
# Describes the minimum number of camera pairs that need to detect the same person
# If the number of cameras is high, and the views are not occluded, use a higher value
"panoptic": 1,
"shelf": 2,
"chi3d": 1,
"tsinghua": 2,
"egohumans": 4,
}
min_group_size = min_group_sizes.get(dataset_use, 1)
if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10:
min_group_size = 4
if dataset_use == "egohumans" and (
"lego" in labels[0]["seq"] or "tagging" in labels[0]["seq"]
):
min_group_size = 2
if dataset_use == "egohumans" and (
"volleyball" in labels[0]["seq"] or "badminton" in labels[0]["seq"]
):
min_group_size = 7
if dataset_use == "egohumans" and "tennis" in labels[0]["seq"]:
min_group_size = 11
print("\nRunning predictions ...") print("\nRunning predictions ...")
all_poses = [] all_poses = []
all_ids = [] all_ids = []
times = [] times = []
triangulator = rpt.Triangulator( triangulator = rpt.Triangulator(
min_match_score=minscore, min_group_size=min_group_size min_match_score=min_match_score, min_group_size=min_group_size
) )
old_scene = "" old_scene = ""
old_index = -1 old_index = -1