Updated dataset configs and results.
This commit is contained in:
6226
media/RESULTS.md
6226
media/RESULTS.md
File diff suppressed because it is too large
Load Diff
@ -17,8 +17,8 @@ import rpt
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
# dataset_use = "panoptic"
|
||||
dataset_use = "human36m"
|
||||
# dataset_use = "panoptic"
|
||||
# dataset_use = "mvor"
|
||||
# dataset_use = "shelf"
|
||||
# dataset_use = "campus"
|
||||
@ -26,8 +26,43 @@ dataset_use = "human36m"
|
||||
# dataset_use = "chi3d"
|
||||
# dataset_use = "tsinghua"
|
||||
# dataset_use = "human36m_wb"
|
||||
# dataset_use = "egohumans"
|
||||
# dataset_use = "egohumans_tagging"
|
||||
# dataset_use = "egohumans_legoassemble"
|
||||
# dataset_use = "egohumans_fencing"
|
||||
# dataset_use = "egohumans_basketball"
|
||||
# dataset_use = "egohumans_volleyball"
|
||||
# dataset_use = "egohumans_badminton"
|
||||
# dataset_use = "egohumans_tennis"
|
||||
# dataset_use = "ntu"
|
||||
# dataset_use = "koarob"
|
||||
|
||||
|
||||
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
|
||||
# If the persons are small in the image, use a lower value
|
||||
default_min_bbox_area = 0.1 * 0.1
|
||||
|
||||
# Describes how confident a 2D bounding box needs to be to be considered
|
||||
# If the persons are small in the image, or poorly recognizable, use a lower value
|
||||
default_min_bbox_score = 0.3
|
||||
|
||||
# Describes how good two 2D poses need to match each other to create a valid triangulation
|
||||
# If the quality of the 2D detections is poor, use a lower value
|
||||
default_min_match_score = 0.94
|
||||
|
||||
# Describes the minimum number of camera pairs that need to detect the same person
|
||||
# If the number of cameras is high, and the views are not occluded, use a higher value
|
||||
default_min_group_size = 1
|
||||
|
||||
|
||||
datasets = {
|
||||
"human36m": {
|
||||
"path": "/datasets/human36m/skelda/pose_test.json",
|
||||
"take_interval": 5,
|
||||
"min_match_score": 0.94,
|
||||
"min_group_size": 1,
|
||||
"min_bbox_score": 0.4,
|
||||
"min_bbox_area": 0.1 * 0.1,
|
||||
},
|
||||
"panoptic": {
|
||||
"path": "/datasets/panoptic/skelda/test.json",
|
||||
"cams": ["00_03", "00_06", "00_12", "00_13", "00_23"],
|
||||
@ -35,27 +70,33 @@ datasets = {
|
||||
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
|
||||
"take_interval": 3,
|
||||
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
|
||||
},
|
||||
"human36m": {
|
||||
"path": "/datasets/human36m/skelda/pose_test.json",
|
||||
"take_interval": 5,
|
||||
"min_group_size": 1,
|
||||
# "min_group_size": 4,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"mvor": {
|
||||
"path": "/datasets/mvor/skelda/all.json",
|
||||
"take_interval": 1,
|
||||
"with_depth": False,
|
||||
"min_match_score": 0.85,
|
||||
"min_bbox_score": 0.25,
|
||||
},
|
||||
"campus": {
|
||||
"path": "/datasets/campus/skelda/test.json",
|
||||
"take_interval": 1,
|
||||
"min_bbox_score": 0.5,
|
||||
},
|
||||
"shelf": {
|
||||
"path": "/datasets/shelf/skelda/test.json",
|
||||
"take_interval": 1,
|
||||
"min_match_score": 0.96,
|
||||
"min_group_size": 2,
|
||||
},
|
||||
"ikeaasm": {
|
||||
"path": "/datasets/ikeaasm/skelda/test.json",
|
||||
"take_interval": 2,
|
||||
"min_match_score": 0.92,
|
||||
"min_bbox_score": 0.20,
|
||||
},
|
||||
"chi3d": {
|
||||
"path": "/datasets/chi3d/skelda/all.json",
|
||||
@ -64,21 +105,65 @@ datasets = {
|
||||
"tsinghua": {
|
||||
"path": "/datasets/tsinghua/skelda/test.json",
|
||||
"take_interval": 3,
|
||||
"min_group_size": 2,
|
||||
},
|
||||
"human36m_wb": {
|
||||
"path": "/datasets/human36m/skelda/wb/test.json",
|
||||
"take_interval": 100,
|
||||
"min_bbox_score": 0.4,
|
||||
},
|
||||
"egohumans": {
|
||||
"egohumans_tagging": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "tagging",
|
||||
"min_group_size": 2,
|
||||
"min_bbox_score": 0.25,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"egohumans_legoassemble": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
# "subset": "tagging",
|
||||
"subset": "legoassemble",
|
||||
# "subset": "fencing",
|
||||
# "subset": "basketball",
|
||||
# "subset": "volleyball",
|
||||
# "subset": "badminton",
|
||||
# "subset": "tennis",
|
||||
"min_group_size": 2,
|
||||
},
|
||||
"egohumans_fencing": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "fencing",
|
||||
"min_group_size": 7,
|
||||
"min_bbox_score": 0.5,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"egohumans_basketball": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "basketball",
|
||||
"min_group_size": 7,
|
||||
"min_bbox_score": 0.25,
|
||||
"min_bbox_area": 0.025 * 0.025,
|
||||
},
|
||||
"egohumans_volleyball": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "volleyball",
|
||||
"min_group_size": 11,
|
||||
"min_bbox_score": 0.25,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"egohumans_badminton": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "badminton",
|
||||
"min_group_size": 7,
|
||||
"min_bbox_score": 0.25,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"egohumans_tennis": {
|
||||
"path": "/datasets/egohumans/skelda/all.json",
|
||||
"take_interval": 2,
|
||||
"subset": "tennis",
|
||||
"min_group_size": 11,
|
||||
"min_bbox_area": 0.025 * 0.025,
|
||||
},
|
||||
}
|
||||
|
||||
@ -102,8 +187,10 @@ eval_joints = [
|
||||
if dataset_use in ["human36m", "panoptic"]:
|
||||
eval_joints[eval_joints.index("head")] = "nose"
|
||||
if dataset_use.endswith("_wb"):
|
||||
# eval_joints[eval_joints.index("head")] = "nose"
|
||||
if any((test_triangulate.whole_body.values())):
|
||||
eval_joints = list(joint_names_2d)
|
||||
else:
|
||||
eval_joints[eval_joints.index("head")] = "nose"
|
||||
|
||||
# output_dir = "/RapidPoseTriangulation/data/testoutput/"
|
||||
output_dir = ""
|
||||
@ -191,11 +278,11 @@ def load_labels(dataset: dict):
|
||||
elif "human36m_wb" in dataset:
|
||||
labels = load_json(dataset["human36m_wb"]["path"])
|
||||
|
||||
elif "egohumans" in dataset:
|
||||
labels = load_json(dataset["egohumans"]["path"])
|
||||
elif any(("egohumans" in key for key in dataset)):
|
||||
labels = load_json(dataset[dataset_use]["path"])
|
||||
labels = [lb for lb in labels if "test" in lb["splits"]]
|
||||
labels = [lb for lb in labels if dataset["egohumans"]["subset"] in lb["seq"]]
|
||||
if dataset["egohumans"]["subset"] in ["volleyball", "tennis"]:
|
||||
labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]]
|
||||
if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]:
|
||||
labels = [lb for i, lb in enumerate(labels) if i % 150 < 60]
|
||||
|
||||
else:
|
||||
@ -216,11 +303,20 @@ def load_labels(dataset: dict):
|
||||
def main():
|
||||
global joint_names_3d, eval_joints
|
||||
|
||||
# Load dataset specific parameters
|
||||
min_match_score = datasets[dataset_use].get(
|
||||
"min_match_score", default_min_match_score
|
||||
)
|
||||
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
|
||||
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
|
||||
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
|
||||
|
||||
# Load 2D pose model
|
||||
whole_body = test_triangulate.whole_body
|
||||
if any((whole_body[k] for k in whole_body)):
|
||||
kpt_model = utils_2d_pose.load_wb_model()
|
||||
else:
|
||||
kpt_model = utils_2d_pose.load_model()
|
||||
kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area)
|
||||
|
||||
# Manually set matplotlib backend
|
||||
try:
|
||||
@ -239,50 +335,12 @@ def main():
|
||||
# Print a dataset sample for debugging
|
||||
print(labels[0])
|
||||
|
||||
minscores = {
|
||||
# Describes how good two 2D poses need to match each other to create a valid triangulation
|
||||
# If the quality of the 2D detections is poor, use a lower value
|
||||
"panoptic": 0.94,
|
||||
"human36m": 0.94,
|
||||
"mvor": 0.86,
|
||||
"campus": 0.96,
|
||||
"shelf": 0.96,
|
||||
"ikeaasm": 0.89,
|
||||
"chi3d": 0.94,
|
||||
"tsinghua": 0.96,
|
||||
"egohumans": 0.95,
|
||||
"human36m_wb": 0.94,
|
||||
}
|
||||
minscore = minscores.get(dataset_use, 0.95)
|
||||
min_group_sizes = {
|
||||
# Describes the minimum number of camera pairs that need to detect the same person
|
||||
# If the number of cameras is high, and the views are not occluded, use a higher value
|
||||
"panoptic": 1,
|
||||
"shelf": 2,
|
||||
"chi3d": 1,
|
||||
"tsinghua": 2,
|
||||
"egohumans": 4,
|
||||
}
|
||||
min_group_size = min_group_sizes.get(dataset_use, 1)
|
||||
if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10:
|
||||
min_group_size = 4
|
||||
if dataset_use == "egohumans" and (
|
||||
"lego" in labels[0]["seq"] or "tagging" in labels[0]["seq"]
|
||||
):
|
||||
min_group_size = 2
|
||||
if dataset_use == "egohumans" and (
|
||||
"volleyball" in labels[0]["seq"] or "badminton" in labels[0]["seq"]
|
||||
):
|
||||
min_group_size = 7
|
||||
if dataset_use == "egohumans" and "tennis" in labels[0]["seq"]:
|
||||
min_group_size = 11
|
||||
|
||||
print("\nRunning predictions ...")
|
||||
all_poses = []
|
||||
all_ids = []
|
||||
times = []
|
||||
triangulator = rpt.Triangulator(
|
||||
min_match_score=minscore, min_group_size=min_group_size
|
||||
min_match_score=min_match_score, min_group_size=min_group_size
|
||||
)
|
||||
old_scene = ""
|
||||
old_index = -1
|
||||
|
||||
Reference in New Issue
Block a user