Updated dataset configs and results.

2024-12-16 14:43:54 +01:00
parent 17ff41010a
commit 21e93661f5
2 changed files with 3230 additions and 3172 deletions
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@ -17,8 +17,8 @@ import rpt

 # ==================================================================================================

-# dataset_use = "panoptic"
 dataset_use = "human36m"
+# dataset_use = "panoptic"
 # dataset_use = "mvor"
 # dataset_use = "shelf"
 # dataset_use = "campus"
@ -26,8 +26,43 @@ dataset_use = "human36m"
 # dataset_use = "chi3d"
 # dataset_use = "tsinghua"
 # dataset_use = "human36m_wb"
-# dataset_use = "egohumans"
+# dataset_use = "egohumans_tagging"
+# dataset_use = "egohumans_legoassemble"
+# dataset_use = "egohumans_fencing"
+# dataset_use = "egohumans_basketball"
+# dataset_use = "egohumans_volleyball"
+# dataset_use = "egohumans_badminton"
+# dataset_use = "egohumans_tennis"
+# dataset_use = "ntu"
+# dataset_use = "koarob"
+
+
+# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
+# If the persons are small in the image, use a lower value
+default_min_bbox_area = 0.1 * 0.1
+
+# Describes how confident a 2D bounding box needs to be to be considered
+# If the persons are small in the image, or poorly recognizable, use a lower value
+default_min_bbox_score = 0.3
+
+# Describes how good two 2D poses need to match each other to create a valid triangulation
+# If the quality of the 2D detections is poor, use a lower value
+default_min_match_score = 0.94
+
+# Describes the minimum number of camera pairs that need to detect the same person
+# If the number of cameras is high, and the views are not occluded, use a higher value
+default_min_group_size = 1
+
+
 datasets = {
+    "human36m": {
+        "path": "/datasets/human36m/skelda/pose_test.json",
+        "take_interval": 5,
+        "min_match_score": 0.94,
+        "min_group_size": 1,
+        "min_bbox_score": 0.4,
+        "min_bbox_area": 0.1 * 0.1,
+    },
    "panoptic": {
        "path": "/datasets/panoptic/skelda/test.json",
        "cams": ["00_03", "00_06", "00_12", "00_13", "00_23"],
@ -35,27 +70,33 @@ datasets = {
        # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
        "take_interval": 3,
        "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
-    },
-    "human36m": {
-        "path": "/datasets/human36m/skelda/pose_test.json",
-        "take_interval": 5,
+        "min_group_size": 1,
+        # "min_group_size": 4,
+        "min_bbox_area": 0.05 * 0.05,
    },
    "mvor": {
        "path": "/datasets/mvor/skelda/all.json",
        "take_interval": 1,
        "with_depth": False,
+        "min_match_score": 0.85,
+        "min_bbox_score": 0.25,
    },
    "campus": {
        "path": "/datasets/campus/skelda/test.json",
        "take_interval": 1,
+        "min_bbox_score": 0.5,
    },
    "shelf": {
        "path": "/datasets/shelf/skelda/test.json",
        "take_interval": 1,
+        "min_match_score": 0.96,
+        "min_group_size": 2,
    },
    "ikeaasm": {
        "path": "/datasets/ikeaasm/skelda/test.json",
        "take_interval": 2,
+        "min_match_score": 0.92,
+        "min_bbox_score": 0.20,
    },
    "chi3d": {
        "path": "/datasets/chi3d/skelda/all.json",
@ -64,21 +105,65 @@ datasets = {
    "tsinghua": {
        "path": "/datasets/tsinghua/skelda/test.json",
        "take_interval": 3,
+        "min_group_size": 2,
    },
    "human36m_wb": {
        "path": "/datasets/human36m/skelda/wb/test.json",
        "take_interval": 100,
+        "min_bbox_score": 0.4,
    },
-    "egohumans": {
+    "egohumans_tagging": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "tagging",
+        "min_group_size": 2,
+        "min_bbox_score": 0.25,
+        "min_bbox_area": 0.05 * 0.05,
+    },
+    "egohumans_legoassemble": {
        "path": "/datasets/egohumans/skelda/all.json",
        "take_interval": 2,
-        # "subset": "tagging",
        "subset": "legoassemble",
-        # "subset": "fencing",
-        # "subset": "basketball",
-        # "subset": "volleyball",
-        # "subset": "badminton",
-        # "subset": "tennis",
+        "min_group_size": 2,
+    },
+    "egohumans_fencing": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "fencing",
+        "min_group_size": 7,
+        "min_bbox_score": 0.5,
+        "min_bbox_area": 0.05 * 0.05,
+    },
+    "egohumans_basketball": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "basketball",
+        "min_group_size": 7,
+        "min_bbox_score": 0.25,
+        "min_bbox_area": 0.025 * 0.025,
+    },
+    "egohumans_volleyball": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "volleyball",
+        "min_group_size": 11,
+        "min_bbox_score": 0.25,
+        "min_bbox_area": 0.05 * 0.05,
+    },
+    "egohumans_badminton": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "badminton",
+        "min_group_size": 7,
+        "min_bbox_score": 0.25,
+        "min_bbox_area": 0.05 * 0.05,
+    },
+    "egohumans_tennis": {
+        "path": "/datasets/egohumans/skelda/all.json",
+        "take_interval": 2,
+        "subset": "tennis",
+        "min_group_size": 11,
+        "min_bbox_area": 0.025 * 0.025,
    },
 }

@ -102,8 +187,10 @@ eval_joints = [
 if dataset_use in ["human36m", "panoptic"]:
    eval_joints[eval_joints.index("head")] = "nose"
 if dataset_use.endswith("_wb"):
-    # eval_joints[eval_joints.index("head")] = "nose"
+    if any((test_triangulate.whole_body.values())):
        eval_joints = list(joint_names_2d)
+    else:
+        eval_joints[eval_joints.index("head")] = "nose"

 # output_dir = "/RapidPoseTriangulation/data/testoutput/"
 output_dir = ""
@ -191,11 +278,11 @@ def load_labels(dataset: dict):
    elif "human36m_wb" in dataset:
        labels = load_json(dataset["human36m_wb"]["path"])

-    elif "egohumans" in dataset:
-        labels = load_json(dataset["egohumans"]["path"])
+    elif any(("egohumans" in key for key in dataset)):
+        labels = load_json(dataset[dataset_use]["path"])
        labels = [lb for lb in labels if "test" in lb["splits"]]
-        labels = [lb for lb in labels if dataset["egohumans"]["subset"] in lb["seq"]]
-        if dataset["egohumans"]["subset"] in ["volleyball", "tennis"]:
+        labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]]
+        if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]:
            labels = [lb for i, lb in enumerate(labels) if i % 150 < 60]

    else:
@ -216,11 +303,20 @@ def load_labels(dataset: dict):
 def main():
    global joint_names_3d, eval_joints

+    # Load dataset specific parameters
+    min_match_score = datasets[dataset_use].get(
+        "min_match_score", default_min_match_score
+    )
+    min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
+    min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
+    min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
+
+    # Load 2D pose model
    whole_body = test_triangulate.whole_body
    if any((whole_body[k] for k in whole_body)):
        kpt_model = utils_2d_pose.load_wb_model()
    else:
-        kpt_model = utils_2d_pose.load_model()
+        kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area)

    # Manually set matplotlib backend
    try:
@ -239,50 +335,12 @@ def main():
    # Print a dataset sample for debugging
    print(labels[0])

-    minscores = {
-        # Describes how good two 2D poses need to match each other to create a valid triangulation
-        # If the quality of the 2D detections is poor, use a lower value
-        "panoptic": 0.94,
-        "human36m": 0.94,
-        "mvor": 0.86,
-        "campus": 0.96,
-        "shelf": 0.96,
-        "ikeaasm": 0.89,
-        "chi3d": 0.94,
-        "tsinghua": 0.96,
-        "egohumans": 0.95,
-        "human36m_wb": 0.94,
-    }
-    minscore = minscores.get(dataset_use, 0.95)
-    min_group_sizes = {
-        # Describes the minimum number of camera pairs that need to detect the same person
-        # If the number of cameras is high, and the views are not occluded, use a higher value
-        "panoptic": 1,
-        "shelf": 2,
-        "chi3d": 1,
-        "tsinghua": 2,
-        "egohumans": 4,
-    }
-    min_group_size = min_group_sizes.get(dataset_use, 1)
-    if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10:
-        min_group_size = 4
-    if dataset_use == "egohumans" and (
-        "lego" in labels[0]["seq"] or "tagging" in labels[0]["seq"]
-    ):
-        min_group_size = 2
-    if dataset_use == "egohumans" and (
-        "volleyball" in labels[0]["seq"] or "badminton" in labels[0]["seq"]
-    ):
-        min_group_size = 7
-    if dataset_use == "egohumans" and "tennis" in labels[0]["seq"]:
-        min_group_size = 11
-
    print("\nRunning predictions ...")
    all_poses = []
    all_ids = []
    times = []
    triangulator = rpt.Triangulator(
-        min_match_score=minscore, min_group_size=min_group_size
+        min_match_score=min_match_score, min_group_size=min_group_size
    )
    old_scene = ""
    old_index = -1