Using bayer encoding for images.

2025-01-13 16:50:08 +01:00
parent 53543368c4
commit 2f52521b9a
3 changed files with 2657 additions and 2613 deletions
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@ -61,7 +61,7 @@ datasets = {
    "human36m": {
        "path": "/datasets/human36m/skelda/pose_test.json",
        "take_interval": 5,
-        "min_match_score": 0.94,
+        "min_match_score": 0.95,
        "min_group_size": 1,
        "min_bbox_score": 0.4,
        "min_bbox_area": 0.1 * 0.1,
@ -73,6 +73,7 @@ datasets = {
        # "cams": ["00_03", "00_06", "00_12"],
        # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
        "take_interval": 3,
+        "min_match_score": 0.95,
        "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
        "min_group_size": 1,
        # "min_group_size": 4,
@ -88,6 +89,7 @@ datasets = {
    "campus": {
        "path": "/datasets/campus/skelda/test.json",
        "take_interval": 1,
+        "min_match_score": 0.90,
        "min_bbox_score": 0.5,
    },
    "shelf": {
@ -109,6 +111,7 @@ datasets = {
    "tsinghua": {
        "path": "/datasets/tsinghua/skelda/test.json",
        "take_interval": 3,
+        "min_match_score": 0.95,
        "min_group_size": 2,
    },
    "human36m_wb": {
@ -122,7 +125,7 @@ datasets = {
        "take_interval": 2,
        "subset": "tagging",
        "min_group_size": 2,
-        "min_bbox_score": 0.25,
+        "min_bbox_score": 0.2,
        "min_bbox_area": 0.05 * 0.05,
    },
    "egohumans_legoassemble": {
@ -343,19 +346,32 @@ def main():
    # Print a dataset sample for debugging
    print(labels[0])

+    print("\nPrefetching images ...")
+    for label in tqdm.tqdm(labels):
+        # If the images are stored on a HDD, it sometimes takes a while to load them
+        # Prefetching them results in more stable timings of the following steps
+        # To prevent memory overflow, the code only loads the images, but does not store them
+        try:
+            for i in range(len(label["imgpaths"])):
+                imgpath = label["imgpaths"][i]
+                img = test_triangulate.load_image(imgpath)
+        except cv2.error:
+            print("One of the paths not found:", label["imgpaths"])
+            continue
+    time.sleep(3)
+
    print("\nCalculating 2D predictions ...")
    all_poses_2d = []
    times = []
    for label in tqdm.tqdm(labels):
        images_2d = []

+        start = time.time()
        try:
-            start = time.time()
            for i in range(len(label["imgpaths"])):
                imgpath = label["imgpaths"][i]
                img = test_triangulate.load_image(imgpath)
                images_2d.append(img)
-            time_imgs = time.time() - start
        except cv2.error:
            print("One of the paths not found:", label["imgpaths"])
            continue
@ -373,7 +389,16 @@ def main():
                    cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
                    images_2d[i] = cv2.resize(img, (1000, 1000))

+        # Convert image format to Bayer encoding to simulate real camera input
+        # This also resulted in notably better MPJPE results in most cases, presumbly since the
+        # demosaicing algorithm from OpenCV is better than the default one from the cameras
+        for i in range(len(images_2d)):
+            images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
+        time_imgs = time.time() - start
+
        start = time.time()
+        for i in range(len(images_2d)):
+            images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
        poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
        poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
        time_2d = time.time() - start
--- a/scripts/test_triangulate.py
+++ b/scripts/test_triangulate.py
@ -227,6 +227,23 @@ def load_image(path: str):
 # ==================================================================================================


+def rgb2bayer(img):
+    bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
+    bayer[0::2, 0::2] = img[0::2, 0::2, 0]
+    bayer[0::2, 1::2] = img[0::2, 1::2, 1]
+    bayer[1::2, 0::2] = img[1::2, 0::2, 1]
+    bayer[1::2, 1::2] = img[1::2, 1::2, 2]
+    return bayer
+
+
+def bayer2rgb(bayer):
+    img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
+    return img
+
+
+# ==================================================================================================
+
+
 def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
    new_views = []
    for view in poses_2d:
@ -314,6 +331,8 @@ def main():
        for i in range(len(sample["cameras_color"])):
            imgpath = sample["imgpaths_color"][i]
            img = load_image(imgpath)
+            img = rgb2bayer(img)
+            img = bayer2rgb(img)
            images_2d.append(img)

        # Get 2D poses