Greatly improved triangulation.

2024-06-28 15:50:58 +02:00
parent 7df32cd182
commit 2702c7d762
4 changed files with 1440 additions and 1360 deletions
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@ -357,7 +357,9 @@ def main():
            poses3D = np.zeros([1, len(joint_names_3d), 4])
            poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
        else:
-            poses3D = triangulate_poses.get_3d_pose(poses_2d, label["cameras"], joint_names_2d)
+            poses3D = triangulate_poses.get_3d_pose(
                poses_2d, label["cameras"], roomparams, joint_names_2d
            )
            poses2D = []
            for cam in label["cameras"]:
                poses_2d, _ = utils_pose.project_poses(poses3D, cam)
--- a/scripts/test_triangulate.py
+++ b/scripts/test_triangulate.py
@ -439,7 +439,9 @@ def main():
            poses3D = np.zeros([1, len(joint_names_3d), 4])
            poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
        else:
-            poses3D = triangulate_poses.get_3d_pose(poses_2d, camparams, joint_names_2d)
+            poses3D = triangulate_poses.get_3d_pose(
                poses_2d, camparams, roomparams, joint_names_2d
            )
            poses2D = []
            for cam in camparams:
                poses_2d, _ = utils_pose.project_poses(poses3D, cam)
--- a/scripts/triangulate_poses.py
+++ b/scripts/triangulate_poses.py
@ -1,4 +1,6 @@
 import copy
 import math
 import time
 import cv2
 import numpy as np
@ -8,8 +10,6 @@ from skelda import utils_pose
 # ==================================================================================================
 core_joints = [
    "shoulder_middle",
    "hip_middle",
    "shoulder_left",
    "shoulder_right",
    "hip_left",
@ -64,7 +64,7 @@ def get_camera_P(cam):
 # ==================================================================================================
-def calc_pair_score(pair, poses_2d, camparams, joint_names_2d):
+def calc_pair_score(pair, poses_2d, camparams, roomparams, joint_names_2d, use_joints):
    """Triangulates a pair of persons and scores them based on the reprojection error"""
    cam1 = camparams[pair[0][0]]
@ -73,45 +73,77 @@ def calc_pair_score(pair, poses_2d, camparams, joint_names_2d):
    pose2 = np.array(poses_2d[pair[0][1]][pair[0][3]])
    # Select core joints
-    jids = [joint_names_2d.index(j) for j in core_joints]
+    jids = [joint_names_2d.index(j) for j in use_joints]
    pose1 = pose1[jids]
    pose2 = pose2[jids]
-    poses_3d, score = calc_pose_scored(pose1, pose2, cam1, cam2)
+    poses_3d, score = calc_pose_scored(pose1, pose2, cam1, cam2, roomparams)
    return poses_3d, score
 # ==================================================================================================
-def calc_pose_scored(pose1, pose2, cam1, cam2):
+def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
    """Triangulates a pair of persons and scores them based on the reprojection error"""
    # Mask out invisible joints
    mask1a = pose1[:, 2] >= 0.1
    mask2a = pose2[:, 2] >= 0.1
    mask = mask1a & mask2a
    # If no joints are visible return a low score
    if np.sum(mask) == 0:
        pose3d = np.zeros([len(pose1), 4])
        score = 0.0
        return pose3d, score
    # Triangulate points
-    points1 = pose1[:, 0:2].T
+    points1 = pose1[mask, 0:2].T
-    points2 = pose2[:, 0:2].T
+    points2 = pose2[mask, 0:2].T
    P1 = get_camera_P(cam1)
    P2 = get_camera_P(cam2)
    points3d = cv2.triangulatePoints(P1, P2, points1, points2)
    points3d = points3d / points3d[3, :]
    points3d = points3d[0:3, :].T
    pose3d = np.zeros([len(pose1), 4])
    pose3d[mask] = np.concatenate([points3d, np.ones([points3d.shape[0], 1])], axis=-1)
    # If the triangulated points are outside the room drop it
    mean = np.mean(pose3d[mask][:, 0:3], axis=0)
    mins = np.min(pose3d[mask][:, 0:3], axis=0)
    maxs = np.max(pose3d[mask][:, 0:3], axis=0)
    rsize = np.array(roomparams["room_size"]) / 2
    rcent = np.array(roomparams["room_center"])
    center_outside = np.any((mean > rsize + rcent) | (mean < -rsize + rcent))
    limb_outside = np.any((maxs > rsize + rcent + 0.1) | (mins < -rsize + rcent - 0.1))
    if center_outside or limb_outside:
        pose3d[:, 3] = 0.001
        score = 0.001
        return pose3d, score
    # Calculate reprojection error
-    poses_3d = np.expand_dims(points3d, axis=0)
+    poses_3d = np.expand_dims(pose3d, axis=0)
    poses_3d = np.concatenate([poses_3d, np.ones([1, poses_3d.shape[1], 1])], axis=-1)
    repro1, _ = utils_pose.project_poses(poses_3d, cam1)
    repro2, _ = utils_pose.project_poses(poses_3d, cam2)
    repro1 = repro1[0]
    repro2 = repro2[0]
    mask1 = pose1[:, 2] > 0.1
    mask2 = pose2[:, 2] > 0.1
    mask = mask1 & mask2
    error1 = np.linalg.norm(pose1[mask, 0:2] - repro1[mask, 0:2], axis=1)
    error2 = np.linalg.norm(pose2[mask, 0:2] - repro2[mask, 0:2], axis=1)
    # Set errors of invisible reprojections to a high value
    penalty = (cam1["width"] + cam1["height"]) / 2
    mask1b = (repro1[:, 2] < 0.1)[mask]
    mask2b = (repro2[:, 2] < 0.1)[mask]
    error1[mask1b] = penalty
    error2[mask2b] = penalty
    # Convert errors to a score
-    error1 = error1 / ((cam1["width"] + cam1["height"]) / 2)
+    scale = (cam1["width"] + cam1["height"]) / 2
-    error2 = error2 / ((cam2["width"] + cam2["height"]) / 2)
+    error1 = error1.clip(0, scale / 4)
    error2 = error2.clip(0, scale / 4)
    error1 = error1 / scale
    error2 = error2 / scale
    error = (error1 + error2) / 2
    scores = 1.0 / (1.0 + error * 10)
    score = np.mean(scores)
@ -119,9 +151,9 @@ def calc_pose_scored(pose1, pose2, cam1, cam2):
    # Add score to 3D pose
    full_scores = np.zeros([poses_3d.shape[1], 1])
    full_scores[mask] = np.expand_dims(scores, axis=-1)
-    pose_3d = np.concatenate([points3d, full_scores], axis=-1)
+    pose3d[:, 3] = full_scores[:, 0]
-    return pose_3d, score
+    return pose3d, score
 # ==================================================================================================
@ -131,12 +163,13 @@ def calc_grouping(all_pairs):
    """Groups pairs that share a person"""
    # Calculate the pose center for each pair
    min_score = 0.9
    for i in range(len(all_pairs)):
        pair = all_pairs[i]
        pose_3d = pair[2][0]
-        mask = pose_3d[:, 2] > 0.1
+        mask = pose_3d[:, 3] > min_score
        center = np.mean(pose_3d[mask, 0:3], axis=0)
-        all_pairs[i] = all_pairs[i] + (center,)
+        all_pairs[i] = all_pairs[i] + [center]
    groups = []
    for i in range(len(all_pairs)):
@ -144,33 +177,43 @@ def calc_grouping(all_pairs):
        # Create new group if non exists
        if len(groups) == 0:
-            groups.append([pair])
+            groups.append([pair[4], pair[2][0], [pair]])
            continue
-        # Check if the pair belongs to an existing group
+        # Check if the pair matches to an existing group
-        matched = False
+        max_center_dist = 0.9
        max_joint_avg_dist = 0.3
        best_dist = math.inf
        best_group = -1
        for j in range(len(groups)):
-            g0 = groups[j][0]
+            g0 = groups[j]
-            center0 = g0[3]
+            center0 = g0[0]
-            center1 = pair[3]
+            center1 = pair[4]
-            if np.linalg.norm(center0 - center1) < 0.5:
+            if np.linalg.norm(center0 - center1) < max_center_dist:
-                pose0 = g0[2][0]
+                pose0 = g0[1]
                pose1 = pair[2][0]
                # Calculate the distance between the two poses
-                mask0 = pose0[:, 3] > 0.1
+                mask0 = pose0[:, 3] > min_score
-                mask1 = pose1[:, 3] > 0.1
+                mask1 = pose1[:, 3] > min_score
                mask = mask0 & mask1
                dists = np.linalg.norm(pose0[mask, 0:3] - pose1[mask, 0:3], axis=1)
                dist = np.mean(dists)
-                if dist < 0.3:
+                if dist < max_joint_avg_dist:
-                    groups[j].append(pair)
+                    if dist < best_dist:
-                    matched = True
+                        best_dist = dist
-                    break
+                        best_group = j
-
+        if best_group >= 0:
-        # Create new group if no match was found
+            # Add pair to existing group and update the mean positions
-        if not matched:
+            group = groups[best_group]
-            groups.append([pair])
+            new_center = (group[0] * len(group[1]) + pair[4]) / (len(group[1]) + 1)
            new_pose = (group[1] * len(group[1]) + pair[2][0]) / (len(group[1]) + 1)
            group[2].append(pair)
            group[0] = new_center
            group[1] = new_pose
        else:
            # Create new group if no match was found
            groups.append([pair[4], pair[2][0], [pair]])
    return groups
@ -178,30 +221,51 @@ def calc_grouping(all_pairs):
 # ==================================================================================================
-def merge_group(group, poses_2d, camparams):
+def merge_group(poses_3d: np.ndarray):
    """Merges a group of poses into a single pose"""
-    # Calculate full 3D poses
+    # Merge poses to create initial pose
-    poses_3d = []
+    # Use only those triangulations with a high score
-    for pair in group:
+    min_score = 0.9
-        cam1 = camparams[pair[0][0]]
+    mask = poses_3d[:, :, 3:4] > min_score
-        cam2 = camparams[pair[0][1]]
+    sum_poses = np.sum(poses_3d * mask, axis=0)
-        pose1 = np.array(poses_2d[pair[0][0]][pair[0][2]])
+    sum_mask = np.sum(mask, axis=0)
-        pose2 = np.array(poses_2d[pair[0][1]][pair[0][3]])
+    initial_pose_3d = np.divide(
        sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
    )
-        pose_3d, _ = calc_pose_scored(pose1, pose2, cam1, cam2)
+    # Drop outliers that are far away from the other proposals
-        poses_3d.append(pose_3d)
+    max_dist = 0.3
    distances = np.linalg.norm(
        poses_3d[:, :, :3] - initial_pose_3d[np.newaxis, :, :3], axis=2
    )
    dist_mask = distances <= max_dist
    mask = mask & np.expand_dims(dist_mask, axis=-1)
-    # Merge poses
+    # Select the best-k proposals for each joint that are closest to the initial pose
-    pose_3d = np.mean(poses_3d, axis=0)
+    keep_best = 3
    sorted_indices = np.argsort(distances, axis=0)
    best_k_mask = np.zeros_like(mask, dtype=bool)
    num_joints = poses_3d.shape[1]
    for i in range(num_joints):
        valid_indices = sorted_indices[:, i][mask[sorted_indices[:, i], i, 0]]
        best_k_mask[valid_indices[:keep_best], i, 0] = True
    mask = mask & best_k_mask
-    return pose_3d
+    # Final pose computation with combined masks
    sum_poses = np.sum(poses_3d * mask, axis=0)
    sum_mask = np.sum(mask, axis=0)
    final_pose_3d = np.divide(
        sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
    )
    return final_pose_3d
 # ==================================================================================================
-def get_3d_pose(poses_2d, camparams, joint_names_2d):
+def get_3d_pose(poses_2d, camparams, roomparams, joint_names_2d):
    # Undistort 2D points
    for i, cam in enumerate(camparams):
@ -220,13 +284,15 @@ def get_3d_pose(poses_2d, camparams, joint_names_2d):
                for l in range(len(poses2)):
                    pid1 = sum(num_persons[:i]) + k
                    pid2 = sum(num_persons[:j]) + l
-                    all_pairs.append(((i, j, k, l), (pid1, pid2)))
+                    all_pairs.append([(i, j, k, l), (pid1, pid2)])
    # Calculate pair scores
    for i in range(len(all_pairs)):
        pair = all_pairs[i]
-        pose_3d, score = calc_pair_score(pair, poses_2d, camparams, joint_names_2d)
+        pose_3d, score = calc_pair_score(
-        all_pairs[i] = all_pairs[i] + ((pose_3d, score),)
+            pair, poses_2d, camparams, roomparams, joint_names_2d, core_joints
        )
        all_pairs[i].append((pose_3d, score))
        # import draw_utils
        # poses3D = np.array([pose_3d])
@ -239,13 +305,25 @@ def get_3d_pose(poses_2d, camparams, joint_names_2d):
    min_score = 0.9
    all_pairs = [p for p in all_pairs if p[2][1] > min_score]
    # Calculate full 3D poses
    poses_3d = []
    for pair in all_pairs:
        cam1 = camparams[pair[0][0]]
        cam2 = camparams[pair[0][1]]
        pose1 = np.array(poses_2d[pair[0][0]][pair[0][2]])
        pose2 = np.array(poses_2d[pair[0][1]][pair[0][3]])
        pose_3d, _ = calc_pose_scored(pose1, pose2, cam1, cam2, roomparams)
        pair.append(pose_3d)
    # Group pairs that share a person
    groups = calc_grouping(all_pairs)
    # Merge groups
    poses_3d = []
    for group in groups:
-        pose_3d = merge_group(group, poses_2d, camparams)
+        poses = np.array([p[3] for p in group[2]])
        pose_3d = merge_group(poses)
        poses_3d.append(pose_3d)
    if len(poses_3d) > 0: