Various performance improvements.

2024-07-02 11:29:25 +02:00
parent a311af38e9
commit 089fb0b41d
8 changed files with 1248 additions and 1197 deletions
--- a/media/2d-k.png
+++ b/media/2d-k.png
--- a/media/2d-p.png
+++ b/media/2d-p.png
--- a/media/3d-p.png
+++ b/media/3d-p.png
--- a/media/RESULTS.md
+++ b/media/RESULTS.md
--- a/scripts/test_skelda_dataset.py
+++ b/scripts/test_skelda_dataset.py
@ -352,13 +352,24 @@ def main():
        time_2d = time.time() - start
        print("2D time:", time_2d)
        minscores = {
            # Choose this depending on the fraction of invalid/missing persons
            # A higher value reduces the number of proposals
            "panoptic": 0.95,
            "human36m": 0.96,
            "mvor": 0.87,
            "campus": 0.96,
            "shelf": 0.96,
        }
        minscore = minscores.get(dataset_use, 0.95)
        start = time.time()
        if sum(np.sum(p) for p in poses_2d) == 0:
            poses3D = np.zeros([1, len(joint_names_3d), 4])
            poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
        else:
            poses3D = triangulate_poses.get_3d_pose(
-                poses_2d, label["cameras"], roomparams, joint_names_2d
+                poses_2d, label["cameras"], roomparams, joint_names_2d, minscore
            )
            poses2D = []
            for cam in label["cameras"]:
--- a/scripts/test_triangulate.py
+++ b/scripts/test_triangulate.py
@ -288,17 +288,27 @@ def filter_poses(poses3D, poses2D, roomparams, joint_names, drop_few_limbs=True)
                drop.append(i)
                continue
-        # Drop persons with too small average limb length
+        # Drop persons with too small or high average limb length
        total_length = 0
        total_limbs = 0
        for limb in main_limbs:
            start_idx = joint_names.index(limb[0])
            end_idx = joint_names.index(limb[1])
            if pose[start_idx, -1] < 0.1 or pose[end_idx, -1] < 0.1:
                continue
            limb_length = np.linalg.norm(pose[end_idx, :3] - pose[start_idx, :3])
            total_length += limb_length
-        average_length = total_length / len(main_limbs)
+            total_limbs += 1
        if total_limbs == 0:
            drop.append(i)
            continue
        average_length = total_length / total_limbs
        if average_length < 0.1:
            drop.append(i)
            continue
        if average_length > 0.5:
            drop.append(i)
            continue
    new_poses3D = []
    new_poses2D = [[] for _ in range(len(poses2D))]
--- a/scripts/triangulate_poses.py
+++ b/scripts/triangulate_poses.py
@ -28,16 +28,17 @@ core_joints = [
 def undistort_points(points: np.ndarray, caminfo: dict):
-    K = np.array(caminfo["K"], dtype=np.float32)
+    """Undistorts 2D pixel coordinates"""
-    DC = np.array(caminfo["DC"][0:5], dtype=np.float32)
+
-    w = caminfo["width"]
+    K = np.asarray(caminfo["K"], dtype=np.float32)
-    h = caminfo["height"]
+    DC = np.asarray(caminfo["DC"][0:5], dtype=np.float32)
    # Undistort camera matrix
    w = caminfo["width"]
    h = caminfo["height"]
    newK, _ = cv2.getOptimalNewCameraMatrix(K, DC, (w, h), 1, (w, h))
-    caminfo = copy.deepcopy(caminfo)
+    caminfo["K"] = newK
-    caminfo["K"] = newK.tolist()
+    caminfo["DC"] = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    caminfo["DC"] = [0.0, 0.0, 0.0, 0.0, 0.0]
    # Undistort points
    pshape = points.shape
@ -45,7 +46,7 @@ def undistort_points(points: np.ndarray, caminfo: dict):
    points = cv2.undistortPoints(points, K, DC, P=newK)
    points = points.reshape(pshape)
-    return points, caminfo
+    return points
 # ==================================================================================================
@ -54,10 +55,11 @@ def undistort_points(points: np.ndarray, caminfo: dict):
 def get_camera_P(cam):
    """Calculate opencv-style projection matrix"""
-    R = np.array(cam["R"])
+    R = np.asarray(cam["R"])
-    T = np.array(cam["T"])
+    T = np.asarray(cam["T"])
    K = np.asarray(cam["K"])
    Tr = R @ (T * -1)
-    P = cam["K"] @ np.hstack([R, Tr])
+    P = K @ np.hstack([R, Tr])
    return P
@ -69,8 +71,8 @@ def calc_pair_score(pair, poses_2d, camparams, roomparams, joint_names_2d, use_j
    cam1 = camparams[pair[0][0]]
    cam2 = camparams[pair[0][1]]
-    pose1 = np.array(poses_2d[pair[0][0]][pair[0][2]])
+    pose1 = poses_2d[pair[0][0]][pair[0][2]]
-    pose2 = np.array(poses_2d[pair[0][1]][pair[0][3]])
+    pose2 = poses_2d[pair[0][1]][pair[0][3]]
    # Select core joints
    jids = [joint_names_2d.index(j) for j in use_joints]
@ -88,12 +90,13 @@ def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
    """Triangulates a pair of persons and scores them based on the reprojection error"""
    # Mask out invisible joints
-    mask1a = pose1[:, 2] >= 0.1
+    min_score = 0.1
-    mask2a = pose2[:, 2] >= 0.1
+    mask1a = pose1[:, 2] >= min_score
    mask2a = pose2[:, 2] >= min_score
    mask = mask1a & mask2a
-    # If no joints are visible return a low score
+    # If too few joints are visible return a low score
-    if np.sum(mask) == 0:
+    if np.sum(mask) < 3:
        pose3d = np.zeros([len(pose1), 4])
        score = 0.0
        return pose3d, score
@ -104,8 +107,7 @@ def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
    P1 = get_camera_P(cam1)
    P2 = get_camera_P(cam2)
    points3d = cv2.triangulatePoints(P1, P2, points1, points2)
-    points3d = points3d / points3d[3, :]
+    points3d = (points3d / points3d[3, :])[0:3, :].T
    points3d = points3d[0:3, :].T
    pose3d = np.zeros([len(pose1), 4])
    pose3d[mask] = np.concatenate([points3d, np.ones([points3d.shape[0], 1])], axis=-1)
@ -115,8 +117,11 @@ def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
    maxs = np.max(pose3d[mask][:, 0:3], axis=0)
    rsize = np.array(roomparams["room_size"]) / 2
    rcent = np.array(roomparams["room_center"])
    wdist = 0.1
    center_outside = np.any((mean > rsize + rcent) | (mean < -rsize + rcent))
-    limb_outside = np.any((maxs > rsize + rcent + 0.1) | (mins < -rsize + rcent - 0.1))
+    limb_outside = np.any(
        (maxs > rsize + rcent + wdist) | (mins < -rsize + rcent - wdist)
    )
    if center_outside or limb_outside:
        pose3d[:, 3] = 0.001
        score = 0.001
@ -124,29 +129,33 @@ def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
    # Calculate reprojection error
    poses_3d = np.expand_dims(pose3d, axis=0)
-    repro1, _ = utils_pose.project_poses(poses_3d, cam1)
+    repro1, dists1 = utils_pose.project_poses(poses_3d, cam1, calc_dists=True)
-    repro2, _ = utils_pose.project_poses(poses_3d, cam2)
+    repro2, dists2 = utils_pose.project_poses(poses_3d, cam2, calc_dists=True)
-    repro1 = repro1[0]
+    error1 = np.linalg.norm(pose1[mask, 0:2] - repro1[0, mask, 0:2], axis=1)
-    repro2 = repro2[0]
+    error2 = np.linalg.norm(pose2[mask, 0:2] - repro2[0, mask, 0:2], axis=1)
    error1 = np.linalg.norm(pose1[mask, 0:2] - repro1[mask, 0:2], axis=1)
    error2 = np.linalg.norm(pose2[mask, 0:2] - repro2[mask, 0:2], axis=1)
    # Set errors of invisible reprojections to a high value
    penalty = (cam1["width"] + cam1["height"]) / 2
-    mask1b = (repro1[:, 2] < 0.1)[mask]
+    mask1b = (repro1[0, :, 2] < min_score)[mask]
-    mask2b = (repro2[:, 2] < 0.1)[mask]
+    mask2b = (repro2[0, :, 2] < min_score)[mask]
    error1[mask1b] = penalty
    error2[mask2b] = penalty
    # Convert errors to a score
-    scale = (cam1["width"] + cam1["height"]) / 2
+    # Scale by image size and distance to the camera
-    error1 = error1.clip(0, scale / 4)
+    iscale = (cam1["width"] + cam1["height"]) / 2
-    error2 = error2.clip(0, scale / 4)
+    error1 = error1.clip(0, iscale / 4) / iscale
-    error1 = error1 / scale
+    error2 = error2.clip(0, iscale / 4) / iscale
-    error2 = error2 / scale
+    dscale1 = np.sqrt(np.mean(dists1[0, mask]) / 3.5)
    dscale2 = np.sqrt(np.mean(dists2[0, mask]) / 3.5)
    error1 = error1 * dscale1
    error2 = error2 * dscale2
    error = (error1 + error2) / 2
    scores = 1.0 / (1.0 + error * 10)
-    score = np.mean(scores)
+
    # Drop lowest scores
    drop_k = math.floor(len(pose1) * 0.2)
    score = np.mean(np.sort(scores, axis=-1)[drop_k:])
    # Add score to 3D pose
    full_scores = np.zeros([poses_3d.shape[1], 1])
@ -159,11 +168,10 @@ def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
 # ==================================================================================================
-def calc_grouping(all_pairs):
+def calc_grouping(all_pairs, min_score: float):
    """Groups pairs that share a person"""
    # Calculate the pose center for each pair
    min_score = 0.9
    for i in range(len(all_pairs)):
        pair = all_pairs[i]
        pose_3d = pair[2][0]
@ -177,7 +185,7 @@ def calc_grouping(all_pairs):
        # Create new group if non exists
        if len(groups) == 0:
-            groups.append([pair[4], pair[2][0], [pair]])
+            groups.append([pair[3], pair[2][0], [pair]])
            continue
        # Check if the pair matches to an existing group
@ -188,7 +196,7 @@ def calc_grouping(all_pairs):
        for j in range(len(groups)):
            g0 = groups[j]
            center0 = g0[0]
-            center1 = pair[4]
+            center1 = pair[3]
            if np.linalg.norm(center0 - center1) < max_center_dist:
                pose0 = g0[1]
                pose1 = pair[2][0]
@ -206,14 +214,14 @@ def calc_grouping(all_pairs):
        if best_group >= 0:
            # Add pair to existing group and update the mean positions
            group = groups[best_group]
-            new_center = (group[0] * len(group[1]) + pair[4]) / (len(group[1]) + 1)
+            new_center = (group[0] * len(group[1]) + pair[3]) / (len(group[1]) + 1)
            new_pose = (group[1] * len(group[1]) + pair[2][0]) / (len(group[1]) + 1)
            group[2].append(pair)
            group[0] = new_center
            group[1] = new_pose
        else:
            # Create new group if no match was found
-            groups.append([pair[4], pair[2][0], [pair]])
+            groups.append([pair[3], pair[2][0], [pair]])
    return groups
@ -221,26 +229,38 @@ def calc_grouping(all_pairs):
 # ==================================================================================================
-def merge_group(poses_3d: np.ndarray):
+def merge_group(poses_3d: np.ndarray, min_score: float):
    """Merges a group of poses into a single pose"""
    # Merge poses to create initial pose
    # Use only those triangulations with a high score
-    min_score = 0.9
+    imask = poses_3d[:, :, 3:4] > min_score
-    mask = poses_3d[:, :, 3:4] > min_score
+    sum_poses = np.sum(poses_3d * imask, axis=0)
-    sum_poses = np.sum(poses_3d * mask, axis=0)
+    sum_mask = np.sum(imask, axis=0)
    sum_mask = np.sum(mask, axis=0)
    initial_pose_3d = np.divide(
        sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
    )
    # Use center as default if the initial pose is empty
    jmask = initial_pose_3d[:, 3] > 0.0
    sum_joints = np.sum(initial_pose_3d[jmask, 0:3], axis=0)
    sum_mask = np.sum(jmask)
    center = np.divide(
        sum_joints, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_joints)
    )
    initial_pose_3d[~jmask, 0:3] = center
    # Drop joints with low scores
    offset = 0.1
    mask = poses_3d[:, :, 3:4] > (min_score - offset)
    # Drop outliers that are far away from the other proposals
-    max_dist = 0.3
+    max_dist = 1.2
    distances = np.linalg.norm(
        poses_3d[:, :, :3] - initial_pose_3d[np.newaxis, :, :3], axis=2
    )
-    dist_mask = distances <= max_dist
+    dmask = distances <= max_dist
-    mask = mask & np.expand_dims(dist_mask, axis=-1)
+    mask = mask & np.expand_dims(dmask, axis=-1)
    # Select the best-k proposals for each joint that are closest to the initial pose
    keep_best = 3
@ -265,13 +285,24 @@ def merge_group(poses_3d: np.ndarray):
 # ==================================================================================================
-def get_3d_pose(poses_2d, camparams, roomparams, joint_names_2d):
+def get_3d_pose(poses_2d, camparams, roomparams, joint_names_2d, min_score=0.95):
    """Triangulates 3D poses from 2D poses of multiple views"""
    # Convert poses and camparams to numpy arrays
    camparams = copy.deepcopy(camparams)
    for i in range(len(camparams)):
        poses_2d[i] = np.asarray(poses_2d[i])
        camparams[i]["K"] = np.array(camparams[i]["K"])
        camparams[i]["R"] = np.array(camparams[i]["R"])
        camparams[i]["T"] = np.array(camparams[i]["T"])
        camparams[i]["DC"] = np.array(camparams[i]["DC"][0:5])
    # Undistort 2D points
-    for i, cam in enumerate(camparams):
+    for i in range(len(camparams)):
-        poses = np.array(poses_2d[i])
+        poses = poses_2d[i]
-        poses[:, :, 0:2], camparams[i] = undistort_points(poses[:, :, 0:2], cam)
+        cam = camparams[i]
-        poses_2d[i] = poses.tolist()
+        poses[:, :, 0:2] = undistort_points(poses[:, :, 0:2], cam)
        poses_2d[i] = poses
    # Create pairs of persons
    num_persons = [len(p) for p in poses_2d]
@ -302,28 +333,27 @@ def get_3d_pose(poses_2d, camparams, roomparams, joint_names_2d):
        # draw_utils.utils_view.show_plots()
    # Drop pairs with low scores
    min_score = 0.9
    all_pairs = [p for p in all_pairs if p[2][1] > min_score]
    # Group pairs that share a person
    groups = calc_grouping(all_pairs, min_score)
    # Calculate full 3D poses
    poses_3d = []
    for pair in all_pairs:
        cam1 = camparams[pair[0][0]]
        cam2 = camparams[pair[0][1]]
-        pose1 = np.array(poses_2d[pair[0][0]][pair[0][2]])
+        pose1 = poses_2d[pair[0][0]][pair[0][2]]
-        pose2 = np.array(poses_2d[pair[0][1]][pair[0][3]])
+        pose2 = poses_2d[pair[0][1]][pair[0][3]]
        pose_3d, _ = calc_pose_scored(pose1, pose2, cam1, cam2, roomparams)
        pair.append(pose_3d)
    # Group pairs that share a person
    groups = calc_grouping(all_pairs)
    # Merge groups
    poses_3d = []
    for group in groups:
-        poses = np.array([p[3] for p in group[2]])
+        poses = np.array([p[4] for p in group[2]])
-        pose_3d = merge_group(poses)
+        pose_3d = merge_group(poses, min_score)
        poses_3d.append(pose_3d)
    if len(poses_3d) > 0:
--- a/2
+++ b/2