Files
RapidPoseTriangulation/scripts/triangulate_poses.py
2024-07-02 11:29:25 +02:00

364 lines
12 KiB
Python

import copy
import math
import time
import cv2
import numpy as np
from skelda import utils_pose
# ==================================================================================================
core_joints = [
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
]
# ==================================================================================================
def undistort_points(points: np.ndarray, caminfo: dict):
"""Undistorts 2D pixel coordinates"""
K = np.asarray(caminfo["K"], dtype=np.float32)
DC = np.asarray(caminfo["DC"][0:5], dtype=np.float32)
# Undistort camera matrix
w = caminfo["width"]
h = caminfo["height"]
newK, _ = cv2.getOptimalNewCameraMatrix(K, DC, (w, h), 1, (w, h))
caminfo["K"] = newK
caminfo["DC"] = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
# Undistort points
pshape = points.shape
points = np.reshape(points, (-1, 1, 2))
points = cv2.undistortPoints(points, K, DC, P=newK)
points = points.reshape(pshape)
return points
# ==================================================================================================
def get_camera_P(cam):
"""Calculate opencv-style projection matrix"""
R = np.asarray(cam["R"])
T = np.asarray(cam["T"])
K = np.asarray(cam["K"])
Tr = R @ (T * -1)
P = K @ np.hstack([R, Tr])
return P
# ==================================================================================================
def calc_pair_score(pair, poses_2d, camparams, roomparams, joint_names_2d, use_joints):
"""Triangulates a pair of persons and scores them based on the reprojection error"""
cam1 = camparams[pair[0][0]]
cam2 = camparams[pair[0][1]]
pose1 = poses_2d[pair[0][0]][pair[0][2]]
pose2 = poses_2d[pair[0][1]][pair[0][3]]
# Select core joints
jids = [joint_names_2d.index(j) for j in use_joints]
pose1 = pose1[jids]
pose2 = pose2[jids]
poses_3d, score = calc_pose_scored(pose1, pose2, cam1, cam2, roomparams)
return poses_3d, score
# ==================================================================================================
def calc_pose_scored(pose1, pose2, cam1, cam2, roomparams):
"""Triangulates a pair of persons and scores them based on the reprojection error"""
# Mask out invisible joints
min_score = 0.1
mask1a = pose1[:, 2] >= min_score
mask2a = pose2[:, 2] >= min_score
mask = mask1a & mask2a
# If too few joints are visible return a low score
if np.sum(mask) < 3:
pose3d = np.zeros([len(pose1), 4])
score = 0.0
return pose3d, score
# Triangulate points
points1 = pose1[mask, 0:2].T
points2 = pose2[mask, 0:2].T
P1 = get_camera_P(cam1)
P2 = get_camera_P(cam2)
points3d = cv2.triangulatePoints(P1, P2, points1, points2)
points3d = (points3d / points3d[3, :])[0:3, :].T
pose3d = np.zeros([len(pose1), 4])
pose3d[mask] = np.concatenate([points3d, np.ones([points3d.shape[0], 1])], axis=-1)
# If the triangulated points are outside the room drop it
mean = np.mean(pose3d[mask][:, 0:3], axis=0)
mins = np.min(pose3d[mask][:, 0:3], axis=0)
maxs = np.max(pose3d[mask][:, 0:3], axis=0)
rsize = np.array(roomparams["room_size"]) / 2
rcent = np.array(roomparams["room_center"])
wdist = 0.1
center_outside = np.any((mean > rsize + rcent) | (mean < -rsize + rcent))
limb_outside = np.any(
(maxs > rsize + rcent + wdist) | (mins < -rsize + rcent - wdist)
)
if center_outside or limb_outside:
pose3d[:, 3] = 0.001
score = 0.001
return pose3d, score
# Calculate reprojection error
poses_3d = np.expand_dims(pose3d, axis=0)
repro1, dists1 = utils_pose.project_poses(poses_3d, cam1, calc_dists=True)
repro2, dists2 = utils_pose.project_poses(poses_3d, cam2, calc_dists=True)
error1 = np.linalg.norm(pose1[mask, 0:2] - repro1[0, mask, 0:2], axis=1)
error2 = np.linalg.norm(pose2[mask, 0:2] - repro2[0, mask, 0:2], axis=1)
# Set errors of invisible reprojections to a high value
penalty = (cam1["width"] + cam1["height"]) / 2
mask1b = (repro1[0, :, 2] < min_score)[mask]
mask2b = (repro2[0, :, 2] < min_score)[mask]
error1[mask1b] = penalty
error2[mask2b] = penalty
# Convert errors to a score
# Scale by image size and distance to the camera
iscale = (cam1["width"] + cam1["height"]) / 2
error1 = error1.clip(0, iscale / 4) / iscale
error2 = error2.clip(0, iscale / 4) / iscale
dscale1 = np.sqrt(np.mean(dists1[0, mask]) / 3.5)
dscale2 = np.sqrt(np.mean(dists2[0, mask]) / 3.5)
error1 = error1 * dscale1
error2 = error2 * dscale2
error = (error1 + error2) / 2
scores = 1.0 / (1.0 + error * 10)
# Drop lowest scores
drop_k = math.floor(len(pose1) * 0.2)
score = np.mean(np.sort(scores, axis=-1)[drop_k:])
# Add score to 3D pose
full_scores = np.zeros([poses_3d.shape[1], 1])
full_scores[mask] = np.expand_dims(scores, axis=-1)
pose3d[:, 3] = full_scores[:, 0]
return pose3d, score
# ==================================================================================================
def calc_grouping(all_pairs, min_score: float):
"""Groups pairs that share a person"""
# Calculate the pose center for each pair
for i in range(len(all_pairs)):
pair = all_pairs[i]
pose_3d = pair[2][0]
mask = pose_3d[:, 3] > min_score
center = np.mean(pose_3d[mask, 0:3], axis=0)
all_pairs[i] = all_pairs[i] + [center]
groups = []
for i in range(len(all_pairs)):
pair = all_pairs[i]
# Create new group if non exists
if len(groups) == 0:
groups.append([pair[3], pair[2][0], [pair]])
continue
# Check if the pair matches to an existing group
max_center_dist = 0.9
max_joint_avg_dist = 0.3
best_dist = math.inf
best_group = -1
for j in range(len(groups)):
g0 = groups[j]
center0 = g0[0]
center1 = pair[3]
if np.linalg.norm(center0 - center1) < max_center_dist:
pose0 = g0[1]
pose1 = pair[2][0]
# Calculate the distance between the two poses
mask0 = pose0[:, 3] > min_score
mask1 = pose1[:, 3] > min_score
mask = mask0 & mask1
dists = np.linalg.norm(pose0[mask, 0:3] - pose1[mask, 0:3], axis=1)
dist = np.mean(dists)
if dist < max_joint_avg_dist:
if dist < best_dist:
best_dist = dist
best_group = j
if best_group >= 0:
# Add pair to existing group and update the mean positions
group = groups[best_group]
new_center = (group[0] * len(group[1]) + pair[3]) / (len(group[1]) + 1)
new_pose = (group[1] * len(group[1]) + pair[2][0]) / (len(group[1]) + 1)
group[2].append(pair)
group[0] = new_center
group[1] = new_pose
else:
# Create new group if no match was found
groups.append([pair[3], pair[2][0], [pair]])
return groups
# ==================================================================================================
def merge_group(poses_3d: np.ndarray, min_score: float):
"""Merges a group of poses into a single pose"""
# Merge poses to create initial pose
# Use only those triangulations with a high score
imask = poses_3d[:, :, 3:4] > min_score
sum_poses = np.sum(poses_3d * imask, axis=0)
sum_mask = np.sum(imask, axis=0)
initial_pose_3d = np.divide(
sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
)
# Use center as default if the initial pose is empty
jmask = initial_pose_3d[:, 3] > 0.0
sum_joints = np.sum(initial_pose_3d[jmask, 0:3], axis=0)
sum_mask = np.sum(jmask)
center = np.divide(
sum_joints, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_joints)
)
initial_pose_3d[~jmask, 0:3] = center
# Drop joints with low scores
offset = 0.1
mask = poses_3d[:, :, 3:4] > (min_score - offset)
# Drop outliers that are far away from the other proposals
max_dist = 1.2
distances = np.linalg.norm(
poses_3d[:, :, :3] - initial_pose_3d[np.newaxis, :, :3], axis=2
)
dmask = distances <= max_dist
mask = mask & np.expand_dims(dmask, axis=-1)
# Select the best-k proposals for each joint that are closest to the initial pose
keep_best = 3
sorted_indices = np.argsort(distances, axis=0)
best_k_mask = np.zeros_like(mask, dtype=bool)
num_joints = poses_3d.shape[1]
for i in range(num_joints):
valid_indices = sorted_indices[:, i][mask[sorted_indices[:, i], i, 0]]
best_k_mask[valid_indices[:keep_best], i, 0] = True
mask = mask & best_k_mask
# Final pose computation with combined masks
sum_poses = np.sum(poses_3d * mask, axis=0)
sum_mask = np.sum(mask, axis=0)
final_pose_3d = np.divide(
sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
)
return final_pose_3d
# ==================================================================================================
def get_3d_pose(poses_2d, camparams, roomparams, joint_names_2d, min_score=0.95):
"""Triangulates 3D poses from 2D poses of multiple views"""
# Convert poses and camparams to numpy arrays
camparams = copy.deepcopy(camparams)
for i in range(len(camparams)):
poses_2d[i] = np.asarray(poses_2d[i])
camparams[i]["K"] = np.array(camparams[i]["K"])
camparams[i]["R"] = np.array(camparams[i]["R"])
camparams[i]["T"] = np.array(camparams[i]["T"])
camparams[i]["DC"] = np.array(camparams[i]["DC"][0:5])
# Undistort 2D points
for i in range(len(camparams)):
poses = poses_2d[i]
cam = camparams[i]
poses[:, :, 0:2] = undistort_points(poses[:, :, 0:2], cam)
poses_2d[i] = poses
# Create pairs of persons
num_persons = [len(p) for p in poses_2d]
all_pairs = []
for i in range(len(poses_2d)):
poses = poses_2d[i]
for j in range(i + 1, len(poses_2d)):
poses2 = poses_2d[j]
for k in range(len(poses)):
for l in range(len(poses2)):
pid1 = sum(num_persons[:i]) + k
pid2 = sum(num_persons[:j]) + l
all_pairs.append([(i, j, k, l), (pid1, pid2)])
# Calculate pair scores
for i in range(len(all_pairs)):
pair = all_pairs[i]
pose_3d, score = calc_pair_score(
pair, poses_2d, camparams, roomparams, joint_names_2d, core_joints
)
all_pairs[i].append((pose_3d, score))
# import draw_utils
# poses3D = np.array([pose_3d])
# _ = draw_utils.utils_view.show_poses3d(
# poses3D, core_joints, {}, camparams
# )
# draw_utils.utils_view.show_plots()
# Drop pairs with low scores
all_pairs = [p for p in all_pairs if p[2][1] > min_score]
# Group pairs that share a person
groups = calc_grouping(all_pairs, min_score)
# Calculate full 3D poses
poses_3d = []
for pair in all_pairs:
cam1 = camparams[pair[0][0]]
cam2 = camparams[pair[0][1]]
pose1 = poses_2d[pair[0][0]][pair[0][2]]
pose2 = poses_2d[pair[0][1]][pair[0][3]]
pose_3d, _ = calc_pose_scored(pose1, pose2, cam1, cam2, roomparams)
pair.append(pose_3d)
# Merge groups
poses_3d = []
for group in groups:
poses = np.array([p[4] for p in group[2]])
pose_3d = merge_group(poses, min_score)
poses_3d.append(pose_3d)
if len(poses_3d) > 0:
poses3D = np.array(poses_3d)
else:
poses3D = np.zeros([1, len(joint_names_2d), 4])
return poses3D