feat: implement geometry-first auto-align heuristic

This commit is contained in:
2026-02-07 16:54:21 +00:00
parent 18e814217a
commit 15989195f1
3 changed files with 194 additions and 7 deletions
+72 -3
View File
@@ -137,6 +137,71 @@ def apply_alignment_to_pose(T: Mat44, R_align: Mat33) -> Mat44:
return (T_align @ T).astype(np.float64)
def estimate_up_vector_from_cameras(camera_poses: list[Mat44]) -> Vec3:
"""
Estimate the 'up' vector of the scene based on camera positions.
Assumes cameras are arranged roughly in a horizontal ring (coplanar).
The normal of the plane fitting the camera centers is used as the up vector.
The sign is disambiguated using the average camera 'up' vector (-Y in OpenCV).
Args:
camera_poses: List of (4, 4) camera-to-world transformation matrices.
Returns:
(3,) normalized up vector.
"""
if not camera_poses:
raise ValueError("No camera poses provided.")
# Extract camera centers (translations)
centers = np.array([T[:3, 3] for T in camera_poses])
# Calculate average camera 'up' vector (assuming OpenCV convention: Y is down, so up is -Y)
# T[:3, 1] is the Y axis direction in world frame
# We want the vector pointing UP in world coordinates.
# In OpenCV camera frame, Y is down. So -Y is up.
# The world-frame representation of the camera's -Y axis is -R[:, 1]
# T[:3, 1] is the second column of the rotation matrix (Y axis).
avg_cam_up = np.mean([-T[:3, 1] for T in camera_poses], axis=0)
norm = np.linalg.norm(avg_cam_up)
if norm > 1e-6:
avg_cam_up /= norm
else:
avg_cam_up = np.array([0.0, 1.0, 0.0]) # Fallback
# If fewer than 3 cameras, we can't reliably fit a plane.
# Fallback to average camera up vector.
if len(camera_poses) < 3:
logger.debug("Fewer than 3 cameras; using average camera -Y as up vector.")
return avg_cam_up
# Fit plane to camera centers using SVD
centroid = np.mean(centers, axis=0)
centered = centers - centroid
# Check if points are collinear or coincident (rank check)
# If they are collinear, plane is undefined.
if np.linalg.matrix_rank(centered) < 2:
logger.debug(
"Camera centers are collinear; using average camera -Y as up vector."
)
return avg_cam_up
try:
u, s, vh = np.linalg.svd(centered)
# The normal is the singular vector corresponding to the smallest singular value
normal = vh[2, :]
except np.linalg.LinAlgError:
logger.warning("SVD failed; using average camera -Y as up vector.")
return avg_cam_up
# Disambiguate sign: choose the normal that aligns best with average camera up
if np.dot(normal, avg_cam_up) < 0:
normal = -normal
return normal
def get_face_normal_from_geometry(
face_name: str,
marker_geometry: dict[int, np.ndarray],
@@ -223,9 +288,13 @@ def detect_ground_face(
# Iterate faces in mapping
for face_name, face_marker_ids in face_marker_map.items():
# Consider only faces with any visible marker ID
if not any(mid in visible_marker_ids for mid in face_marker_ids):
continue
# We check ALL faces for which we have geometry, regardless of visibility.
# This allows detecting the ground face even if it's occluded,
# provided we have geometry for it (e.g. from a loaded model or previous detections).
# However, get_face_normal_from_geometry requires marker_geometry to contain the markers.
# If marker_geometry only contains *visible* markers (which is typical if passed from detection),
# then we are limited to visible faces.
# But if marker_geometry is the full loaded geometry, we can check all faces.
normal = get_face_normal_from_geometry(
face_name, marker_geometry, face_marker_map=face_marker_map