import numpy as np from loguru import logger from jaxtyping import Float from typing import TYPE_CHECKING # Type aliases for shape-aware annotations if TYPE_CHECKING: Vec3 = Float[np.ndarray, "3"] Mat33 = Float[np.ndarray, "3 3"] Mat44 = Float[np.ndarray, "4 4"] CornersNC = Float[np.ndarray, "N 3"] else: Vec3 = np.ndarray Mat33 = np.ndarray Mat44 = np.ndarray CornersNC = np.ndarray def compute_face_normal(corners: CornersNC) -> Vec3: """ Compute the normal vector of a face defined by its corners. Assumes corners are in order (e.g., clockwise or counter-clockwise). Args: corners: (N, 3) array of corner coordinates. Returns: (3,) normalized normal vector. """ if corners.ndim != 2 or corners.shape[1] != 3: raise ValueError(f"Expected (N, 3) array, got {corners.shape}") if corners.shape[0] < 3: raise ValueError("At least 3 corners are required to compute a normal.") # Use the cross product of two edges # Stable edge definition for quad faces consistent with plan/repo convention: # normal from (corners[1]-corners[0]) x (corners[3]-corners[0]) when N>=4, # fallback to index 2 if N==3. v1 = corners[1] - corners[0] if corners.shape[0] >= 4: v2 = corners[3] - corners[0] else: v2 = corners[2] - corners[0] normal = np.cross(v1, v2) norm = np.linalg.norm(normal) if norm < 1e-10: raise ValueError("Corners are collinear or degenerate; cannot compute normal.") return (normal / norm).astype(np.float64) def rotation_align_vectors(from_vec: Vec3, to_vec: Vec3) -> Mat33: """ Compute the 3x3 rotation matrix that aligns from_vec to to_vec. Args: from_vec: (3,) source vector. to_vec: (3,) target vector. Returns: (3, 3) rotation matrix. """ if from_vec.shape != (3,) or to_vec.shape != (3,): raise ValueError( f"Expected (3,) vectors, got {from_vec.shape} and {to_vec.shape}" ) norm_from = np.linalg.norm(from_vec) norm_to = np.linalg.norm(to_vec) if norm_from < 1e-10 or norm_to < 1e-10: raise ValueError("Cannot align zero-norm vectors.") # Normalize inputs a = from_vec / norm_from b = to_vec / norm_to v = np.cross(a, b) c = np.dot(a, b) s = np.linalg.norm(v) # Handle parallel case if s < 1e-10: if c > 0: return np.eye(3, dtype=np.float64) else: # Anti-parallel case: 180 degree rotation around an orthogonal axis # Find an orthogonal axis if abs(a[0]) < 0.9: ortho = np.array([1.0, 0.0, 0.0]) else: ortho = np.array([0.0, 1.0, 0.0]) axis = np.cross(a, ortho) axis /= np.linalg.norm(axis) # Rodrigues formula for 180 degrees K = np.array( [ [0.0, -axis[2], axis[1]], [axis[2], 0.0, -axis[0]], [-axis[1], axis[0], 0.0], ] ) return (np.eye(3) + 2 * (K @ K)).astype(np.float64) # General case using Rodrigues' rotation formula # R = I + [v]_x + [v]_x^2 * (1-c)/s^2 vx = np.array([[0.0, -v[2], v[1]], [v[2], 0.0, -v[0]], [-v[1], v[0], 0.0]]) R = np.eye(3) + vx + (vx @ vx) * ((1 - c) / (s**2)) return R.astype(np.float64) def apply_alignment_to_pose(T: Mat44, R_align: Mat33) -> Mat44: """ Apply an alignment rotation to a 4x4 pose matrix. The alignment is applied in the global frame (pre-multiplication of rotation). Args: T: (4, 4) homogeneous transformation matrix. R_align: (3, 3) alignment rotation matrix. Returns: (4, 4) aligned transformation matrix. """ if T.shape != (4, 4): raise ValueError(f"Expected 4x4 matrix, got {T.shape}") if R_align.shape != (3, 3): raise ValueError(f"Expected 3x3 matrix, got {R_align.shape}") T_align = np.eye(4, dtype=np.float64) T_align[:3, :3] = R_align return (T_align @ T).astype(np.float64) def estimate_up_vector_from_cameras(camera_poses: list[Mat44]) -> Vec3: """ Estimate the 'up' vector of the scene based on camera positions. Assumes cameras are arranged roughly in a horizontal ring (coplanar). The normal of the plane fitting the camera centers is used as the up vector. The sign is disambiguated using the average camera 'up' vector (-Y in OpenCV). Args: camera_poses: List of (4, 4) camera-to-world transformation matrices. Returns: (3,) normalized up vector. """ if not camera_poses: raise ValueError("No camera poses provided.") # Extract camera centers (translations) centers = np.array([T[:3, 3] for T in camera_poses]) # Calculate average camera 'up' vector (assuming OpenCV convention: Y is down, so up is -Y) # T[:3, 1] is the Y axis direction in world frame # We want the vector pointing UP in world coordinates. # In OpenCV camera frame, Y is down. So -Y is up. # The world-frame representation of the camera's -Y axis is -R[:, 1] # T[:3, 1] is the second column of the rotation matrix (Y axis). avg_cam_up = np.mean([-T[:3, 1] for T in camera_poses], axis=0) norm = np.linalg.norm(avg_cam_up) if norm > 1e-6: avg_cam_up /= norm else: avg_cam_up = np.array([0.0, 1.0, 0.0]) # Fallback # If fewer than 3 cameras, we can't reliably fit a plane. # Fallback to average camera up vector. if len(camera_poses) < 3: logger.debug("Fewer than 3 cameras; using average camera -Y as up vector.") return avg_cam_up # Fit plane to camera centers using SVD centroid = np.mean(centers, axis=0) centered = centers - centroid # Check if points are collinear or coincident (rank check) # If they are collinear, plane is undefined. if np.linalg.matrix_rank(centered) < 2: logger.debug( "Camera centers are collinear; using average camera -Y as up vector." ) return avg_cam_up try: u, s, vh = np.linalg.svd(centered) # The normal is the singular vector corresponding to the smallest singular value normal = vh[2, :] except np.linalg.LinAlgError: logger.warning("SVD failed; using average camera -Y as up vector.") return avg_cam_up # Disambiguate sign: choose the normal that aligns best with average camera up if np.dot(normal, avg_cam_up) < 0: normal = -normal return normal def get_face_normal_from_geometry( face_name: str, marker_geometry: dict[int, np.ndarray], face_marker_map: dict[str, list[int]] | None = None, ) -> Vec3 | None: """ Compute the average normal vector for a face based on available marker geometry. Args: face_name: Name of the face (key in face_marker_map). marker_geometry: Dictionary mapping marker IDs to their (4, 3) corner coordinates. face_marker_map: Dictionary mapping face names to marker IDs. Returns: (3,) normalized average normal vector, or None if no markers are available. """ if face_marker_map is None: return None if face_name not in face_marker_map: return None marker_ids = face_marker_map[face_name] normals = [] for mid in marker_ids: if mid in marker_geometry: try: n = compute_face_normal(marker_geometry[mid]) normals.append(n) except ValueError: continue if not normals: return None avg_normal = np.mean(normals, axis=0) norm = np.linalg.norm(avg_normal) if norm < 1e-10: return None return (avg_normal / norm).astype(np.float64) def detect_ground_face( visible_marker_ids: set[int], marker_geometry: dict[int, np.ndarray], camera_up_vector: Vec3 = np.array([0, -1, 0]), face_marker_map: dict[str, list[int]] | None = None, ) -> tuple[str, Vec3] | None: """ Detect which face of the object is most likely the ground face. The ground face is the one whose normal is most aligned with the camera's up vector. Args: visible_marker_ids: Set of marker IDs currently visible. marker_geometry: Dictionary mapping marker IDs to their (4, 3) corner coordinates. camera_up_vector: (3,) vector representing the 'up' direction in camera frame. face_marker_map: Dictionary mapping face names to marker IDs. Returns: Tuple of (best_face_name, best_face_normal) or None if no face is detected. """ if not visible_marker_ids: return None if face_marker_map is None: return None if camera_up_vector.shape != (3,): raise ValueError( f"Expected (3,) camera_up_vector, got {camera_up_vector.shape}" ) up_norm = np.linalg.norm(camera_up_vector) if up_norm < 1e-10: raise ValueError("camera_up_vector cannot be zero-norm.") up_vec = camera_up_vector / up_norm best_face = None best_normal = None best_score = -np.inf # Iterate faces in mapping for face_name, face_marker_ids in face_marker_map.items(): # We check ALL faces for which we have geometry, regardless of visibility. # This allows detecting the ground face even if it's occluded, # provided we have geometry for it (e.g. from a loaded model or previous detections). # However, get_face_normal_from_geometry requires marker_geometry to contain the markers. # If marker_geometry only contains *visible* markers (which is typical if passed from detection), # then we are limited to visible faces. # But if marker_geometry is the full loaded geometry, we can check all faces. normal = get_face_normal_from_geometry( face_name, marker_geometry, face_marker_map=face_marker_map ) if normal is None: continue # Score by dot(normal, normalized camera_up_vector) score = np.dot(normal, up_vec) logger.debug(f"Face '{face_name}' considered. Score: {score:.4f}") if score > best_score: best_score = score best_face = face_name best_normal = normal if best_face is not None and best_normal is not None: logger.debug( f"Best ground face detected: '{best_face}' with score {best_score:.4f}" ) return best_face, best_normal return None