import cv2 import numpy as np from typing import Optional, Tuple def rvec_tvec_to_matrix(rvec: np.ndarray, tvec: np.ndarray) -> np.ndarray: """ Converts rotation vector and translation vector to a 4x4 homogeneous transformation matrix. For solvePnP, rvec and tvec typically represent the transformation from object space to camera space (T_camera_object). Args: rvec: Rotation vector of shape (3,), (3, 1), or (1, 3). tvec: Translation vector of shape (3,), (3, 1), or (1, 3). Returns: np.ndarray: 4x4 homogeneous transformation matrix. Raises: ValueError: If inputs have incorrect shapes. """ rvec = np.asarray(rvec, dtype=np.float64).flatten() tvec = np.asarray(tvec, dtype=np.float64).flatten() if rvec.shape != (3,) or tvec.shape != (3,): raise ValueError( f"rvec and tvec must have 3 elements. Got rvec {rvec.shape} and tvec {tvec.shape}" ) R, _ = cv2.Rodrigues(rvec) T = np.eye(4, dtype=np.float64) T[:3, :3] = R T[:3, 3] = tvec return T def matrix_to_rvec_tvec(T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Converts a 4x4 homogeneous transformation matrix to rotation and translation vectors. Args: T: 4x4 homogeneous transformation matrix. Returns: Tuple[np.ndarray, np.ndarray]: (rvec, tvec) both of shape (3,). Raises: ValueError: If T is not a 4x4 matrix. """ T = np.asarray(T, dtype=np.float64) if T.shape != (4, 4): raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}") R = T[:3, :3] tvec = T[:3, 3] rvec, _ = cv2.Rodrigues(R) return rvec.flatten(), tvec.flatten() def invert_transform(T: np.ndarray) -> np.ndarray: """ Inverts a 4x4 homogeneous transformation matrix. Uses the property that the inverse of [R | t; 0 | 1] is [R^T | -R^T * t; 0 | 1]. Args: T: 4x4 homogeneous transformation matrix. Returns: np.ndarray: Inverted 4x4 transformation matrix. Raises: ValueError: If T is not a 4x4 matrix. """ T = np.asarray(T, dtype=np.float64) if T.shape != (4, 4): raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}") R = T[:3, :3] t = T[:3, 3] T_inv = np.eye(4, dtype=np.float64) R_inv = R.T T_inv[:3, :3] = R_inv T_inv[:3, 3] = -R_inv @ t return T_inv def compose_transforms(T1: np.ndarray, T2: np.ndarray) -> np.ndarray: """ Multiplies two 4x4 homogeneous transformation matrices. Args: T1: First 4x4 transformation matrix. T2: Second 4x4 transformation matrix. Returns: np.ndarray: Result of T1 @ T2. """ return np.asarray(T1, dtype=np.float64) @ np.asarray(T2, dtype=np.float64) def compute_reprojection_error( obj_pts: np.ndarray, img_pts: np.ndarray, rvec: np.ndarray, tvec: np.ndarray, K: np.ndarray, dist: Optional[np.ndarray] = None, ) -> float: """ Computes the mean Euclidean pixel error using cv2.projectPoints. Args: obj_pts: Object points in 3D (N, 3). img_pts: Observed image points (N, 2). rvec: Rotation vector. tvec: Translation vector. K: 3x3 Camera intrinsic matrix. dist: Optional lens distortion coefficients. Returns: float: Mean Euclidean reprojection error. Raises: ValueError: If point counts do not match. """ obj_pts = np.asarray(obj_pts, dtype=np.float64) img_pts = np.asarray(img_pts, dtype=np.float64) if len(obj_pts) != len(img_pts): raise ValueError( f"Number of object points ({len(obj_pts)}) and image points ({len(img_pts)}) must match." ) if len(obj_pts) == 0: return 0.0 projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, dist) projected_pts = projected_pts.reshape(-1, 2) img_pts = img_pts.reshape(-1, 2) errors = np.linalg.norm(projected_pts - img_pts, axis=1) return float(np.mean(errors))