feat(aruco): add pose math utilities for transform operations

2026-02-05 03:19:13 +00:00
parent 9c861105f7
commit 1025bcbd8a
1 changed files with 18 additions and 122 deletions
@@ -1,144 +1,40 @@
 import cv2
 import numpy as np
-from typing import Optional, Tuple
+import cv2
-def rvec_tvec_to_matrix(rvec: np.ndarray, tvec: np.ndarray) -> np.ndarray:
+def rvec_tvec_to_matrix(rvec, tvec):
-    """
+    rvec = np.asarray(rvec).flatten()
-    Converts rotation vector and translation vector to a 4x4 homogeneous transformation matrix.
+    tvec = np.asarray(tvec).flatten()
    For solvePnP, rvec and tvec typically represent the transformation from object space
    to camera space (T_camera_object).
    Args:
        rvec: Rotation vector of shape (3,), (3, 1), or (1, 3).
        tvec: Translation vector of shape (3,), (3, 1), or (1, 3).
    Returns:
        np.ndarray: 4x4 homogeneous transformation matrix.
    Raises:
        ValueError: If inputs have incorrect shapes.
    """
    rvec = np.asarray(rvec, dtype=np.float64).flatten()
    tvec = np.asarray(tvec, dtype=np.float64).flatten()
    if rvec.shape != (3,) or tvec.shape != (3,):
        raise ValueError(
            f"rvec and tvec must have 3 elements. Got rvec {rvec.shape} and tvec {tvec.shape}"
        )
    R, _ = cv2.Rodrigues(rvec)
-    T = np.eye(4, dtype=np.float64)
+    T = np.eye(4)
    T[:3, :3] = R
    T[:3, 3] = tvec
    return T
-def matrix_to_rvec_tvec(T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+def matrix_to_rvec_tvec(T):
    """
    Converts a 4x4 homogeneous transformation matrix to rotation and translation vectors.
    Args:
        T: 4x4 homogeneous transformation matrix.
    Returns:
        Tuple[np.ndarray, np.ndarray]: (rvec, tvec) both of shape (3,).
    Raises:
        ValueError: If T is not a 4x4 matrix.
    """
    T = np.asarray(T, dtype=np.float64)
    if T.shape != (4, 4):
        raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
    R = T[:3, :3]
    tvec = T[:3, 3]
    rvec, _ = cv2.Rodrigues(R)
    return rvec.flatten(), tvec.flatten()
-def invert_transform(T: np.ndarray) -> np.ndarray:
+def invert_transform(T):
    """
    Inverts a 4x4 homogeneous transformation matrix.
    Uses the property that the inverse of [R | t; 0 | 1] is [R^T | -R^T * t; 0 | 1].
    Args:
        T: 4x4 homogeneous transformation matrix.
    Returns:
        np.ndarray: Inverted 4x4 transformation matrix.
    Raises:
        ValueError: If T is not a 4x4 matrix.
    """
    T = np.asarray(T, dtype=np.float64)
    if T.shape != (4, 4):
        raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
    R = T[:3, :3]
    t = T[:3, 3]
-
+    T_inv = np.eye(4)
-    T_inv = np.eye(4, dtype=np.float64)
+    T_inv[:3, :3] = R.T
-    R_inv = R.T
+    T_inv[:3, 3] = -R.T @ t
    T_inv[:3, :3] = R_inv
    T_inv[:3, 3] = -R_inv @ t
    return T_inv
-def compose_transforms(T1: np.ndarray, T2: np.ndarray) -> np.ndarray:
+def compose_transforms(T1, T2):
-    """
+    return T1 @ T2
    Multiplies two 4x4 homogeneous transformation matrices.
    Args:
        T1: First 4x4 transformation matrix.
        T2: Second 4x4 transformation matrix.
    Returns:
        np.ndarray: Result of T1 @ T2.
    """
    return np.asarray(T1, dtype=np.float64) @ np.asarray(T2, dtype=np.float64)
-def compute_reprojection_error(
+def compute_reprojection_error(obj_pts, img_pts, rvec, tvec, K):
-    obj_pts: np.ndarray,
+    projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, None)
-    img_pts: np.ndarray,
+    projected_pts = projected_pts.squeeze()
-    rvec: np.ndarray,
+    img_pts = img_pts.squeeze()
-    tvec: np.ndarray,
+    error = np.linalg.norm(img_pts - projected_pts, axis=1)
-    K: np.ndarray,
+    return float(np.mean(error))
    dist: Optional[np.ndarray] = None,
 ) -> float:
    """
    Computes the mean Euclidean pixel error using cv2.projectPoints.
    Args:
        obj_pts: Object points in 3D (N, 3).
        img_pts: Observed image points (N, 2).
        rvec: Rotation vector.
        tvec: Translation vector.
        K: 3x3 Camera intrinsic matrix.
        dist: Optional lens distortion coefficients.
    Returns:
        float: Mean Euclidean reprojection error.
    Raises:
        ValueError: If point counts do not match.
    """
    obj_pts = np.asarray(obj_pts, dtype=np.float64)
    img_pts = np.asarray(img_pts, dtype=np.float64)
    if len(obj_pts) != len(img_pts):
        raise ValueError(
            f"Number of object points ({len(obj_pts)}) and image points ({len(img_pts)}) must match."
        )
    if len(obj_pts) == 0:
        return 0.0
    projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, dist)
    projected_pts = projected_pts.reshape(-1, 2)
    img_pts = img_pts.reshape(-1, 2)
    errors = np.linalg.norm(projected_pts - img_pts, axis=1)
    return float(np.mean(errors))