From 1025bcbd8a6b66baedb307d9919855b640488a0b Mon Sep 17 00:00:00 2001
From: crosstyan <crosstyan@outlook.com>
Date: Thu, 5 Feb 2026 03:19:13 +0000
Subject: [PATCH] feat(aruco): add pose math utilities for transform operations

---
 py_workspace/aruco/pose_math.py | 140 ++++----------------------------
 1 file changed, 18 insertions(+), 122 deletions(-)

diff --git a/py_workspace/aruco/pose_math.py b/py_workspace/aruco/pose_math.py
index aca964f..a1408f7 100644
--- a/py_workspace/aruco/pose_math.py
+++ b/py_workspace/aruco/pose_math.py
@@ -1,144 +1,40 @@
-import cv2
 import numpy as np
-from typing import Optional, Tuple
+import cv2
 
 
-def rvec_tvec_to_matrix(rvec: np.ndarray, tvec: np.ndarray) -> np.ndarray:
-    """
-    Converts rotation vector and translation vector to a 4x4 homogeneous transformation matrix.
-
-    For solvePnP, rvec and tvec typically represent the transformation from object space
-    to camera space (T_camera_object).
-
-    Args:
-        rvec: Rotation vector of shape (3,), (3, 1), or (1, 3).
-        tvec: Translation vector of shape (3,), (3, 1), or (1, 3).
-
-    Returns:
-        np.ndarray: 4x4 homogeneous transformation matrix.
-
-    Raises:
-        ValueError: If inputs have incorrect shapes.
-    """
-    rvec = np.asarray(rvec, dtype=np.float64).flatten()
-    tvec = np.asarray(tvec, dtype=np.float64).flatten()
-
-    if rvec.shape != (3,) or tvec.shape != (3,):
-        raise ValueError(
-            f"rvec and tvec must have 3 elements. Got rvec {rvec.shape} and tvec {tvec.shape}"
-        )
-
+def rvec_tvec_to_matrix(rvec, tvec):
+    rvec = np.asarray(rvec).flatten()
+    tvec = np.asarray(tvec).flatten()
     R, _ = cv2.Rodrigues(rvec)
-    T = np.eye(4, dtype=np.float64)
+    T = np.eye(4)
     T[:3, :3] = R
     T[:3, 3] = tvec
     return T
 
 
-def matrix_to_rvec_tvec(T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Converts a 4x4 homogeneous transformation matrix to rotation and translation vectors.
-
-    Args:
-        T: 4x4 homogeneous transformation matrix.
-
-    Returns:
-        Tuple[np.ndarray, np.ndarray]: (rvec, tvec) both of shape (3,).
-
-    Raises:
-        ValueError: If T is not a 4x4 matrix.
-    """
-    T = np.asarray(T, dtype=np.float64)
-    if T.shape != (4, 4):
-        raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
-
+def matrix_to_rvec_tvec(T):
     R = T[:3, :3]
     tvec = T[:3, 3]
     rvec, _ = cv2.Rodrigues(R)
     return rvec.flatten(), tvec.flatten()
 
 
-def invert_transform(T: np.ndarray) -> np.ndarray:
-    """
-    Inverts a 4x4 homogeneous transformation matrix.
-    Uses the property that the inverse of [R | t; 0 | 1] is [R^T | -R^T * t; 0 | 1].
-
-    Args:
-        T: 4x4 homogeneous transformation matrix.
-
-    Returns:
-        np.ndarray: Inverted 4x4 transformation matrix.
-
-    Raises:
-        ValueError: If T is not a 4x4 matrix.
-    """
-    T = np.asarray(T, dtype=np.float64)
-    if T.shape != (4, 4):
-        raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
-
+def invert_transform(T):
     R = T[:3, :3]
     t = T[:3, 3]
-
-    T_inv = np.eye(4, dtype=np.float64)
-    R_inv = R.T
-    T_inv[:3, :3] = R_inv
-    T_inv[:3, 3] = -R_inv @ t
+    T_inv = np.eye(4)
+    T_inv[:3, :3] = R.T
+    T_inv[:3, 3] = -R.T @ t
     return T_inv
 
 
-def compose_transforms(T1: np.ndarray, T2: np.ndarray) -> np.ndarray:
-    """
-    Multiplies two 4x4 homogeneous transformation matrices.
-
-    Args:
-        T1: First 4x4 transformation matrix.
-        T2: Second 4x4 transformation matrix.
-
-    Returns:
-        np.ndarray: Result of T1 @ T2.
-    """
-    return np.asarray(T1, dtype=np.float64) @ np.asarray(T2, dtype=np.float64)
+def compose_transforms(T1, T2):
+    return T1 @ T2
 
 
-def compute_reprojection_error(
-    obj_pts: np.ndarray,
-    img_pts: np.ndarray,
-    rvec: np.ndarray,
-    tvec: np.ndarray,
-    K: np.ndarray,
-    dist: Optional[np.ndarray] = None,
-) -> float:
-    """
-    Computes the mean Euclidean pixel error using cv2.projectPoints.
-
-    Args:
-        obj_pts: Object points in 3D (N, 3).
-        img_pts: Observed image points (N, 2).
-        rvec: Rotation vector.
-        tvec: Translation vector.
-        K: 3x3 Camera intrinsic matrix.
-        dist: Optional lens distortion coefficients.
-
-    Returns:
-        float: Mean Euclidean reprojection error.
-
-    Raises:
-        ValueError: If point counts do not match.
-    """
-    obj_pts = np.asarray(obj_pts, dtype=np.float64)
-    img_pts = np.asarray(img_pts, dtype=np.float64)
-
-    if len(obj_pts) != len(img_pts):
-        raise ValueError(
-            f"Number of object points ({len(obj_pts)}) and image points ({len(img_pts)}) must match."
-        )
-
-    if len(obj_pts) == 0:
-        return 0.0
-
-    projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, dist)
-    projected_pts = projected_pts.reshape(-1, 2)
-    img_pts = img_pts.reshape(-1, 2)
-
-    errors = np.linalg.norm(projected_pts - img_pts, axis=1)
-    return float(np.mean(errors))
+def compute_reprojection_error(obj_pts, img_pts, rvec, tvec, K):
+    projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, None)
+    projected_pts = projected_pts.squeeze()
+    img_pts = img_pts.squeeze()
+    error = np.linalg.norm(img_pts - projected_pts, axis=1)
+    return float(np.mean(error))