feat(aruco): add pose math utilities for transform operations

This commit is contained in:
2026-02-05 03:19:13 +00:00
parent 9c861105f7
commit 1025bcbd8a
+18 -122
View File
@@ -1,144 +1,40 @@
import cv2
import numpy as np import numpy as np
from typing import Optional, Tuple import cv2
def rvec_tvec_to_matrix(rvec: np.ndarray, tvec: np.ndarray) -> np.ndarray: def rvec_tvec_to_matrix(rvec, tvec):
""" rvec = np.asarray(rvec).flatten()
Converts rotation vector and translation vector to a 4x4 homogeneous transformation matrix. tvec = np.asarray(tvec).flatten()
For solvePnP, rvec and tvec typically represent the transformation from object space
to camera space (T_camera_object).
Args:
rvec: Rotation vector of shape (3,), (3, 1), or (1, 3).
tvec: Translation vector of shape (3,), (3, 1), or (1, 3).
Returns:
np.ndarray: 4x4 homogeneous transformation matrix.
Raises:
ValueError: If inputs have incorrect shapes.
"""
rvec = np.asarray(rvec, dtype=np.float64).flatten()
tvec = np.asarray(tvec, dtype=np.float64).flatten()
if rvec.shape != (3,) or tvec.shape != (3,):
raise ValueError(
f"rvec and tvec must have 3 elements. Got rvec {rvec.shape} and tvec {tvec.shape}"
)
R, _ = cv2.Rodrigues(rvec) R, _ = cv2.Rodrigues(rvec)
T = np.eye(4, dtype=np.float64) T = np.eye(4)
T[:3, :3] = R T[:3, :3] = R
T[:3, 3] = tvec T[:3, 3] = tvec
return T return T
def matrix_to_rvec_tvec(T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: def matrix_to_rvec_tvec(T):
"""
Converts a 4x4 homogeneous transformation matrix to rotation and translation vectors.
Args:
T: 4x4 homogeneous transformation matrix.
Returns:
Tuple[np.ndarray, np.ndarray]: (rvec, tvec) both of shape (3,).
Raises:
ValueError: If T is not a 4x4 matrix.
"""
T = np.asarray(T, dtype=np.float64)
if T.shape != (4, 4):
raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
R = T[:3, :3] R = T[:3, :3]
tvec = T[:3, 3] tvec = T[:3, 3]
rvec, _ = cv2.Rodrigues(R) rvec, _ = cv2.Rodrigues(R)
return rvec.flatten(), tvec.flatten() return rvec.flatten(), tvec.flatten()
def invert_transform(T: np.ndarray) -> np.ndarray: def invert_transform(T):
"""
Inverts a 4x4 homogeneous transformation matrix.
Uses the property that the inverse of [R | t; 0 | 1] is [R^T | -R^T * t; 0 | 1].
Args:
T: 4x4 homogeneous transformation matrix.
Returns:
np.ndarray: Inverted 4x4 transformation matrix.
Raises:
ValueError: If T is not a 4x4 matrix.
"""
T = np.asarray(T, dtype=np.float64)
if T.shape != (4, 4):
raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
R = T[:3, :3] R = T[:3, :3]
t = T[:3, 3] t = T[:3, 3]
T_inv = np.eye(4)
T_inv = np.eye(4, dtype=np.float64) T_inv[:3, :3] = R.T
R_inv = R.T T_inv[:3, 3] = -R.T @ t
T_inv[:3, :3] = R_inv
T_inv[:3, 3] = -R_inv @ t
return T_inv return T_inv
def compose_transforms(T1: np.ndarray, T2: np.ndarray) -> np.ndarray: def compose_transforms(T1, T2):
""" return T1 @ T2
Multiplies two 4x4 homogeneous transformation matrices.
Args:
T1: First 4x4 transformation matrix.
T2: Second 4x4 transformation matrix.
Returns:
np.ndarray: Result of T1 @ T2.
"""
return np.asarray(T1, dtype=np.float64) @ np.asarray(T2, dtype=np.float64)
def compute_reprojection_error( def compute_reprojection_error(obj_pts, img_pts, rvec, tvec, K):
obj_pts: np.ndarray, projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, None)
img_pts: np.ndarray, projected_pts = projected_pts.squeeze()
rvec: np.ndarray, img_pts = img_pts.squeeze()
tvec: np.ndarray, error = np.linalg.norm(img_pts - projected_pts, axis=1)
K: np.ndarray, return float(np.mean(error))
dist: Optional[np.ndarray] = None,
) -> float:
"""
Computes the mean Euclidean pixel error using cv2.projectPoints.
Args:
obj_pts: Object points in 3D (N, 3).
img_pts: Observed image points (N, 2).
rvec: Rotation vector.
tvec: Translation vector.
K: 3x3 Camera intrinsic matrix.
dist: Optional lens distortion coefficients.
Returns:
float: Mean Euclidean reprojection error.
Raises:
ValueError: If point counts do not match.
"""
obj_pts = np.asarray(obj_pts, dtype=np.float64)
img_pts = np.asarray(img_pts, dtype=np.float64)
if len(obj_pts) != len(img_pts):
raise ValueError(
f"Number of object points ({len(obj_pts)}) and image points ({len(img_pts)}) must match."
)
if len(obj_pts) == 0:
return 0.0
projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, dist)
projected_pts = projected_pts.reshape(-1, 2)
img_pts = img_pts.reshape(-1, 2)
errors = np.linalg.norm(projected_pts - img_pts, axis=1)
return float(np.mean(errors))