43e736faac
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
145 lines
4.0 KiB
Python
145 lines
4.0 KiB
Python
import cv2
|
|
import numpy as np
|
|
from typing import Optional, Tuple
|
|
|
|
|
|
def rvec_tvec_to_matrix(rvec: np.ndarray, tvec: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Converts rotation vector and translation vector to a 4x4 homogeneous transformation matrix.
|
|
|
|
For solvePnP, rvec and tvec typically represent the transformation from object space
|
|
to camera space (T_camera_object).
|
|
|
|
Args:
|
|
rvec: Rotation vector of shape (3,), (3, 1), or (1, 3).
|
|
tvec: Translation vector of shape (3,), (3, 1), or (1, 3).
|
|
|
|
Returns:
|
|
np.ndarray: 4x4 homogeneous transformation matrix.
|
|
|
|
Raises:
|
|
ValueError: If inputs have incorrect shapes.
|
|
"""
|
|
rvec = np.asarray(rvec, dtype=np.float64).flatten()
|
|
tvec = np.asarray(tvec, dtype=np.float64).flatten()
|
|
|
|
if rvec.shape != (3,) or tvec.shape != (3,):
|
|
raise ValueError(
|
|
f"rvec and tvec must have 3 elements. Got rvec {rvec.shape} and tvec {tvec.shape}"
|
|
)
|
|
|
|
R, _ = cv2.Rodrigues(rvec)
|
|
T = np.eye(4, dtype=np.float64)
|
|
T[:3, :3] = R
|
|
T[:3, 3] = tvec
|
|
return T
|
|
|
|
|
|
def matrix_to_rvec_tvec(T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Converts a 4x4 homogeneous transformation matrix to rotation and translation vectors.
|
|
|
|
Args:
|
|
T: 4x4 homogeneous transformation matrix.
|
|
|
|
Returns:
|
|
Tuple[np.ndarray, np.ndarray]: (rvec, tvec) both of shape (3,).
|
|
|
|
Raises:
|
|
ValueError: If T is not a 4x4 matrix.
|
|
"""
|
|
T = np.asarray(T, dtype=np.float64)
|
|
if T.shape != (4, 4):
|
|
raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
|
|
|
|
R = T[:3, :3]
|
|
tvec = T[:3, 3]
|
|
rvec, _ = cv2.Rodrigues(R)
|
|
return rvec.flatten(), tvec.flatten()
|
|
|
|
|
|
def invert_transform(T: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Inverts a 4x4 homogeneous transformation matrix.
|
|
Uses the property that the inverse of [R | t; 0 | 1] is [R^T | -R^T * t; 0 | 1].
|
|
|
|
Args:
|
|
T: 4x4 homogeneous transformation matrix.
|
|
|
|
Returns:
|
|
np.ndarray: Inverted 4x4 transformation matrix.
|
|
|
|
Raises:
|
|
ValueError: If T is not a 4x4 matrix.
|
|
"""
|
|
T = np.asarray(T, dtype=np.float64)
|
|
if T.shape != (4, 4):
|
|
raise ValueError(f"Transformation matrix must be 4x4. Got {T.shape}")
|
|
|
|
R = T[:3, :3]
|
|
t = T[:3, 3]
|
|
|
|
T_inv = np.eye(4, dtype=np.float64)
|
|
R_inv = R.T
|
|
T_inv[:3, :3] = R_inv
|
|
T_inv[:3, 3] = -R_inv @ t
|
|
return T_inv
|
|
|
|
|
|
def compose_transforms(T1: np.ndarray, T2: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Multiplies two 4x4 homogeneous transformation matrices.
|
|
|
|
Args:
|
|
T1: First 4x4 transformation matrix.
|
|
T2: Second 4x4 transformation matrix.
|
|
|
|
Returns:
|
|
np.ndarray: Result of T1 @ T2.
|
|
"""
|
|
return np.asarray(T1, dtype=np.float64) @ np.asarray(T2, dtype=np.float64)
|
|
|
|
|
|
def compute_reprojection_error(
|
|
obj_pts: np.ndarray,
|
|
img_pts: np.ndarray,
|
|
rvec: np.ndarray,
|
|
tvec: np.ndarray,
|
|
K: np.ndarray,
|
|
dist: Optional[np.ndarray] = None,
|
|
) -> float:
|
|
"""
|
|
Computes the mean Euclidean pixel error using cv2.projectPoints.
|
|
|
|
Args:
|
|
obj_pts: Object points in 3D (N, 3).
|
|
img_pts: Observed image points (N, 2).
|
|
rvec: Rotation vector.
|
|
tvec: Translation vector.
|
|
K: 3x3 Camera intrinsic matrix.
|
|
dist: Optional lens distortion coefficients.
|
|
|
|
Returns:
|
|
float: Mean Euclidean reprojection error.
|
|
|
|
Raises:
|
|
ValueError: If point counts do not match.
|
|
"""
|
|
obj_pts = np.asarray(obj_pts, dtype=np.float64)
|
|
img_pts = np.asarray(img_pts, dtype=np.float64)
|
|
|
|
if len(obj_pts) != len(img_pts):
|
|
raise ValueError(
|
|
f"Number of object points ({len(obj_pts)}) and image points ({len(img_pts)}) must match."
|
|
)
|
|
|
|
if len(obj_pts) == 0:
|
|
return 0.0
|
|
|
|
projected_pts, _ = cv2.projectPoints(obj_pts, rvec, tvec, K, dist)
|
|
projected_pts = projected_pts.reshape(-1, 2)
|
|
img_pts = img_pts.reshape(-1, 2)
|
|
|
|
errors = np.linalg.norm(projected_pts - img_pts, axis=1)
|
|
return float(np.mean(errors))
|