#!/usr/bin/env python3 """ Compare two camera pose sets from different world frames using rigid alignment. Assumes both pose sets are in world_from_cam convention. """ import json import sys from pathlib import Path from typing import Final import click import numpy as np import plotly.graph_objects as go def parse_pose(pose_str: str, context: str = "") -> np.ndarray: vals = [float(x) for x in pose_str.split()] if len(vals) != 16: raise ValueError(f"[{context}] Expected 16 values for pose, got {len(vals)}") pose = np.array(vals).reshape((4, 4)) # Validate transformation matrix properties # 1. Last row check [0, 0, 0, 1] last_row = pose[3, :] expected_last_row = np.array([0, 0, 0, 1], dtype=float) if not np.allclose(last_row, expected_last_row, atol=1e-5): raise ValueError( f"[{context}] Invalid last row in transformation matrix: {last_row}. " f"Expected [0, 0, 0, 1]" ) # 2. Rotation block orthonormality R = pose[:3, :3] # R @ R.T approx I identity_check = R @ R.T if not np.allclose(identity_check, np.eye(3), atol=1e-3): raise ValueError( f"[{context}] Rotation block is not orthonormal (R @ R.T != I)." ) # 3. Determinant check det(R) approx 1 det = np.linalg.det(R) if not np.allclose(det, 1.0, atol=1e-3): raise ValueError( f"[{context}] Rotation block determinant is {det:.6f}, expected 1.0 (improper rotation or scaling)." ) return pose def load_poses_from_json(path: str) -> dict[str, np.ndarray]: """ Heuristically load poses from a JSON file. Supports: 1) flat: {"serial": {"pose": "..."}} 2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}} """ with open(path, "r") as f: data = json.load(f) poses: dict[str, np.ndarray] = {} for serial, entry in data.items(): if not isinstance(entry, dict): continue context = f"File: {path}, Serial: {serial}" # Check nested FusionConfiguration first if "FusionConfiguration" in entry and isinstance( entry["FusionConfiguration"], dict ): if "pose" in entry["FusionConfiguration"]: poses[str(serial)] = parse_pose( entry["FusionConfiguration"]["pose"], context=context ) # Then check flat elif "pose" in entry: poses[str(serial)] = parse_pose(entry["pose"], context=context) if not poses: raise click.UsageError( f"No parsable poses found in {path}.\n" "Expected formats:\n" ' 1) Flat: {"serial": {"pose": "..."}}\n' ' 2) Nested: {"serial": {"FusionConfiguration": {"pose": "..."}}}' ) return poses def serialize_pose(pose: np.ndarray) -> str: return " ".join(f"{x:.6f}" for x in pose.flatten()) def rigid_transform_3d(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Find rigid alignment (R, t) such that R*A + t approx B. A, B are (N, 3) arrays of points. Uses Kabsch algorithm. """ assert A.shape == B.shape centroid_A = np.mean(A, axis=0) centroid_B = np.mean(B, axis=0) AA = A - centroid_A BB = B - centroid_B H = AA.T @ BB U, S, Vt = np.linalg.svd(H) R_mat = Vt.T @ U.T if np.linalg.det(R_mat) < 0: Vt[2, :] *= -1 R_mat = Vt.T @ U.T t = centroid_B - R_mat @ centroid_A return R_mat, t def get_camera_center(pose: np.ndarray) -> np.ndarray: return pose[:3, 3] def get_camera_up(pose: np.ndarray) -> np.ndarray: # In CV convention, Y is down, so -Y is up. # R is [x_axis, y_axis, z_axis] return -pose[:3, 1] def rotation_error_deg(R1: np.ndarray, R2: np.ndarray) -> float: R_rel = R1.T @ R2 cos_theta = (np.trace(R_rel) - 1.0) / 2.0 cos_theta = np.clip(cos_theta, -1.0, 1.0) return np.degrees(np.arccos(cos_theta)) def angle_between_vectors_deg(v1: np.ndarray, v2: np.ndarray) -> float: v1_u = v1 / np.linalg.norm(v1) v2_u = v2 / np.linalg.norm(v2) cos_theta = np.dot(v1_u, v2_u) cos_theta = np.clip(cos_theta, -1.0, 1.0) return np.degrees(np.arccos(cos_theta)) def add_camera_trace( fig: go.Figure, pose: np.ndarray, label: str, scale: float = 0.2, frustum_scale: float = 0.5, fov_deg: float = 60.0, color: str = "blue", ): """ Adds a camera frustum and axes to the Plotly figure. """ R = pose[:3, :3] center = pose[:3, 3] # OpenCV convention: X right, Y down, Z forward x_axis_local = np.array([1, 0, 0]) y_axis_local = np.array([0, 1, 0]) z_axis_local = np.array([0, 0, 1]) # Transform local axes to world x_axis_world = R @ x_axis_local y_axis_world = R @ y_axis_local z_axis_world = R @ z_axis_local # Frustum points in local coordinates fov_rad = np.radians(fov_deg) w = frustum_scale * np.tan(fov_rad / 2.0) h = w * 0.75 # 4:3 aspect ratio assumption pts_local = np.array( [ [0, 0, 0], # Center [-w, -h, frustum_scale], # Top-Left [w, -h, frustum_scale], # Top-Right [w, h, frustum_scale], # Bottom-Right [-w, h, frustum_scale], # Bottom-Left ] ) # Transform frustum to world pts_world = (R @ pts_local.T).T + center # Create lines for frustum x_lines, y_lines, z_lines = [], [], [] def add_line(i, j): x_lines.extend([pts_world[i, 0], pts_world[j, 0], None]) y_lines.extend([pts_world[i, 1], pts_world[j, 1], None]) z_lines.extend([pts_world[i, 2], pts_world[j, 2], None]) for i in range(1, 5): add_line(0, i) add_line(1, 2) add_line(2, 3) add_line(3, 4) add_line(4, 1) fig.add_trace( go.Scatter3d( x=x_lines, y=y_lines, z=z_lines, mode="lines", line=dict(color=color, width=2), name=f"{label} Frustum", showlegend=False, hoverinfo="skip", ) ) # Add center point with label fig.add_trace( go.Scatter3d( x=[center[0]], y=[center[1]], z=[center[2]], mode="markers+text", marker=dict(size=4, color="black"), text=[label], textposition="top center", name=label, showlegend=True, ) ) # Add axes (RGB = XYZ) for axis_world, axis_color in zip( [x_axis_world, y_axis_world, z_axis_world], ["red", "green", "blue"] ): end = center + axis_world * scale fig.add_trace( go.Scatter3d( x=[center[0], end[0]], y=[center[1], end[1]], z=[center[2], end[2]], mode="lines", line=dict(color=axis_color, width=3), showlegend=False, hoverinfo="skip", ) ) @click.command() @click.option( "--pose-a-json", type=click.Path(exists=True), required=True, help="Pose set A. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.", ) @click.option( "--pose-b-json", type=click.Path(exists=True), required=True, help="Pose set B. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.", ) @click.option( "--report-json", type=click.Path(), required=True, help="Output path for comparison report", ) @click.option( "--aligned-pose-b-json", type=click.Path(), help="Output path for aligned pose B set", ) @click.option( "--plot-output", type=click.Path(), help="Output path for visualization (HTML or PNG)", ) @click.option( "--show-plot", is_flag=True, default=False, help="Show the plot interactively", ) @click.option( "--frustum-scale", type=float, default=0.3, help="Scale of the camera frustum", ) @click.option( "--axis-scale", type=float, default=0.1, help="Scale of the camera axes", ) def main( pose_a_json: str, pose_b_json: str, report_json: str, aligned_pose_b_json: str | None, plot_output: str | None, show_plot: bool, frustum_scale: float, axis_scale: float, ): """ Compare two camera pose sets from different world frames using rigid alignment. Both are treated as T_world_from_cam. Supports symmetric, heuristic input parsing for both A and B: 1) flat: {"serial": {"pose": "..."}} 2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}} """ poses_a = load_poses_from_json(pose_a_json) poses_b = load_poses_from_json(pose_b_json) shared_serials = sorted(list(set(poses_a.keys()) & set(poses_b.keys()))) if len(shared_serials) < 3: click.echo( f"Error: Found only {len(shared_serials)} shared serials ({shared_serials}). Need at least 3.", err=True, ) sys.exit(1) pts_b = np.array([get_camera_center(poses_b[s]) for s in shared_serials]) pts_a = np.array([get_camera_center(poses_a[s]) for s in shared_serials]) # Align B to A: R_align * pts_b + t_align approx pts_a R_align, t_align = rigid_transform_3d(pts_b, pts_a) T_align = np.eye(4) T_align[:3, :3] = R_align T_align[:3, 3] = t_align per_cam_results = [] pos_errors = [] rot_errors = [] up_errors = [] for s in shared_serials: T_b = poses_b[s] T_a = poses_a[s] # T_world_a_from_cam = T_world_a_from_world_b * T_world_b_from_cam T_b_aligned = T_align @ T_b pos_err = np.linalg.norm( get_camera_center(T_b_aligned) - get_camera_center(T_a) ) rot_err = rotation_error_deg(T_b_aligned[:3, :3], T_a[:3, :3]) up_b = get_camera_up(T_b_aligned) up_a = get_camera_up(T_a) up_err = angle_between_vectors_deg(up_b, up_a) per_cam_results.append( { "serial": s, "position_error_m": float(pos_err), "rotation_error_deg": float(rot_err), "up_consistency_error_deg": float(up_err), } ) pos_errors.append(pos_err) rot_errors.append(rot_err) up_errors.append(up_err) report = { "shared_serials": shared_serials, "alignment": { "R_align": R_align.tolist(), "t_align": t_align.tolist(), "T_align": T_align.tolist(), }, "per_camera": per_cam_results, "summary": { "mean_position_error_m": float(np.mean(pos_errors)), "max_position_error_m": float(np.max(pos_errors)), "mean_rotation_error_deg": float(np.mean(rot_errors)), "max_rotation_error_deg": float(np.max(rot_errors)), "mean_up_consistency_error_deg": float(np.mean(up_errors)), "max_up_consistency_error_deg": float(np.max(up_errors)), }, } Path(report_json).parent.mkdir(parents=True, exist_ok=True) with open(report_json, "w") as f: json.dump(report, f, indent=4) click.echo(f"Report written to {report_json}") if aligned_pose_b_json: aligned_data = {} for s, T_b in poses_b.items(): T_b_aligned = T_align @ T_b aligned_data[s] = {"pose": serialize_pose(T_b_aligned)} Path(aligned_pose_b_json).parent.mkdir(parents=True, exist_ok=True) with open(aligned_pose_b_json, "w") as f: json.dump(aligned_data, f, indent=4) click.echo(f"Aligned pose B set written to {aligned_pose_b_json}") if plot_output or show_plot: fig = go.Figure() show_axis: Final[bool] = True if show_axis: for axis, color in zip( [np.eye(3)[:, 0], np.eye(3)[:, 1], np.eye(3)[:, 2]], ["red", "green", "blue"], ): fig.add_trace( go.Scatter3d( x=[0, axis[0] * axis_scale], y=[0, axis[1] * axis_scale], z=[0, axis[2] * axis_scale], mode="lines", line=dict(color=color, width=4), name=f"World {'XYZ'[np.argmax(axis)]}", showlegend=True, ) ) show_ground: Final[bool] = False if show_ground: ground_size = 5.0 half_size = ground_size / 2.0 x_grid = np.linspace(-half_size, half_size, 2) z_grid = np.linspace(-half_size, half_size, 2) x_mesh, z_mesh = np.meshgrid(x_grid, z_grid) y_mesh = np.zeros_like(x_mesh) fig.add_trace( go.Surface( x=x_mesh, y=y_mesh, z=z_mesh, showscale=False, opacity=0.1, colorscale=[[0, "gray"], [1, "gray"]], name="Ground Plane", hoverinfo="skip", ) ) for s in sorted(poses_a.keys()): add_camera_trace( fig, poses_a[s], f"a_{s}", scale=axis_scale, frustum_scale=frustum_scale, color="blue", ) for s in sorted(poses_b.keys()): T_b_aligned = T_align @ poses_b[s] add_camera_trace( fig, T_b_aligned, f"b_{s}", scale=axis_scale, frustum_scale=frustum_scale, color="orange", ) fig.update_layout( title="Pose A vs Aligned Pose B", scene=dict( xaxis_title="X (Right)", yaxis_title="Y (Down)", zaxis_title="Z (Forward)", aspectmode="data", camera=dict( up=dict(x=0, y=-1, z=0), eye=dict(x=1.5, y=-1.5, z=1.5), ), ), margin=dict(l=0, r=0, b=0, t=40), ) if plot_output: if plot_output.endswith(".html"): fig.write_html(plot_output) click.echo(f"Plot saved to {plot_output}") else: try: fig.write_image(plot_output) click.echo(f"Plot saved to {plot_output}") except Exception as e: click.echo(f"Error saving image (ensure kaleido is installed): {e}") if not plot_output.endswith(".html"): html_out = str(Path(plot_output).with_suffix(".html")) fig.write_html(html_out) click.echo(f"Fallback: Plot saved to {html_out}") if show_plot: fig.show() if __name__ == "__main__": main() # pylint: disable=no-value-for-parameter