refactor: things

2026-03-06 17:17:59 +08:00
parent 8c6087683f
commit 33ab1a5d9d
171 changed files with 293 additions and 29894 deletions
@@ -0,0 +1,502 @@
+#!/usr/bin/env python3
+"""
+Compare two camera pose sets from different world frames using rigid alignment.
+Assumes both pose sets are in world_from_cam convention.
+"""
+
+import json
+import sys
+from pathlib import Path
+from typing import Final
+
+import click
+import numpy as np
+import plotly.graph_objects as go
+
+
+def parse_pose(pose_str: str, context: str = "") -> np.ndarray:
+    vals = [float(x) for x in pose_str.split()]
+    if len(vals) != 16:
+        raise ValueError(f"[{context}] Expected 16 values for pose, got {len(vals)}")
+    pose = np.array(vals).reshape((4, 4))
+
+    # Validate transformation matrix properties
+    # 1. Last row check [0, 0, 0, 1]
+    last_row = pose[3, :]
+    expected_last_row = np.array([0, 0, 0, 1], dtype=float)
+    if not np.allclose(last_row, expected_last_row, atol=1e-5):
+        raise ValueError(
+            f"[{context}] Invalid last row in transformation matrix: {last_row}. "
+            f"Expected [0, 0, 0, 1]"
+        )
+
+    # 2. Rotation block orthonormality
+    R = pose[:3, :3]
+    # R @ R.T approx I
+    identity_check = R @ R.T
+    if not np.allclose(identity_check, np.eye(3), atol=1e-3):
+        raise ValueError(
+            f"[{context}] Rotation block is not orthonormal (R @ R.T != I)."
+        )
+
+    # 3. Determinant check det(R) approx 1
+    det = np.linalg.det(R)
+    if not np.allclose(det, 1.0, atol=1e-3):
+        raise ValueError(
+            f"[{context}] Rotation block determinant is {det:.6f}, expected 1.0 (improper rotation or scaling)."
+        )
+
+    return pose
+
+
+def load_poses_from_json(path: str) -> dict[str, np.ndarray]:
+    """
+    Heuristically load poses from a JSON file.
+    Supports:
+    1) flat: {"serial": {"pose": "..."}}
+    2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}}
+    """
+    with open(path, "r") as f:
+        data = json.load(f)
+
+    poses: dict[str, np.ndarray] = {}
+    for serial, entry in data.items():
+        if not isinstance(entry, dict):
+            continue
+
+        context = f"File: {path}, Serial: {serial}"
+
+        # Check nested FusionConfiguration first
+        if "FusionConfiguration" in entry and isinstance(
+            entry["FusionConfiguration"], dict
+        ):
+            if "pose" in entry["FusionConfiguration"]:
+                poses[str(serial)] = parse_pose(
+                    entry["FusionConfiguration"]["pose"], context=context
+                )
+        # Then check flat
+        elif "pose" in entry:
+            poses[str(serial)] = parse_pose(entry["pose"], context=context)
+
+    if not poses:
+        raise click.UsageError(
+            f"No parsable poses found in {path}.\n"
+            "Expected formats:\n"
+            '  1) Flat: {"serial": {"pose": "..."}}\n'
+            '  2) Nested: {"serial": {"FusionConfiguration": {"pose": "..."}}}'
+        )
+    return poses
+
+
+def serialize_pose(pose: np.ndarray) -> str:
+    return " ".join(f"{x:.6f}" for x in pose.flatten())
+
+
+def rigid_transform_3d(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Find rigid alignment (R, t) such that R*A + t approx B.
+    A, B are (N, 3) arrays of points.
+    Uses Kabsch algorithm.
+    """
+    assert A.shape == B.shape
+    centroid_A = np.mean(A, axis=0)
+    centroid_B = np.mean(B, axis=0)
+
+    AA = A - centroid_A
+    BB = B - centroid_B
+
+    H = AA.T @ BB
+    U, S, Vt = np.linalg.svd(H)
+    R_mat = Vt.T @ U.T
+
+    if np.linalg.det(R_mat) < 0:
+        Vt[2, :] *= -1
+        R_mat = Vt.T @ U.T
+
+    t = centroid_B - R_mat @ centroid_A
+    return R_mat, t
+
+
+def get_camera_center(pose: np.ndarray) -> np.ndarray:
+    return pose[:3, 3]
+
+
+def get_camera_up(pose: np.ndarray) -> np.ndarray:
+    # In CV convention, Y is down, so -Y is up.
+    # R is [x_axis, y_axis, z_axis]
+    return -pose[:3, 1]
+
+
+def rotation_error_deg(R1: np.ndarray, R2: np.ndarray) -> float:
+    R_rel = R1.T @ R2
+    cos_theta = (np.trace(R_rel) - 1.0) / 2.0
+    cos_theta = np.clip(cos_theta, -1.0, 1.0)
+    return np.degrees(np.arccos(cos_theta))
+
+
+def angle_between_vectors_deg(v1: np.ndarray, v2: np.ndarray) -> float:
+    v1_u = v1 / np.linalg.norm(v1)
+    v2_u = v2 / np.linalg.norm(v2)
+    cos_theta = np.dot(v1_u, v2_u)
+    cos_theta = np.clip(cos_theta, -1.0, 1.0)
+    return np.degrees(np.arccos(cos_theta))
+
+
+def add_camera_trace(
+    fig: go.Figure,
+    pose: np.ndarray,
+    label: str,
+    scale: float = 0.2,
+    frustum_scale: float = 0.5,
+    fov_deg: float = 60.0,
+    color: str = "blue",
+):
+    """
+    Adds a camera frustum and axes to the Plotly figure.
+    """
+    R = pose[:3, :3]
+    center = pose[:3, 3]
+
+    # OpenCV convention: X right, Y down, Z forward
+    x_axis_local = np.array([1, 0, 0])
+    y_axis_local = np.array([0, 1, 0])
+    z_axis_local = np.array([0, 0, 1])
+
+    # Transform local axes to world
+    x_axis_world = R @ x_axis_local
+    y_axis_world = R @ y_axis_local
+    z_axis_world = R @ z_axis_local
+
+    # Frustum points in local coordinates
+    fov_rad = np.radians(fov_deg)
+    w = frustum_scale * np.tan(fov_rad / 2.0)
+    h = w * 0.75  # 4:3 aspect ratio assumption
+
+    pts_local = np.array(
+        [
+            [0, 0, 0],  # Center
+            [-w, -h, frustum_scale],  # Top-Left
+            [w, -h, frustum_scale],  # Top-Right
+            [w, h, frustum_scale],  # Bottom-Right
+            [-w, h, frustum_scale],  # Bottom-Left
+        ]
+    )
+
+    # Transform frustum to world
+    pts_world = (R @ pts_local.T).T + center
+
+    # Create lines for frustum
+    x_lines, y_lines, z_lines = [], [], []
+
+    def add_line(i, j):
+        x_lines.extend([pts_world[i, 0], pts_world[j, 0], None])
+        y_lines.extend([pts_world[i, 1], pts_world[j, 1], None])
+        z_lines.extend([pts_world[i, 2], pts_world[j, 2], None])
+
+    for i in range(1, 5):
+        add_line(0, i)
+    add_line(1, 2)
+    add_line(2, 3)
+    add_line(3, 4)
+    add_line(4, 1)
+
+    fig.add_trace(
+        go.Scatter3d(
+            x=x_lines,
+            y=y_lines,
+            z=z_lines,
+            mode="lines",
+            line=dict(color=color, width=2),
+            name=f"{label} Frustum",
+            showlegend=False,
+            hoverinfo="skip",
+        )
+    )
+
+    # Add center point with label
+    fig.add_trace(
+        go.Scatter3d(
+            x=[center[0]],
+            y=[center[1]],
+            z=[center[2]],
+            mode="markers+text",
+            marker=dict(size=4, color="black"),
+            text=[label],
+            textposition="top center",
+            name=label,
+            showlegend=True,
+        )
+    )
+
+    # Add axes (RGB = XYZ)
+    for axis_world, axis_color in zip(
+        [x_axis_world, y_axis_world, z_axis_world], ["red", "green", "blue"]
+    ):
+        end = center + axis_world * scale
+        fig.add_trace(
+            go.Scatter3d(
+                x=[center[0], end[0]],
+                y=[center[1], end[1]],
+                z=[center[2], end[2]],
+                mode="lines",
+                line=dict(color=axis_color, width=3),
+                showlegend=False,
+                hoverinfo="skip",
+            )
+        )
+
+
+@click.command()
+@click.option(
+    "--pose-a-json",
+    type=click.Path(exists=True),
+    required=True,
+    help="Pose set A. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.",
+)
+@click.option(
+    "--pose-b-json",
+    type=click.Path(exists=True),
+    required=True,
+    help="Pose set B. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.",
+)
+@click.option(
+    "--report-json",
+    type=click.Path(),
+    required=True,
+    help="Output path for comparison report",
+)
+@click.option(
+    "--aligned-pose-b-json",
+    type=click.Path(),
+    help="Output path for aligned pose B set",
+)
+@click.option(
+    "--plot-output",
+    type=click.Path(),
+    help="Output path for visualization (HTML or PNG)",
+)
+@click.option(
+    "--show-plot",
+    is_flag=True,
+    default=False,
+    help="Show the plot interactively",
+)
+@click.option(
+    "--frustum-scale",
+    type=float,
+    default=0.3,
+    help="Scale of the camera frustum",
+)
+@click.option(
+    "--axis-scale",
+    type=float,
+    default=0.1,
+    help="Scale of the camera axes",
+)
+def main(
+    pose_a_json: str,
+    pose_b_json: str,
+    report_json: str,
+    aligned_pose_b_json: str | None,
+    plot_output: str | None,
+    show_plot: bool,
+    frustum_scale: float,
+    axis_scale: float,
+):
+    """
+    Compare two camera pose sets from different world frames using rigid alignment.
+    Both are treated as T_world_from_cam.
+
+    Supports symmetric, heuristic input parsing for both A and B:
+    1) flat: {"serial": {"pose": "..."}}
+    2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}}
+    """
+    poses_a = load_poses_from_json(pose_a_json)
+    poses_b = load_poses_from_json(pose_b_json)
+
+    shared_serials = sorted(list(set(poses_a.keys()) & set(poses_b.keys())))
+    if len(shared_serials) < 3:
+        click.echo(
+            f"Error: Found only {len(shared_serials)} shared serials ({shared_serials}). Need at least 3.",
+            err=True,
+        )
+        sys.exit(1)
+
+    pts_b = np.array([get_camera_center(poses_b[s]) for s in shared_serials])
+    pts_a = np.array([get_camera_center(poses_a[s]) for s in shared_serials])
+
+    # Align B to A: R_align * pts_b + t_align approx pts_a
+    R_align, t_align = rigid_transform_3d(pts_b, pts_a)
+
+    T_align = np.eye(4)
+    T_align[:3, :3] = R_align
+    T_align[:3, 3] = t_align
+
+    per_cam_results = []
+    pos_errors = []
+    rot_errors = []
+    up_errors = []
+
+    for s in shared_serials:
+        T_b = poses_b[s]
+        T_a = poses_a[s]
+
+        # T_world_a_from_cam = T_world_a_from_world_b * T_world_b_from_cam
+        T_b_aligned = T_align @ T_b
+
+        pos_err = np.linalg.norm(
+            get_camera_center(T_b_aligned) - get_camera_center(T_a)
+        )
+
+        rot_err = rotation_error_deg(T_b_aligned[:3, :3], T_a[:3, :3])
+
+        up_b = get_camera_up(T_b_aligned)
+        up_a = get_camera_up(T_a)
+        up_err = angle_between_vectors_deg(up_b, up_a)
+
+        per_cam_results.append(
+            {
+                "serial": s,
+                "position_error_m": float(pos_err),
+                "rotation_error_deg": float(rot_err),
+                "up_consistency_error_deg": float(up_err),
+            }
+        )
+
+        pos_errors.append(pos_err)
+        rot_errors.append(rot_err)
+        up_errors.append(up_err)
+
+    report = {
+        "shared_serials": shared_serials,
+        "alignment": {
+            "R_align": R_align.tolist(),
+            "t_align": t_align.tolist(),
+            "T_align": T_align.tolist(),
+        },
+        "per_camera": per_cam_results,
+        "summary": {
+            "mean_position_error_m": float(np.mean(pos_errors)),
+            "max_position_error_m": float(np.max(pos_errors)),
+            "mean_rotation_error_deg": float(np.mean(rot_errors)),
+            "max_rotation_error_deg": float(np.max(rot_errors)),
+            "mean_up_consistency_error_deg": float(np.mean(up_errors)),
+            "max_up_consistency_error_deg": float(np.max(up_errors)),
+        },
+    }
+
+    Path(report_json).parent.mkdir(parents=True, exist_ok=True)
+    with open(report_json, "w") as f:
+        json.dump(report, f, indent=4)
+    click.echo(f"Report written to {report_json}")
+
+    if aligned_pose_b_json:
+        aligned_data = {}
+        for s, T_b in poses_b.items():
+            T_b_aligned = T_align @ T_b
+            aligned_data[s] = {"pose": serialize_pose(T_b_aligned)}
+
+        Path(aligned_pose_b_json).parent.mkdir(parents=True, exist_ok=True)
+        with open(aligned_pose_b_json, "w") as f:
+            json.dump(aligned_data, f, indent=4)
+        click.echo(f"Aligned pose B set written to {aligned_pose_b_json}")
+
+    if plot_output or show_plot:
+        fig = go.Figure()
+
+        show_axis: Final[bool] = True
+        if show_axis:
+            for axis, color in zip(
+                [np.eye(3)[:, 0], np.eye(3)[:, 1], np.eye(3)[:, 2]],
+                ["red", "green", "blue"],
+            ):
+                fig.add_trace(
+                    go.Scatter3d(
+                        x=[0, axis[0] * axis_scale],
+                        y=[0, axis[1] * axis_scale],
+                        z=[0, axis[2] * axis_scale],
+                        mode="lines",
+                        line=dict(color=color, width=4),
+                        name=f"World {'XYZ'[np.argmax(axis)]}",
+                        showlegend=True,
+                    )
+                )
+
+        show_ground: Final[bool] = False
+        if show_ground:
+            ground_size = 5.0
+            half_size = ground_size / 2.0
+            x_grid = np.linspace(-half_size, half_size, 2)
+            z_grid = np.linspace(-half_size, half_size, 2)
+            x_mesh, z_mesh = np.meshgrid(x_grid, z_grid)
+            y_mesh = np.zeros_like(x_mesh)
+            fig.add_trace(
+                go.Surface(
+                    x=x_mesh,
+                    y=y_mesh,
+                    z=z_mesh,
+                    showscale=False,
+                    opacity=0.1,
+                    colorscale=[[0, "gray"], [1, "gray"]],
+                    name="Ground Plane",
+                    hoverinfo="skip",
+                )
+            )
+
+        for s in sorted(poses_a.keys()):
+            add_camera_trace(
+                fig,
+                poses_a[s],
+                f"a_{s}",
+                scale=axis_scale,
+                frustum_scale=frustum_scale,
+                color="blue",
+            )
+
+        for s in sorted(poses_b.keys()):
+            T_b_aligned = T_align @ poses_b[s]
+            add_camera_trace(
+                fig,
+                T_b_aligned,
+                f"b_{s}",
+                scale=axis_scale,
+                frustum_scale=frustum_scale,
+                color="orange",
+            )
+
+        fig.update_layout(
+            title="Pose A vs Aligned Pose B",
+            scene=dict(
+                xaxis_title="X (Right)",
+                yaxis_title="Y (Down)",
+                zaxis_title="Z (Forward)",
+                aspectmode="data",
+                camera=dict(
+                    up=dict(x=0, y=-1, z=0),
+                    eye=dict(x=1.5, y=-1.5, z=1.5),
+                ),
+            ),
+            margin=dict(l=0, r=0, b=0, t=40),
+        )
+
+        if plot_output:
+            if plot_output.endswith(".html"):
+                fig.write_html(plot_output)
+                click.echo(f"Plot saved to {plot_output}")
+            else:
+                try:
+                    fig.write_image(plot_output)
+                    click.echo(f"Plot saved to {plot_output}")
+                except Exception as e:
+                    click.echo(f"Error saving image (ensure kaleido is installed): {e}")
+                    if not plot_output.endswith(".html"):
+                        html_out = str(Path(plot_output).with_suffix(".html"))
+                        fig.write_html(html_out)
+                        click.echo(f"Fallback: Plot saved to {html_out}")
+
+        if show_plot:
+            fig.show()
+
+
+if __name__ == "__main__":
+    main()  # pylint: disable=no-value-for-parameter