From d6c7829b1e8891c521c8595e695660e035757d73 Mon Sep 17 00:00:00 2001
From: crosstyan <crosstyan@outlook.com>
Date: Sun, 8 Feb 2026 08:07:33 +0000
Subject: [PATCH] feat: add pose-set comparison visualization and clarify
 conventions

---
 py_workspace/compare_pose_sets.py             | 309 +++++++++++++++---
 .../docs/visualization-conventions.md         |  30 ++
 2 files changed, 298 insertions(+), 41 deletions(-)

diff --git a/py_workspace/compare_pose_sets.py b/py_workspace/compare_pose_sets.py
index 78ce5e0..47e09b9 100644
--- a/py_workspace/compare_pose_sets.py
+++ b/py_workspace/compare_pose_sets.py
@@ -10,6 +10,7 @@ from pathlib import Path
 
 import click
 import numpy as np
+import plotly.graph_objects as go
 
 
 def parse_pose(pose_str: str) -> np.ndarray:
@@ -73,18 +74,122 @@ def angle_between_vectors_deg(v1: np.ndarray, v2: np.ndarray) -> float:
     return np.degrees(np.arccos(cos_theta))
 
 
+def add_camera_trace(
+    fig: go.Figure,
+    pose: np.ndarray,
+    label: str,
+    scale: float = 0.2,
+    frustum_scale: float = 0.5,
+    fov_deg: float = 60.0,
+    color: str = "blue",
+):
+    """
+    Adds a camera frustum and axes to the Plotly figure.
+    """
+    R = pose[:3, :3]
+    center = pose[:3, 3]
+
+    # OpenCV convention: X right, Y down, Z forward
+    x_axis_local = np.array([1, 0, 0])
+    y_axis_local = np.array([0, 1, 0])
+    z_axis_local = np.array([0, 0, 1])
+
+    # Transform local axes to world
+    x_axis_world = R @ x_axis_local
+    y_axis_world = R @ y_axis_local
+    z_axis_world = R @ z_axis_local
+
+    # Frustum points in local coordinates
+    fov_rad = np.radians(fov_deg)
+    w = frustum_scale * np.tan(fov_rad / 2.0)
+    h = w * 0.75  # 4:3 aspect ratio assumption
+
+    pts_local = np.array(
+        [
+            [0, 0, 0],  # Center
+            [-w, -h, frustum_scale],  # Top-Left
+            [w, -h, frustum_scale],  # Top-Right
+            [w, h, frustum_scale],  # Bottom-Right
+            [-w, h, frustum_scale],  # Bottom-Left
+        ]
+    )
+
+    # Transform frustum to world
+    pts_world = (R @ pts_local.T).T + center
+
+    # Create lines for frustum
+    x_lines, y_lines, z_lines = [], [], []
+
+    def add_line(i, j):
+        x_lines.extend([pts_world[i, 0], pts_world[j, 0], None])
+        y_lines.extend([pts_world[i, 1], pts_world[j, 1], None])
+        z_lines.extend([pts_world[i, 2], pts_world[j, 2], None])
+
+    for i in range(1, 5):
+        add_line(0, i)
+    add_line(1, 2)
+    add_line(2, 3)
+    add_line(3, 4)
+    add_line(4, 1)
+
+    fig.add_trace(
+        go.Scatter3d(
+            x=x_lines,
+            y=y_lines,
+            z=z_lines,
+            mode="lines",
+            line=dict(color=color, width=2),
+            name=f"{label} Frustum",
+            showlegend=False,
+            hoverinfo="skip",
+        )
+    )
+
+    # Add center point with label
+    fig.add_trace(
+        go.Scatter3d(
+            x=[center[0]],
+            y=[center[1]],
+            z=[center[2]],
+            mode="markers+text",
+            marker=dict(size=4, color="black"),
+            text=[label],
+            textposition="top center",
+            name=label,
+            showlegend=True,
+        )
+    )
+
+    # Add axes (RGB = XYZ)
+    for axis_world, axis_color in zip(
+        [x_axis_world, y_axis_world, z_axis_world], ["red", "green", "blue"]
+    ):
+        end = center + axis_world * scale
+        fig.add_trace(
+            go.Scatter3d(
+                x=[center[0], end[0]],
+                y=[center[1], end[1]],
+                z=[center[2], end[2]],
+                mode="lines",
+                line=dict(color=axis_color, width=3),
+                showlegend=False,
+                hoverinfo="skip",
+            )
+        )
+
+
 @click.command()
 @click.option(
-    "--calibration-json",
+    "--pose-a-json",
     type=click.Path(exists=True),
     required=True,
-    help="Calibration output format (serial -> {pose: '...'})",
+    help="Pose set A (serial -> {pose: '...'})",
 )
 @click.option(
-    "--inside-network-json",
+    "--pose-b-json",
     type=click.Path(exists=True),
     required=True,
-    help="inside_network.json nested format",
+    help="Pose set B (serial -> {pose: '...'} or inside_network format)",
 )
 @click.option(
     "--report-json",
@@ -93,38 +198,66 @@ def angle_between_vectors_deg(v1: np.ndarray, v2: np.ndarray) -> float:
     help="Output path for comparison report",
 )
 @click.option(
-    "--aligned-inside-json",
+    "--aligned-pose-b-json",
     type=click.Path(),
-    help="Output path for aligned inside poses",
+    help="Output path for aligned pose B set",
+)
+@click.option(
+    "--plot-output",
+    type=click.Path(),
+    help="Output path for visualization (HTML or PNG)",
+)
+@click.option(
+    "--show-plot",
+    is_flag=True,
+    help="Show the plot interactively",
+)
+@click.option(
+    "--frustum-scale",
+    type=float,
+    default=0.3,
+    help="Scale of the camera frustum",
+)
+@click.option(
+    "--axis-scale",
+    type=float,
+    default=0.1,
+    help="Scale of the camera axes",
 )
 def main(
-    calibration_json: str,
-    inside_network_json: str,
+    pose_a_json: str,
+    pose_b_json: str,
     report_json: str,
-    aligned_inside_json: str | None,
+    aligned_pose_b_json: str | None,
+    plot_output: str | None,
+    show_plot: bool,
+    frustum_scale: float,
+    axis_scale: float,
 ):
     """
     Compare two camera pose sets from different world frames using rigid alignment.
     Both are treated as T_world_from_cam.
     """
-    with open(calibration_json, "r") as f:
-        calib_data = json.load(f)
+    with open(pose_a_json, "r") as f:
+        data_a = json.load(f)
 
-    with open(inside_network_json, "r") as f:
-        inside_data = json.load(f)
+    with open(pose_b_json, "r") as f:
+        data_b = json.load(f)
 
-    calib_poses: dict[str, np.ndarray] = {}
-    for serial, data in calib_data.items():
+    poses_a: dict[str, np.ndarray] = {}
+    for serial, data in data_a.items():
         if "pose" in data:
-            calib_poses[str(serial)] = parse_pose(data["pose"])
+            poses_a[str(serial)] = parse_pose(data["pose"])
 
-    inside_poses: dict[str, np.ndarray] = {}
-    for serial, data in inside_data.items():
-        # inside_network.json has FusionConfiguration nested
+    poses_b: dict[str, np.ndarray] = {}
+    for serial, data in data_b.items():
+        # Support both standard and inside_network.json nested format
         if "FusionConfiguration" in data and "pose" in data["FusionConfiguration"]:
-            inside_poses[str(serial)] = parse_pose(data["FusionConfiguration"]["pose"])
+            poses_b[str(serial)] = parse_pose(data["FusionConfiguration"]["pose"])
+        elif "pose" in data:
+            poses_b[str(serial)] = parse_pose(data["pose"])
 
-    shared_serials = sorted(list(set(calib_poses.keys()) & set(inside_poses.keys())))
+    shared_serials = sorted(list(set(poses_a.keys()) & set(poses_b.keys())))
     if len(shared_serials) < 3:
         click.echo(
             f"Error: Found only {len(shared_serials)} shared serials ({shared_serials}). Need at least 3.",
@@ -132,11 +265,11 @@ def main(
         )
         sys.exit(1)
 
-    pts_inside = np.array([get_camera_center(inside_poses[s]) for s in shared_serials])
-    pts_calib = np.array([get_camera_center(calib_poses[s]) for s in shared_serials])
+    pts_b = np.array([get_camera_center(poses_b[s]) for s in shared_serials])
+    pts_a = np.array([get_camera_center(poses_a[s]) for s in shared_serials])
 
-    # Align inside to calib: R_align * pts_inside + t_align approx pts_calib
-    R_align, t_align = rigid_transform_3d(pts_inside, pts_calib)
+    # Align B to A: R_align * pts_b + t_align approx pts_a
+    R_align, t_align = rigid_transform_3d(pts_b, pts_a)
 
     T_align = np.eye(4)
     T_align[:3, :3] = R_align
@@ -148,21 +281,21 @@ def main(
     up_errors = []
 
     for s in shared_serials:
-        T_inside = inside_poses[s]
-        T_calib = calib_poses[s]
+        T_b = poses_b[s]
+        T_a = poses_a[s]
 
-        # T_world_calib_from_cam = T_world_calib_from_world_inside * T_world_inside_from_cam
-        T_inside_aligned = T_align @ T_inside
+        # T_world_a_from_cam = T_world_a_from_world_b * T_world_b_from_cam
+        T_b_aligned = T_align @ T_b
 
         pos_err = np.linalg.norm(
-            get_camera_center(T_inside_aligned) - get_camera_center(T_calib)
+            get_camera_center(T_b_aligned) - get_camera_center(T_a)
         )
 
-        rot_err = rotation_error_deg(T_inside_aligned[:3, :3], T_calib[:3, :3])
+        rot_err = rotation_error_deg(T_b_aligned[:3, :3], T_a[:3, :3])
 
-        up_inside = get_camera_up(T_inside_aligned)
-        up_calib = get_camera_up(T_calib)
-        up_err = angle_between_vectors_deg(up_inside, up_calib)
+        up_b = get_camera_up(T_b_aligned)
+        up_a = get_camera_up(T_a)
+        up_err = angle_between_vectors_deg(up_b, up_a)
 
         per_cam_results.append(
             {
@@ -200,16 +333,110 @@ def main(
         json.dump(report, f, indent=4)
     click.echo(f"Report written to {report_json}")
 
-    if aligned_inside_json:
+    if aligned_pose_b_json:
         aligned_data = {}
-        for s, T_inside in inside_poses.items():
-            T_inside_aligned = T_align @ T_inside
-            aligned_data[s] = {"pose": serialize_pose(T_inside_aligned)}
+        for s, T_b in poses_b.items():
+            T_b_aligned = T_align @ T_b
+            aligned_data[s] = {"pose": serialize_pose(T_b_aligned)}
 
-        Path(aligned_inside_json).parent.mkdir(parents=True, exist_ok=True)
-        with open(aligned_inside_json, "w") as f:
+        Path(aligned_pose_b_json).parent.mkdir(parents=True, exist_ok=True)
+        with open(aligned_pose_b_json, "w") as f:
             json.dump(aligned_data, f, indent=4)
-        click.echo(f"Aligned inside poses written to {aligned_inside_json}")
+        click.echo(f"Aligned pose B set written to {aligned_pose_b_json}")
+
+    if plot_output or show_plot:
+        fig = go.Figure()
+
+        for axis, color in zip(
+            [np.eye(3)[:, 0], np.eye(3)[:, 1], np.eye(3)[:, 2]],
+            ["red", "green", "blue"],
+        ):
+            fig.add_trace(
+                go.Scatter3d(
+                    x=[0, axis[0] * axis_scale * 2],
+                    y=[0, axis[1] * axis_scale * 2],
+                    z=[0, axis[2] * axis_scale * 2],
+                    mode="lines",
+                    line=dict(color=color, width=4),
+                    name=f"World {'XYZ'[np.argmax(axis)]}",
+                    showlegend=True,
+                )
+            )
+
+        show_ground = False
+        if show_ground:
+            ground_size = 5.0
+            half_size = ground_size / 2.0
+            x_grid = np.linspace(-half_size, half_size, 2)
+            z_grid = np.linspace(-half_size, half_size, 2)
+            x_mesh, z_mesh = np.meshgrid(x_grid, z_grid)
+            y_mesh = np.zeros_like(x_mesh)
+            fig.add_trace(
+                go.Surface(
+                    x=x_mesh,
+                    y=y_mesh,
+                    z=z_mesh,
+                    showscale=False,
+                    opacity=0.1,
+                    colorscale=[[0, "gray"], [1, "gray"]],
+                    name="Ground Plane",
+                    hoverinfo="skip",
+                )
+            )
+
+        for s in sorted(poses_a.keys()):
+            add_camera_trace(
+                fig,
+                poses_a[s],
+                f"a_{s}",
+                scale=axis_scale,
+                frustum_scale=frustum_scale,
+                color="blue",
+            )
+
+        for s in sorted(poses_b.keys()):
+            T_b_aligned = T_align @ poses_b[s]
+            add_camera_trace(
+                fig,
+                T_b_aligned,
+                f"b_{s}",
+                scale=axis_scale,
+                frustum_scale=frustum_scale,
+                color="orange",
+            )
+
+        fig.update_layout(
+            title="Pose A vs Aligned Pose B",
+            scene=dict(
+                xaxis_title="X (Right)",
+                yaxis_title="Y (Down)",
+                zaxis_title="Z (Forward)",
+                aspectmode="data",
+                camera=dict(
+                    up=dict(x=0, y=-1, z=0),
+                    eye=dict(x=1.5, y=-1.5, z=1.5),
+                ),
+            ),
+            margin=dict(l=0, r=0, b=0, t=40),
+        )
+
+        if plot_output:
+            if plot_output.endswith(".html"):
+                fig.write_html(plot_output)
+                click.echo(f"Plot saved to {plot_output}")
+            else:
+                try:
+                    fig.write_image(plot_output)
+                    click.echo(f"Plot saved to {plot_output}")
+                except Exception as e:
+                    click.echo(f"Error saving image (ensure kaleido is installed): {e}")
+                    if not plot_output.endswith(".html"):
+                        html_out = str(Path(plot_output).with_suffix(".html"))
+                        fig.write_html(html_out)
+                        click.echo(f"Fallback: Plot saved to {html_out}")
+
+        if show_plot:
+            fig.show()
 
 
 if __name__ == "__main__":
diff --git a/py_workspace/docs/visualization-conventions.md b/py_workspace/docs/visualization-conventions.md
index 1dbbb75..5c879b9 100644
--- a/py_workspace/docs/visualization-conventions.md
+++ b/py_workspace/docs/visualization-conventions.md
@@ -322,6 +322,36 @@ system's origin.
 
 ---
 
+## Methodology: Comparing Different World Frames
+
+Since `inside_network.json` (Fusion) and `calibrate_extrinsics.py` (ArUco) use different
+world origins, raw coordinate comparison is meaningless. We validated consistency using
+**rigid SE(3) alignment**:
+
+1.  **Match Serials**: Identify cameras present in both JSON files.
+2.  **Extract Centers**: Extract the translation column `t` from `T_world_from_cam` for
+    each camera.
+    *   **Crucial**: Both systems use `T_world_from_cam`. It is **not** `cam_from_world`.
+3.  **Compute Alignment**: Solve for the rigid transform `(R_align, t_align)` that
+    minimizes the distance between the two point sets (Kabsch algorithm).
+    *   Scale is fixed at 1.0 (both systems use meters).
+4.  **Apply & Compare**:
+    *   Transform Fusion points: `P_aligned = R_align * P_fusion + t_align`.
+    *   **Position Residual**: `|| P_aruco - P_aligned ||`.
+    *   **Orientation Check**: Apply `R_align` to Fusion rotation matrices and compare
+        column vectors (Right/Down/Forward) with ArUco rotations.
+5.  **Up-Vector Verification**:
+    *   Fusion uses Y-Up (gravity). ArUco uses Y-Down (image).
+    *   After alignment, the transformed Fusion Y-axis should be approximately parallel
+        to the ArUco -Y axis (or +Y depending on the specific alignment solution found,
+        but they must be collinear with gravity).
+
+**Result**: The overlay images in `output/` were generated using this aligned frame.
+The low residuals (<2cm) confirm that the internal calibration is consistent, even
+though the absolute world coordinates differ.
+
+---
+
 ## Appendix: Stale README References
 
 The following lines in `py_workspace/README.md` reference removed flags and should be