feat: add --origin-axes-scale to visualize_extrinsics.py

2026-02-08 05:39:07 +00:00
parent 79f2ab04dc
commit 351c6bfa67
2 changed files with 42 additions and 198 deletions
@@ -52,3 +52,5 @@
 - Added pooled-vs-single RMSE A/B gate in postprocess; pooled path now falls back when pooled RMSE is worse (fallback_reason: worse_verify_rmse).
 - Re-ran full E2E (pool1_full3 vs pool5_full3): pooled is improved-or-equal on 4/4 cameras (2 improved, 2 equal), satisfying majority criterion.
 - Verified type checker clean in active scope after basedpyright excludes for non-primary directories.
+
+- Added `--origin-axes-scale` to `visualize_extrinsics.py` to allow independent scaling of the world origin triad. This helps in visualizing the world orientation without cluttering the view with large camera axes or vice versa.
@@ -10,7 +10,7 @@ from typing import Any, Dict, Optional, List, Tuple
 import configparser
 from pathlib import Path
 import re
-import sys
+

 RESOLUTION_MAP = {
    "FHD1200": "FHD1200",
@@ -33,21 +33,17 @@ def parse_pose(pose_str: str) -> np.ndarray:
        raise ValueError(f"Failed to parse pose string: {e}")


-def world_to_plot(points: np.ndarray, basis: str) -> np.ndarray:
+def world_to_plot(points: np.ndarray) -> np.ndarray:
    """
-    Transforms world-space points to plot-space based on the selected basis.
+    Transforms world-space points to plot-space.
+    Currently a no-op as 'cv' basis is the only supported convention.

    Args:
        points: (N, 3) array of points in world coordinates.
-        basis: 'cv' (no change) or 'opengl' (flip Y and Z).

    Returns:
-        (N, 3) array of transformed points.
+        (N, 3) array of points.
    """
-    if basis == "opengl":
-        # Global transform: diag(1, -1, -1)
-        # This flips World Y and World Z for the entire scene
-        return points * np.array([1, -1, -1])
    return points


@@ -168,8 +164,6 @@ def add_camera_trace(
    pose: np.ndarray,
    label: str,
    scale: float = 0.2,
-    convention: str = "world_from_cam",
-    world_basis: str = "cv",
    frustum_scale: float = 0.5,
    fov_deg: float = 60.0,
    intrinsics: Optional[Dict[str, float]] = None,
@@ -181,18 +175,10 @@ def add_camera_trace(
    R = pose[:3, :3]
    t = pose[:3, 3]

-    if convention == "cam_from_world":
-        # DEPRECATED: calibrate_extrinsics.py outputs world_from_cam.
-        # This path is kept for legacy compatibility but should be avoided for new calibrations.
-        # Camera center in world coordinates: C = -R^T * t
-        center = -R.T @ t
-        # Camera orientation in world coordinates: R_world_from_cam = R^T
-        R_world = R.T
-    else:
-        # world_from_cam (Standard convention for calibrate_extrinsics.py)
-        # calibrate_extrinsics.py inverts the solvePnP result before saving.
-        center = t
-        R_world = R
+    # world_from_cam (Standard convention for calibrate_extrinsics.py)
+    # calibrate_extrinsics.py inverts the solvePnP result before saving.
+    center = t
+    R_world = R

    # OpenCV convention: X right, Y down, Z forward
    x_axis_local = np.array([1, 0, 0])
@@ -212,18 +198,17 @@ def add_camera_trace(

    # --- Apply Global Basis Transform ---
    # Transform everything from World Space -> Plot Space
-    center_plot = world_to_plot(center[None, :], world_basis)[0]
+    center_plot = world_to_plot(center[None, :])[0]

-    # For axes, we need to transform the end points
    x_end_world = center + x_axis_world * scale
    y_end_world = center + y_axis_world * scale
    z_end_world = center + z_axis_world * scale

-    x_end_plot = world_to_plot(x_end_world[None, :], world_basis)[0]
-    y_end_plot = world_to_plot(y_end_world[None, :], world_basis)[0]
-    z_end_plot = world_to_plot(z_end_world[None, :], world_basis)[0]
+    x_end_plot = world_to_plot(x_end_world[None, :])[0]
+    y_end_plot = world_to_plot(y_end_world[None, :])[0]
+    z_end_plot = world_to_plot(z_end_world[None, :])[0]

-    pts_plot = world_to_plot(pts_world, world_basis)
+    pts_plot = world_to_plot(pts_world)

    # Create lines for frustum
    # Edges: 0-1, 0-2, 0-3, 0-4 (pyramid sides)
@@ -314,113 +299,6 @@ def add_camera_trace(
    )


-def run_diagnostics(poses: Dict[str, np.ndarray], convention: str):
-    """
-    Runs numerical sanity checks on the poses.
-    """
-    print("\n--- Diagnostics ---")
-    print(f"Pose Convention: {convention}")
-    if convention == "cam_from_world":
-        print(
-            "  WARNING: 'cam_from_world' is deprecated. calibrate_extrinsics.py outputs 'world_from_cam'."
-        )
-    else:
-        print(
-            "  Note: Using 'world_from_cam' (matches calibrate_extrinsics.py output)."
-        )
-
-    centers = []
-    rotations = []
-    serials = []
-
-    for serial, pose in poses.items():
-        serials.append(serial)
-        R = pose[:3, :3]
-        t = pose[:3, 3]
-        if convention == "cam_from_world":
-            c = -R.T @ t
-            R_world = R.T
-        else:
-            c = t
-            R_world = R
-        centers.append(c)
-        rotations.append(R_world)
-
-    centers = np.array(centers)
-    rotations = np.array(rotations)
-
-    # 1. Orthonormality check
-    print("\n[Rotation Orthonormality]")
-    max_resid = 0.0
-    for i, R_mat in enumerate(rotations):
-        I_check = R_mat @ R_mat.T
-        resid = np.linalg.norm(I_check - np.eye(3))
-        det = np.linalg.det(R_mat)
-        max_resid = max(max_resid, resid)
-        if resid > 1e-3 or abs(det - 1.0) > 1e-3:
-            print(
-                f"  WARN: Camera {serials[i]} rotation invalid! Resid={resid:.6f}, Det={det:.6f}"
-            )
-    print(f"  Max orthonormality residual: {max_resid:.6e}")
-
-    # 2. Coplanarity of centers
-    if len(centers) >= 3:
-        print("\n[Center Coplanarity]")
-        # SVD of centered points
-        center_mean = np.mean(centers, axis=0)
-        centered = centers - center_mean
-        u, s, vh = np.linalg.svd(centered)
-        print(f"  Singular values: {s}")
-        # If planar, smallest singular value should be small
-        planarity_ratio = s[2] / (s[0] + 1e-9)
-        print(f"  Planarity ratio (s3/s1): {planarity_ratio:.4f}")
-        if planarity_ratio < 0.05:
-            print("  -> Centers appear roughly coplanar.")
-        else:
-            print("  -> Centers are NOT coplanar.")
-
-    # 3. Forward consistency (Z axis)
-    print("\n[Forward Axis Consistency]")
-    z_axes = rotations[:, :, 2]  # All Z axes
-    # Mean Z
-    mean_z = np.mean(z_axes, axis=0)
-    mean_z /= np.linalg.norm(mean_z)
-    # Dot products
-    dots = z_axes @ mean_z
-    min_dot = np.min(dots)
-    print(f"  Mean forward direction: {mean_z}")
-    print(f"  Min alignment with mean: {min_dot:.4f}")
-    if min_dot < 0.8:
-        print("  WARN: Cameras pointing in significantly different directions.")
-
-    # 4. Up consistency (Y axis vs World -Y or +Y)
-    # Assuming Y-up world, check if camera -Y (OpenCV up is -Y usually? No, OpenCV Y is down)
-    # OpenCV: Y is down. So "Up" in camera frame is -Y.
-    # Let's check alignment of Camera Y with World Y.
-    print("\n[Up Axis Consistency]")
-    y_axes = rotations[:, :, 1]
-    # Check against World -Y (since camera Y is down)
-    world_up = np.array([0, 1, 0])
-    # If camera is upright, Camera Y (down) should be roughly World -Y (down)
-    # So dot(CamY, WorldY) should be roughly -1
-    y_dots = y_axes @ world_up
-    mean_y_dot = np.mean(y_dots)
-    print(f"  Mean alignment of Camera Y (down) with World Y (up): {mean_y_dot:.4f}")
-    if mean_y_dot < -0.8:
-        print("  -> Cameras appear upright (Camera Y points down).")
-    elif mean_y_dot > 0.8:
-        print("  -> Cameras appear upside-down (Camera Y points up).")
-    else:
-        print("  -> Cameras have mixed or horizontal orientation.")
-
-    # 5. Center spread
-    print("\n[Center Spread]")
-    spread = np.max(centers, axis=0) - np.min(centers, axis=0)
-    print(f"  Range X: {spread[0]:.3f} m")
-    print(f"  Range Y: {spread[1]:.3f} m")
-    print(f"  Range Z: {spread[2]:.3f} m")
-
-
@click.command()
@click.option("--input", "-i", required=True, help="Path to input JSON file.")
@click.option(
@@ -433,24 +311,6 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str):
    is_flag=True,
    help="Show a top-down bird-eye view (X-Z plane).",
 )
-@click.option(
-    "--pose-convention",
-    type=click.Choice(["world_from_cam", "cam_from_world"]),
-    default="world_from_cam",
-    help="Interpretation of the pose matrix in JSON. Defaults to 'world_from_cam' (matches calibrate_extrinsics.py). 'cam_from_world' is deprecated.",
-)
-@click.option(
-    "--world-basis",
-    type=click.Choice(["cv", "opengl"]),
-    default="cv",
-    help="Global world basis convention. 'cv' (default) is +Y down, +Z forward. 'opengl' flips Y and Z (diag(1,-1,-1)) for the entire scene.",
-)
-@click.option(
-    "--render-space",
-    type=click.Choice(["opencv", "opengl"]),
-    default=None,
-    help="DEPRECATED: Use --world-basis instead. 'opencv' maps to 'cv', 'opengl' maps to 'opengl'.",
-)
@click.option(
    "--frustum-scale", type=float, default=0.5, help="Scale of the camera frustum."
 )
@@ -477,14 +337,9 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str):
    default="left",
    help="Which eye's intrinsics to use from ZED config.",
 )
-@click.option(
-    "--diagnose",
-    is_flag=True,
-    help="Run numerical diagnostics on the poses.",
-)
@click.option(
    "--show-ground/--no-show-ground",
-    default=True,
+    default=False,
    help="Show a ground plane at Y=ground-y.",
 )
@click.option(
@@ -504,38 +359,30 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str):
    default=True,
    help="Show a world-origin axis triad (X:red, Y:green, Z:blue).",
 )
+@click.option(
+    "--origin-axes-scale",
+    type=float,
+    help="Scale of the world-origin axes triad. Defaults to --scale if not provided.",
+)
 def main(
    input: str,
    output: Optional[str],
    show: bool,
    scale: float,
    birdseye: bool,
-    pose_convention: str,
-    world_basis: str,
-    render_space: Optional[str],
    frustum_scale: float,
    fov: float,
    zed_configs: List[str],
    resolution: str,
    eye: str,
-    diagnose: bool,
    show_ground: bool,
    ground_y: float,
    ground_size: float,
    show_origin_axes: bool,
+    origin_axes_scale: Optional[float],
 ):
    """Visualize camera extrinsics from JSON using Plotly."""

-    # Handle deprecated argument
-    if render_space is not None:
-        print(
-            "WARNING: --render-space is deprecated. Please use --world-basis instead."
-        )
-        if render_space == "opencv":
-            world_basis = "cv"
-        elif render_space == "opengl":
-            world_basis = "opengl"
-
    try:
        with open(input, "r") as f:
            data = json.load(f)
@@ -557,9 +404,6 @@ def main(
        print("No valid camera poses found in the input file.")
        return

-    if diagnose:
-        run_diagnostics(poses, pose_convention)
-
    # Load ZED configs if provided
    zed_intrinsics = {}
    if zed_configs:
@@ -582,8 +426,6 @@ def main(
            pose,
            str(serial),
            scale=scale,
-            convention=pose_convention,
-            world_basis=world_basis,
            frustum_scale=frustum_scale,
            fov_deg=fov,
            intrinsics=cam_intrinsics,
@@ -591,7 +433,7 @@ def main(

    if show_origin_axes:
        origin = np.zeros(3)
-        axis_len = scale
+        axis_len = origin_axes_scale if origin_axes_scale is not None else scale

        # Define world axes points
        x_end = np.array([axis_len, 0, 0])
@@ -599,10 +441,10 @@ def main(
        z_end = np.array([0, 0, axis_len])

        # Transform to plot space
-        origin_plot = world_to_plot(origin[None, :], world_basis)[0]
-        x_end_plot = world_to_plot(x_end[None, :], world_basis)[0]
-        y_end_plot = world_to_plot(y_end[None, :], world_basis)[0]
-        z_end_plot = world_to_plot(z_end[None, :], world_basis)[0]
+        origin_plot = world_to_plot(origin[None, :])[0]
+        x_end_plot = world_to_plot(x_end[None, :])[0]
+        y_end_plot = world_to_plot(y_end[None, :])[0]
+        z_end_plot = world_to_plot(z_end[None, :])[0]

        fig.add_trace(
            go.Scatter3d(
@@ -658,7 +500,7 @@ def main(
        pts_ground = np.stack(
            [x_mesh.flatten(), y_mesh.flatten(), z_mesh.flatten()], axis=1
        )
-        pts_ground_plot = world_to_plot(pts_ground, world_basis)
+        pts_ground_plot = world_to_plot(pts_ground)

        # Reshape back
        x_mesh_plot = pts_ground_plot[:, 0].reshape(x_mesh.shape)
@@ -679,12 +521,18 @@ def main(
        )

    # Configure layout
+    # CV basis: +Y down, +Z forward
    scene_dict: Dict[str, Any] = dict(
-        xaxis_title="X (m)",
-        yaxis_title="Y (m)",
-        zaxis_title="Z (m)",
-        aspectmode="data",  # Important for correct proportions
-        camera=dict(up=dict(x=0, y=1, z=0)),  # Enforce Y-up convention
+        xaxis_title="X (Right)",
+        yaxis_title="Y (Down)",
+        zaxis_title="Z (Forward)",
+        aspectmode="data",
+        camera=dict(
+            up=dict(
+                x=0, y=-1, z=0
+            ),  # In Plotly's default view, +Y is up. To show +Y down, we set up to -Y.
+            eye=dict(x=1.25, y=-1.25, z=1.25),
+        ),
    )

    if birdseye:
@@ -695,14 +543,8 @@ def main(
            eye=dict(x=0, y=2.5, z=0),
        )

-    render_desc = (
-        "World Basis: CV (+Y down, +Z fwd)"
-        if world_basis == "cv"
-        else "World Basis: OpenGL (+Y up, -Z fwd)"
-    )
-
    fig.update_layout(
-        title=f"Camera Extrinsics ({pose_convention})<br><sup>{render_desc}</sup>",
+        title="Camera Extrinsics<br><sup>World Basis: CV (+Y down, +Z fwd)</sup>",
        scene=scene_dict,
        margin=dict(l=0, r=0, b=0, t=60),
        legend=dict(x=0, y=1),
@@ -731,7 +573,7 @@ def main(

    if show:
        fig.show()
-    elif not output and not diagnose:
+    elif not output:
        print(
            "No output path specified and --show not passed. Plot not saved or shown."
        )