""" Utility script to visualize camera extrinsics from a JSON file using Plotly. """ import json import click import numpy as np import plotly.graph_objects as go from typing import Any, Dict, Optional, List, Tuple import configparser from pathlib import Path import re import sys RESOLUTION_MAP = { "FHD1200": "FHD1200", "FHD": "FHD", "2K": "2K", "HD": "HD", "SVGA": "SVGA", "VGA": "VGA", } def parse_pose(pose_str: str) -> np.ndarray: """Parses a 16-float pose string into a 4x4 matrix.""" try: vals = [float(x) for x in pose_str.split()] if len(vals) != 16: raise ValueError(f"Expected 16 values, got {len(vals)}") return np.array(vals).reshape((4, 4)) except Exception as e: raise ValueError(f"Failed to parse pose string: {e}") def world_to_plot(points: np.ndarray, basis: str) -> np.ndarray: """ Transforms world-space points to plot-space based on the selected basis. Args: points: (N, 3) array of points in world coordinates. basis: 'cv' (no change) or 'opengl' (flip Y and Z). Returns: (N, 3) array of transformed points. """ if basis == "opengl": # Global transform: diag(1, -1, -1) # This flips World Y and World Z for the entire scene return points * np.array([1, -1, -1]) return points def load_zed_configs( paths: List[str], resolution: str, eye: str ) -> Dict[str, Dict[str, float]]: """ Loads ZED intrinsics from config files. Returns a mapping from serial (string) to intrinsics dict. """ configs = {} eye_prefix = eye.upper() # Map resolution to section suffix res_map = { "1200": "FHD1200", "fhd": "FHD", "2k": "2K", "hd": "HD", "svga": "SVGA", "vga": "VGA", } res_suffix = res_map.get(resolution.lower(), resolution.upper()) section_name = f"{eye_prefix}_CAM_{res_suffix}" all_files = [] for p in paths: path = Path(p) if path.is_dir(): all_files.extend(list(path.glob("SN*.conf"))) else: all_files.append(path) for f in all_files: # Extract serial from filename SN.conf match = re.search(r"SN(\d+)", f.name) serial = match.group(1) if match else None parser = configparser.ConfigParser() try: parser.read(f) if section_name in parser: sect = parser[section_name] intrinsics = { "fx": float(sect.get("fx", 0)), "fy": float(sect.get("fy", 0)), "cx": float(sect.get("cx", 0)), "cy": float(sect.get("cy", 0)), } if serial: configs[serial] = intrinsics # Always store as default in case it's the only file configs["default"] = intrinsics except Exception as e: print(f"Warning: Failed to parse config {f}: {e}") # If only one config was provided, apply to all if len(all_files) == 1 and "default" in configs: return {"all": configs["default"]} return configs def get_frustum_points( intrinsics: Optional[Dict[str, float]], frustum_scale: float, fov_deg: float, ) -> np.ndarray: """ Returns 5 points in local camera coordinates: center + 4 corners of the far plane. Local coordinates: forward is +Z, right is +X, down is +Y (OpenCV convention). """ if intrinsics and all(k in intrinsics for k in ["fx", "fy", "cx", "cy"]): fx, fy = intrinsics["fx"], intrinsics["fy"] cx, cy = intrinsics["cx"], intrinsics["cy"] # We assume the frustum plane is at Z = frustum_scale # x = (u - cx) * Z / fx # y = (v - cy) * Z / fy # We'll assume a standard aspect ratio and center cx/cy for visualization # if we don't have image dimensions. # Let's approximate image size from principal point (assuming it's roughly center) w_half = (cx / fx) * frustum_scale h_half = (cy / fy) * frustum_scale w, h = w_half, h_half else: fov_rad = np.radians(fov_deg) # Assuming horizontal FOV w = frustum_scale * np.tan(fov_rad / 2.0) h = w * 0.75 # 4:3 aspect ratio assumption # 5 points: center + 4 corners of the far plane # OpenCV: +Z forward, +X right, +Y down pts_local = np.array( [ [0, 0, 0], # Center [ -w, -h, frustum_scale, ], # Top-Left (if Y down is positive, -h is up) -> Wait. # OpenCV: Y is down. So -h is UP in 3D space if we map Y->Y. # But usually we want to visualize it. # Let's stick to: # +X right # +Y down # +Z forward [w, -h, frustum_scale], # Top-Right [w, h, frustum_scale], # Bottom-Right [-w, h, frustum_scale], # Bottom-Left ] ) return pts_local def add_camera_trace( fig: go.Figure, pose: np.ndarray, label: str, scale: float = 0.2, convention: str = "world_from_cam", world_basis: str = "cv", frustum_scale: float = 0.5, fov_deg: float = 60.0, intrinsics: Optional[Dict[str, float]] = None, color: str = "blue", ): """ Adds a camera frustum and axes to the Plotly figure. """ R = pose[:3, :3] t = pose[:3, 3] if convention == "cam_from_world": # DEPRECATED: calibrate_extrinsics.py outputs world_from_cam. # This path is kept for legacy compatibility but should be avoided for new calibrations. # Camera center in world coordinates: C = -R^T * t center = -R.T @ t # Camera orientation in world coordinates: R_world_from_cam = R^T R_world = R.T else: # world_from_cam (Standard convention for calibrate_extrinsics.py) # calibrate_extrinsics.py inverts the solvePnP result before saving. center = t R_world = R # OpenCV convention: X right, Y down, Z forward x_axis_local = np.array([1, 0, 0]) y_axis_local = np.array([0, 1, 0]) z_axis_local = np.array([0, 0, 1]) # Transform local axes to world x_axis_world = R_world @ x_axis_local y_axis_world = R_world @ y_axis_local z_axis_world = R_world @ z_axis_local # Frustum points in local coordinates (OpenCV: +Z fwd, +X right, +Y down) pts_local = get_frustum_points(intrinsics, frustum_scale, fov_deg) # Transform frustum to world pts_world = (R_world @ pts_local.T).T + center # --- Apply Global Basis Transform --- # Transform everything from World Space -> Plot Space center_plot = world_to_plot(center[None, :], world_basis)[0] # For axes, we need to transform the end points x_end_world = center + x_axis_world * scale y_end_world = center + y_axis_world * scale z_end_world = center + z_axis_world * scale x_end_plot = world_to_plot(x_end_world[None, :], world_basis)[0] y_end_plot = world_to_plot(y_end_world[None, :], world_basis)[0] z_end_plot = world_to_plot(z_end_world[None, :], world_basis)[0] pts_plot = world_to_plot(pts_world, world_basis) # Create lines for frustum # Edges: 0-1, 0-2, 0-3, 0-4 (pyramid sides) # 1-2, 2-3, 3-4, 4-1 (base) x_lines = [] y_lines = [] z_lines = [] def add_line(i, j): x_lines.extend([pts_plot[i, 0], pts_plot[j, 0], None]) y_lines.extend([pts_plot[i, 1], pts_plot[j, 1], None]) z_lines.extend([pts_plot[i, 2], pts_plot[j, 2], None]) # Pyramid sides for i in range(1, 5): add_line(0, i) # Base add_line(1, 2) add_line(2, 3) add_line(3, 4) add_line(4, 1) # Add frustum trace fig.add_trace( go.Scatter3d( x=x_lines, y=y_lines, z=z_lines, mode="lines", line=dict(color=color, width=2), name=f"{label} Frustum", showlegend=False, hoverinfo="skip", ) ) # Add center point with label fig.add_trace( go.Scatter3d( x=[center_plot[0]], y=[center_plot[1]], z=[center_plot[2]], mode="markers+text", marker=dict(size=4, color="black"), text=[label], textposition="top center", name=label, showlegend=True, ) ) # Add axes (RGB = XYZ) # X axis (Red) fig.add_trace( go.Scatter3d( x=[center_plot[0], x_end_plot[0]], y=[center_plot[1], x_end_plot[1]], z=[center_plot[2], x_end_plot[2]], mode="lines", line=dict(color="red", width=3), showlegend=False, hoverinfo="skip", ) ) # Y axis (Green) fig.add_trace( go.Scatter3d( x=[center_plot[0], y_end_plot[0]], y=[center_plot[1], y_end_plot[1]], z=[center_plot[2], y_end_plot[2]], mode="lines", line=dict(color="green", width=3), showlegend=False, hoverinfo="skip", ) ) # Z axis (Blue) fig.add_trace( go.Scatter3d( x=[center_plot[0], z_end_plot[0]], y=[center_plot[1], z_end_plot[1]], z=[center_plot[2], z_end_plot[2]], mode="lines", line=dict(color="blue", width=3), showlegend=False, hoverinfo="skip", ) ) def run_diagnostics(poses: Dict[str, np.ndarray], convention: str): """ Runs numerical sanity checks on the poses. """ print("\n--- Diagnostics ---") print(f"Pose Convention: {convention}") if convention == "cam_from_world": print( " WARNING: 'cam_from_world' is deprecated. calibrate_extrinsics.py outputs 'world_from_cam'." ) else: print( " Note: Using 'world_from_cam' (matches calibrate_extrinsics.py output)." ) centers = [] rotations = [] serials = [] for serial, pose in poses.items(): serials.append(serial) R = pose[:3, :3] t = pose[:3, 3] if convention == "cam_from_world": c = -R.T @ t R_world = R.T else: c = t R_world = R centers.append(c) rotations.append(R_world) centers = np.array(centers) rotations = np.array(rotations) # 1. Orthonormality check print("\n[Rotation Orthonormality]") max_resid = 0.0 for i, R_mat in enumerate(rotations): I_check = R_mat @ R_mat.T resid = np.linalg.norm(I_check - np.eye(3)) det = np.linalg.det(R_mat) max_resid = max(max_resid, resid) if resid > 1e-3 or abs(det - 1.0) > 1e-3: print( f" WARN: Camera {serials[i]} rotation invalid! Resid={resid:.6f}, Det={det:.6f}" ) print(f" Max orthonormality residual: {max_resid:.6e}") # 2. Coplanarity of centers if len(centers) >= 3: print("\n[Center Coplanarity]") # SVD of centered points center_mean = np.mean(centers, axis=0) centered = centers - center_mean u, s, vh = np.linalg.svd(centered) print(f" Singular values: {s}") # If planar, smallest singular value should be small planarity_ratio = s[2] / (s[0] + 1e-9) print(f" Planarity ratio (s3/s1): {planarity_ratio:.4f}") if planarity_ratio < 0.05: print(" -> Centers appear roughly coplanar.") else: print(" -> Centers are NOT coplanar.") # 3. Forward consistency (Z axis) print("\n[Forward Axis Consistency]") z_axes = rotations[:, :, 2] # All Z axes # Mean Z mean_z = np.mean(z_axes, axis=0) mean_z /= np.linalg.norm(mean_z) # Dot products dots = z_axes @ mean_z min_dot = np.min(dots) print(f" Mean forward direction: {mean_z}") print(f" Min alignment with mean: {min_dot:.4f}") if min_dot < 0.8: print(" WARN: Cameras pointing in significantly different directions.") # 4. Up consistency (Y axis vs World -Y or +Y) # Assuming Y-up world, check if camera -Y (OpenCV up is -Y usually? No, OpenCV Y is down) # OpenCV: Y is down. So "Up" in camera frame is -Y. # Let's check alignment of Camera Y with World Y. print("\n[Up Axis Consistency]") y_axes = rotations[:, :, 1] # Check against World -Y (since camera Y is down) world_up = np.array([0, 1, 0]) # If camera is upright, Camera Y (down) should be roughly World -Y (down) # So dot(CamY, WorldY) should be roughly -1 y_dots = y_axes @ world_up mean_y_dot = np.mean(y_dots) print(f" Mean alignment of Camera Y (down) with World Y (up): {mean_y_dot:.4f}") if mean_y_dot < -0.8: print(" -> Cameras appear upright (Camera Y points down).") elif mean_y_dot > 0.8: print(" -> Cameras appear upside-down (Camera Y points up).") else: print(" -> Cameras have mixed or horizontal orientation.") # 5. Center spread print("\n[Center Spread]") spread = np.max(centers, axis=0) - np.min(centers, axis=0) print(f" Range X: {spread[0]:.3f} m") print(f" Range Y: {spread[1]:.3f} m") print(f" Range Z: {spread[2]:.3f} m") @click.command() @click.option("--input", "-i", required=True, help="Path to input JSON file.") @click.option( "--output", "-o", help="Path to save the output visualization (HTML or PNG)." ) @click.option("--show", is_flag=True, help="Show the plot interactively.") @click.option("--scale", type=float, default=0.2, help="Scale of the camera axes.") @click.option( "--birdseye", is_flag=True, help="Show a top-down bird-eye view (X-Z plane).", ) @click.option( "--pose-convention", type=click.Choice(["world_from_cam", "cam_from_world"]), default="world_from_cam", help="Interpretation of the pose matrix in JSON. Defaults to 'world_from_cam' (matches calibrate_extrinsics.py). 'cam_from_world' is deprecated.", ) @click.option( "--world-basis", type=click.Choice(["cv", "opengl"]), default="cv", help="Global world basis convention. 'cv' (default) is +Y down, +Z forward. 'opengl' flips Y and Z (diag(1,-1,-1)) for the entire scene.", ) @click.option( "--render-space", type=click.Choice(["opencv", "opengl"]), default=None, help="DEPRECATED: Use --world-basis instead. 'opencv' maps to 'cv', 'opengl' maps to 'opengl'.", ) @click.option( "--frustum-scale", type=float, default=0.5, help="Scale of the camera frustum." ) @click.option( "--fov", type=float, default=60.0, help="Horizontal FOV in degrees for frustum visualization.", ) @click.option( "--zed-configs", multiple=True, help="Path to ZED config file(s) or directory containing SN*.conf files.", ) @click.option( "--resolution", type=click.Choice(RESOLUTION_MAP.keys()), default="FHD1200", help="Resolution suffix to use from ZED config.", ) @click.option( "--eye", type=click.Choice(["left", "right"]), default="left", help="Which eye's intrinsics to use from ZED config.", ) @click.option( "--diagnose", is_flag=True, help="Run numerical diagnostics on the poses.", ) @click.option( "--show-ground/--no-show-ground", default=True, help="Show a ground plane at Y=ground-y.", ) @click.option( "--ground-y", type=float, default=0.0, help="Y height of the ground plane.", ) @click.option( "--ground-size", type=float, default=8.0, help="Size of the ground plane (side length in meters).", ) @click.option( "--show-origin-axes/--no-show-origin-axes", default=True, help="Show a world-origin axis triad (X:red, Y:green, Z:blue).", ) def main( input: str, output: Optional[str], show: bool, scale: float, birdseye: bool, pose_convention: str, world_basis: str, render_space: Optional[str], frustum_scale: float, fov: float, zed_configs: List[str], resolution: str, eye: str, diagnose: bool, show_ground: bool, ground_y: float, ground_size: float, show_origin_axes: bool, ): """Visualize camera extrinsics from JSON using Plotly.""" # Handle deprecated argument if render_space is not None: print( "WARNING: --render-space is deprecated. Please use --world-basis instead." ) if render_space == "opencv": world_basis = "cv" elif render_space == "opengl": world_basis = "opengl" try: with open(input, "r") as f: data = json.load(f) except Exception as e: print(f"Error reading input file: {e}") return # Parse poses poses = {} for serial, cam_data in data.items(): if not isinstance(cam_data, dict) or "pose" not in cam_data: continue try: poses[serial] = parse_pose(str(cam_data["pose"])) except ValueError as e: print(f"Warning: Skipping camera {serial} due to error: {e}") if not poses: print("No valid camera poses found in the input file.") return if diagnose: run_diagnostics(poses, pose_convention) # Load ZED configs if provided zed_intrinsics = {} if zed_configs: zed_intrinsics = load_zed_configs(list(zed_configs), resolution, eye) matched_count = 0 for serial in poses.keys(): if "all" in zed_intrinsics or serial in zed_intrinsics: matched_count += 1 print( f"ZED Configs: matched {matched_count}/{len(poses)} cameras (fallback: {len(poses) - matched_count})" ) # Create Plotly figure fig = go.Figure() for serial, pose in poses.items(): cam_intrinsics = zed_intrinsics.get("all") or zed_intrinsics.get(str(serial)) add_camera_trace( fig, pose, str(serial), scale=scale, convention=pose_convention, world_basis=world_basis, frustum_scale=frustum_scale, fov_deg=fov, intrinsics=cam_intrinsics, ) if show_origin_axes: origin = np.zeros(3) axis_len = scale # Define world axes points x_end = np.array([axis_len, 0, 0]) y_end = np.array([0, axis_len, 0]) z_end = np.array([0, 0, axis_len]) # Transform to plot space origin_plot = world_to_plot(origin[None, :], world_basis)[0] x_end_plot = world_to_plot(x_end[None, :], world_basis)[0] y_end_plot = world_to_plot(y_end[None, :], world_basis)[0] z_end_plot = world_to_plot(z_end[None, :], world_basis)[0] fig.add_trace( go.Scatter3d( x=[origin_plot[0], x_end_plot[0]], y=[origin_plot[1], x_end_plot[1]], z=[origin_plot[2], x_end_plot[2]], mode="lines", line=dict(color="red", width=4), name="World X", legendgroup="Origin", showlegend=True, hoverinfo="text", text="World X", ) ) fig.add_trace( go.Scatter3d( x=[origin_plot[0], y_end_plot[0]], y=[origin_plot[1], y_end_plot[1]], z=[origin_plot[2], y_end_plot[2]], mode="lines", line=dict(color="green", width=4), name="World Y", legendgroup="Origin", showlegend=True, hoverinfo="text", text="World Y", ) ) fig.add_trace( go.Scatter3d( x=[origin_plot[0], z_end_plot[0]], y=[origin_plot[1], z_end_plot[1]], z=[origin_plot[2], z_end_plot[2]], mode="lines", line=dict(color="blue", width=4), name="World Z", legendgroup="Origin", showlegend=True, hoverinfo="text", text="World Z", ) ) if show_ground: half_size = ground_size / 2.0 x_grid = np.linspace(-half_size, half_size, 2) z_grid = np.linspace(-half_size, half_size, 2) x_mesh, z_mesh = np.meshgrid(x_grid, z_grid) y_mesh = np.full_like(x_mesh, ground_y) # Flatten for transformation pts_ground = np.stack( [x_mesh.flatten(), y_mesh.flatten(), z_mesh.flatten()], axis=1 ) pts_ground_plot = world_to_plot(pts_ground, world_basis) # Reshape back x_mesh_plot = pts_ground_plot[:, 0].reshape(x_mesh.shape) y_mesh_plot = pts_ground_plot[:, 1].reshape(y_mesh.shape) z_mesh_plot = pts_ground_plot[:, 2].reshape(z_mesh.shape) fig.add_trace( go.Surface( x=x_mesh_plot, y=y_mesh_plot, z=z_mesh_plot, showscale=False, opacity=0.15, colorscale=[[0, "gray"], [1, "gray"]], name="Ground Plane", hoverinfo="skip", ) ) # Configure layout scene_dict: Dict[str, Any] = dict( xaxis_title="X (m)", yaxis_title="Y (m)", zaxis_title="Z (m)", aspectmode="data", # Important for correct proportions camera=dict(up=dict(x=0, y=1, z=0)), # Enforce Y-up convention ) if birdseye: # For birdseye, we force top-down view (looking down +Y towards X-Z plane) scene_dict["camera"] = dict( projection=dict(type="orthographic"), up=dict(x=0, y=0, z=1), # World +Z is 'up' on screen eye=dict(x=0, y=2.5, z=0), ) render_desc = ( "World Basis: CV (+Y down, +Z fwd)" if world_basis == "cv" else "World Basis: OpenGL (+Y up, -Z fwd)" ) fig.update_layout( title=f"Camera Extrinsics ({pose_convention})
{render_desc}", scene=scene_dict, margin=dict(l=0, r=0, b=0, t=60), legend=dict(x=0, y=1), ) if output: if output.endswith(".html"): fig.write_html(output) print(f"Saved interactive plot to {output}") elif ( output.endswith(".png") or output.endswith(".jpg") or output.endswith(".jpeg") ): try: # Requires kaleido fig.write_image(output) print(f"Saved static image to {output}") except Exception as e: print(f"Error saving image (ensure kaleido is installed): {e}") else: # Default to HTML if unknown extension out_path = output + ".html" fig.write_html(out_path) print(f"Saved interactive plot to {out_path}") if show: fig.show() elif not output and not diagnose: print( "No output path specified and --show not passed. Plot not saved or shown." ) if __name__ == "__main__": # pylint: disable=no-value-for-parameter main()