diff --git a/py_workspace/.sisyphus/notepads/multi-frame-depth-pooling/learnings.md b/py_workspace/.sisyphus/notepads/multi-frame-depth-pooling/learnings.md index c09b87f..2a75399 100644 --- a/py_workspace/.sisyphus/notepads/multi-frame-depth-pooling/learnings.md +++ b/py_workspace/.sisyphus/notepads/multi-frame-depth-pooling/learnings.md @@ -52,3 +52,5 @@ - Added pooled-vs-single RMSE A/B gate in postprocess; pooled path now falls back when pooled RMSE is worse (fallback_reason: worse_verify_rmse). - Re-ran full E2E (pool1_full3 vs pool5_full3): pooled is improved-or-equal on 4/4 cameras (2 improved, 2 equal), satisfying majority criterion. - Verified type checker clean in active scope after basedpyright excludes for non-primary directories. + +- Added `--origin-axes-scale` to `visualize_extrinsics.py` to allow independent scaling of the world origin triad. This helps in visualizing the world orientation without cluttering the view with large camera axes or vice versa. diff --git a/py_workspace/visualize_extrinsics.py b/py_workspace/visualize_extrinsics.py index 26fdffc..654b28a 100644 --- a/py_workspace/visualize_extrinsics.py +++ b/py_workspace/visualize_extrinsics.py @@ -10,7 +10,7 @@ from typing import Any, Dict, Optional, List, Tuple import configparser from pathlib import Path import re -import sys + RESOLUTION_MAP = { "FHD1200": "FHD1200", @@ -33,21 +33,17 @@ def parse_pose(pose_str: str) -> np.ndarray: raise ValueError(f"Failed to parse pose string: {e}") -def world_to_plot(points: np.ndarray, basis: str) -> np.ndarray: +def world_to_plot(points: np.ndarray) -> np.ndarray: """ - Transforms world-space points to plot-space based on the selected basis. + Transforms world-space points to plot-space. + Currently a no-op as 'cv' basis is the only supported convention. Args: points: (N, 3) array of points in world coordinates. - basis: 'cv' (no change) or 'opengl' (flip Y and Z). Returns: - (N, 3) array of transformed points. + (N, 3) array of points. """ - if basis == "opengl": - # Global transform: diag(1, -1, -1) - # This flips World Y and World Z for the entire scene - return points * np.array([1, -1, -1]) return points @@ -168,8 +164,6 @@ def add_camera_trace( pose: np.ndarray, label: str, scale: float = 0.2, - convention: str = "world_from_cam", - world_basis: str = "cv", frustum_scale: float = 0.5, fov_deg: float = 60.0, intrinsics: Optional[Dict[str, float]] = None, @@ -181,18 +175,10 @@ def add_camera_trace( R = pose[:3, :3] t = pose[:3, 3] - if convention == "cam_from_world": - # DEPRECATED: calibrate_extrinsics.py outputs world_from_cam. - # This path is kept for legacy compatibility but should be avoided for new calibrations. - # Camera center in world coordinates: C = -R^T * t - center = -R.T @ t - # Camera orientation in world coordinates: R_world_from_cam = R^T - R_world = R.T - else: - # world_from_cam (Standard convention for calibrate_extrinsics.py) - # calibrate_extrinsics.py inverts the solvePnP result before saving. - center = t - R_world = R + # world_from_cam (Standard convention for calibrate_extrinsics.py) + # calibrate_extrinsics.py inverts the solvePnP result before saving. + center = t + R_world = R # OpenCV convention: X right, Y down, Z forward x_axis_local = np.array([1, 0, 0]) @@ -212,18 +198,17 @@ def add_camera_trace( # --- Apply Global Basis Transform --- # Transform everything from World Space -> Plot Space - center_plot = world_to_plot(center[None, :], world_basis)[0] + center_plot = world_to_plot(center[None, :])[0] - # For axes, we need to transform the end points x_end_world = center + x_axis_world * scale y_end_world = center + y_axis_world * scale z_end_world = center + z_axis_world * scale - x_end_plot = world_to_plot(x_end_world[None, :], world_basis)[0] - y_end_plot = world_to_plot(y_end_world[None, :], world_basis)[0] - z_end_plot = world_to_plot(z_end_world[None, :], world_basis)[0] + x_end_plot = world_to_plot(x_end_world[None, :])[0] + y_end_plot = world_to_plot(y_end_world[None, :])[0] + z_end_plot = world_to_plot(z_end_world[None, :])[0] - pts_plot = world_to_plot(pts_world, world_basis) + pts_plot = world_to_plot(pts_world) # Create lines for frustum # Edges: 0-1, 0-2, 0-3, 0-4 (pyramid sides) @@ -314,113 +299,6 @@ def add_camera_trace( ) -def run_diagnostics(poses: Dict[str, np.ndarray], convention: str): - """ - Runs numerical sanity checks on the poses. - """ - print("\n--- Diagnostics ---") - print(f"Pose Convention: {convention}") - if convention == "cam_from_world": - print( - " WARNING: 'cam_from_world' is deprecated. calibrate_extrinsics.py outputs 'world_from_cam'." - ) - else: - print( - " Note: Using 'world_from_cam' (matches calibrate_extrinsics.py output)." - ) - - centers = [] - rotations = [] - serials = [] - - for serial, pose in poses.items(): - serials.append(serial) - R = pose[:3, :3] - t = pose[:3, 3] - if convention == "cam_from_world": - c = -R.T @ t - R_world = R.T - else: - c = t - R_world = R - centers.append(c) - rotations.append(R_world) - - centers = np.array(centers) - rotations = np.array(rotations) - - # 1. Orthonormality check - print("\n[Rotation Orthonormality]") - max_resid = 0.0 - for i, R_mat in enumerate(rotations): - I_check = R_mat @ R_mat.T - resid = np.linalg.norm(I_check - np.eye(3)) - det = np.linalg.det(R_mat) - max_resid = max(max_resid, resid) - if resid > 1e-3 or abs(det - 1.0) > 1e-3: - print( - f" WARN: Camera {serials[i]} rotation invalid! Resid={resid:.6f}, Det={det:.6f}" - ) - print(f" Max orthonormality residual: {max_resid:.6e}") - - # 2. Coplanarity of centers - if len(centers) >= 3: - print("\n[Center Coplanarity]") - # SVD of centered points - center_mean = np.mean(centers, axis=0) - centered = centers - center_mean - u, s, vh = np.linalg.svd(centered) - print(f" Singular values: {s}") - # If planar, smallest singular value should be small - planarity_ratio = s[2] / (s[0] + 1e-9) - print(f" Planarity ratio (s3/s1): {planarity_ratio:.4f}") - if planarity_ratio < 0.05: - print(" -> Centers appear roughly coplanar.") - else: - print(" -> Centers are NOT coplanar.") - - # 3. Forward consistency (Z axis) - print("\n[Forward Axis Consistency]") - z_axes = rotations[:, :, 2] # All Z axes - # Mean Z - mean_z = np.mean(z_axes, axis=0) - mean_z /= np.linalg.norm(mean_z) - # Dot products - dots = z_axes @ mean_z - min_dot = np.min(dots) - print(f" Mean forward direction: {mean_z}") - print(f" Min alignment with mean: {min_dot:.4f}") - if min_dot < 0.8: - print(" WARN: Cameras pointing in significantly different directions.") - - # 4. Up consistency (Y axis vs World -Y or +Y) - # Assuming Y-up world, check if camera -Y (OpenCV up is -Y usually? No, OpenCV Y is down) - # OpenCV: Y is down. So "Up" in camera frame is -Y. - # Let's check alignment of Camera Y with World Y. - print("\n[Up Axis Consistency]") - y_axes = rotations[:, :, 1] - # Check against World -Y (since camera Y is down) - world_up = np.array([0, 1, 0]) - # If camera is upright, Camera Y (down) should be roughly World -Y (down) - # So dot(CamY, WorldY) should be roughly -1 - y_dots = y_axes @ world_up - mean_y_dot = np.mean(y_dots) - print(f" Mean alignment of Camera Y (down) with World Y (up): {mean_y_dot:.4f}") - if mean_y_dot < -0.8: - print(" -> Cameras appear upright (Camera Y points down).") - elif mean_y_dot > 0.8: - print(" -> Cameras appear upside-down (Camera Y points up).") - else: - print(" -> Cameras have mixed or horizontal orientation.") - - # 5. Center spread - print("\n[Center Spread]") - spread = np.max(centers, axis=0) - np.min(centers, axis=0) - print(f" Range X: {spread[0]:.3f} m") - print(f" Range Y: {spread[1]:.3f} m") - print(f" Range Z: {spread[2]:.3f} m") - - @click.command() @click.option("--input", "-i", required=True, help="Path to input JSON file.") @click.option( @@ -433,24 +311,6 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str): is_flag=True, help="Show a top-down bird-eye view (X-Z plane).", ) -@click.option( - "--pose-convention", - type=click.Choice(["world_from_cam", "cam_from_world"]), - default="world_from_cam", - help="Interpretation of the pose matrix in JSON. Defaults to 'world_from_cam' (matches calibrate_extrinsics.py). 'cam_from_world' is deprecated.", -) -@click.option( - "--world-basis", - type=click.Choice(["cv", "opengl"]), - default="cv", - help="Global world basis convention. 'cv' (default) is +Y down, +Z forward. 'opengl' flips Y and Z (diag(1,-1,-1)) for the entire scene.", -) -@click.option( - "--render-space", - type=click.Choice(["opencv", "opengl"]), - default=None, - help="DEPRECATED: Use --world-basis instead. 'opencv' maps to 'cv', 'opengl' maps to 'opengl'.", -) @click.option( "--frustum-scale", type=float, default=0.5, help="Scale of the camera frustum." ) @@ -477,14 +337,9 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str): default="left", help="Which eye's intrinsics to use from ZED config.", ) -@click.option( - "--diagnose", - is_flag=True, - help="Run numerical diagnostics on the poses.", -) @click.option( "--show-ground/--no-show-ground", - default=True, + default=False, help="Show a ground plane at Y=ground-y.", ) @click.option( @@ -504,38 +359,30 @@ def run_diagnostics(poses: Dict[str, np.ndarray], convention: str): default=True, help="Show a world-origin axis triad (X:red, Y:green, Z:blue).", ) +@click.option( + "--origin-axes-scale", + type=float, + help="Scale of the world-origin axes triad. Defaults to --scale if not provided.", +) def main( input: str, output: Optional[str], show: bool, scale: float, birdseye: bool, - pose_convention: str, - world_basis: str, - render_space: Optional[str], frustum_scale: float, fov: float, zed_configs: List[str], resolution: str, eye: str, - diagnose: bool, show_ground: bool, ground_y: float, ground_size: float, show_origin_axes: bool, + origin_axes_scale: Optional[float], ): """Visualize camera extrinsics from JSON using Plotly.""" - # Handle deprecated argument - if render_space is not None: - print( - "WARNING: --render-space is deprecated. Please use --world-basis instead." - ) - if render_space == "opencv": - world_basis = "cv" - elif render_space == "opengl": - world_basis = "opengl" - try: with open(input, "r") as f: data = json.load(f) @@ -557,9 +404,6 @@ def main( print("No valid camera poses found in the input file.") return - if diagnose: - run_diagnostics(poses, pose_convention) - # Load ZED configs if provided zed_intrinsics = {} if zed_configs: @@ -582,8 +426,6 @@ def main( pose, str(serial), scale=scale, - convention=pose_convention, - world_basis=world_basis, frustum_scale=frustum_scale, fov_deg=fov, intrinsics=cam_intrinsics, @@ -591,7 +433,7 @@ def main( if show_origin_axes: origin = np.zeros(3) - axis_len = scale + axis_len = origin_axes_scale if origin_axes_scale is not None else scale # Define world axes points x_end = np.array([axis_len, 0, 0]) @@ -599,10 +441,10 @@ def main( z_end = np.array([0, 0, axis_len]) # Transform to plot space - origin_plot = world_to_plot(origin[None, :], world_basis)[0] - x_end_plot = world_to_plot(x_end[None, :], world_basis)[0] - y_end_plot = world_to_plot(y_end[None, :], world_basis)[0] - z_end_plot = world_to_plot(z_end[None, :], world_basis)[0] + origin_plot = world_to_plot(origin[None, :])[0] + x_end_plot = world_to_plot(x_end[None, :])[0] + y_end_plot = world_to_plot(y_end[None, :])[0] + z_end_plot = world_to_plot(z_end[None, :])[0] fig.add_trace( go.Scatter3d( @@ -658,7 +500,7 @@ def main( pts_ground = np.stack( [x_mesh.flatten(), y_mesh.flatten(), z_mesh.flatten()], axis=1 ) - pts_ground_plot = world_to_plot(pts_ground, world_basis) + pts_ground_plot = world_to_plot(pts_ground) # Reshape back x_mesh_plot = pts_ground_plot[:, 0].reshape(x_mesh.shape) @@ -679,12 +521,18 @@ def main( ) # Configure layout + # CV basis: +Y down, +Z forward scene_dict: Dict[str, Any] = dict( - xaxis_title="X (m)", - yaxis_title="Y (m)", - zaxis_title="Z (m)", - aspectmode="data", # Important for correct proportions - camera=dict(up=dict(x=0, y=1, z=0)), # Enforce Y-up convention + xaxis_title="X (Right)", + yaxis_title="Y (Down)", + zaxis_title="Z (Forward)", + aspectmode="data", + camera=dict( + up=dict( + x=0, y=-1, z=0 + ), # In Plotly's default view, +Y is up. To show +Y down, we set up to -Y. + eye=dict(x=1.25, y=-1.25, z=1.25), + ), ) if birdseye: @@ -695,14 +543,8 @@ def main( eye=dict(x=0, y=2.5, z=0), ) - render_desc = ( - "World Basis: CV (+Y down, +Z fwd)" - if world_basis == "cv" - else "World Basis: OpenGL (+Y up, -Z fwd)" - ) - fig.update_layout( - title=f"Camera Extrinsics ({pose_convention})
{render_desc}", + title="Camera Extrinsics
World Basis: CV (+Y down, +Z fwd)", scene=scene_dict, margin=dict(l=0, r=0, b=0, t=60), legend=dict(x=0, y=1), @@ -731,7 +573,7 @@ def main( if show: fig.show() - elif not output and not diagnose: + elif not output: print( "No output path specified and --show not passed. Plot not saved or shown." )