From dc2d34dd7cb4aff21a5847e9bf7a99914523ac81 Mon Sep 17 00:00:00 2001 From: crosstyan Date: Sun, 8 Feb 2026 07:38:23 +0000 Subject: [PATCH] docs: add visualization conventions and update visualizer defaults --- py_workspace/.beads/issues.jsonl | 4 +- .../notepads/auto-align-fix/decisions.md | 6 + .../notepads/auto-align-fix/learnings.md | 6 + .../ground-plane-analysis/decisions.md | 3 + .../ground-plane-analysis/learnings.md | 5 + .../visualization-conventions/learnings.md | 17 + py_workspace/README.md | 10 +- .../docs/visualization-conventions.md | 334 ++++++++++++++++++ py_workspace/visualize_extrinsics.py | 14 +- 9 files changed, 392 insertions(+), 7 deletions(-) create mode 100644 py_workspace/.sisyphus/notepads/auto-align-fix/decisions.md create mode 100644 py_workspace/.sisyphus/notepads/auto-align-fix/learnings.md create mode 100644 py_workspace/.sisyphus/notepads/ground-plane-analysis/decisions.md create mode 100644 py_workspace/.sisyphus/notepads/ground-plane-analysis/learnings.md create mode 100644 py_workspace/.sisyphus/notepads/visualization-conventions/learnings.md create mode 100644 py_workspace/docs/visualization-conventions.md diff --git a/py_workspace/.beads/issues.jsonl b/py_workspace/.beads/issues.jsonl index 8ad4c87..5164a43 100644 --- a/py_workspace/.beads/issues.jsonl +++ b/py_workspace/.beads/issues.jsonl @@ -5,7 +5,7 @@ {"id":"py_workspace-62y","title":"Fix depth pooling fallback threshold","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T08:12:12.046607198Z","created_by":"crosstyan","updated_at":"2026-02-07T08:13:12.98625698Z","closed_at":"2026-02-07T08:13:12.98625698Z","close_reason":"Updated fallback threshold to strict comparison"} {"id":"py_workspace-6m5","title":"Robust Optimizer Implementation","status":"closed","priority":0,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T05:22:45.183574374Z","created_by":"crosstyan","updated_at":"2026-02-07T05:22:53.151871639Z","closed_at":"2026-02-07T05:22:53.151871639Z","close_reason":"Implemented robust optimizer with least_squares and soft_l1 loss, updated tests"} {"id":"py_workspace-6sg","title":"Document marker parquet structure","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T02:48:08.95742431Z","created_by":"crosstyan","updated_at":"2026-02-07T02:49:35.897152691Z","closed_at":"2026-02-07T02:49:35.897152691Z","close_reason":"Documented parquet structure in aruco/markers/PARQUET_FORMAT.md"} -{"id":"py_workspace-7ul","title":"Implement global world-basis conversion for Plotly visualization","status":"open","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T17:30:41.94482545Z","created_by":"crosstyan","updated_at":"2026-02-07T17:30:41.94482545Z"} +{"id":"py_workspace-7ul","title":"Implement global world-basis conversion for Plotly visualization","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T17:30:41.94482545Z","created_by":"crosstyan","updated_at":"2026-02-07T17:38:39.56245337Z","closed_at":"2026-02-07T17:38:39.56245337Z","close_reason":"Implemented global world-basis conversion"} {"id":"py_workspace-98p","title":"Integrate multi-frame depth pooling into calibrate_extrinsics.py","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T07:59:35.333468652Z","created_by":"crosstyan","updated_at":"2026-02-07T08:06:37.662956356Z","closed_at":"2026-02-07T08:06:37.662956356Z","close_reason":"Implemented multi-frame depth pooling and verified with tests"} {"id":"py_workspace-a85","title":"Add CLI option for ArUco dictionary in calibrate_extrinsics.py","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-06T10:13:41.896728814Z","created_by":"crosstyan","updated_at":"2026-02-07T07:29:52.290976525Z","closed_at":"2026-02-07T07:29:52.290976525Z","close_reason":"Implemented multi-frame depth pooling in calibrate_extrinsics.py"} {"id":"py_workspace-afh","title":"Inspect tmp_visualizer.html camera layout","notes":"Inspected tmp_visualizer.html. cam_0 is at (0,0,0). cam_1 is at (1,0,0). cam_2 is at (0, 0.5, 1.0). Axes are RGB=XYZ. Layout matches expected synthetic geometry.","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T15:40:04.162565539Z","created_by":"crosstyan","updated_at":"2026-02-07T15:42:10.721124074Z","closed_at":"2026-02-07T15:42:10.721124074Z","close_reason":"Inspection complete. Layout matches synthetic input."} @@ -16,11 +16,13 @@ {"id":"py_workspace-kpa","title":"Unit Hardening (P0)","status":"closed","priority":0,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T05:01:46.342605011Z","created_by":"crosstyan","updated_at":"2026-02-07T05:01:51.303022101Z","closed_at":"2026-02-07T05:01:51.303022101Z","close_reason":"Implemented unit hardening in SVOReader: set coordinate_units=METER and guarded manual conversion in _retrieve_depth. Added depth sanity logs."} {"id":"py_workspace-kuy","title":"Move parquet documentation to docs/","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T02:52:12.609090777Z","created_by":"crosstyan","updated_at":"2026-02-07T02:52:43.088520272Z","closed_at":"2026-02-07T02:52:43.088520272Z","close_reason":"Moved parquet documentation to docs/marker-parquet-format.md"} {"id":"py_workspace-ld1","title":"Search for depth unit conversion and scaling patterns","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T04:53:53.211242053Z","created_by":"crosstyan","updated_at":"2026-02-07T04:54:56.840335809Z","closed_at":"2026-02-07T04:54:56.840335809Z","close_reason":"Exhaustive search completed. Identified manual scaling in svo_sync.py and SDK-level scaling in depth_sensing.py. Documented risks in learnings.md."} +{"id":"py_workspace-nlu","title":"Produce A/B visualization comparison for CV world basis","status":"open","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-08T03:50:56.386223999Z","created_by":"crosstyan","updated_at":"2026-02-08T03:50:56.386223999Z"} {"id":"py_workspace-nvw","title":"Update documentation for robust depth refinement","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T05:41:32.963615133Z","created_by":"crosstyan","updated_at":"2026-02-07T05:43:55.707975317Z","closed_at":"2026-02-07T05:43:55.707975317Z","close_reason":"Documentation updated with robust refinement details"} {"id":"py_workspace-q4w","title":"Add type hints and folder-aware --svo input in calibrate_extrinsics.py","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-06T10:01:13.943518267Z","created_by":"crosstyan","updated_at":"2026-02-06T10:03:09.855307397Z","closed_at":"2026-02-06T10:03:09.855307397Z","close_reason":"Implemented type hints and directory expansion for --svo"} {"id":"py_workspace-q8j","title":"Add script to visualize generated camera extrinsics","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T08:22:35.151648893Z","created_by":"crosstyan","updated_at":"2026-02-07T08:27:27.034717788Z","closed_at":"2026-02-07T08:27:27.034717788Z","close_reason":"Implemented visualize_extrinsics.py utility script and verified with example data."} {"id":"py_workspace-qf9","title":"Implement RMSE-based fallback for depth pooling","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T09:03:17.759148159Z","created_by":"crosstyan","updated_at":"2026-02-07T09:06:33.106901615Z","closed_at":"2026-02-07T09:06:33.106901615Z","close_reason":"Implemented RMSE-based fallback and verified with tests"} {"id":"py_workspace-t4e","title":"Add --min-markers CLI and rejection debug logs in calibrate_extrinsics","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-06T10:21:51.846079425Z","created_by":"crosstyan","updated_at":"2026-02-06T10:22:39.870440044Z","closed_at":"2026-02-06T10:22:39.870440044Z","close_reason":"Added --min-markers (default 1), rejection debug logs, and clarified accepted-pose summary label"} {"id":"py_workspace-th3","title":"Implement Best-Frame Selection for depth verification","status":"closed","priority":1,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T05:04:11.896109458Z","created_by":"crosstyan","updated_at":"2026-02-07T05:06:07.346747231Z","closed_at":"2026-02-07T05:06:07.346747231Z","close_reason":"Implemented best-frame selection with scoring logic and verified with tests."} +{"id":"py_workspace-tpz","title":"Refactor visualize_extrinsics.py to use true global basis conversion","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T17:41:09.345966612Z","created_by":"crosstyan","updated_at":"2026-02-07T17:43:35.501465973Z","closed_at":"2026-02-07T17:43:35.501465973Z","close_reason":"Refactored visualize_extrinsics.py to use true global basis conversion"} {"id":"py_workspace-wsk","title":"Fix basedpyright errors in tests and exclude ogl_viewer","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-07T08:54:16.6652971Z","created_by":"crosstyan","updated_at":"2026-02-07T08:58:49.256601506Z","closed_at":"2026-02-07T08:58:49.256601506Z","close_reason":"Fixed basedpyright errors"} {"id":"py_workspace-z3r","title":"Add debug logs for successful ArUco detection","status":"closed","priority":2,"issue_type":"task","owner":"crosstyan@outlook.com","created_at":"2026-02-06T10:17:30.195422209Z","created_by":"crosstyan","updated_at":"2026-02-06T10:18:35.263206185Z","closed_at":"2026-02-06T10:18:35.263206185Z","close_reason":"Added loguru debug logs for successful ArUco detections in calibrate_extrinsics loop"} diff --git a/py_workspace/.sisyphus/notepads/auto-align-fix/decisions.md b/py_workspace/.sisyphus/notepads/auto-align-fix/decisions.md new file mode 100644 index 0000000..8ad065e --- /dev/null +++ b/py_workspace/.sisyphus/notepads/auto-align-fix/decisions.md @@ -0,0 +1,6 @@ +## Semantic Priority in detect_ground_face +- Decision: Explicitly check for 'bottom' face in and return it immediately if any of its markers are visible. +- Rationale: For the , the 'bottom' face is guaranteed to be the ground. Geometric heuristics might pick other faces (like 'front' or 'back') if they happen to align better with the camera's 'up' vector due to camera tilt or marker placement. +## Semantic Priority in detect_ground_face +- Decision: Explicitly check for 'bottom' face in face_marker_map and return it immediately if any of its markers are visible. +- Rationale: For the standard_box_markers_600mm.parquet, the 'bottom' face is guaranteed to be the ground. Geometric heuristics might pick other faces (like 'front' or 'back') if they happen to align better with the camera's 'up' vector due to camera tilt or marker placement. diff --git a/py_workspace/.sisyphus/notepads/auto-align-fix/learnings.md b/py_workspace/.sisyphus/notepads/auto-align-fix/learnings.md new file mode 100644 index 0000000..5b12b8b --- /dev/null +++ b/py_workspace/.sisyphus/notepads/auto-align-fix/learnings.md @@ -0,0 +1,6 @@ +## Ground Face Heuristic Priority +- Prioritizing semantic face names (specifically 'bottom') over purely geometric dot-product heuristics significantly improves robustness for marker parquets with named faces. +- Geometric heuristics can be noisy due to marker frame orientation or slight misalignments. +## Ground Face Heuristic Priority +- Prioritizing semantic face names (specifically 'bottom') over purely geometric dot-product heuristics significantly improves robustness for marker parquets with named faces. +- Geometric heuristics can be noisy due to marker frame orientation or slight misalignments. diff --git a/py_workspace/.sisyphus/notepads/ground-plane-analysis/decisions.md b/py_workspace/.sisyphus/notepads/ground-plane-analysis/decisions.md new file mode 100644 index 0000000..ba21e20 --- /dev/null +++ b/py_workspace/.sisyphus/notepads/ground-plane-analysis/decisions.md @@ -0,0 +1,3 @@ +## Decisions +- Use `--diagnose` in `visualize_extrinsics.py` to verify world-frame orientation. +- Prefer explicit `--ground-face` over heuristic detection to avoid 90-degree flips. diff --git a/py_workspace/.sisyphus/notepads/ground-plane-analysis/learnings.md b/py_workspace/.sisyphus/notepads/ground-plane-analysis/learnings.md new file mode 100644 index 0000000..959be6c --- /dev/null +++ b/py_workspace/.sisyphus/notepads/ground-plane-analysis/learnings.md @@ -0,0 +1,5 @@ +## Ground Plane Orientation Analysis +- `calibrate_extrinsics.py` uses `--auto-align` to rotate the world frame. +- Alignment maps a detected face normal to `[0, 1, 0]` (Y-up). +- Heuristic detection (`detect_ground_face`) depends on camera being roughly upright. +- `inside_network.json` uses a world frame where the ground is at Y=0 and cameras have specific offsets (e.g., -1.17m Y). diff --git a/py_workspace/.sisyphus/notepads/visualization-conventions/learnings.md b/py_workspace/.sisyphus/notepads/visualization-conventions/learnings.md new file mode 100644 index 0000000..e68e905 --- /dev/null +++ b/py_workspace/.sisyphus/notepads/visualization-conventions/learnings.md @@ -0,0 +1,17 @@ + +## 2026-02-08: Visualization Conventions Documentation + +### Key findings from codebase analysis: +- `visualize_extrinsics.py` went through 7+ iterations (commits `6113d0e` → `d07c244`) +- The core confusion was conflating Plotly view transforms with data-frame transforms +- `world_to_plot()` is now a no-op identity function — all data stays in OpenCV frame +- Plotly `camera.up = {y:-1}` is the correct way to render Y-down data without transforming it +- `autorange: "reversed"` was a red herring — it flips tick labels, not data +- `inside_network.json` uses a different world frame (Fusion/gravity-aligned) than calibrate_extrinsics.py (ArUco marker object frame) +- README.md has 3 stale references to removed flags: `--pose-convention`, `--world-basis`, `--diagnose` + +### Conventions confirmed: +- Poses are `world_from_cam` (solvePnP result is inverted before saving) +- RGB = XYZ color convention for axis triads +- All units are meters +- `--origin-axes-scale` controls origin triad independently from `--scale` diff --git a/py_workspace/README.md b/py_workspace/README.md index 4495b84..1540bad 100755 --- a/py_workspace/README.md +++ b/py_workspace/README.md @@ -58,6 +58,14 @@ uv run calibrate_extrinsics.py \ Visualize camera poses and frustums from a JSON extrinsics file using Plotly. +```bash +uv run visualize_extrinsics.py \ + --input output/e2e_refine_depth_smoke_rerun.json \ + --zed-configs ../zed_settings \ + --origin-axes-scale 2 \ + --output output/e2e_refine_depth_smoke_rerun.html +``` + **Basic 3D Visualization (Interactive HTML):** ```bash uv run visualize_extrinsics.py -i output/extrinsics.json --show @@ -102,7 +110,7 @@ uv run visualize_extrinsics.py -i output/extrinsics.json \ **Troubleshooting:** - **Cameras bunched up?** Check `--pose-convention`. `world_from_cam` is the standard convention for `calibrate_extrinsics.py` outputs. `cam_from_world` is deprecated. -- **Axes flipped?** Use `--world-basis opengl` to match C++ viewer conventions (X right, Y up, Z backward). Default is `cv` (X right, Y down, Z forward). +- **Axes flipped?** Use `--world-basis opengl` to match C++ viewer conventions (X right, Y up, Z backward). Default is `cv` (X right, Y down, Z forward). The Plotly scene axes and labels are explicitly aligned to the selected basis. - **Config not matching?** Ensure JSON keys match the serial numbers in `SN.conf` filenames. For full options: diff --git a/py_workspace/docs/visualization-conventions.md b/py_workspace/docs/visualization-conventions.md new file mode 100644 index 0000000..1dbbb75 --- /dev/null +++ b/py_workspace/docs/visualization-conventions.md @@ -0,0 +1,334 @@ +# Visualization Conventions & Coordinate Frame Reference + +> **Status**: Canonical reference as of 2026-02-08. +> **Applies to**: `visualize_extrinsics.py`, `calibrate_extrinsics.py`, and `inside_network.json`. + +--- + +## Executive Summary + +The `visualize_extrinsics.py` script went through multiple iterations of coordinate-frame +switching (OpenCV ↔ OpenGL), Plotly camera/view hacks, and partial basis transforms that +created compounding confusion about whether the visualization was correct. The root cause +was **conflating Plotly's scene camera settings with actual data-frame transforms**: +adjusting `camera.up`, `autorange: "reversed"`, or eye position changes *how you look at +the data* but does **not** change the coordinate frame the data lives in. + +After several rounds of adding and removing `--world-basis`, `--render-space`, and +`--pose-convention` flags, the visualizer was simplified to a single convention: + +- **All data is in OpenCV convention** (+X right, +Y down, +Z forward). +- **No basis switching**. The `--world-basis` flag was removed. +- **Plotly's scene camera** is configured with `up = {x:0, y:-1, z:0}` so that the + OpenCV +Y-down axis renders as "down" on screen. + +The confusion was never a bug in the calibration math — it was a visualization-layer +problem caused by trying to make Plotly (which defaults to Y-up) display OpenCV data +(which is Y-down) without a clear separation between "data frame" and "view frame." + +--- + +## Ground Truth Conventions + +### 1. Calibration Output: `world_from_cam` + +`calibrate_extrinsics.py` stores poses as **T_world_from_cam** (4×4 homogeneous): + +``` +T_world_from_cam = invert_transform(T_cam_from_world) +``` + +- `solvePnP` returns `T_cam_from_world` (maps world points into camera frame). +- The script **inverts** this before saving to JSON. +- The translation column `T[:3, 3]` is the **camera center in world coordinates**. +- The rotation columns `T[:3, :3]` are the camera's local axes expressed in world frame. + +**JSON format** (16 floats, row-major 4×4): +```json +{ + "44289123": { + "pose": "0.878804 -0.039482 0.475548 -2.155006 0.070301 0.996409 ..." + } +} +``` + +### 2. Camera-Local Axes (OpenCV) + +Every camera's local frame follows the OpenCV pinhole convention: + +| Axis | Direction | Color in visualizer | +|------|-----------|-------------------| +| +X | Right | Red | +| +Y | Down | Green | +| +Z | Forward (into scene) | Blue | + +The frustum is drawn along the camera's local +Z axis. The four corners of the +frustum's far plane are at `(±w, ±h, frustum_scale)` in camera-local coordinates. + +### 3. Plotly Scene/Camera Interpretation Pitfalls + +Plotly's 3D scene has its own camera model that controls **how you view** the data: + +| Plotly setting | What it does | What it does NOT do | +|----------------|-------------|-------------------| +| `camera.up` | Sets which direction is "up" on screen | Does not transform data coordinates | +| `camera.eye` | Sets the viewpoint position | Does not change axis orientation | +| `yaxis.autorange = "reversed"` | Flips the Y axis tick direction | Does not negate Y data values | +| `aspectmode = "data"` | Preserves metric proportions | Does not imply any convention | + +**Critical insight**: Changing `camera.up` from `{y:1}` to `{y:-1}` makes the plot +*look* like Y-down is rendered correctly, but the underlying Plotly axis still runs +bottom-to-top by default. This is purely a view transform — the data coordinates are +unchanged. + +--- + +## Historical Confusion Timeline + +This section documents the sequence of changes that led to confusion, for future +reference. All commits are on `visualize_extrinsics.py`. + +### Phase 1: Initial Plotly Rewrite (`7b9782a`) +- Rewrote the visualizer from matplotlib to Plotly with a `--diagnose` mode. +- Used Plotly defaults (Y-up). OpenCV data (Y-down) appeared "upside down." +- Frustums pointed in the correct direction in data space but *looked* inverted. + +### Phase 2: Y-Up Enforcement (`a8d3751`) +- Attempted to fix by setting `camera.up = {y:1}` and using `autorange: "reversed"`. +- This made the view *look* correct for some angles but introduced axis-label confusion. +- The Y axis ticks ran in the opposite direction from the data, misleading users. + +### Phase 3: Render-Space Option (`ab88a24`) +- Added `--render-space` flag to switch between "cv" and "opengl" rendering. +- The OpenGL path applied a basis-change matrix `diag(1, -1, -1)` to all data. +- This actually transformed the data, not just the view — a correct approach but + introduced a second code path that was hard to validate. + +### Phase 4: Ground Plane & Origin Triad (`18e8142`, `57f0dff`) +- Added ground plane overlay and world-origin axis triad. +- These were drawn in the *data* frame, so they were correct in CV mode but + appeared wrong in OpenGL mode (the basis transform was applied inconsistently + to some elements but not others). + +### Phase 5: `--world-basis` with Global Transform (`79f2ab0`) +- Renamed `--render-space` to `--world-basis` with `cv` and `opengl` options. +- Introduced `world_to_plot()` as a central transform function. +- In `opengl` mode: `world_to_plot` applied `diag(1, -1, -1)` to all points. +- **Problem**: The Plotly `camera.up` and axis labels were not always updated + consistently with the basis choice, leading to "it looks right from one angle + but wrong from another" reports. + +### Phase 6: Restore After Removal (`6330e0e`) +- `--world-basis` was briefly removed, then restored due to user request. +- This back-and-forth left the README with stale documentation referencing both + the old and new interfaces. + +### Phase 7: Final Cleanup — CV Only (`d07c244`) +- **Removed `--world-basis` entirely.** +- `world_to_plot()` became a no-op (identity function). +- Plotly camera set to `up = {x:0, y:-1, z:0}` to render Y-down correctly. +- Axis labels explicitly set to `X (Right)`, `Y (Down)`, `Z (Forward)`. +- Added `--origin-axes-scale` for independent control of the origin triad size. +- Removed `--diagnose`, `--pose-convention`, and `--render-space` flags. + +**This is the current state.** + +--- + +## Peculiar Behaviors Catalog + +| # | Symptom | Root Cause | Fix / Explanation | +|---|---------|-----------|-------------------| +| 1 | Frustum appears to point in "-Z" direction | Plotly default camera has Y-up; OpenCV frustum points +Z which looks "backward" when viewed from a Y-up perspective | Set `camera.up = {y:-1}` (done in current code). The frustum is correct in data space. | +| 2 | Switching to `--world-basis opengl` makes some elements flip but not others | The `world_to_plot()` transform was applied to camera traces but not consistently to ground plane or origin triad | Removed `--world-basis`. Single convention eliminates partial-transform bugs. | +| 3 | `yaxis.autorange = "reversed"` makes ticks confusing | Plotly reverses the tick labels but the data coordinates stay the same. Users see "0 at top, -2 at bottom" which contradicts Y-down intuition. | Removed `autorange: reversed`. Use `camera.up = {y:-1}` instead, which rotates the view without mangling tick labels. | +| 4 | Camera positions don't match `inside_network.json` | `inside_network.json` stores poses in the ZED Fusion coordinate frame (gravity-aligned, Y-up). `calibrate_extrinsics.py` stores poses in the ArUco marker object's frame (Y-down if the marker board is horizontal). These are **different world frames**. | Not a bug. The two systems use different world origins and orientations. To compare, you must apply the alignment transform between the two frames. See FAQ below. | +| 5 | Origin triad too small or too large relative to cameras | Origin triad defaulted to `--scale` (camera axis size), which is often much smaller than the camera spread | Use `--origin-axes-scale 0.6` (or similar) independently of `--scale`. | +| 6 | Bird-eye view shows unexpected orientation | `--birdseye` uses orthographic projection looking down the Y axis. In CV convention, Y is "down" so this is looking from below the scene upward. | Expected behavior. The bird-eye view shows the X-Z plane as seen from the -Y direction (below the cameras). | + +--- + +## Canonical Rules Going Forward + +1. **Single convention**: All visualization data is in OpenCV frame. No basis switching. +2. **`world_to_plot()` is identity**: It exists as a hook but performs no transform. + If a future need arises for basis conversion, it should be the *only* place it happens. +3. **Plotly camera settings are view-only**: Never use `autorange: reversed` or axis + negation to simulate a coordinate change. Use `camera.up` and `camera.eye` only. +4. **Poses are `world_from_cam`**: The 4×4 matrix maps camera-local points to world. + Translation = camera position in world. Rotation columns = camera axes in world. +5. **Colors are RGB = XYZ**: Red = X (right), Green = Y (down), Blue = Z (forward). + This applies to both per-camera axis triads and the world-origin triad. +6. **Units are meters**: Consistent with marker parquet geometry and calibration output. + +--- + +## Current CLI Behavior + +### Available Flags + +``` +visualize_extrinsics.py + -i, --input TEXT [required] Path to JSON extrinsics file + -o, --output TEXT Output path (.html or .png) + --show Open interactive Plotly viewer + --scale FLOAT Camera axis length (default: 0.2) + --frustum-scale FLOAT Frustum depth (default: 0.5) + --fov FLOAT Horizontal FOV degrees (default: 60.0) + --birdseye Top-down orthographic view + --show-ground/--no-show-ground Ground plane toggle + --ground-y FLOAT Ground plane Y position (default: 0.0) + --ground-size FLOAT Ground plane side length (default: 8.0) + --show-origin-axes/--no-show-origin-axes Origin triad toggle (default: on) + --origin-axes-scale FLOAT Origin triad size (defaults to --scale) + --zed-configs TEXT ZED calibration file(s) for accurate frustums + --resolution [FHD1200|FHD|2K|HD|SVGA|VGA] + --eye [left|right] +``` + +### Removed Flags (Historical Only) + +| Flag | Removed In | Reason | +|------|-----------|--------| +| `--world-basis` | `d07c244` | Caused partial/inconsistent transforms. Single CV convention is simpler. | +| `--pose-convention` | `d07c244` | Only `world_from_cam` is supported. No need for a flag. | +| `--diagnose` | `d07c244` | Diagnostic checks moved out of the visualizer. | +| `--render-space` | `79f2ab0` | Renamed to `--world-basis`, then removed. | + +> **Note**: The README.md still contains stale references to `--world-basis`, +> `--pose-convention`, and `--diagnose` in the Troubleshooting section. These should +> be cleaned up to match the current CLI. + +--- + +## Verification Playbook + +### Quick Sanity Check + +```bash +# Render with origin triad at 0.6m scale, save as PNG +uv run visualize_extrinsics.py \ + --input output/e2e_refine_depth_smoke_rerun.json \ + --output output/_final_opencv_origin_axes_scaled.png \ + --origin-axes-scale 0.6 +``` + +**Expected result**: +- Origin triad at (0,0,0) with Red→+X (right), Green→+Y (down), Blue→+Z (forward). +- Camera frustums pointing along each camera's local +Z (blue axis). +- Camera positions spread out in world space (not bunched at origin). +- Y values for cameras should be negative (cameras are above the marker board, + which is at Y≈0; "above" in CV convention means negative Y). + +### Interactive Validation + +```bash +# Open interactive HTML for rotation/inspection +uv run visualize_extrinsics.py \ + --input output/e2e_refine_depth_smoke_rerun.json \ + --show \ + --origin-axes-scale 0.6 +``` + +**What to check**: +1. **Rotate the view**: The origin triad should remain consistent — Red/Green/Blue + always point in the same data-space directions regardless of view angle. +2. **Hover over camera centers**: Tooltip shows the camera serial number. +3. **Frustum orientation**: Each frustum's open end faces away from the camera center + along the camera's blue (Z) axis. + +### Bird-Eye Sanity Check + +```bash +uv run visualize_extrinsics.py \ + --input output/e2e_refine_depth_smoke_rerun.json \ + --birdseye --show \ + --origin-axes-scale 0.6 +``` + +**Expected**: Top-down view of the X-Z plane. Cameras should form a recognizable +spatial layout matching the physical installation. The Red (X) axis points right, +Blue (Z) axis points "up" on screen (forward in world). + +--- + +## FAQ + +### "Why does an OpenGL-like view look strange?" + +Because the data is in OpenCV convention (Y-down, Z-forward) and Plotly defaults to +Y-up. When you try to make Plotly act like an OpenGL viewer (Y-up, Z-backward), you +need to either: + +1. **Transform all data** by applying `diag(1, -1, -1)` — correct but doubles the + code paths and creates consistency risks. +2. **Adjust the Plotly camera** — only changes the view, not the data. Axis labels + and hover values still show CV coordinates. + +We chose option (2) with `camera.up = {y:-1}`: minimal code, no data transformation, +axis labels match the actual coordinate values. The trade-off is that the default +Plotly orbit feels "inverted" compared to a Y-up 3D viewer. This is expected. + +### "Does flipping axes in the view equal changing the world frame?" + +**No.** Plotly's `camera.up`, `camera.eye`, and `autorange: reversed` are purely +view transforms. They change how the data is *displayed* but not what the coordinates +*mean*. The data always lives in the frame it was computed in (OpenCV/ArUco world frame). + +If you set `camera.up = {y:1}` (Plotly default), the plot will render Y-up on screen, +but the data values are still Y-down. This creates a visual inversion that looks like +"the cameras are upside down" — they're not; the view is just flipped. + +### "How do I compare with the C++ viewer and `inside_network.json`?" + +The C++ ZED Fusion viewer and `inside_network.json` use a **different world frame** +than `calibrate_extrinsics.py`: + +| Property | `calibrate_extrinsics.py` | ZED Fusion / `inside_network.json` | +|----------|--------------------------|-------------------------------------| +| World origin | ArUco marker object center | Gravity-aligned, first camera or user-defined | +| Y direction | Down (OpenCV) | Up (gravity-aligned) | +| Pose meaning | `T_world_from_cam` | `T_world_from_cam` (same semantics, different world) | +| Units | Meters | Meters | + +To compare numerically: +1. The **relative** poses between cameras should match (up to the alignment transform). +2. The **absolute** positions will differ because the world origins are different. +3. To convert: apply the alignment rotation that maps the ArUco world frame to the + Fusion world frame. If `--auto-align` was used with a ground face, the ArUco frame + is partially aligned (ground = XZ plane), but the origin and yaw may still differ. + +**Quick visual comparison**: Look at the *shape* of the camera arrangement (distances +and angles between cameras), not the absolute positions. If the shape matches, the +calibration is consistent. + +### "Why are camera Y-positions negative?" + +In OpenCV convention, +Y is down. Cameras mounted above the marker board (which defines +Y≈0) have negative Y values. This is correct. A camera at `Y = -1.3` is 1.3 meters +above the board. + +### "What does `inside_network.json` camera 41831756's pose mean?" + +``` +Translation: [0.0, -1.175, 0.0] +Rotation: Identity +``` + +This camera is the reference frame origin (identity rotation) positioned 1.175m in the +-Y direction. In the Fusion frame (Y-up), this means 1.175m *below* the world origin. +In practice, this is the height offset of the camera relative to the Fusion coordinate +system's origin. + +--- + +## Appendix: Stale README References + +The following lines in `py_workspace/README.md` reference removed flags and should be +updated: + +- **Line ~104**: References `--pose-convention` (removed). +- **Line ~105**: References `--world-basis opengl` (removed). +- **Line ~116**: References `--diagnose` (removed). + +These were left from earlier iterations and do not reflect the current CLI. diff --git a/py_workspace/visualize_extrinsics.py b/py_workspace/visualize_extrinsics.py index b9982ba..255ae33 100644 --- a/py_workspace/visualize_extrinsics.py +++ b/py_workspace/visualize_extrinsics.py @@ -6,7 +6,7 @@ import json import click import numpy as np import plotly.graph_objects as go -from typing import Any, Dict, Optional, List +from typing import Any, Dict, Optional, List, Literal import configparser from pathlib import Path import re @@ -22,6 +22,10 @@ RESOLUTION_MAP = { } +ResolutionLiteral = Literal["FHD1200", "FHD", "2K", "HD", "SVGA", "VGA"] +EyeLiteral = Literal["left", "right"] + + def parse_pose(pose_str: str) -> np.ndarray: """Parses a 16-float pose string into a 4x4 matrix.""" try: @@ -48,7 +52,7 @@ def world_to_plot(points: np.ndarray) -> np.ndarray: def load_zed_configs( - paths: List[str], resolution: str, eye: str + paths: List[str], resolution: ResolutionLiteral, eye: EyeLiteral ) -> Dict[str, Dict[str, float]]: """ Loads ZED intrinsics from config files. @@ -373,8 +377,8 @@ def main( frustum_scale: float, fov: float, zed_configs: List[str], - resolution: str, - eye: str, + resolution: ResolutionLiteral, + eye: EyeLiteral, show_ground: bool, ground_y: float, ground_size: float, @@ -544,7 +548,7 @@ def main( ) fig.update_layout( - title=f"Camera Extrinsics
World Basis: CV (+Y down, +Z fwd)", + title=f"Camera Extrinsics", scene=scene_dict, margin=dict(l=0, r=0, b=0, t=60), legend=dict(x=0, y=1),