feat(cli): add depth verify/refine outputs and tests

- Retrieve depth + confidence measures from SVOReader when depth enabled - Compute depth residual metrics and attach to output JSON - Optionally write per-corner residual CSV via --report-csv - Post-process refinement: optimize final pose and report pre/post metrics - Add unit tests for depth verification and refinement modules - Add basedpyright dev dependency for diagnostics
2026-02-05 04:44:34 +00:00
parent 882d9348a6
commit 6d3c5cc5c1
10 changed files with 514 additions and 76 deletions
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Dict, Tuple, Optional
+from typing import Dict, Tuple
 from scipy.optimize import minimize
 from .pose_math import rvec_tvec_to_matrix, matrix_to_rvec_tvec
 from .depth_verify import compute_depth_residual
@@ -44,7 +44,7 @@ def depth_residual_objective(
    translation_diff = np.linalg.norm(param_diff[3:])
    regularization = regularization_weight * (rotation_diff + translation_diff)

-    return data_term + regularization
+    return float(np.real(data_term + regularization))


 def refine_extrinsics_with_depth(
@@ -55,7 +55,7 @@ def refine_extrinsics_with_depth(
    max_translation_m: float = 0.05,
    max_rotation_deg: float = 5.0,
    regularization_weight: float = 0.1,
-) -> Tuple[np.ndarray, dict]:
+) -> Tuple[np.ndarray, dict[str, float]]:
    initial_params = extrinsics_to_params(T_initial)

    max_rotation_rad = np.deg2rad(max_rotation_deg)
@@ -1,12 +1,12 @@
 import numpy as np
 from dataclasses import dataclass
-from typing import Optional, Dict
+from typing import Optional, Dict, List, Tuple
 from .pose_math import invert_transform


@dataclass
 class DepthVerificationResult:
-    residuals: list
+    residuals: List[Tuple[int, int, float]]  # (marker_id, corner_idx, residual)
    rmse: float
    mean_abs: float
    median: float
@@ -40,7 +40,7 @@ def compute_depth_residual(
        return None

    u, v = project_point_to_pixel(P_cam, K)
-    if u is None:
+    if u is None or v is None:
        return None

    h, w = depth_map.shape[:2]
@@ -67,6 +67,43 @@ def compute_depth_residual(
    return float(z_measured - z_predicted)


+def compute_marker_corner_residuals(
+    T_world_cam: np.ndarray,
+    marker_corners_world: Dict[int, np.ndarray],
+    depth_map: np.ndarray,
+    K: np.ndarray,
+    confidence_map: Optional[np.ndarray] = None,
+    confidence_thresh: float = 50,
+) -> List[Tuple[int, int, float]]:
+    detailed_residuals = []
+
+    for marker_id, corners in marker_corners_world.items():
+        for corner_idx, corner in enumerate(corners):
+            # Check confidence if map is provided
+            if confidence_map is not None:
+                T_cam_world = invert_transform(T_world_cam)
+                P_world_h = np.append(corner, 1.0)
+                P_cam = (T_cam_world @ P_world_h)[:3]
+                u_proj, v_proj = project_point_to_pixel(P_cam, K)
+
+                if u_proj is not None and v_proj is not None:
+                    h, w = confidence_map.shape[:2]
+                    if 0 <= u_proj < w and 0 <= v_proj < h:
+                        confidence = confidence_map[v_proj, u_proj]
+                        # Higher confidence value means LESS confident in ZED SDK
+                        # Range [1, 100], where 100 is typically occlusion/invalid
+                        if confidence > confidence_thresh:
+                            continue
+
+            residual = compute_depth_residual(
+                corner, T_world_cam, depth_map, K, window_size=5
+            )
+            if residual is not None:
+                detailed_residuals.append((int(marker_id), corner_idx, residual))
+
+    return detailed_residuals
+
+
 def verify_extrinsics_with_depth(
    T_world_cam: np.ndarray,
    marker_corners_world: Dict[int, np.ndarray],
@@ -75,28 +112,17 @@ def verify_extrinsics_with_depth(
    confidence_map: Optional[np.ndarray] = None,
    confidence_thresh: float = 50,
 ) -> DepthVerificationResult:
-    residuals = []
-    n_total = 0
-
-    for marker_id, corners in marker_corners_world.items():
-        for corner in corners:
-            n_total += 1
-
-            if confidence_map is not None:
-                u = int(round(corner[0]))
-                v = int(round(corner[1]))
-                h, w = confidence_map.shape[:2]
-                if 0 <= u < w and 0 <= v < h:
-                    confidence = confidence_map[v, u]
-                    if confidence > confidence_thresh:
-                        continue
-
-            residual = compute_depth_residual(
-                corner, T_world_cam, depth_map, K, window_size=5
-            )
-            if residual is not None:
-                residuals.append(residual)
+    detailed_residuals = compute_marker_corner_residuals(
+        T_world_cam,
+        marker_corners_world,
+        depth_map,
+        K,
+        confidence_map,
+        confidence_thresh,
+    )

+    residuals = [r[2] for r in detailed_residuals]
+    n_total = sum(len(corners) for corners in marker_corners_world.values())
    n_valid = len(residuals)

    if n_valid == 0:
@@ -128,10 +154,10 @@ def verify_extrinsics_with_depth(
        if depths:
            mean_depth = np.mean(depths)
            if mean_depth > 0:
-                depth_normalized_rmse = rmse / mean_depth
+                depth_normalized_rmse = float(rmse / mean_depth)

    return DepthVerificationResult(
-        residuals=residuals,
+        residuals=detailed_residuals,
        rmse=rmse,
        mean_abs=mean_abs,
        median=median,
@@ -1,7 +1,7 @@
 import pyzed.sl as sl
 import numpy as np
 from dataclasses import dataclass
-from typing import List, Optional, Dict
+from typing import Any
 import os


@@ -13,18 +13,22 @@ class FrameData:
    timestamp_ns: int
    frame_index: int
    serial_number: int
-    depth_map: Optional[np.ndarray] = None
+    depth_map: np.ndarray | None = None


 class SVOReader:
    """Handles synchronized playback of multiple SVO files."""

+    svo_paths: list[str]
+    runtime_params: sl.RuntimeParameters
+    _depth_mode: sl.DEPTH_MODE
+
    def __init__(
-        self, svo_paths: List[str], depth_mode: sl.DEPTH_MODE = sl.DEPTH_MODE.NONE
+        self, svo_paths: list[str], depth_mode: sl.DEPTH_MODE = sl.DEPTH_MODE.NONE
    ):
        self.svo_paths = svo_paths
-        self.cameras: List[sl.Camera] = []
-        self.camera_info: List[Dict] = []
+        self.cameras: list[sl.Camera] = []
+        self.camera_info: list[dict[str, Any]] = []
        self.runtime_params = sl.RuntimeParameters()
        self._depth_mode = depth_mode

@@ -83,9 +87,9 @@ class SVOReader:
            else:
                cam.set_svo_position(0)

-    def grab_all(self) -> List[Optional[FrameData]]:
+    def grab_all(self) -> list[FrameData | None]:
        """Grabs a frame from all cameras without strict synchronization."""
-        frames = []
+        frames: list[FrameData | None] = []
        for i, cam in enumerate(self.cameras):
            err = cam.grab(self.runtime_params)
            if err == sl.ERROR_CODE.SUCCESS:
@@ -110,7 +114,7 @@ class SVOReader:
                frames.append(None)
        return frames

-    def grab_synced(self, tolerance_ms: int = 33) -> List[Optional[FrameData]]:
+    def grab_synced(self, tolerance_ms: int = 33) -> list[FrameData | None]:
        """
        Grabs frames from all cameras, attempting to keep them within tolerance_ms.
        If a camera falls behind, it skips frames.
@@ -168,14 +172,14 @@ class SVOReader:
    def enable_depth(self) -> bool:
        return self._depth_mode != sl.DEPTH_MODE.NONE

-    def _retrieve_depth(self, cam: sl.Camera) -> Optional[np.ndarray]:
+    def _retrieve_depth(self, cam: sl.Camera) -> np.ndarray | None:
        if not self.enable_depth:
            return None
        depth_mat = sl.Mat()
        cam.retrieve_measure(depth_mat, sl.MEASURE.DEPTH)
        return depth_mat.get_data().copy()

-    def get_depth_at(self, frame: FrameData, x: int, y: int) -> Optional[float]:
+    def get_depth_at(self, frame: FrameData, x: int, y: int) -> float | None:
        if frame.depth_map is None:
            return None
        h, w = frame.depth_map.shape[:2]
@@ -188,7 +192,7 @@ class SVOReader:

    def get_depth_window_median(
        self, frame: FrameData, x: int, y: int, size: int = 5
-    ) -> Optional[float]:
+    ) -> float | None:
        if frame.depth_map is None:
            return None
        if size % 2 == 0: