feat(cli): add depth verify/refine outputs and tests

- Retrieve depth + confidence measures from SVOReader when depth enabled
- Compute depth residual metrics and attach to output JSON
- Optionally write per-corner residual CSV via --report-csv
- Post-process refinement: optimize final pose and report pre/post metrics
- Add unit tests for depth verification and refinement modules
- Add basedpyright dev dependency for diagnostics
This commit is contained in:
2026-02-05 04:44:34 +00:00
parent 882d9348a6
commit 6d3c5cc5c1
10 changed files with 514 additions and 76 deletions
+3 -3
View File
@@ -1,5 +1,5 @@
import numpy as np
from typing import Dict, Tuple, Optional
from typing import Dict, Tuple
from scipy.optimize import minimize
from .pose_math import rvec_tvec_to_matrix, matrix_to_rvec_tvec
from .depth_verify import compute_depth_residual
@@ -44,7 +44,7 @@ def depth_residual_objective(
translation_diff = np.linalg.norm(param_diff[3:])
regularization = regularization_weight * (rotation_diff + translation_diff)
return data_term + regularization
return float(np.real(data_term + regularization))
def refine_extrinsics_with_depth(
@@ -55,7 +55,7 @@ def refine_extrinsics_with_depth(
max_translation_m: float = 0.05,
max_rotation_deg: float = 5.0,
regularization_weight: float = 0.1,
) -> Tuple[np.ndarray, dict]:
) -> Tuple[np.ndarray, dict[str, float]]:
initial_params = extrinsics_to_params(T_initial)
max_rotation_rad = np.deg2rad(max_rotation_deg)
+52 -26
View File
@@ -1,12 +1,12 @@
import numpy as np
from dataclasses import dataclass
from typing import Optional, Dict
from typing import Optional, Dict, List, Tuple
from .pose_math import invert_transform
@dataclass
class DepthVerificationResult:
residuals: list
residuals: List[Tuple[int, int, float]] # (marker_id, corner_idx, residual)
rmse: float
mean_abs: float
median: float
@@ -40,7 +40,7 @@ def compute_depth_residual(
return None
u, v = project_point_to_pixel(P_cam, K)
if u is None:
if u is None or v is None:
return None
h, w = depth_map.shape[:2]
@@ -67,6 +67,43 @@ def compute_depth_residual(
return float(z_measured - z_predicted)
def compute_marker_corner_residuals(
T_world_cam: np.ndarray,
marker_corners_world: Dict[int, np.ndarray],
depth_map: np.ndarray,
K: np.ndarray,
confidence_map: Optional[np.ndarray] = None,
confidence_thresh: float = 50,
) -> List[Tuple[int, int, float]]:
detailed_residuals = []
for marker_id, corners in marker_corners_world.items():
for corner_idx, corner in enumerate(corners):
# Check confidence if map is provided
if confidence_map is not None:
T_cam_world = invert_transform(T_world_cam)
P_world_h = np.append(corner, 1.0)
P_cam = (T_cam_world @ P_world_h)[:3]
u_proj, v_proj = project_point_to_pixel(P_cam, K)
if u_proj is not None and v_proj is not None:
h, w = confidence_map.shape[:2]
if 0 <= u_proj < w and 0 <= v_proj < h:
confidence = confidence_map[v_proj, u_proj]
# Higher confidence value means LESS confident in ZED SDK
# Range [1, 100], where 100 is typically occlusion/invalid
if confidence > confidence_thresh:
continue
residual = compute_depth_residual(
corner, T_world_cam, depth_map, K, window_size=5
)
if residual is not None:
detailed_residuals.append((int(marker_id), corner_idx, residual))
return detailed_residuals
def verify_extrinsics_with_depth(
T_world_cam: np.ndarray,
marker_corners_world: Dict[int, np.ndarray],
@@ -75,28 +112,17 @@ def verify_extrinsics_with_depth(
confidence_map: Optional[np.ndarray] = None,
confidence_thresh: float = 50,
) -> DepthVerificationResult:
residuals = []
n_total = 0
for marker_id, corners in marker_corners_world.items():
for corner in corners:
n_total += 1
if confidence_map is not None:
u = int(round(corner[0]))
v = int(round(corner[1]))
h, w = confidence_map.shape[:2]
if 0 <= u < w and 0 <= v < h:
confidence = confidence_map[v, u]
if confidence > confidence_thresh:
continue
residual = compute_depth_residual(
corner, T_world_cam, depth_map, K, window_size=5
)
if residual is not None:
residuals.append(residual)
detailed_residuals = compute_marker_corner_residuals(
T_world_cam,
marker_corners_world,
depth_map,
K,
confidence_map,
confidence_thresh,
)
residuals = [r[2] for r in detailed_residuals]
n_total = sum(len(corners) for corners in marker_corners_world.values())
n_valid = len(residuals)
if n_valid == 0:
@@ -128,10 +154,10 @@ def verify_extrinsics_with_depth(
if depths:
mean_depth = np.mean(depths)
if mean_depth > 0:
depth_normalized_rmse = rmse / mean_depth
depth_normalized_rmse = float(rmse / mean_depth)
return DepthVerificationResult(
residuals=residuals,
residuals=detailed_residuals,
rmse=rmse,
mean_abs=mean_abs,
median=median,
+15 -11
View File
@@ -1,7 +1,7 @@
import pyzed.sl as sl
import numpy as np
from dataclasses import dataclass
from typing import List, Optional, Dict
from typing import Any
import os
@@ -13,18 +13,22 @@ class FrameData:
timestamp_ns: int
frame_index: int
serial_number: int
depth_map: Optional[np.ndarray] = None
depth_map: np.ndarray | None = None
class SVOReader:
"""Handles synchronized playback of multiple SVO files."""
svo_paths: list[str]
runtime_params: sl.RuntimeParameters
_depth_mode: sl.DEPTH_MODE
def __init__(
self, svo_paths: List[str], depth_mode: sl.DEPTH_MODE = sl.DEPTH_MODE.NONE
self, svo_paths: list[str], depth_mode: sl.DEPTH_MODE = sl.DEPTH_MODE.NONE
):
self.svo_paths = svo_paths
self.cameras: List[sl.Camera] = []
self.camera_info: List[Dict] = []
self.cameras: list[sl.Camera] = []
self.camera_info: list[dict[str, Any]] = []
self.runtime_params = sl.RuntimeParameters()
self._depth_mode = depth_mode
@@ -83,9 +87,9 @@ class SVOReader:
else:
cam.set_svo_position(0)
def grab_all(self) -> List[Optional[FrameData]]:
def grab_all(self) -> list[FrameData | None]:
"""Grabs a frame from all cameras without strict synchronization."""
frames = []
frames: list[FrameData | None] = []
for i, cam in enumerate(self.cameras):
err = cam.grab(self.runtime_params)
if err == sl.ERROR_CODE.SUCCESS:
@@ -110,7 +114,7 @@ class SVOReader:
frames.append(None)
return frames
def grab_synced(self, tolerance_ms: int = 33) -> List[Optional[FrameData]]:
def grab_synced(self, tolerance_ms: int = 33) -> list[FrameData | None]:
"""
Grabs frames from all cameras, attempting to keep them within tolerance_ms.
If a camera falls behind, it skips frames.
@@ -168,14 +172,14 @@ class SVOReader:
def enable_depth(self) -> bool:
return self._depth_mode != sl.DEPTH_MODE.NONE
def _retrieve_depth(self, cam: sl.Camera) -> Optional[np.ndarray]:
def _retrieve_depth(self, cam: sl.Camera) -> np.ndarray | None:
if not self.enable_depth:
return None
depth_mat = sl.Mat()
cam.retrieve_measure(depth_mat, sl.MEASURE.DEPTH)
return depth_mat.get_data().copy()
def get_depth_at(self, frame: FrameData, x: int, y: int) -> Optional[float]:
def get_depth_at(self, frame: FrameData, x: int, y: int) -> float | None:
if frame.depth_map is None:
return None
h, w = frame.depth_map.shape[:2]
@@ -188,7 +192,7 @@ class SVOReader:
def get_depth_window_median(
self, frame: FrameData, x: int, y: int, size: int = 5
) -> Optional[float]:
) -> float | None:
if frame.depth_map is None:
return None
if size % 2 == 0: