import numpy as np from dataclasses import dataclass from typing import Optional, Dict, List, Tuple from .pose_math import invert_transform @dataclass class DepthVerificationResult: residuals: List[Tuple[int, int, float]] # (marker_id, corner_idx, residual) rmse: float mean_abs: float median: float depth_normalized_rmse: float n_valid: int n_total: int def project_point_to_pixel(P_cam: np.ndarray, K: np.ndarray): X, Y, Z = P_cam if Z <= 0: return None, None u = int(round(K[0, 0] * X / Z + K[0, 2])) v = int(round(K[1, 1] * Y / Z + K[1, 2])) return u, v def get_confidence_weight(confidence: float, threshold: float = 100.0) -> float: """ Convert ZED confidence value to a weight in [0, 1]. ZED semantics: 1 is most confident, 100 is least confident. """ if not np.isfinite(confidence) or confidence < 0: return 0.0 # Linear weight from 1.0 (at confidence=0) to 0.0 (at confidence=threshold) weight = 1.0 - (confidence / threshold) return float(np.clip(weight, 0.0, 1.0)) def compute_depth_residual( P_world: np.ndarray, T_world_cam: np.ndarray, depth_map: np.ndarray, K: np.ndarray, window_size: int = 5, ) -> Optional[float]: T_cam_world = invert_transform(T_world_cam) P_world_h = np.append(P_world, 1.0) P_cam = (T_cam_world @ P_world_h)[:3] z_predicted = P_cam[2] if z_predicted <= 0: return None u, v = project_point_to_pixel(P_cam, K) if u is None or v is None: return None h, w = depth_map.shape[:2] if u < 0 or u >= w or v < 0 or v >= h: return None if window_size <= 1: z_measured = depth_map[v, u] else: half = window_size // 2 x_min = max(0, u - half) x_max = min(w, u + half + 1) y_min = max(0, v - half) y_max = min(h, v + half + 1) window = depth_map[y_min:y_max, x_min:x_max] valid_depths = window[np.isfinite(window) & (window > 0)] if len(valid_depths) == 0: return None z_measured = np.median(valid_depths) if not np.isfinite(z_measured) or z_measured <= 0: return None return float(z_measured - z_predicted) def compute_marker_corner_residuals( T_world_cam: np.ndarray, marker_corners_world: Dict[int, np.ndarray], depth_map: np.ndarray, K: np.ndarray, confidence_map: Optional[np.ndarray] = None, confidence_thresh: float = 50, ) -> List[Tuple[int, int, float]]: detailed_residuals = [] for marker_id, corners in marker_corners_world.items(): for corner_idx, corner in enumerate(corners): # Check confidence if map is provided if confidence_map is not None: T_cam_world = invert_transform(T_world_cam) P_world_h = np.append(corner, 1.0) P_cam = (T_cam_world @ P_world_h)[:3] u_proj, v_proj = project_point_to_pixel(P_cam, K) if u_proj is not None and v_proj is not None: h, w = confidence_map.shape[:2] if 0 <= u_proj < w and 0 <= v_proj < h: confidence = confidence_map[v_proj, u_proj] # Higher confidence value means LESS confident in ZED SDK # Range [1, 100], where 100 is typically occlusion/invalid if confidence > confidence_thresh: continue residual = compute_depth_residual( corner, T_world_cam, depth_map, K, window_size=5 ) if residual is not None: detailed_residuals.append((int(marker_id), corner_idx, residual)) return detailed_residuals def verify_extrinsics_with_depth( T_world_cam: np.ndarray, marker_corners_world: Dict[int, np.ndarray], depth_map: np.ndarray, K: np.ndarray, confidence_map: Optional[np.ndarray] = None, confidence_thresh: float = 50, ) -> DepthVerificationResult: detailed_residuals = compute_marker_corner_residuals( T_world_cam, marker_corners_world, depth_map, K, confidence_map, confidence_thresh, ) residuals = [r[2] for r in detailed_residuals] n_total = sum(len(corners) for corners in marker_corners_world.values()) n_valid = len(residuals) if n_valid == 0: return DepthVerificationResult( residuals=[], rmse=0.0, mean_abs=0.0, median=0.0, depth_normalized_rmse=0.0, n_valid=0, n_total=n_total, ) residuals_array = np.array(residuals) rmse = float(np.sqrt(np.mean(residuals_array**2))) mean_abs = float(np.mean(np.abs(residuals_array))) median = float(np.median(residuals_array)) depth_normalized_rmse = 0.0 if n_valid > 0: depths = [] for marker_id, corners in marker_corners_world.items(): for corner in corners: T_cam_world = invert_transform(T_world_cam) P_world_h = np.append(corner, 1.0) P_cam = (T_cam_world @ P_world_h)[:3] if P_cam[2] > 0: depths.append(P_cam[2]) if depths: mean_depth = np.mean(depths) if mean_depth > 0: depth_normalized_rmse = float(rmse / mean_depth) return DepthVerificationResult( residuals=detailed_residuals, rmse=rmse, mean_abs=mean_abs, median=median, depth_normalized_rmse=depth_normalized_rmse, n_valid=n_valid, n_total=n_total, )