zed-playground/py_workspace/aruco/depth_verify.py

import numpy as np
from dataclasses import dataclass
from typing import Optional, Dict, List, Tuple
from .pose_math import invert_transform


@dataclass
class DepthVerificationResult:
    residuals: List[Tuple[int, int, float]]  # (marker_id, corner_idx, residual)
    rmse: float
    mean_abs: float
    median: float
    depth_normalized_rmse: float
    n_valid: int
    n_total: int


def project_point_to_pixel(P_cam: np.ndarray, K: np.ndarray):
    X, Y, Z = P_cam
    if Z <= 0:
        return None, None
    u = int(round(K[0, 0] * X / Z + K[0, 2]))
    v = int(round(K[1, 1] * Y / Z + K[1, 2]))
    return u, v


def get_confidence_weight(confidence: float, threshold: float = 100.0) -> float:
    """
    Convert ZED confidence value to a weight in [0, 1].
    ZED semantics: 1 is most confident, 100 is least confident.
    """
    if not np.isfinite(confidence) or confidence < 0:
        return 0.0
    # Linear weight from 1.0 (at confidence=0) to 0.0 (at confidence=threshold)
    weight = 1.0 - (confidence / threshold)
    return float(np.clip(weight, 0.0, 1.0))


def compute_depth_residual(
    P_world: np.ndarray,
    T_world_cam: np.ndarray,
    depth_map: np.ndarray,
    K: np.ndarray,
    window_size: int = 5,
) -> Optional[float]:
    T_cam_world = invert_transform(T_world_cam)
    P_world_h = np.append(P_world, 1.0)
    P_cam = (T_cam_world @ P_world_h)[:3]

    z_predicted = P_cam[2]
    if z_predicted <= 0:
        return None

    u, v = project_point_to_pixel(P_cam, K)
    if u is None or v is None:
        return None

    h, w = depth_map.shape[:2]
    if u < 0 or u >= w or v < 0 or v >= h:
        return None

    if window_size <= 1:
        z_measured = depth_map[v, u]
    else:
        half = window_size // 2
        x_min = max(0, u - half)
        x_max = min(w, u + half + 1)
        y_min = max(0, v - half)
        y_max = min(h, v + half + 1)
        window = depth_map[y_min:y_max, x_min:x_max]
        valid_depths = window[np.isfinite(window) & (window > 0)]
        if len(valid_depths) == 0:
            return None
        z_measured = np.median(valid_depths)

    if not np.isfinite(z_measured) or z_measured <= 0:
        return None

    return float(z_measured - z_predicted)


def compute_marker_corner_residuals(
    T_world_cam: np.ndarray,
    marker_corners_world: Dict[int, np.ndarray],
    depth_map: np.ndarray,
    K: np.ndarray,
    confidence_map: Optional[np.ndarray] = None,
    confidence_thresh: float = 50,
) -> List[Tuple[int, int, float]]:
    detailed_residuals = []

    for marker_id, corners in marker_corners_world.items():
        for corner_idx, corner in enumerate(corners):
            # Check confidence if map is provided
            if confidence_map is not None:
                T_cam_world = invert_transform(T_world_cam)
                P_world_h = np.append(corner, 1.0)
                P_cam = (T_cam_world @ P_world_h)[:3]
                u_proj, v_proj = project_point_to_pixel(P_cam, K)

                if u_proj is not None and v_proj is not None:
                    h, w = confidence_map.shape[:2]
                    if 0 <= u_proj < w and 0 <= v_proj < h:
                        confidence = confidence_map[v_proj, u_proj]
                        # Higher confidence value means LESS confident in ZED SDK
                        # Range [1, 100], where 100 is typically occlusion/invalid
                        if confidence > confidence_thresh:
                            continue

            residual = compute_depth_residual(
                corner, T_world_cam, depth_map, K, window_size=5
            )
            if residual is not None:
                detailed_residuals.append((int(marker_id), corner_idx, residual))

    return detailed_residuals


def verify_extrinsics_with_depth(
    T_world_cam: np.ndarray,
    marker_corners_world: Dict[int, np.ndarray],
    depth_map: np.ndarray,
    K: np.ndarray,
    confidence_map: Optional[np.ndarray] = None,
    confidence_thresh: float = 50,
) -> DepthVerificationResult:
    detailed_residuals = compute_marker_corner_residuals(
        T_world_cam,
        marker_corners_world,
        depth_map,
        K,
        confidence_map,
        confidence_thresh,
    )

    residuals = [r[2] for r in detailed_residuals]
    n_total = sum(len(corners) for corners in marker_corners_world.values())
    n_valid = len(residuals)

    if n_valid == 0:
        return DepthVerificationResult(
            residuals=[],
            rmse=0.0,
            mean_abs=0.0,
            median=0.0,
            depth_normalized_rmse=0.0,
            n_valid=0,
            n_total=n_total,
        )

    residuals_array = np.array(residuals)
    rmse = float(np.sqrt(np.mean(residuals_array**2)))
    mean_abs = float(np.mean(np.abs(residuals_array)))
    median = float(np.median(residuals_array))

    depth_normalized_rmse = 0.0
    if n_valid > 0:
        depths = []
        for marker_id, corners in marker_corners_world.items():
            for corner in corners:
                T_cam_world = invert_transform(T_world_cam)
                P_world_h = np.append(corner, 1.0)
                P_cam = (T_cam_world @ P_world_h)[:3]
                if P_cam[2] > 0:
                    depths.append(P_cam[2])
        if depths:
            mean_depth = np.mean(depths)
            if mean_depth > 0:
                depth_normalized_rmse = float(rmse / mean_depth)

    return DepthVerificationResult(
        residuals=detailed_residuals,
        rmse=rmse,
        mean_abs=mean_abs,
        median=median,
        depth_normalized_rmse=depth_normalized_rmse,
        n_valid=n_valid,
        n_total=n_total,
    )