zed-playground/py_workspace/aruco/depth_refine.py

import numpy as np
from typing import Dict, Tuple, Any, Optional, List
from scipy.optimize import least_squares
from .pose_math import rvec_tvec_to_matrix, matrix_to_rvec_tvec
from .depth_verify import (
    compute_depth_residual,
    get_confidence_weight,
    project_point_to_pixel,
)


def extrinsics_to_params(T: np.ndarray) -> np.ndarray:
    rvec, tvec = matrix_to_rvec_tvec(T)
    return np.concatenate([rvec, tvec])


def params_to_extrinsics(params: np.ndarray) -> np.ndarray:
    rvec = params[:3]
    tvec = params[3:]
    return rvec_tvec_to_matrix(rvec, tvec)


def depth_residuals(
    params: np.ndarray,
    active_corners: List[np.ndarray],
    depth_map: np.ndarray,
    K: np.ndarray,
    initial_params: np.ndarray,
    reg_rot: float = 0.1,
    reg_trans: float = 1.0,
    confidence_map: Optional[np.ndarray] = None,
    confidence_thresh: float = 100.0,
) -> np.ndarray:
    T = params_to_extrinsics(params)
    residuals = []

    for corner in active_corners:
        residual = compute_depth_residual(corner, T, depth_map, K, window_size=5)
        if residual is None:
            # If a point becomes invalid during optimization, assign a large residual
            # to discourage the optimizer from going there, but keep the vector length fixed.
            # 10.0m is a safe "large" value for depth residuals.
            residual = 10.0
        else:
            if confidence_map is not None:
                u, v = project_point_to_pixel(
                    (np.linalg.inv(T) @ np.append(corner, 1.0))[:3], K
                )
                if u is not None and v is not None:
                    h, w = confidence_map.shape[:2]
                    if 0 <= u < w and 0 <= v < h:
                        conf = confidence_map[v, u]
                        weight = get_confidence_weight(conf, confidence_thresh)
                        residual *= np.sqrt(weight)
        residuals.append(residual)

    # Regularization as pseudo-residuals
    param_diff = params - initial_params

    # Rotation regularization (first 3 params)
    if reg_rot > 0:
        residuals.extend(param_diff[:3] * reg_rot)

    # Translation regularization (last 3 params)
    if reg_trans > 0:
        residuals.extend(param_diff[3:] * reg_trans)

    return np.array(residuals)


def refine_extrinsics_with_depth(
    T_initial: np.ndarray,
    marker_corners_world: Dict[int, np.ndarray],
    depth_map: np.ndarray,
    K: np.ndarray,
    max_translation_m: float = 0.05,
    max_rotation_deg: float = 5.0,
    regularization_weight: float = 0.1,
    loss: str = "soft_l1",
    f_scale: float = 0.1,
    reg_rot: float | None = None,
    reg_trans: float | None = None,
    confidence_map: Optional[np.ndarray] = None,
    confidence_thresh: float = 100.0,
) -> Tuple[np.ndarray, dict[str, Any]]:
    initial_params = extrinsics_to_params(T_initial)

    # Handle legacy regularization_weight if specific weights aren't provided
    # Default behavior: reg_rot = weight, reg_trans = weight * 10
    # This matches the plan's default of (0.1, 1.0) when weight is 0.1
    if reg_rot is None:
        reg_rot = regularization_weight
    if reg_trans is None:
        reg_trans = regularization_weight * 10.0

    # Check for valid depth points first
    active_corners = []
    n_points_total = 0
    n_depth_valid = 0
    n_confidence_rejected = 0

    for marker_id, corners in marker_corners_world.items():
        for corner in corners:
            n_points_total += 1
            res = compute_depth_residual(corner, T_initial, depth_map, K, window_size=5)
            if res is not None:
                n_depth_valid += 1
                is_confident = True
                if confidence_map is not None:
                    u, v = project_point_to_pixel(
                        (np.linalg.inv(T_initial) @ np.append(corner, 1.0))[:3], K
                    )
                    if u is not None and v is not None:
                        h, w = confidence_map.shape[:2]
                        if 0 <= u < w and 0 <= v < h:
                            conf = confidence_map[v, u]
                            weight = get_confidence_weight(conf, confidence_thresh)
                            if weight <= 0:
                                n_confidence_rejected += 1
                                is_confident = False

                if is_confident:
                    active_corners.append(corner)

    if not active_corners:
        return T_initial, {
            "success": False,
            "reason": "no_valid_depth_points",
            "initial_cost": 0.0,
            "final_cost": 0.0,
            "iterations": 0,
            "delta_rotation_deg": 0.0,
            "delta_translation_norm_m": 0.0,
            "termination_message": "No valid depth points found at marker corners",
            "termination_status": -1,
            "nfev": 0,
            "njev": 0,
            "optimality": 0.0,
            "n_active_bounds": 0,
            "active_mask": np.zeros(6, dtype=int),
            "cost": 0.0,
            "n_points_total": n_points_total,
            "n_depth_valid": n_depth_valid,
            "n_confidence_rejected": n_confidence_rejected,
            "loss_function": loss,
            "f_scale": f_scale,
        }

    max_rotation_rad = np.deg2rad(max_rotation_deg)

    lower_bounds = initial_params.copy()
    upper_bounds = initial_params.copy()

    lower_bounds[:3] -= max_rotation_rad
    upper_bounds[:3] += max_rotation_rad
    lower_bounds[3:] -= max_translation_m
    upper_bounds[3:] += max_translation_m

    bounds = (lower_bounds, upper_bounds)

    result = least_squares(
        depth_residuals,
        initial_params,
        args=(
            active_corners,
            depth_map,
            K,
            initial_params,
            reg_rot,
            reg_trans,
            confidence_map,
            confidence_thresh,
        ),
        method="trf",
        loss=loss,
        f_scale=f_scale,
        bounds=bounds,
        x_scale="jac",
        max_nfev=200,
    )

    T_refined = params_to_extrinsics(result.x)

    delta_params = result.x - initial_params
    delta_rotation_rad = np.linalg.norm(delta_params[:3])
    delta_rotation_deg = np.rad2deg(delta_rotation_rad)
    delta_translation = np.linalg.norm(delta_params[3:])

    # Calculate initial cost for comparison
    initial_residuals = depth_residuals(
        initial_params,
        active_corners,
        depth_map,
        K,
        initial_params,
        reg_rot,
        reg_trans,
        confidence_map,
        confidence_thresh,
    )
    initial_cost = 0.5 * np.sum(initial_residuals**2)

    stats = {
        "iterations": result.nfev,
        "success": result.success,
        "initial_cost": float(initial_cost),
        "final_cost": float(result.cost),
        "delta_rotation_deg": float(delta_rotation_deg),
        "delta_translation_norm_m": float(delta_translation),
        "termination_message": result.message,
        "termination_status": int(result.status),
        "nfev": int(result.nfev),
        "njev": int(getattr(result, "njev", 0)),
        "optimality": float(result.optimality),
        "n_active_bounds": int(np.sum(result.active_mask != 0)),
        "active_mask": result.active_mask.tolist()
        if hasattr(result.active_mask, "tolist")
        else result.active_mask,
        "cost": float(result.cost),
        "n_points_total": n_points_total,
        "n_depth_valid": n_depth_valid,
        "n_confidence_rejected": n_confidence_rejected,
        "loss_function": loss,
        "f_scale": f_scale,
    }

    return T_refined, stats