diff --git a/py_workspace/aruco/depth_verify.py b/py_workspace/aruco/depth_verify.py
new file mode 100644
index 0000000..cb2ab09
--- /dev/null
+++ b/py_workspace/aruco/depth_verify.py
@@ -0,0 +1,141 @@
+import numpy as np
+from dataclasses import dataclass
+from typing import Optional, Dict
+from .pose_math import invert_transform
+
+
+@dataclass
+class DepthVerificationResult:
+    residuals: list
+    rmse: float
+    mean_abs: float
+    median: float
+    depth_normalized_rmse: float
+    n_valid: int
+    n_total: int
+
+
+def project_point_to_pixel(P_cam: np.ndarray, K: np.ndarray):
+    X, Y, Z = P_cam
+    if Z <= 0:
+        return None, None
+    u = int(round(K[0, 0] * X / Z + K[0, 2]))
+    v = int(round(K[1, 1] * Y / Z + K[1, 2]))
+    return u, v
+
+
+def compute_depth_residual(
+    P_world: np.ndarray,
+    T_world_cam: np.ndarray,
+    depth_map: np.ndarray,
+    K: np.ndarray,
+    window_size: int = 5,
+) -> Optional[float]:
+    T_cam_world = invert_transform(T_world_cam)
+    P_world_h = np.append(P_world, 1.0)
+    P_cam = (T_cam_world @ P_world_h)[:3]
+
+    z_predicted = P_cam[2]
+    if z_predicted <= 0:
+        return None
+
+    u, v = project_point_to_pixel(P_cam, K)
+    if u is None:
+        return None
+
+    h, w = depth_map.shape[:2]
+    if u < 0 or u >= w or v < 0 or v >= h:
+        return None
+
+    if window_size <= 1:
+        z_measured = depth_map[v, u]
+    else:
+        half = window_size // 2
+        x_min = max(0, u - half)
+        x_max = min(w, u + half + 1)
+        y_min = max(0, v - half)
+        y_max = min(h, v + half + 1)
+        window = depth_map[y_min:y_max, x_min:x_max]
+        valid_depths = window[np.isfinite(window) & (window > 0)]
+        if len(valid_depths) == 0:
+            return None
+        z_measured = np.median(valid_depths)
+
+    if not np.isfinite(z_measured) or z_measured <= 0:
+        return None
+
+    return float(z_measured - z_predicted)
+
+
+def verify_extrinsics_with_depth(
+    T_world_cam: np.ndarray,
+    marker_corners_world: Dict[int, np.ndarray],
+    depth_map: np.ndarray,
+    K: np.ndarray,
+    confidence_map: Optional[np.ndarray] = None,
+    confidence_thresh: float = 50,
+) -> DepthVerificationResult:
+    residuals = []
+    n_total = 0
+
+    for marker_id, corners in marker_corners_world.items():
+        for corner in corners:
+            n_total += 1
+
+            if confidence_map is not None:
+                u = int(round(corner[0]))
+                v = int(round(corner[1]))
+                h, w = confidence_map.shape[:2]
+                if 0 <= u < w and 0 <= v < h:
+                    confidence = confidence_map[v, u]
+                    if confidence > confidence_thresh:
+                        continue
+
+            residual = compute_depth_residual(
+                corner, T_world_cam, depth_map, K, window_size=5
+            )
+            if residual is not None:
+                residuals.append(residual)
+
+    n_valid = len(residuals)
+
+    if n_valid == 0:
+        return DepthVerificationResult(
+            residuals=[],
+            rmse=0.0,
+            mean_abs=0.0,
+            median=0.0,
+            depth_normalized_rmse=0.0,
+            n_valid=0,
+            n_total=n_total,
+        )
+
+    residuals_array = np.array(residuals)
+    rmse = float(np.sqrt(np.mean(residuals_array**2)))
+    mean_abs = float(np.mean(np.abs(residuals_array)))
+    median = float(np.median(residuals_array))
+
+    depth_normalized_rmse = 0.0
+    if n_valid > 0:
+        depths = []
+        for marker_id, corners in marker_corners_world.items():
+            for corner in corners:
+                T_cam_world = invert_transform(T_world_cam)
+                P_world_h = np.append(corner, 1.0)
+                P_cam = (T_cam_world @ P_world_h)[:3]
+                if P_cam[2] > 0:
+                    depths.append(P_cam[2])
+        if depths:
+            mean_depth = np.mean(depths)
+            if mean_depth > 0:
+                depth_normalized_rmse = rmse / mean_depth
+
+    return DepthVerificationResult(
+        residuals=residuals,
+        rmse=rmse,
+        mean_abs=mean_abs,
+        median=median,
+        depth_normalized_rmse=depth_normalized_rmse,
+        n_valid=n_valid,
+        n_total=n_total,
+    )