feat(calibrate): integrate multi-frame depth pooling with --depth-pool-size flag

This commit is contained in:
2026-02-07 08:10:01 +00:00
parent dad1f2a69f
commit 4fc8de4bdc
6 changed files with 774 additions and 82 deletions
+205 -45
View File
@@ -24,6 +24,7 @@ from aruco.pose_averaging import PoseAccumulator
from aruco.preview import draw_detected_markers, draw_pose_axes, show_preview
from aruco.depth_verify import verify_extrinsics_with_depth
from aruco.depth_refine import refine_extrinsics_with_depth
from aruco.depth_pool import pool_depth_maps
from aruco.alignment import (
get_face_normal_from_geometry,
detect_ground_face,
@@ -117,13 +118,14 @@ def score_frame(
def apply_depth_verify_refine_postprocess(
results: Dict[str, Any],
verification_frames: Dict[str, Any],
verification_frames: Dict[int, List[Dict[str, Any]]],
marker_geometry: Dict[int, Any],
camera_matrices: Dict[str, Any],
camera_matrices: Dict[int, Any],
verify_depth: bool,
refine_depth: bool,
use_confidence_weights: bool,
depth_confidence_threshold: int,
depth_pool_size: int = 1,
report_csv_path: Optional[str] = None,
) -> Tuple[Dict[str, Any], List[List[Any]]]:
"""
@@ -137,12 +139,117 @@ def apply_depth_verify_refine_postprocess(
click.echo("\nRunning depth verification/refinement on computed extrinsics...")
for serial, vf in verification_frames.items():
for serial, vfs in verification_frames.items():
if str(serial) not in results:
continue
frame = vf["frame"]
ids = vf["ids"]
# Extract depth maps and confidence maps from the top-N frames
# vfs is already sorted by score descending and truncated to depth_pool_size
depth_maps = []
confidence_maps = []
# We need at least one frame with depth
valid_frames = []
for vf in vfs:
frame = vf["frame"]
if frame.depth_map is not None:
depth_maps.append(frame.depth_map)
confidence_maps.append(frame.confidence_map)
valid_frames.append(vf)
if not valid_frames:
click.echo(
f"Camera {serial}: No frames with depth map available for verification."
)
continue
# Use the best frame (first in the list) for marker IDs and corners
# This ensures we use the highest quality detection for geometry
best_vf = valid_frames[0]
ids = best_vf["ids"]
# Determine if we should pool or use single frame
use_pooling = depth_pool_size > 1 and len(depth_maps) > 1
if use_pooling:
try:
pooled_depth, pooled_conf = pool_depth_maps(
depth_maps,
confidence_maps,
confidence_thresh=depth_confidence_threshold,
)
# Check if pooling resulted in a valid map (enough valid pixels)
# We'll do a quick check against the best single frame
# If pooled map has significantly fewer valid pixels, fallback
best_depth = depth_maps[0]
best_conf = confidence_maps[0]
# Simple validity check (finite and > 0)
# We don't need to be perfect here, just catch catastrophic pooling failure
n_valid_pooled = np.count_nonzero(
np.isfinite(pooled_depth) & (pooled_depth > 0)
)
# For best frame, we also respect confidence threshold if provided
mask_best = np.isfinite(best_depth) & (best_depth > 0)
if best_conf is not None:
mask_best &= best_conf <= depth_confidence_threshold
n_valid_best = np.count_nonzero(mask_best)
# If pooled result is much worse (e.g. < 50% of valid points of single frame), fallback
# This can happen if frames are misaligned or pooling logic fails
if n_valid_pooled < (n_valid_best * 0.5):
click.echo(
f"Camera {serial}: Pooled depth has too few valid points ({n_valid_pooled} vs {n_valid_best}). "
"Falling back to best single frame."
)
final_depth = best_depth
final_conf = best_conf
pool_metadata = {
"pool_size_requested": depth_pool_size,
"pool_size_actual": len(depth_maps),
"pooled": False,
"fallback_reason": "insufficient_valid_points",
}
else:
final_depth = pooled_depth
final_conf = pooled_conf
pool_metadata = {
"pool_size_requested": depth_pool_size,
"pool_size_actual": len(depth_maps),
"pooled": True,
}
click.echo(
f"Camera {serial}: Using pooled depth from {len(depth_maps)} frames."
)
except Exception as e:
click.echo(
f"Camera {serial}: Pooling failed with error: {e}. Falling back to single frame.",
err=True,
)
final_depth = depth_maps[0]
final_conf = confidence_maps[0]
pool_metadata = {
"pool_size_requested": depth_pool_size,
"pool_size_actual": len(depth_maps),
"pooled": False,
"fallback_reason": f"exception: {str(e)}",
}
else:
# Single frame case (N=1 or only 1 available)
final_depth = depth_maps[0]
final_conf = confidence_maps[0]
# Only add metadata if pooling was requested but not possible due to lack of frames
if depth_pool_size > 1:
pool_metadata = {
"pool_size_requested": depth_pool_size,
"pool_size_actual": len(depth_maps),
"pooled": False,
"fallback_reason": "insufficient_frames",
}
else:
pool_metadata = None
# Use the FINAL COMPUTED POSE for verification
pose_str = results[str(serial)]["pose"]
@@ -155,13 +262,13 @@ def apply_depth_verify_refine_postprocess(
if int(mid) in marker_geometry
}
if marker_corners_world and frame.depth_map is not None:
if marker_corners_world and final_depth is not None:
verify_res = verify_extrinsics_with_depth(
T_mean,
marker_corners_world,
frame.depth_map,
final_depth,
cam_matrix,
confidence_map=frame.confidence_map,
confidence_map=final_conf,
confidence_thresh=depth_confidence_threshold,
)
@@ -174,6 +281,9 @@ def apply_depth_verify_refine_postprocess(
"n_total": verify_res.n_total,
}
if pool_metadata:
results[str(serial)]["depth_pool"] = pool_metadata
click.echo(
f"Camera {serial} verification: RMSE={verify_res.rmse:.3f}m, "
f"Valid={verify_res.n_valid}/{verify_res.n_total}"
@@ -189,20 +299,18 @@ def apply_depth_verify_refine_postprocess(
T_refined, refine_stats = refine_extrinsics_with_depth(
T_mean,
marker_corners_world,
frame.depth_map,
final_depth,
cam_matrix,
confidence_map=frame.confidence_map
if use_confidence_weights
else None,
confidence_map=(final_conf if use_confidence_weights else None),
confidence_thresh=depth_confidence_threshold,
)
verify_res_post = verify_extrinsics_with_depth(
T_refined,
marker_corners_world,
frame.depth_map,
final_depth,
cam_matrix,
confidence_map=frame.confidence_map,
confidence_map=final_conf,
confidence_thresh=depth_confidence_threshold,
)
@@ -218,6 +326,9 @@ def apply_depth_verify_refine_postprocess(
"n_total": verify_res_post.n_total,
}
if pool_metadata:
results[str(serial)]["depth_pool"] = pool_metadata
improvement = verify_res.rmse - verify_res_post.rmse
results[str(serial)]["refine_depth"]["improvement_rmse"] = (
improvement
@@ -260,10 +371,10 @@ def apply_depth_verify_refine_postprocess(
def run_benchmark_matrix(
results: Dict[str, Any],
verification_frames: Dict[Any, Any],
first_frames: Dict[Any, Any],
verification_frames: Dict[int, List[Dict[str, Any]]],
first_frames: Dict[int, Dict[str, Any]],
marker_geometry: Dict[int, Any],
camera_matrices: Dict[Any, Any],
camera_matrices: Dict[int, Any],
depth_confidence_threshold: int,
) -> Dict[str, Any]:
"""
@@ -318,11 +429,10 @@ def run_benchmark_matrix(
for config in configs:
name = config["name"]
use_best = config["use_best_frame"]
vf = (
verification_frames[serial_int]
if use_best
else first_frames[serial_int]
)
if use_best:
vf = verification_frames[serial_int][0]
else:
vf = first_frames[serial_int]
frame = vf["frame"]
ids = vf["ids"]
@@ -351,9 +461,9 @@ def run_benchmark_matrix(
marker_corners_world,
frame.depth_map,
cam_matrix,
confidence_map=frame.confidence_map
if config["use_confidence"]
else None,
confidence_map=(
frame.confidence_map if config["use_confidence"] else None
),
confidence_thresh=depth_confidence_threshold,
loss=str(config["loss"]),
f_scale=0.1,
@@ -430,9 +540,9 @@ def run_benchmark_matrix(
)
@click.option(
"--depth-mode",
default="NEURAL",
type=click.Choice(["NEURAL", "ULTRA", "PERFORMANCE", "NONE"]),
help="Depth computation mode.",
default=None,
type=click.Choice(["NEURAL", "NEURAL_PLUS", "NEURAL_LIGHT", "NONE"]),
help="Depth computation mode. Defaults to NEURAL_PLUS if depth verification/refinement is enabled, otherwise NONE.",
)
@click.option(
"--depth-confidence-threshold",
@@ -440,6 +550,12 @@ def run_benchmark_matrix(
type=int,
help="Confidence threshold for depth filtering (lower = more confident).",
)
@click.option(
"--depth-pool-size",
default=1,
type=click.IntRange(min=1, max=10),
help="Number of best frames to pool for depth verification/refinement (1=single best frame).",
)
@click.option(
"--report-csv", type=click.Path(), help="Optional path for per-frame CSV report."
)
@@ -494,8 +610,9 @@ def main(
verify_depth: bool,
refine_depth: bool,
use_confidence_weights: bool,
depth_mode: str,
depth_mode: str | None,
depth_confidence_threshold: int,
depth_pool_size: int,
report_csv: str | None,
auto_align: bool,
ground_face: str | None,
@@ -519,14 +636,18 @@ def main(
depth_mode_map = {
"NEURAL": sl.DEPTH_MODE.NEURAL,
"ULTRA": sl.DEPTH_MODE.ULTRA,
"PERFORMANCE": sl.DEPTH_MODE.PERFORMANCE,
"NEURAL_PLUS": sl.DEPTH_MODE.NEURAL_PLUS,
"NEURAL_LIGHT": sl.DEPTH_MODE.NEURAL_LIGHT,
"NONE": sl.DEPTH_MODE.NONE,
}
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
if not (verify_depth or refine_depth or benchmark_matrix):
sl_depth_mode = sl.DEPTH_MODE.NONE
if depth_mode is None:
if verify_depth or refine_depth or benchmark_matrix:
sl_depth_mode = sl.DEPTH_MODE.NEURAL_PLUS
else:
sl_depth_mode = sl.DEPTH_MODE.NONE
else:
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
# Expand SVO paths (files or directories)
expanded_svo = []
@@ -617,9 +738,9 @@ def main(
}
# Store verification frames for post-process check
verification_frames = {}
verification_frames: Dict[int, List[Dict[str, Any]]] = {}
# Store first valid frame for benchmarking
first_frames = {}
first_frames: Dict[int, Dict[str, Any]] = {}
# Track all visible marker IDs for heuristic ground detection
all_visible_ids = set()
@@ -696,21 +817,29 @@ def main(
"frame_index": frame_count,
}
best_so_far = verification_frames.get(serial)
if (
best_so_far is None
or current_score > best_so_far["score"]
):
verification_frames[serial] = {
if serial not in verification_frames:
verification_frames[serial] = []
verification_frames[serial].append(
{
"frame": frame,
"ids": ids,
"corners": corners,
"score": current_score,
"frame_index": frame_count,
}
logger.debug(
f"Cam {serial}: New best frame {frame_count} with score {current_score:.2f}"
)
)
# Sort by score descending and truncate to pool size
verification_frames[serial].sort(
key=lambda x: x["score"], reverse=True
)
verification_frames[serial] = verification_frames[
serial
][:depth_pool_size]
logger.debug(
f"Cam {serial}: Updated verification pool (size {len(verification_frames[serial])}), top score {verification_frames[serial][0]['score']:.2f}"
)
accumulators[serial].add_pose(
T_world_cam, reproj_err, frame_count
@@ -794,6 +923,7 @@ def main(
refine_depth,
use_confidence_weights,
depth_confidence_threshold,
depth_pool_size,
report_csv,
)
@@ -890,6 +1020,36 @@ def main(
)
raise SystemExit(1)
# Verify depth-quality outliers if depth verification ran
depth_rmse_by_cam = {}
for serial, data in results.items():
depth_metrics = data.get("depth_verify_post") or data.get("depth_verify")
if depth_metrics and "rmse" in depth_metrics:
depth_rmse_by_cam[serial] = float(depth_metrics["rmse"])
if len(depth_rmse_by_cam) >= 2:
rmse_values = sorted(depth_rmse_by_cam.values())
median_rmse = float(np.median(np.array(rmse_values)))
outlier_factor = 2.5
min_outlier_rmse_m = 0.08
failed_depth_cams = []
for serial, rmse in depth_rmse_by_cam.items():
if rmse > max(min_outlier_rmse_m, outlier_factor * median_rmse):
failed_depth_cams.append((serial, rmse))
if failed_depth_cams:
failed_str = ", ".join(
f"{serial}:{rmse:.3f}m"
for serial, rmse in sorted(failed_depth_cams)
)
click.echo(
"Error: Calibration failed depth outlier self-check "
f"(median RMSE={median_rmse:.3f}m, outliers={failed_str}).",
err=True,
)
raise SystemExit(1)
# Simple check: verify distance between cameras if multiple
if len(results) >= 2:
serials_list = sorted(results.keys())