feat(calibrate): integrate multi-frame depth pooling with --depth-pool-size flag
This commit is contained in:
@@ -24,6 +24,7 @@ from aruco.pose_averaging import PoseAccumulator
|
||||
from aruco.preview import draw_detected_markers, draw_pose_axes, show_preview
|
||||
from aruco.depth_verify import verify_extrinsics_with_depth
|
||||
from aruco.depth_refine import refine_extrinsics_with_depth
|
||||
from aruco.depth_pool import pool_depth_maps
|
||||
from aruco.alignment import (
|
||||
get_face_normal_from_geometry,
|
||||
detect_ground_face,
|
||||
@@ -117,13 +118,14 @@ def score_frame(
|
||||
|
||||
def apply_depth_verify_refine_postprocess(
|
||||
results: Dict[str, Any],
|
||||
verification_frames: Dict[str, Any],
|
||||
verification_frames: Dict[int, List[Dict[str, Any]]],
|
||||
marker_geometry: Dict[int, Any],
|
||||
camera_matrices: Dict[str, Any],
|
||||
camera_matrices: Dict[int, Any],
|
||||
verify_depth: bool,
|
||||
refine_depth: bool,
|
||||
use_confidence_weights: bool,
|
||||
depth_confidence_threshold: int,
|
||||
depth_pool_size: int = 1,
|
||||
report_csv_path: Optional[str] = None,
|
||||
) -> Tuple[Dict[str, Any], List[List[Any]]]:
|
||||
"""
|
||||
@@ -137,12 +139,117 @@ def apply_depth_verify_refine_postprocess(
|
||||
|
||||
click.echo("\nRunning depth verification/refinement on computed extrinsics...")
|
||||
|
||||
for serial, vf in verification_frames.items():
|
||||
for serial, vfs in verification_frames.items():
|
||||
if str(serial) not in results:
|
||||
continue
|
||||
|
||||
frame = vf["frame"]
|
||||
ids = vf["ids"]
|
||||
# Extract depth maps and confidence maps from the top-N frames
|
||||
# vfs is already sorted by score descending and truncated to depth_pool_size
|
||||
depth_maps = []
|
||||
confidence_maps = []
|
||||
|
||||
# We need at least one frame with depth
|
||||
valid_frames = []
|
||||
for vf in vfs:
|
||||
frame = vf["frame"]
|
||||
if frame.depth_map is not None:
|
||||
depth_maps.append(frame.depth_map)
|
||||
confidence_maps.append(frame.confidence_map)
|
||||
valid_frames.append(vf)
|
||||
|
||||
if not valid_frames:
|
||||
click.echo(
|
||||
f"Camera {serial}: No frames with depth map available for verification."
|
||||
)
|
||||
continue
|
||||
|
||||
# Use the best frame (first in the list) for marker IDs and corners
|
||||
# This ensures we use the highest quality detection for geometry
|
||||
best_vf = valid_frames[0]
|
||||
ids = best_vf["ids"]
|
||||
|
||||
# Determine if we should pool or use single frame
|
||||
use_pooling = depth_pool_size > 1 and len(depth_maps) > 1
|
||||
|
||||
if use_pooling:
|
||||
try:
|
||||
pooled_depth, pooled_conf = pool_depth_maps(
|
||||
depth_maps,
|
||||
confidence_maps,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
# Check if pooling resulted in a valid map (enough valid pixels)
|
||||
# We'll do a quick check against the best single frame
|
||||
# If pooled map has significantly fewer valid pixels, fallback
|
||||
best_depth = depth_maps[0]
|
||||
best_conf = confidence_maps[0]
|
||||
|
||||
# Simple validity check (finite and > 0)
|
||||
# We don't need to be perfect here, just catch catastrophic pooling failure
|
||||
n_valid_pooled = np.count_nonzero(
|
||||
np.isfinite(pooled_depth) & (pooled_depth > 0)
|
||||
)
|
||||
|
||||
# For best frame, we also respect confidence threshold if provided
|
||||
mask_best = np.isfinite(best_depth) & (best_depth > 0)
|
||||
if best_conf is not None:
|
||||
mask_best &= best_conf <= depth_confidence_threshold
|
||||
n_valid_best = np.count_nonzero(mask_best)
|
||||
|
||||
# If pooled result is much worse (e.g. < 50% of valid points of single frame), fallback
|
||||
# This can happen if frames are misaligned or pooling logic fails
|
||||
if n_valid_pooled < (n_valid_best * 0.5):
|
||||
click.echo(
|
||||
f"Camera {serial}: Pooled depth has too few valid points ({n_valid_pooled} vs {n_valid_best}). "
|
||||
"Falling back to best single frame."
|
||||
)
|
||||
final_depth = best_depth
|
||||
final_conf = best_conf
|
||||
pool_metadata = {
|
||||
"pool_size_requested": depth_pool_size,
|
||||
"pool_size_actual": len(depth_maps),
|
||||
"pooled": False,
|
||||
"fallback_reason": "insufficient_valid_points",
|
||||
}
|
||||
else:
|
||||
final_depth = pooled_depth
|
||||
final_conf = pooled_conf
|
||||
pool_metadata = {
|
||||
"pool_size_requested": depth_pool_size,
|
||||
"pool_size_actual": len(depth_maps),
|
||||
"pooled": True,
|
||||
}
|
||||
click.echo(
|
||||
f"Camera {serial}: Using pooled depth from {len(depth_maps)} frames."
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
f"Camera {serial}: Pooling failed with error: {e}. Falling back to single frame.",
|
||||
err=True,
|
||||
)
|
||||
final_depth = depth_maps[0]
|
||||
final_conf = confidence_maps[0]
|
||||
pool_metadata = {
|
||||
"pool_size_requested": depth_pool_size,
|
||||
"pool_size_actual": len(depth_maps),
|
||||
"pooled": False,
|
||||
"fallback_reason": f"exception: {str(e)}",
|
||||
}
|
||||
else:
|
||||
# Single frame case (N=1 or only 1 available)
|
||||
final_depth = depth_maps[0]
|
||||
final_conf = confidence_maps[0]
|
||||
# Only add metadata if pooling was requested but not possible due to lack of frames
|
||||
if depth_pool_size > 1:
|
||||
pool_metadata = {
|
||||
"pool_size_requested": depth_pool_size,
|
||||
"pool_size_actual": len(depth_maps),
|
||||
"pooled": False,
|
||||
"fallback_reason": "insufficient_frames",
|
||||
}
|
||||
else:
|
||||
pool_metadata = None
|
||||
|
||||
# Use the FINAL COMPUTED POSE for verification
|
||||
pose_str = results[str(serial)]["pose"]
|
||||
@@ -155,13 +262,13 @@ def apply_depth_verify_refine_postprocess(
|
||||
if int(mid) in marker_geometry
|
||||
}
|
||||
|
||||
if marker_corners_world and frame.depth_map is not None:
|
||||
if marker_corners_world and final_depth is not None:
|
||||
verify_res = verify_extrinsics_with_depth(
|
||||
T_mean,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
final_depth,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map,
|
||||
confidence_map=final_conf,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
@@ -174,6 +281,9 @@ def apply_depth_verify_refine_postprocess(
|
||||
"n_total": verify_res.n_total,
|
||||
}
|
||||
|
||||
if pool_metadata:
|
||||
results[str(serial)]["depth_pool"] = pool_metadata
|
||||
|
||||
click.echo(
|
||||
f"Camera {serial} verification: RMSE={verify_res.rmse:.3f}m, "
|
||||
f"Valid={verify_res.n_valid}/{verify_res.n_total}"
|
||||
@@ -189,20 +299,18 @@ def apply_depth_verify_refine_postprocess(
|
||||
T_refined, refine_stats = refine_extrinsics_with_depth(
|
||||
T_mean,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
final_depth,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map
|
||||
if use_confidence_weights
|
||||
else None,
|
||||
confidence_map=(final_conf if use_confidence_weights else None),
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
verify_res_post = verify_extrinsics_with_depth(
|
||||
T_refined,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
final_depth,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map,
|
||||
confidence_map=final_conf,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
@@ -218,6 +326,9 @@ def apply_depth_verify_refine_postprocess(
|
||||
"n_total": verify_res_post.n_total,
|
||||
}
|
||||
|
||||
if pool_metadata:
|
||||
results[str(serial)]["depth_pool"] = pool_metadata
|
||||
|
||||
improvement = verify_res.rmse - verify_res_post.rmse
|
||||
results[str(serial)]["refine_depth"]["improvement_rmse"] = (
|
||||
improvement
|
||||
@@ -260,10 +371,10 @@ def apply_depth_verify_refine_postprocess(
|
||||
|
||||
def run_benchmark_matrix(
|
||||
results: Dict[str, Any],
|
||||
verification_frames: Dict[Any, Any],
|
||||
first_frames: Dict[Any, Any],
|
||||
verification_frames: Dict[int, List[Dict[str, Any]]],
|
||||
first_frames: Dict[int, Dict[str, Any]],
|
||||
marker_geometry: Dict[int, Any],
|
||||
camera_matrices: Dict[Any, Any],
|
||||
camera_matrices: Dict[int, Any],
|
||||
depth_confidence_threshold: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -318,11 +429,10 @@ def run_benchmark_matrix(
|
||||
for config in configs:
|
||||
name = config["name"]
|
||||
use_best = config["use_best_frame"]
|
||||
vf = (
|
||||
verification_frames[serial_int]
|
||||
if use_best
|
||||
else first_frames[serial_int]
|
||||
)
|
||||
if use_best:
|
||||
vf = verification_frames[serial_int][0]
|
||||
else:
|
||||
vf = first_frames[serial_int]
|
||||
|
||||
frame = vf["frame"]
|
||||
ids = vf["ids"]
|
||||
@@ -351,9 +461,9 @@ def run_benchmark_matrix(
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map
|
||||
if config["use_confidence"]
|
||||
else None,
|
||||
confidence_map=(
|
||||
frame.confidence_map if config["use_confidence"] else None
|
||||
),
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
loss=str(config["loss"]),
|
||||
f_scale=0.1,
|
||||
@@ -430,9 +540,9 @@ def run_benchmark_matrix(
|
||||
)
|
||||
@click.option(
|
||||
"--depth-mode",
|
||||
default="NEURAL",
|
||||
type=click.Choice(["NEURAL", "ULTRA", "PERFORMANCE", "NONE"]),
|
||||
help="Depth computation mode.",
|
||||
default=None,
|
||||
type=click.Choice(["NEURAL", "NEURAL_PLUS", "NEURAL_LIGHT", "NONE"]),
|
||||
help="Depth computation mode. Defaults to NEURAL_PLUS if depth verification/refinement is enabled, otherwise NONE.",
|
||||
)
|
||||
@click.option(
|
||||
"--depth-confidence-threshold",
|
||||
@@ -440,6 +550,12 @@ def run_benchmark_matrix(
|
||||
type=int,
|
||||
help="Confidence threshold for depth filtering (lower = more confident).",
|
||||
)
|
||||
@click.option(
|
||||
"--depth-pool-size",
|
||||
default=1,
|
||||
type=click.IntRange(min=1, max=10),
|
||||
help="Number of best frames to pool for depth verification/refinement (1=single best frame).",
|
||||
)
|
||||
@click.option(
|
||||
"--report-csv", type=click.Path(), help="Optional path for per-frame CSV report."
|
||||
)
|
||||
@@ -494,8 +610,9 @@ def main(
|
||||
verify_depth: bool,
|
||||
refine_depth: bool,
|
||||
use_confidence_weights: bool,
|
||||
depth_mode: str,
|
||||
depth_mode: str | None,
|
||||
depth_confidence_threshold: int,
|
||||
depth_pool_size: int,
|
||||
report_csv: str | None,
|
||||
auto_align: bool,
|
||||
ground_face: str | None,
|
||||
@@ -519,14 +636,18 @@ def main(
|
||||
|
||||
depth_mode_map = {
|
||||
"NEURAL": sl.DEPTH_MODE.NEURAL,
|
||||
"ULTRA": sl.DEPTH_MODE.ULTRA,
|
||||
"PERFORMANCE": sl.DEPTH_MODE.PERFORMANCE,
|
||||
"NEURAL_PLUS": sl.DEPTH_MODE.NEURAL_PLUS,
|
||||
"NEURAL_LIGHT": sl.DEPTH_MODE.NEURAL_LIGHT,
|
||||
"NONE": sl.DEPTH_MODE.NONE,
|
||||
}
|
||||
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
|
||||
|
||||
if not (verify_depth or refine_depth or benchmark_matrix):
|
||||
sl_depth_mode = sl.DEPTH_MODE.NONE
|
||||
if depth_mode is None:
|
||||
if verify_depth or refine_depth or benchmark_matrix:
|
||||
sl_depth_mode = sl.DEPTH_MODE.NEURAL_PLUS
|
||||
else:
|
||||
sl_depth_mode = sl.DEPTH_MODE.NONE
|
||||
else:
|
||||
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
|
||||
|
||||
# Expand SVO paths (files or directories)
|
||||
expanded_svo = []
|
||||
@@ -617,9 +738,9 @@ def main(
|
||||
}
|
||||
|
||||
# Store verification frames for post-process check
|
||||
verification_frames = {}
|
||||
verification_frames: Dict[int, List[Dict[str, Any]]] = {}
|
||||
# Store first valid frame for benchmarking
|
||||
first_frames = {}
|
||||
first_frames: Dict[int, Dict[str, Any]] = {}
|
||||
|
||||
# Track all visible marker IDs for heuristic ground detection
|
||||
all_visible_ids = set()
|
||||
@@ -696,21 +817,29 @@ def main(
|
||||
"frame_index": frame_count,
|
||||
}
|
||||
|
||||
best_so_far = verification_frames.get(serial)
|
||||
if (
|
||||
best_so_far is None
|
||||
or current_score > best_so_far["score"]
|
||||
):
|
||||
verification_frames[serial] = {
|
||||
if serial not in verification_frames:
|
||||
verification_frames[serial] = []
|
||||
|
||||
verification_frames[serial].append(
|
||||
{
|
||||
"frame": frame,
|
||||
"ids": ids,
|
||||
"corners": corners,
|
||||
"score": current_score,
|
||||
"frame_index": frame_count,
|
||||
}
|
||||
logger.debug(
|
||||
f"Cam {serial}: New best frame {frame_count} with score {current_score:.2f}"
|
||||
)
|
||||
)
|
||||
# Sort by score descending and truncate to pool size
|
||||
verification_frames[serial].sort(
|
||||
key=lambda x: x["score"], reverse=True
|
||||
)
|
||||
verification_frames[serial] = verification_frames[
|
||||
serial
|
||||
][:depth_pool_size]
|
||||
|
||||
logger.debug(
|
||||
f"Cam {serial}: Updated verification pool (size {len(verification_frames[serial])}), top score {verification_frames[serial][0]['score']:.2f}"
|
||||
)
|
||||
|
||||
accumulators[serial].add_pose(
|
||||
T_world_cam, reproj_err, frame_count
|
||||
@@ -794,6 +923,7 @@ def main(
|
||||
refine_depth,
|
||||
use_confidence_weights,
|
||||
depth_confidence_threshold,
|
||||
depth_pool_size,
|
||||
report_csv,
|
||||
)
|
||||
|
||||
@@ -890,6 +1020,36 @@ def main(
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
# Verify depth-quality outliers if depth verification ran
|
||||
depth_rmse_by_cam = {}
|
||||
for serial, data in results.items():
|
||||
depth_metrics = data.get("depth_verify_post") or data.get("depth_verify")
|
||||
if depth_metrics and "rmse" in depth_metrics:
|
||||
depth_rmse_by_cam[serial] = float(depth_metrics["rmse"])
|
||||
|
||||
if len(depth_rmse_by_cam) >= 2:
|
||||
rmse_values = sorted(depth_rmse_by_cam.values())
|
||||
median_rmse = float(np.median(np.array(rmse_values)))
|
||||
outlier_factor = 2.5
|
||||
min_outlier_rmse_m = 0.08
|
||||
|
||||
failed_depth_cams = []
|
||||
for serial, rmse in depth_rmse_by_cam.items():
|
||||
if rmse > max(min_outlier_rmse_m, outlier_factor * median_rmse):
|
||||
failed_depth_cams.append((serial, rmse))
|
||||
|
||||
if failed_depth_cams:
|
||||
failed_str = ", ".join(
|
||||
f"{serial}:{rmse:.3f}m"
|
||||
for serial, rmse in sorted(failed_depth_cams)
|
||||
)
|
||||
click.echo(
|
||||
"Error: Calibration failed depth outlier self-check "
|
||||
f"(median RMSE={median_rmse:.3f}m, outliers={failed_str}).",
|
||||
err=True,
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
# Simple check: verify distance between cameras if multiple
|
||||
if len(results) >= 2:
|
||||
serials_list = sorted(results.keys())
|
||||
|
||||
Reference in New Issue
Block a user