feat(calibration): robust depth refinement pipeline with diagnostics and benchmarking

This commit is contained in:
2026-02-07 05:51:07 +00:00
parent ead3796cdb
commit dad1f2a69f
17 changed files with 1876 additions and 261 deletions
+267 -9
View File
@@ -70,6 +70,51 @@ ARUCO_DICT_MAP = {
}
def score_frame(
n_markers: int,
reproj_err: float,
corners: np.ndarray,
depth_map: Optional[np.ndarray],
depth_confidence_threshold: int = 50,
confidence_map: Optional[np.ndarray] = None,
) -> float:
"""
Compute a quality score for a frame to select the best one for depth verification.
Higher is better.
"""
# Base score: more markers is better, lower reprojection error is better.
# We weight markers heavily as they provide more constraints.
score = n_markers * 100.0 - reproj_err
if depth_map is not None:
# Calculate depth validity ratio at marker corners.
# This ensures we pick a frame where depth is actually available where we need it.
valid_count = 0
total_count = 0
h, w = depth_map.shape[:2]
# corners shape is (N, 4, 2)
flat_corners = corners.reshape(-1, 2)
for pt in flat_corners:
x, y = int(round(pt[0])), int(round(pt[1]))
if 0 <= x < w and 0 <= y < h:
total_count += 1
d = depth_map[y, x]
if np.isfinite(d) and d > 0:
if confidence_map is not None:
# ZED confidence: lower is more confident
if confidence_map[y, x] <= depth_confidence_threshold:
valid_count += 1
else:
valid_count += 1
if total_count > 0:
depth_ratio = valid_count / total_count
score += depth_ratio * 50.0
return score
def apply_depth_verify_refine_postprocess(
results: Dict[str, Any],
verification_frames: Dict[str, Any],
@@ -77,6 +122,7 @@ def apply_depth_verify_refine_postprocess(
camera_matrices: Dict[str, Any],
verify_depth: bool,
refine_depth: bool,
use_confidence_weights: bool,
depth_confidence_threshold: int,
report_csv_path: Optional[str] = None,
) -> Tuple[Dict[str, Any], List[List[Any]]]:
@@ -145,6 +191,10 @@ def apply_depth_verify_refine_postprocess(
marker_corners_world,
frame.depth_map,
cam_matrix,
confidence_map=frame.confidence_map
if use_confidence_weights
else None,
confidence_thresh=depth_confidence_threshold,
)
verify_res_post = verify_extrinsics_with_depth(
@@ -180,6 +230,18 @@ def apply_depth_verify_refine_postprocess(
f"Trans={refine_stats['delta_translation_norm_m']:.3f}m"
)
# Warning gates
if improvement < 1e-4 and refine_stats["nfev"] > 5:
click.echo(
f" WARNING: Optimization ran for {refine_stats['nfev']} steps but improvement was negligible ({improvement:.6f}m).",
err=True,
)
if not refine_stats["success"] or refine_stats["nfev"] <= 1:
click.echo(
f" WARNING: Optimization might have failed or stalled. Success: {refine_stats['success']}, Steps: {refine_stats['nfev']}. Message: {refine_stats['termination_message']}",
err=True,
)
verify_res = verify_res_post
if report_csv_path:
@@ -196,6 +258,144 @@ def apply_depth_verify_refine_postprocess(
return results, csv_rows
def run_benchmark_matrix(
results: Dict[str, Any],
verification_frames: Dict[Any, Any],
first_frames: Dict[Any, Any],
marker_geometry: Dict[int, Any],
camera_matrices: Dict[Any, Any],
depth_confidence_threshold: int,
) -> Dict[str, Any]:
"""
Run benchmark matrix comparing 4 configurations:
1) baseline (linear loss, no confidence weights)
2) robust (soft_l1, f_scale=0.1, no confidence)
3) robust+confidence
4) robust+confidence+best-frame
"""
benchmark_results = {}
configs = [
{
"name": "baseline",
"loss": "linear",
"use_confidence": False,
"use_best_frame": False,
},
{
"name": "robust",
"loss": "soft_l1",
"use_confidence": False,
"use_best_frame": False,
},
{
"name": "robust+confidence",
"loss": "soft_l1",
"use_confidence": True,
"use_best_frame": False,
},
{
"name": "robust+confidence+best-frame",
"loss": "soft_l1",
"use_confidence": True,
"use_best_frame": True,
},
]
click.echo("\nRunning Benchmark Matrix...")
for serial in results.keys():
serial_int = int(serial)
if serial_int not in first_frames or serial_int not in verification_frames:
continue
cam_matrix = camera_matrices[serial_int]
pose_str = results[serial]["pose"]
T_initial = np.fromstring(pose_str, sep=" ").reshape(4, 4)
cam_bench = {}
for config in configs:
name = config["name"]
use_best = config["use_best_frame"]
vf = (
verification_frames[serial_int]
if use_best
else first_frames[serial_int]
)
frame = vf["frame"]
ids = vf["ids"]
marker_corners_world = {
int(mid): marker_geometry[int(mid)]
for mid in ids.flatten()
if int(mid) in marker_geometry
}
if not marker_corners_world or frame.depth_map is None:
continue
# Pre-refinement verification
verify_pre = verify_extrinsics_with_depth(
T_initial,
marker_corners_world,
frame.depth_map,
cam_matrix,
confidence_map=frame.confidence_map,
confidence_thresh=depth_confidence_threshold,
)
# Refinement
T_refined, refine_stats = refine_extrinsics_with_depth(
T_initial,
marker_corners_world,
frame.depth_map,
cam_matrix,
confidence_map=frame.confidence_map
if config["use_confidence"]
else None,
confidence_thresh=depth_confidence_threshold,
loss=str(config["loss"]),
f_scale=0.1,
)
# Post-refinement verification
verify_post = verify_extrinsics_with_depth(
T_refined,
marker_corners_world,
frame.depth_map,
cam_matrix,
confidence_map=frame.confidence_map,
confidence_thresh=depth_confidence_threshold,
)
cam_bench[name] = {
"rmse_pre": verify_pre.rmse,
"rmse_post": verify_post.rmse,
"improvement": verify_pre.rmse - verify_post.rmse,
"delta_rot_deg": refine_stats["delta_rotation_deg"],
"delta_trans_m": refine_stats["delta_translation_norm_m"],
"nfev": refine_stats["nfev"],
"success": refine_stats["success"],
"frame_index": vf["frame_index"],
}
benchmark_results[serial] = cam_bench
# Print summary table for this camera
click.echo(f"\nBenchmark Results for Camera {serial}:")
header = f"{'Config':<30} | {'RMSE Pre':<10} | {'RMSE Post':<10} | {'Improv':<10} | {'Iter':<5}"
click.echo(header)
click.echo("-" * len(header))
for name, stats in cam_bench.items():
click.echo(
f"{name:<30} | {stats['rmse_pre']:<10.4f} | {stats['rmse_post']:<10.4f} | "
f"{stats['improvement']:<10.4f} | {stats['nfev']:<5}"
)
return benchmark_results
@click.command()
@click.option("--svo", "-s", multiple=True, required=False, help="Path to SVO files.")
@click.option("--markers", "-m", required=True, help="Path to markers parquet file.")
@@ -223,6 +423,11 @@ def apply_depth_verify_refine_postprocess(
@click.option(
"--refine-depth/--no-refine-depth", default=False, help="Enable depth refinement."
)
@click.option(
"--use-confidence-weights/--no-confidence-weights",
default=False,
help="Use confidence-weighted residuals in depth refinement.",
)
@click.option(
"--depth-mode",
default="NEURAL",
@@ -272,6 +477,11 @@ def apply_depth_verify_refine_postprocess(
type=int,
help="Maximum number of samples to process before stopping.",
)
@click.option(
"--benchmark-matrix/--no-benchmark-matrix",
default=False,
help="Run benchmark matrix comparing different refinement configurations.",
)
def main(
svo: tuple[str, ...],
markers: str,
@@ -283,6 +493,7 @@ def main(
self_check: bool,
verify_depth: bool,
refine_depth: bool,
use_confidence_weights: bool,
depth_mode: str,
depth_confidence_threshold: int,
report_csv: str | None,
@@ -293,6 +504,7 @@ def main(
min_markers: int,
debug: bool,
max_samples: int | None,
benchmark_matrix: bool,
):
"""
Calibrate camera extrinsics relative to a global coordinate system defined by ArUco markers.
@@ -313,7 +525,7 @@ def main(
}
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
if not (verify_depth or refine_depth):
if not (verify_depth or refine_depth or benchmark_matrix):
sl_depth_mode = sl.DEPTH_MODE.NONE
# Expand SVO paths (files or directories)
@@ -406,6 +618,8 @@ def main(
# Store verification frames for post-process check
verification_frames = {}
# Store first valid frame for benchmarking
first_frames = {}
# Track all visible marker IDs for heuristic ground detection
all_visible_ids = set()
@@ -460,15 +674,43 @@ def main(
# We want T_world_from_cam
T_world_cam = invert_transform(T_cam_world)
# Save latest valid frame for verification
# Save best frame for verification based on scoring
if (
verify_depth or refine_depth
verify_depth or refine_depth or benchmark_matrix
) and frame.depth_map is not None:
verification_frames[serial] = {
"frame": frame,
"ids": ids,
"corners": corners,
}
current_score = score_frame(
n_markers,
reproj_err,
corners,
frame.depth_map,
depth_confidence_threshold,
frame.confidence_map,
)
if serial not in first_frames:
first_frames[serial] = {
"frame": frame,
"ids": ids,
"corners": corners,
"score": current_score,
"frame_index": frame_count,
}
best_so_far = verification_frames.get(serial)
if (
best_so_far is None
or current_score > best_so_far["score"]
):
verification_frames[serial] = {
"frame": frame,
"ids": ids,
"corners": corners,
"score": current_score,
"frame_index": frame_count,
}
logger.debug(
f"Cam {serial}: New best frame {frame_count} with score {current_score:.2f}"
)
accumulators[serial].add_pose(
T_world_cam, reproj_err, frame_count
@@ -550,11 +792,27 @@ def main(
camera_matrices,
verify_depth,
refine_depth,
use_confidence_weights,
depth_confidence_threshold,
report_csv,
)
# 5. Optional Ground Plane Alignment
# 5. Run Benchmark Matrix if requested
if benchmark_matrix:
benchmark_results = run_benchmark_matrix(
results,
verification_frames,
first_frames,
marker_geometry,
camera_matrices,
depth_confidence_threshold,
)
# Add to results for saving
for serial, bench in benchmark_results.items():
if serial in results:
results[serial]["benchmark"] = bench
# 6. Optional Ground Plane Alignment
if auto_align:
click.echo("\nPerforming ground plane alignment...")
target_face = ground_face