feat(calibration): robust depth refinement pipeline with diagnostics and benchmarking
This commit is contained in:
@@ -70,6 +70,51 @@ ARUCO_DICT_MAP = {
|
||||
}
|
||||
|
||||
|
||||
def score_frame(
|
||||
n_markers: int,
|
||||
reproj_err: float,
|
||||
corners: np.ndarray,
|
||||
depth_map: Optional[np.ndarray],
|
||||
depth_confidence_threshold: int = 50,
|
||||
confidence_map: Optional[np.ndarray] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Compute a quality score for a frame to select the best one for depth verification.
|
||||
Higher is better.
|
||||
"""
|
||||
# Base score: more markers is better, lower reprojection error is better.
|
||||
# We weight markers heavily as they provide more constraints.
|
||||
score = n_markers * 100.0 - reproj_err
|
||||
|
||||
if depth_map is not None:
|
||||
# Calculate depth validity ratio at marker corners.
|
||||
# This ensures we pick a frame where depth is actually available where we need it.
|
||||
valid_count = 0
|
||||
total_count = 0
|
||||
h, w = depth_map.shape[:2]
|
||||
|
||||
# corners shape is (N, 4, 2)
|
||||
flat_corners = corners.reshape(-1, 2)
|
||||
for pt in flat_corners:
|
||||
x, y = int(round(pt[0])), int(round(pt[1]))
|
||||
if 0 <= x < w and 0 <= y < h:
|
||||
total_count += 1
|
||||
d = depth_map[y, x]
|
||||
if np.isfinite(d) and d > 0:
|
||||
if confidence_map is not None:
|
||||
# ZED confidence: lower is more confident
|
||||
if confidence_map[y, x] <= depth_confidence_threshold:
|
||||
valid_count += 1
|
||||
else:
|
||||
valid_count += 1
|
||||
|
||||
if total_count > 0:
|
||||
depth_ratio = valid_count / total_count
|
||||
score += depth_ratio * 50.0
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def apply_depth_verify_refine_postprocess(
|
||||
results: Dict[str, Any],
|
||||
verification_frames: Dict[str, Any],
|
||||
@@ -77,6 +122,7 @@ def apply_depth_verify_refine_postprocess(
|
||||
camera_matrices: Dict[str, Any],
|
||||
verify_depth: bool,
|
||||
refine_depth: bool,
|
||||
use_confidence_weights: bool,
|
||||
depth_confidence_threshold: int,
|
||||
report_csv_path: Optional[str] = None,
|
||||
) -> Tuple[Dict[str, Any], List[List[Any]]]:
|
||||
@@ -145,6 +191,10 @@ def apply_depth_verify_refine_postprocess(
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map
|
||||
if use_confidence_weights
|
||||
else None,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
verify_res_post = verify_extrinsics_with_depth(
|
||||
@@ -180,6 +230,18 @@ def apply_depth_verify_refine_postprocess(
|
||||
f"Trans={refine_stats['delta_translation_norm_m']:.3f}m"
|
||||
)
|
||||
|
||||
# Warning gates
|
||||
if improvement < 1e-4 and refine_stats["nfev"] > 5:
|
||||
click.echo(
|
||||
f" WARNING: Optimization ran for {refine_stats['nfev']} steps but improvement was negligible ({improvement:.6f}m).",
|
||||
err=True,
|
||||
)
|
||||
if not refine_stats["success"] or refine_stats["nfev"] <= 1:
|
||||
click.echo(
|
||||
f" WARNING: Optimization might have failed or stalled. Success: {refine_stats['success']}, Steps: {refine_stats['nfev']}. Message: {refine_stats['termination_message']}",
|
||||
err=True,
|
||||
)
|
||||
|
||||
verify_res = verify_res_post
|
||||
|
||||
if report_csv_path:
|
||||
@@ -196,6 +258,144 @@ def apply_depth_verify_refine_postprocess(
|
||||
return results, csv_rows
|
||||
|
||||
|
||||
def run_benchmark_matrix(
|
||||
results: Dict[str, Any],
|
||||
verification_frames: Dict[Any, Any],
|
||||
first_frames: Dict[Any, Any],
|
||||
marker_geometry: Dict[int, Any],
|
||||
camera_matrices: Dict[Any, Any],
|
||||
depth_confidence_threshold: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run benchmark matrix comparing 4 configurations:
|
||||
1) baseline (linear loss, no confidence weights)
|
||||
2) robust (soft_l1, f_scale=0.1, no confidence)
|
||||
3) robust+confidence
|
||||
4) robust+confidence+best-frame
|
||||
"""
|
||||
benchmark_results = {}
|
||||
|
||||
configs = [
|
||||
{
|
||||
"name": "baseline",
|
||||
"loss": "linear",
|
||||
"use_confidence": False,
|
||||
"use_best_frame": False,
|
||||
},
|
||||
{
|
||||
"name": "robust",
|
||||
"loss": "soft_l1",
|
||||
"use_confidence": False,
|
||||
"use_best_frame": False,
|
||||
},
|
||||
{
|
||||
"name": "robust+confidence",
|
||||
"loss": "soft_l1",
|
||||
"use_confidence": True,
|
||||
"use_best_frame": False,
|
||||
},
|
||||
{
|
||||
"name": "robust+confidence+best-frame",
|
||||
"loss": "soft_l1",
|
||||
"use_confidence": True,
|
||||
"use_best_frame": True,
|
||||
},
|
||||
]
|
||||
|
||||
click.echo("\nRunning Benchmark Matrix...")
|
||||
|
||||
for serial in results.keys():
|
||||
serial_int = int(serial)
|
||||
if serial_int not in first_frames or serial_int not in verification_frames:
|
||||
continue
|
||||
|
||||
cam_matrix = camera_matrices[serial_int]
|
||||
pose_str = results[serial]["pose"]
|
||||
T_initial = np.fromstring(pose_str, sep=" ").reshape(4, 4)
|
||||
|
||||
cam_bench = {}
|
||||
|
||||
for config in configs:
|
||||
name = config["name"]
|
||||
use_best = config["use_best_frame"]
|
||||
vf = (
|
||||
verification_frames[serial_int]
|
||||
if use_best
|
||||
else first_frames[serial_int]
|
||||
)
|
||||
|
||||
frame = vf["frame"]
|
||||
ids = vf["ids"]
|
||||
marker_corners_world = {
|
||||
int(mid): marker_geometry[int(mid)]
|
||||
for mid in ids.flatten()
|
||||
if int(mid) in marker_geometry
|
||||
}
|
||||
|
||||
if not marker_corners_world or frame.depth_map is None:
|
||||
continue
|
||||
|
||||
# Pre-refinement verification
|
||||
verify_pre = verify_extrinsics_with_depth(
|
||||
T_initial,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
# Refinement
|
||||
T_refined, refine_stats = refine_extrinsics_with_depth(
|
||||
T_initial,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map
|
||||
if config["use_confidence"]
|
||||
else None,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
loss=str(config["loss"]),
|
||||
f_scale=0.1,
|
||||
)
|
||||
|
||||
# Post-refinement verification
|
||||
verify_post = verify_extrinsics_with_depth(
|
||||
T_refined,
|
||||
marker_corners_world,
|
||||
frame.depth_map,
|
||||
cam_matrix,
|
||||
confidence_map=frame.confidence_map,
|
||||
confidence_thresh=depth_confidence_threshold,
|
||||
)
|
||||
|
||||
cam_bench[name] = {
|
||||
"rmse_pre": verify_pre.rmse,
|
||||
"rmse_post": verify_post.rmse,
|
||||
"improvement": verify_pre.rmse - verify_post.rmse,
|
||||
"delta_rot_deg": refine_stats["delta_rotation_deg"],
|
||||
"delta_trans_m": refine_stats["delta_translation_norm_m"],
|
||||
"nfev": refine_stats["nfev"],
|
||||
"success": refine_stats["success"],
|
||||
"frame_index": vf["frame_index"],
|
||||
}
|
||||
|
||||
benchmark_results[serial] = cam_bench
|
||||
|
||||
# Print summary table for this camera
|
||||
click.echo(f"\nBenchmark Results for Camera {serial}:")
|
||||
header = f"{'Config':<30} | {'RMSE Pre':<10} | {'RMSE Post':<10} | {'Improv':<10} | {'Iter':<5}"
|
||||
click.echo(header)
|
||||
click.echo("-" * len(header))
|
||||
for name, stats in cam_bench.items():
|
||||
click.echo(
|
||||
f"{name:<30} | {stats['rmse_pre']:<10.4f} | {stats['rmse_post']:<10.4f} | "
|
||||
f"{stats['improvement']:<10.4f} | {stats['nfev']:<5}"
|
||||
)
|
||||
|
||||
return benchmark_results
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--svo", "-s", multiple=True, required=False, help="Path to SVO files.")
|
||||
@click.option("--markers", "-m", required=True, help="Path to markers parquet file.")
|
||||
@@ -223,6 +423,11 @@ def apply_depth_verify_refine_postprocess(
|
||||
@click.option(
|
||||
"--refine-depth/--no-refine-depth", default=False, help="Enable depth refinement."
|
||||
)
|
||||
@click.option(
|
||||
"--use-confidence-weights/--no-confidence-weights",
|
||||
default=False,
|
||||
help="Use confidence-weighted residuals in depth refinement.",
|
||||
)
|
||||
@click.option(
|
||||
"--depth-mode",
|
||||
default="NEURAL",
|
||||
@@ -272,6 +477,11 @@ def apply_depth_verify_refine_postprocess(
|
||||
type=int,
|
||||
help="Maximum number of samples to process before stopping.",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark-matrix/--no-benchmark-matrix",
|
||||
default=False,
|
||||
help="Run benchmark matrix comparing different refinement configurations.",
|
||||
)
|
||||
def main(
|
||||
svo: tuple[str, ...],
|
||||
markers: str,
|
||||
@@ -283,6 +493,7 @@ def main(
|
||||
self_check: bool,
|
||||
verify_depth: bool,
|
||||
refine_depth: bool,
|
||||
use_confidence_weights: bool,
|
||||
depth_mode: str,
|
||||
depth_confidence_threshold: int,
|
||||
report_csv: str | None,
|
||||
@@ -293,6 +504,7 @@ def main(
|
||||
min_markers: int,
|
||||
debug: bool,
|
||||
max_samples: int | None,
|
||||
benchmark_matrix: bool,
|
||||
):
|
||||
"""
|
||||
Calibrate camera extrinsics relative to a global coordinate system defined by ArUco markers.
|
||||
@@ -313,7 +525,7 @@ def main(
|
||||
}
|
||||
sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)
|
||||
|
||||
if not (verify_depth or refine_depth):
|
||||
if not (verify_depth or refine_depth or benchmark_matrix):
|
||||
sl_depth_mode = sl.DEPTH_MODE.NONE
|
||||
|
||||
# Expand SVO paths (files or directories)
|
||||
@@ -406,6 +618,8 @@ def main(
|
||||
|
||||
# Store verification frames for post-process check
|
||||
verification_frames = {}
|
||||
# Store first valid frame for benchmarking
|
||||
first_frames = {}
|
||||
|
||||
# Track all visible marker IDs for heuristic ground detection
|
||||
all_visible_ids = set()
|
||||
@@ -460,15 +674,43 @@ def main(
|
||||
# We want T_world_from_cam
|
||||
T_world_cam = invert_transform(T_cam_world)
|
||||
|
||||
# Save latest valid frame for verification
|
||||
# Save best frame for verification based on scoring
|
||||
if (
|
||||
verify_depth or refine_depth
|
||||
verify_depth or refine_depth or benchmark_matrix
|
||||
) and frame.depth_map is not None:
|
||||
verification_frames[serial] = {
|
||||
"frame": frame,
|
||||
"ids": ids,
|
||||
"corners": corners,
|
||||
}
|
||||
current_score = score_frame(
|
||||
n_markers,
|
||||
reproj_err,
|
||||
corners,
|
||||
frame.depth_map,
|
||||
depth_confidence_threshold,
|
||||
frame.confidence_map,
|
||||
)
|
||||
|
||||
if serial not in first_frames:
|
||||
first_frames[serial] = {
|
||||
"frame": frame,
|
||||
"ids": ids,
|
||||
"corners": corners,
|
||||
"score": current_score,
|
||||
"frame_index": frame_count,
|
||||
}
|
||||
|
||||
best_so_far = verification_frames.get(serial)
|
||||
if (
|
||||
best_so_far is None
|
||||
or current_score > best_so_far["score"]
|
||||
):
|
||||
verification_frames[serial] = {
|
||||
"frame": frame,
|
||||
"ids": ids,
|
||||
"corners": corners,
|
||||
"score": current_score,
|
||||
"frame_index": frame_count,
|
||||
}
|
||||
logger.debug(
|
||||
f"Cam {serial}: New best frame {frame_count} with score {current_score:.2f}"
|
||||
)
|
||||
|
||||
accumulators[serial].add_pose(
|
||||
T_world_cam, reproj_err, frame_count
|
||||
@@ -550,11 +792,27 @@ def main(
|
||||
camera_matrices,
|
||||
verify_depth,
|
||||
refine_depth,
|
||||
use_confidence_weights,
|
||||
depth_confidence_threshold,
|
||||
report_csv,
|
||||
)
|
||||
|
||||
# 5. Optional Ground Plane Alignment
|
||||
# 5. Run Benchmark Matrix if requested
|
||||
if benchmark_matrix:
|
||||
benchmark_results = run_benchmark_matrix(
|
||||
results,
|
||||
verification_frames,
|
||||
first_frames,
|
||||
marker_geometry,
|
||||
camera_matrices,
|
||||
depth_confidence_threshold,
|
||||
)
|
||||
# Add to results for saving
|
||||
for serial, bench in benchmark_results.items():
|
||||
if serial in results:
|
||||
results[serial]["benchmark"] = bench
|
||||
|
||||
# 6. Optional Ground Plane Alignment
|
||||
if auto_align:
|
||||
click.echo("\nPerforming ground plane alignment...")
|
||||
target_face = ground_face
|
||||
|
||||
Reference in New Issue
Block a user