feat(calibration): robust depth refinement pipeline with diagnostics and benchmarking

2026-02-07 05:51:07 +00:00
parent ead3796cdb
commit dad1f2a69f
17 changed files with 1876 additions and 261 deletions
@@ -70,6 +70,51 @@ ARUCO_DICT_MAP = {
 }


+def score_frame(
+    n_markers: int,
+    reproj_err: float,
+    corners: np.ndarray,
+    depth_map: Optional[np.ndarray],
+    depth_confidence_threshold: int = 50,
+    confidence_map: Optional[np.ndarray] = None,
+) -> float:
+    """
+    Compute a quality score for a frame to select the best one for depth verification.
+    Higher is better.
+    """
+    # Base score: more markers is better, lower reprojection error is better.
+    # We weight markers heavily as they provide more constraints.
+    score = n_markers * 100.0 - reproj_err
+
+    if depth_map is not None:
+        # Calculate depth validity ratio at marker corners.
+        # This ensures we pick a frame where depth is actually available where we need it.
+        valid_count = 0
+        total_count = 0
+        h, w = depth_map.shape[:2]
+
+        # corners shape is (N, 4, 2)
+        flat_corners = corners.reshape(-1, 2)
+        for pt in flat_corners:
+            x, y = int(round(pt[0])), int(round(pt[1]))
+            if 0 <= x < w and 0 <= y < h:
+                total_count += 1
+                d = depth_map[y, x]
+                if np.isfinite(d) and d > 0:
+                    if confidence_map is not None:
+                        # ZED confidence: lower is more confident
+                        if confidence_map[y, x] <= depth_confidence_threshold:
+                            valid_count += 1
+                    else:
+                        valid_count += 1
+
+        if total_count > 0:
+            depth_ratio = valid_count / total_count
+            score += depth_ratio * 50.0
+
+    return score
+
+
 def apply_depth_verify_refine_postprocess(
    results: Dict[str, Any],
    verification_frames: Dict[str, Any],
@@ -77,6 +122,7 @@ def apply_depth_verify_refine_postprocess(
    camera_matrices: Dict[str, Any],
    verify_depth: bool,
    refine_depth: bool,
+    use_confidence_weights: bool,
    depth_confidence_threshold: int,
    report_csv_path: Optional[str] = None,
 ) -> Tuple[Dict[str, Any], List[List[Any]]]:
@@ -145,6 +191,10 @@ def apply_depth_verify_refine_postprocess(
                        marker_corners_world,
                        frame.depth_map,
                        cam_matrix,
+                        confidence_map=frame.confidence_map
+                        if use_confidence_weights
+                        else None,
+                        confidence_thresh=depth_confidence_threshold,
                    )

                    verify_res_post = verify_extrinsics_with_depth(
@@ -180,6 +230,18 @@ def apply_depth_verify_refine_postprocess(
                        f"Trans={refine_stats['delta_translation_norm_m']:.3f}m"
                    )

+                    # Warning gates
+                    if improvement < 1e-4 and refine_stats["nfev"] > 5:
+                        click.echo(
+                            f"  WARNING: Optimization ran for {refine_stats['nfev']} steps but improvement was negligible ({improvement:.6f}m).",
+                            err=True,
+                        )
+                    if not refine_stats["success"] or refine_stats["nfev"] <= 1:
+                        click.echo(
+                            f"  WARNING: Optimization might have failed or stalled. Success: {refine_stats['success']}, Steps: {refine_stats['nfev']}. Message: {refine_stats['termination_message']}",
+                            err=True,
+                        )
+
                    verify_res = verify_res_post

            if report_csv_path:
@@ -196,6 +258,144 @@ def apply_depth_verify_refine_postprocess(
    return results, csv_rows


+def run_benchmark_matrix(
+    results: Dict[str, Any],
+    verification_frames: Dict[Any, Any],
+    first_frames: Dict[Any, Any],
+    marker_geometry: Dict[int, Any],
+    camera_matrices: Dict[Any, Any],
+    depth_confidence_threshold: int,
+) -> Dict[str, Any]:
+    """
+    Run benchmark matrix comparing 4 configurations:
+    1) baseline (linear loss, no confidence weights)
+    2) robust (soft_l1, f_scale=0.1, no confidence)
+    3) robust+confidence
+    4) robust+confidence+best-frame
+    """
+    benchmark_results = {}
+
+    configs = [
+        {
+            "name": "baseline",
+            "loss": "linear",
+            "use_confidence": False,
+            "use_best_frame": False,
+        },
+        {
+            "name": "robust",
+            "loss": "soft_l1",
+            "use_confidence": False,
+            "use_best_frame": False,
+        },
+        {
+            "name": "robust+confidence",
+            "loss": "soft_l1",
+            "use_confidence": True,
+            "use_best_frame": False,
+        },
+        {
+            "name": "robust+confidence+best-frame",
+            "loss": "soft_l1",
+            "use_confidence": True,
+            "use_best_frame": True,
+        },
+    ]
+
+    click.echo("\nRunning Benchmark Matrix...")
+
+    for serial in results.keys():
+        serial_int = int(serial)
+        if serial_int not in first_frames or serial_int not in verification_frames:
+            continue
+
+        cam_matrix = camera_matrices[serial_int]
+        pose_str = results[serial]["pose"]
+        T_initial = np.fromstring(pose_str, sep=" ").reshape(4, 4)
+
+        cam_bench = {}
+
+        for config in configs:
+            name = config["name"]
+            use_best = config["use_best_frame"]
+            vf = (
+                verification_frames[serial_int]
+                if use_best
+                else first_frames[serial_int]
+            )
+
+            frame = vf["frame"]
+            ids = vf["ids"]
+            marker_corners_world = {
+                int(mid): marker_geometry[int(mid)]
+                for mid in ids.flatten()
+                if int(mid) in marker_geometry
+            }
+
+            if not marker_corners_world or frame.depth_map is None:
+                continue
+
+            # Pre-refinement verification
+            verify_pre = verify_extrinsics_with_depth(
+                T_initial,
+                marker_corners_world,
+                frame.depth_map,
+                cam_matrix,
+                confidence_map=frame.confidence_map,
+                confidence_thresh=depth_confidence_threshold,
+            )
+
+            # Refinement
+            T_refined, refine_stats = refine_extrinsics_with_depth(
+                T_initial,
+                marker_corners_world,
+                frame.depth_map,
+                cam_matrix,
+                confidence_map=frame.confidence_map
+                if config["use_confidence"]
+                else None,
+                confidence_thresh=depth_confidence_threshold,
+                loss=str(config["loss"]),
+                f_scale=0.1,
+            )
+
+            # Post-refinement verification
+            verify_post = verify_extrinsics_with_depth(
+                T_refined,
+                marker_corners_world,
+                frame.depth_map,
+                cam_matrix,
+                confidence_map=frame.confidence_map,
+                confidence_thresh=depth_confidence_threshold,
+            )
+
+            cam_bench[name] = {
+                "rmse_pre": verify_pre.rmse,
+                "rmse_post": verify_post.rmse,
+                "improvement": verify_pre.rmse - verify_post.rmse,
+                "delta_rot_deg": refine_stats["delta_rotation_deg"],
+                "delta_trans_m": refine_stats["delta_translation_norm_m"],
+                "nfev": refine_stats["nfev"],
+                "success": refine_stats["success"],
+                "frame_index": vf["frame_index"],
+            }
+
+        benchmark_results[serial] = cam_bench
+
+        # Print summary table for this camera
+        click.echo(f"\nBenchmark Results for Camera {serial}:")
+        header = f"{'Config':<30} | {'RMSE Pre':<10} | {'RMSE Post':<10} | {'Improv':<10} | {'Iter':<5}"
+        click.echo(header)
+        click.echo("-" * len(header))
+        for name, stats in cam_bench.items():
+            click.echo(
+                f"{name:<30} | {stats['rmse_pre']:<10.4f} | {stats['rmse_post']:<10.4f} | "
+                f"{stats['improvement']:<10.4f} | {stats['nfev']:<5}"
+            )
+
+    return benchmark_results
+
+
@click.command()
@click.option("--svo", "-s", multiple=True, required=False, help="Path to SVO files.")
@click.option("--markers", "-m", required=True, help="Path to markers parquet file.")
@@ -223,6 +423,11 @@ def apply_depth_verify_refine_postprocess(
@click.option(
    "--refine-depth/--no-refine-depth", default=False, help="Enable depth refinement."
 )
+@click.option(
+    "--use-confidence-weights/--no-confidence-weights",
+    default=False,
+    help="Use confidence-weighted residuals in depth refinement.",
+)
@click.option(
    "--depth-mode",
    default="NEURAL",
@@ -272,6 +477,11 @@ def apply_depth_verify_refine_postprocess(
    type=int,
    help="Maximum number of samples to process before stopping.",
 )
+@click.option(
+    "--benchmark-matrix/--no-benchmark-matrix",
+    default=False,
+    help="Run benchmark matrix comparing different refinement configurations.",
+)
 def main(
    svo: tuple[str, ...],
    markers: str,
@@ -283,6 +493,7 @@ def main(
    self_check: bool,
    verify_depth: bool,
    refine_depth: bool,
+    use_confidence_weights: bool,
    depth_mode: str,
    depth_confidence_threshold: int,
    report_csv: str | None,
@@ -293,6 +504,7 @@ def main(
    min_markers: int,
    debug: bool,
    max_samples: int | None,
+    benchmark_matrix: bool,
 ):
    """
    Calibrate camera extrinsics relative to a global coordinate system defined by ArUco markers.
@@ -313,7 +525,7 @@ def main(
    }
    sl_depth_mode = depth_mode_map.get(depth_mode, sl.DEPTH_MODE.NONE)

-    if not (verify_depth or refine_depth):
+    if not (verify_depth or refine_depth or benchmark_matrix):
        sl_depth_mode = sl.DEPTH_MODE.NONE

    # Expand SVO paths (files or directories)
@@ -406,6 +618,8 @@ def main(

    # Store verification frames for post-process check
    verification_frames = {}
+    # Store first valid frame for benchmarking
+    first_frames = {}

    # Track all visible marker IDs for heuristic ground detection
    all_visible_ids = set()
@@ -460,15 +674,43 @@ def main(
                            # We want T_world_from_cam
                            T_world_cam = invert_transform(T_cam_world)

-                            # Save latest valid frame for verification
+                            # Save best frame for verification based on scoring
                            if (
-                                verify_depth or refine_depth
+                                verify_depth or refine_depth or benchmark_matrix
                            ) and frame.depth_map is not None:
-                                verification_frames[serial] = {
-                                    "frame": frame,
-                                    "ids": ids,
-                                    "corners": corners,
-                                }
+                                current_score = score_frame(
+                                    n_markers,
+                                    reproj_err,
+                                    corners,
+                                    frame.depth_map,
+                                    depth_confidence_threshold,
+                                    frame.confidence_map,
+                                )
+
+                                if serial not in first_frames:
+                                    first_frames[serial] = {
+                                        "frame": frame,
+                                        "ids": ids,
+                                        "corners": corners,
+                                        "score": current_score,
+                                        "frame_index": frame_count,
+                                    }
+
+                                best_so_far = verification_frames.get(serial)
+                                if (
+                                    best_so_far is None
+                                    or current_score > best_so_far["score"]
+                                ):
+                                    verification_frames[serial] = {
+                                        "frame": frame,
+                                        "ids": ids,
+                                        "corners": corners,
+                                        "score": current_score,
+                                        "frame_index": frame_count,
+                                    }
+                                    logger.debug(
+                                        f"Cam {serial}: New best frame {frame_count} with score {current_score:.2f}"
+                                    )

                            accumulators[serial].add_pose(
                                T_world_cam, reproj_err, frame_count
@@ -550,11 +792,27 @@ def main(
        camera_matrices,
        verify_depth,
        refine_depth,
+        use_confidence_weights,
        depth_confidence_threshold,
        report_csv,
    )

-    # 5. Optional Ground Plane Alignment
+    # 5. Run Benchmark Matrix if requested
+    if benchmark_matrix:
+        benchmark_results = run_benchmark_matrix(
+            results,
+            verification_frames,
+            first_frames,
+            marker_geometry,
+            camera_matrices,
+            depth_confidence_threshold,
+        )
+        # Add to results for saving
+        for serial, bench in benchmark_results.items():
+            if serial in results:
+                results[serial]["benchmark"] = bench
+
+    # 6. Optional Ground Plane Alignment
    if auto_align:
        click.echo("\nPerforming ground plane alignment...")
        target_face = ground_face