feat(calibration): robust depth refinement pipeline with diagnostics and benchmarking

2026-02-07 05:51:07 +00:00
parent ead3796cdb
commit dad1f2a69f
17 changed files with 1876 additions and 261 deletions
@@ -14,7 +14,10 @@ sys.path.append(str(Path(__file__).parent.parent))
 # I'll use a dynamic import or just import the module and access the function dynamically if needed,
 # but standard import is better. I'll write the test file, but I won't run it until I refactor the code.

-from calibrate_extrinsics import apply_depth_verify_refine_postprocess
+from calibrate_extrinsics import (
+    apply_depth_verify_refine_postprocess,
+    run_benchmark_matrix,
+)


@pytest.fixture
@@ -38,6 +41,9 @@ def mock_dependencies():
        mock_refine_res_stats = {
            "delta_rotation_deg": 1.0,
            "delta_translation_norm_m": 0.1,
+            "success": True,
+            "nfev": 10,
+            "termination_message": "Success",
        }
        # refine returns (new_pose_matrix, stats)
        mock_refine.return_value = (np.eye(4), mock_refine_res_stats)
@@ -45,6 +51,50 @@ def mock_dependencies():
        yield mock_verify, mock_refine, mock_echo


+def test_benchmark_matrix(mock_dependencies):
+    mock_verify, mock_refine, _ = mock_dependencies
+
+    serial = "123456"
+    serial_int = int(serial)
+    results = {serial: {"pose": "1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1"}}
+
+    frame_mock = MagicMock(
+        depth_map=np.zeros((10, 10)), confidence_map=np.zeros((10, 10))
+    )
+    vf = {
+        "frame": frame_mock,
+        "ids": np.array([[1]]),
+        "frame_index": 100,
+    }
+
+    verification_frames = {serial_int: vf}
+    first_frames = {serial_int: vf}
+    marker_geometry = {1: np.zeros((4, 3))}
+    camera_matrices = {serial_int: np.eye(3)}
+
+    bench_results = run_benchmark_matrix(
+        results,
+        verification_frames,
+        first_frames,
+        marker_geometry,
+        camera_matrices,
+        depth_confidence_threshold=50,
+    )
+
+    assert serial in bench_results
+    assert "baseline" in bench_results[serial]
+    assert "robust" in bench_results[serial]
+    assert "robust+confidence" in bench_results[serial]
+    assert "robust+confidence+best-frame" in bench_results[serial]
+
+    # 4 configs * (1 verify_pre + 1 refine + 1 verify_post) = 12 calls to verify, 4 to refine
+    assert (
+        mock_verify.call_count == 8
+    )  # Wait, verify_pre and verify_post are called for each config.
+    # Actually, 4 configs * 2 verify calls = 8.
+    assert mock_refine.call_count == 4
+
+
 def test_verify_only(mock_dependencies, tmp_path):
    mock_verify, mock_refine, _ = mock_dependencies

@@ -75,6 +125,7 @@ def test_verify_only(mock_dependencies, tmp_path):
        camera_matrices=camera_matrices,
        verify_depth=True,
        refine_depth=False,
+        use_confidence_weights=False,
        depth_confidence_threshold=50,
        report_csv_path=None,
    )
@@ -130,6 +181,7 @@ def test_refine_depth(mock_dependencies):
        camera_matrices=camera_matrices,
        verify_depth=False,  # refine implies verify usually, but let's check logic
        refine_depth=True,
+        use_confidence_weights=False,
        depth_confidence_threshold=50,
    )

@@ -143,6 +195,103 @@ def test_refine_depth(mock_dependencies):
    mock_refine.assert_called_once()


+def test_refine_depth_warning_negligible_improvement(mock_dependencies):
+    mock_verify, mock_refine, mock_echo = mock_dependencies
+
+    serial = "123456"
+    results = {serial: {"pose": "1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1", "stats": {}}}
+    verification_frames = {
+        serial: {
+            "frame": MagicMock(depth_map=np.zeros((10, 10))),
+            "ids": np.array([[1]]),
+        }
+    }
+    marker_geometry = {1: np.zeros((4, 3))}
+    camera_matrices = {serial: np.eye(3)}
+
+    # RMSE stays almost same
+    res_pre = MagicMock(rmse=0.1, n_valid=10, residuals=[])
+    res_post = MagicMock(rmse=0.099999, n_valid=10, residuals=[])
+    mock_verify.side_effect = [res_pre, res_post]
+
+    # nfev > 5
+    mock_refine.return_value = (
+        np.eye(4),
+        {
+            "delta_rotation_deg": 0.0,
+            "delta_translation_norm_m": 0.0,
+            "success": True,
+            "nfev": 10,
+            "termination_message": "Converged",
+        },
+    )
+
+    apply_depth_verify_refine_postprocess(
+        results=results,
+        verification_frames=verification_frames,
+        marker_geometry=marker_geometry,
+        camera_matrices=camera_matrices,
+        verify_depth=False,
+        refine_depth=True,
+        use_confidence_weights=False,
+        depth_confidence_threshold=50,
+    )
+
+    # Check if warning was echoed
+    # "WARNING: Optimization ran for 10 steps but improvement was negligible"
+    any_negligible = any(
+        "negligible" in str(call.args[0]) for call in mock_echo.call_args_list
+    )
+    assert any_negligible
+
+
+def test_refine_depth_warning_failed_or_stalled(mock_dependencies):
+    mock_verify, mock_refine, mock_echo = mock_dependencies
+
+    serial = "123456"
+    results = {serial: {"pose": "1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1", "stats": {}}}
+    verification_frames = {
+        serial: {
+            "frame": MagicMock(depth_map=np.zeros((10, 10))),
+            "ids": np.array([[1]]),
+        }
+    }
+    marker_geometry = {1: np.zeros((4, 3))}
+    camera_matrices = {serial: np.eye(3)}
+
+    res_pre = MagicMock(rmse=0.1, n_valid=10, residuals=[])
+    res_post = MagicMock(rmse=0.1, n_valid=10, residuals=[])
+    mock_verify.side_effect = [res_pre, res_post]
+
+    # success=False
+    mock_refine.return_value = (
+        np.eye(4),
+        {
+            "delta_rotation_deg": 0.0,
+            "delta_translation_norm_m": 0.0,
+            "success": False,
+            "nfev": 1,
+            "termination_message": "Failed",
+        },
+    )
+
+    apply_depth_verify_refine_postprocess(
+        results=results,
+        verification_frames=verification_frames,
+        marker_geometry=marker_geometry,
+        camera_matrices=camera_matrices,
+        verify_depth=False,
+        refine_depth=True,
+        use_confidence_weights=False,
+        depth_confidence_threshold=50,
+    )
+
+    any_failed = any(
+        "failed or stalled" in str(call.args[0]) for call in mock_echo.call_args_list
+    )
+    assert any_failed
+
+
 def test_csv_output(mock_dependencies, tmp_path):
    mock_verify, _, _ = mock_dependencies

@@ -169,6 +318,7 @@ def test_csv_output(mock_dependencies, tmp_path):
        camera_matrices=camera_matrices,
        verify_depth=True,
        refine_depth=False,
+        use_confidence_weights=False,
        depth_confidence_threshold=50,
        report_csv_path=str(csv_path),
    )
@@ -37,6 +37,14 @@ def test_refine_extrinsics_with_depth_no_change():
    # np.testing.assert_allclose(T_initial, T_refined, atol=1e-5)
    # assert stats["success"] is True
    assert stats["final_cost"] <= stats["initial_cost"] + 1e-10
+    assert "termination_status" in stats
+    assert "nfev" in stats
+    assert "optimality" in stats
+    assert "n_active_bounds" in stats
+    assert "n_depth_valid" in stats
+    assert "n_points_total" in stats
+    assert "loss_function" in stats
+    assert "f_scale" in stats


 def test_refine_extrinsics_with_depth_with_offset():
@@ -95,48 +103,50 @@ def test_refine_extrinsics_respects_bounds():

 def test_robust_loss_handles_outliers():
    K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]], dtype=np.float64)
-    
+
    # True pose: camera moved 0.1m forward
    T_true = np.eye(4)
    T_true[2, 3] = 0.1
-    
+
    # Initial pose: identity
    T_initial = np.eye(4)
-    
+
    # Create synthetic depth map
    # Marker at (0,0,2.1) in world -> (0,0,2.0) in camera (since cam moved 0.1 forward)
    depth_map = np.full((720, 1280), 2.0, dtype=np.float32)
-    
+
    # Add outliers: 30% of pixels are garbage (e.g. 0.5m or 5.0m)
    # We'll simulate this by having multiple markers, some with bad depth
    marker_corners_world = {}
-    
+
    # 7 good markers (depth 2.0)
    # 3 bad markers (depth 5.0 - huge outlier)
-    
+
    # We need to ensure these project to unique pixels.
    # K = 1000 focal.
    # x = 0.1 * i. Z = 2.1 (world).
    # u = 1000 * x / Z + 640
-    
+
    marker_corners_world[0] = []
-    
+
    for i in range(10):
        u = int(50 * i + 640)
        v = 360
-        
+
        world_pt = np.array([0.1 * i, 0, 2.1])
        marker_corners_world[0].append(world_pt)
-        
+
        # Paint a wide strip to cover T_initial to T_true movement
        # u_initial = 47.6 * i + 640. u_true = 50 * i + 640.
        # Diff is ~2.4 * i. Max diff (i=9) is ~22 pixels.
        # So +/- 30 pixels should cover it.
-        
+
        if i < 7:
-            depth_map[v-5:v+6, u-30:u+31] = 2.0  # Good measurement
+            depth_map[v - 5 : v + 6, u - 30 : u + 31] = 2.0  # Good measurement
        else:
-            depth_map[v-5:v+6, u-30:u+31] = 5.0  # Outlier measurement (3m error)
+            depth_map[v - 5 : v + 6, u - 30 : u + 31] = (
+                5.0  # Outlier measurement (3m error)
+            )

    marker_corners_world[0] = np.array(marker_corners_world[0])

@@ -148,15 +158,17 @@ def test_robust_loss_handles_outliers():
        K,
        max_translation_m=0.2,
        max_rotation_deg=5.0,
-        regularization_weight=0.0, # Disable reg to see if data term wins
+        regularization_weight=0.0,  # Disable reg to see if data term wins
        loss="soft_l1",
-        f_scale=0.1
+        f_scale=0.1,
    )
-    
+
    # With robust loss, it should ignore the 3m errors and converge to the 0.1m shift
    # The 0.1m shift explains the 7 inliers perfectly.
    # T_refined[2, 3] should be close to 0.1
-    assert abs(T_refined[2, 3] - 0.1) < 0.02  # Allow small error due to outliers pulling slightly
+    assert (
+        abs(T_refined[2, 3] - 0.1) < 0.02
+    )  # Allow small error due to outliers pulling slightly
    assert stats["success"] is True

    # Run with linear loss (MSE) - should fail or be pulled significantly
@@ -168,14 +180,61 @@ def test_robust_loss_handles_outliers():
        max_translation_m=0.2,
        max_rotation_deg=5.0,
        regularization_weight=0.0,
-        loss="linear"
+        loss="linear",
    )
-    
+
    # MSE will try to average 0.0 error (7 points) and 3.0 error (3 points)
    # Mean error target ~ 0.9m
    # So it will likely pull the camera way back to reduce the 3m errors
    # The result should be WORSE than the robust one
    error_robust = abs(T_refined[2, 3] - 0.1)
    error_mse = abs(T_refined_mse[2, 3] - 0.1)
-    
+
    assert error_robust < error_mse
+
+
+def test_refine_with_confidence_weights():
+    K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]], dtype=np.float64)
+    T_initial = np.eye(4)
+
+    # 2 points: one with good depth, one with bad depth but low confidence
+    # Point 1: World (0,0,2.1), Depth 2.0 (True shift 0.1)
+    # Point 2: World (0.5,0,2.1), Depth 5.0 (Outlier)
+    marker_corners_world = {1: np.array([[0, 0, 2.1], [0.5, 0, 2.1]])}
+    depth_map = np.full((720, 1280), 2.0, dtype=np.float32)
+    # Paint outlier depth
+    depth_map[360, int(1000 * 0.5 / 2.1 + 640)] = 5.0
+
+    # Confidence map: Point 1 is confident (1), Point 2 is NOT confident (90)
+    confidence_map = np.full((720, 1280), 1.0, dtype=np.float32)
+    confidence_map[360, int(1000 * 0.5 / 2.1 + 640)] = 90.0
+
+    # 1. Without weights: Outlier should pull the result significantly
+    T_no_weights, stats_no_weights = refine_extrinsics_with_depth(
+        T_initial,
+        marker_corners_world,
+        depth_map,
+        K,
+        regularization_weight=0.0,
+        confidence_map=None,
+        loss="linear",  # Use linear to make weighting effect more obvious
+    )
+
+    # 2. With weights: Outlier should be suppressed
+    T_weighted, stats_weighted = refine_extrinsics_with_depth(
+        T_initial,
+        marker_corners_world,
+        depth_map,
+        K,
+        regularization_weight=0.0,
+        confidence_map=confidence_map,
+        confidence_thresh=100.0,
+        loss="linear",
+    )
+
+    error_no_weights = abs(T_no_weights[2, 3] - 0.1)
+    error_weighted = abs(T_weighted[2, 3] - 0.1)
+
+    # Weighted error should be much smaller because the 5.0 depth was suppressed
+    assert error_weighted < error_no_weights
+    assert error_weighted < 0.06
@@ -0,0 +1,59 @@
+import numpy as np
+import pyzed.sl as sl
+from unittest.mock import MagicMock
+from aruco.svo_sync import SVOReader
+
+
+def test_retrieve_depth_unit_guard():
+    # Setup SVOReader with depth enabled
+    reader = SVOReader([], depth_mode=sl.DEPTH_MODE.ULTRA)
+
+    # Mock Camera
+    mock_cam = MagicMock(spec=sl.Camera)
+
+    # Mock depth data (e.g., 2.0 meters)
+    depth_data = np.full((100, 100), 2.0, dtype=np.float32)
+    mock_mat = MagicMock(spec=sl.Mat)
+    mock_mat.get_data.return_value = depth_data
+
+    # Mock retrieve_measure to "fill" the mat
+    mock_cam.retrieve_measure.return_value = sl.ERROR_CODE.SUCCESS
+
+    # Case 1: Units are METER -> Should NOT divide by 1000
+    mock_init_params_meter = MagicMock(spec=sl.InitParameters)
+    mock_init_params_meter.coordinate_units = sl.UNIT.METER
+    mock_cam.get_init_parameters.return_value = mock_init_params_meter
+
+    # We need to patch sl.Mat in the test or just rely on the fact that
+    # _retrieve_depth creates a new sl.Mat() and calls get_data() on it.
+    # Since we can't easily mock the sl.Mat() call inside the method without patching,
+    # let's use a slightly different approach: mock the sl.Mat class itself.
+
+    with MagicMock() as mock_mat_class:
+        from aruco import svo_sync
+
+        original_mat = svo_sync.sl.Mat
+        svo_sync.sl.Mat = mock_mat_class
+        mock_mat_instance = mock_mat_class.return_value
+        mock_mat_instance.get_data.return_value = depth_data
+
+        # Test METER path
+        depth_meter = reader._retrieve_depth(mock_cam)
+        assert depth_meter is not None
+        assert np.allclose(depth_meter, 2.0)
+
+        # Case 2: Units are MILLIMETER -> Should divide by 1000
+        mock_init_params_mm = MagicMock(spec=sl.InitParameters)
+        mock_init_params_mm.coordinate_units = sl.UNIT.MILLIMETER
+        mock_cam.get_init_parameters.return_value = mock_init_params_mm
+
+        depth_mm = reader._retrieve_depth(mock_cam)
+        assert depth_mm is not None
+        assert np.allclose(depth_mm, 0.002)
+
+        # Restore original sl.Mat
+        svo_sync.sl.Mat = original_mat
+
+
+if __name__ == "__main__":
+    test_retrieve_depth_unit_guard()
@@ -0,0 +1,72 @@
+import pytest
+import numpy as np
+from calibrate_extrinsics import score_frame
+
+
+def test_score_frame_basic():
+    # More markers should have higher score
+    corners = np.zeros((1, 4, 2))
+    score1 = score_frame(n_markers=1, reproj_err=1.0, corners=corners, depth_map=None)
+    score2 = score_frame(n_markers=2, reproj_err=1.0, corners=corners, depth_map=None)
+    assert score2 > score1
+
+
+def test_score_frame_reproj_err():
+    # Lower reprojection error should have higher score
+    corners = np.zeros((1, 4, 2))
+    score1 = score_frame(n_markers=1, reproj_err=2.0, corners=corners, depth_map=None)
+    score2 = score_frame(n_markers=1, reproj_err=1.0, corners=corners, depth_map=None)
+    assert score2 > score1
+
+
+def test_score_frame_depth_validity():
+    # Better depth validity should have higher score
+    # Create a 10x10 depth map
+    depth_map = np.ones((10, 10))
+
+    # Corners at (2, 2)
+    corners = np.array([[[2, 2], [2, 2], [2, 2], [2, 2]]], dtype=np.float32)
+
+    # Case 1: Depth is valid at (2, 2)
+    score1 = score_frame(
+        n_markers=1, reproj_err=1.0, corners=corners, depth_map=depth_map
+    )
+
+    # Case 2: Depth is invalid (NaN) at (2, 2)
+    depth_map_invalid = depth_map.copy()
+    depth_map_invalid[2, 2] = np.nan
+    score2 = score_frame(
+        n_markers=1, reproj_err=1.0, corners=corners, depth_map=depth_map_invalid
+    )
+
+    assert score1 > score2
+
+
+def test_score_frame_confidence():
+    # Better confidence should have higher score
+    depth_map = np.ones((10, 10))
+    confidence_map = np.zeros((10, 10))  # 0 is most confident
+    corners = np.array([[[2, 2], [2, 2], [2, 2], [2, 2]]], dtype=np.float32)
+
+    # Case 1: High confidence (0)
+    score1 = score_frame(
+        n_markers=1,
+        reproj_err=1.0,
+        corners=corners,
+        depth_map=depth_map,
+        confidence_map=confidence_map,
+        depth_confidence_threshold=50,
+    )
+
+    # Case 2: Low confidence (100)
+    confidence_map_low = np.ones((10, 10)) * 100
+    score2 = score_frame(
+        n_markers=1,
+        reproj_err=1.0,
+        corners=corners,
+        depth_map=depth_map,
+        confidence_map=confidence_map_low,
+        depth_confidence_threshold=50,
+    )
+
+    assert score1 > score2