feat: implement ground plane orchestration

This commit is contained in:
2026-02-09 07:27:36 +00:00
parent 6f34cd48fe
commit 94d9a27724
2 changed files with 318 additions and 2 deletions
+158 -2
View File
@@ -1,9 +1,9 @@
import numpy as np import numpy as np
from typing import Optional, Tuple, List from typing import Optional, Tuple, List, Dict, Any
from jaxtyping import Float from jaxtyping import Float
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import open3d as o3d import open3d as o3d
from dataclasses import dataclass from dataclasses import dataclass, field
if TYPE_CHECKING: if TYPE_CHECKING:
Vec3 = Float[np.ndarray, "3"] Vec3 = Float[np.ndarray, "3"]
@@ -29,6 +29,36 @@ class FloorCorrection:
reason: str = "" reason: str = ""
@dataclass
class GroundPlaneConfig:
enabled: bool = True
target_y: float = 0.0
stride: int = 8
depth_min: float = 0.2
depth_max: float = 5.0
ransac_dist_thresh: float = 0.02
ransac_n: int = 3
ransac_iters: int = 1000
max_rotation_deg: float = 5.0
max_translation_m: float = 0.1
min_inliers: int = 500
min_valid_cameras: int = 2
@dataclass
class GroundPlaneMetrics:
success: bool = False
correction_applied: bool = False
num_cameras_total: int = 0
num_cameras_valid: int = 0
correction_transform: Mat44 = field(default_factory=lambda: np.eye(4))
rotation_deg: float = 0.0
translation_m: float = 0.0
camera_planes: Dict[str, FloorPlane] = field(default_factory=dict)
consensus_plane: Optional[FloorPlane] = None
message: str = ""
def unproject_depth_to_points( def unproject_depth_to_points(
depth_map: np.ndarray, depth_map: np.ndarray,
K: np.ndarray, K: np.ndarray,
@@ -245,3 +275,129 @@ def compute_floor_correction(
T[:3, 3] = target_normal * t_y T[:3, 3] = target_normal * t_y
return FloorCorrection(transform=T.astype(np.float64), valid=True) return FloorCorrection(transform=T.astype(np.float64), valid=True)
def refine_ground_from_depth(
camera_data: Dict[str, Dict[str, Any]],
extrinsics: Dict[str, Mat44],
config: GroundPlaneConfig = GroundPlaneConfig(),
) -> Tuple[Dict[str, Mat44], GroundPlaneMetrics]:
"""
Orchestrate ground plane refinement across multiple cameras.
Args:
camera_data: Dict mapping serial -> {'depth': np.ndarray, 'K': np.ndarray}
extrinsics: Dict mapping serial -> world_from_cam matrix (4x4)
config: Configuration parameters
Returns:
Tuple of (new_extrinsics, metrics)
"""
metrics = GroundPlaneMetrics()
metrics.num_cameras_total = len(camera_data)
if not config.enabled:
metrics.message = "Ground plane refinement disabled in config"
return extrinsics, metrics
valid_planes: List[FloorPlane] = []
valid_serials: List[str] = []
# 1. Detect planes in each camera
for serial, data in camera_data.items():
if serial not in extrinsics:
continue
depth_map = data.get("depth")
K = data.get("K")
if depth_map is None or K is None:
continue
# Unproject to camera frame
points_cam = unproject_depth_to_points(
depth_map,
K,
stride=config.stride,
depth_min=config.depth_min,
depth_max=config.depth_max,
)
if len(points_cam) < config.min_inliers:
continue
# Transform to world frame
T_world_cam = extrinsics[serial]
# points_cam is (N, 3)
# Apply rotation and translation
R = T_world_cam[:3, :3]
t = T_world_cam[:3, 3]
points_world = (points_cam @ R.T) + t
# Detect plane
plane = detect_floor_plane(
points_world,
distance_threshold=config.ransac_dist_thresh,
ransac_n=config.ransac_n,
num_iterations=config.ransac_iters,
)
if plane is not None and plane.num_inliers >= config.min_inliers:
metrics.camera_planes[serial] = plane
valid_planes.append(plane)
valid_serials.append(serial)
metrics.num_cameras_valid = len(valid_planes)
# 2. Check minimum requirements
if len(valid_planes) < config.min_valid_cameras:
metrics.message = f"Found {len(valid_planes)} valid planes, required {config.min_valid_cameras}"
return extrinsics, metrics
# 3. Compute consensus
try:
consensus = compute_consensus_plane(valid_planes)
metrics.consensus_plane = consensus
except ValueError as e:
metrics.message = f"Consensus computation failed: {e}"
return extrinsics, metrics
# 4. Compute correction
correction = compute_floor_correction(
consensus,
target_floor_y=config.target_y,
max_rotation_deg=config.max_rotation_deg,
max_translation_m=config.max_translation_m,
)
metrics.correction_transform = correction.transform
if not correction.valid:
metrics.message = f"Correction invalid: {correction.reason}"
return extrinsics, metrics
# 5. Apply correction
# T_corr is the transform that moves the world frame.
# New world points P' = T_corr * P
# We want new extrinsics T'_world_cam such that P' = T'_world_cam * P_cam
# T'_world_cam * P_cam = T_corr * (T_world_cam * P_cam)
# So T'_world_cam = T_corr * T_world_cam
new_extrinsics = {}
T_corr = correction.transform
for serial, T_old in extrinsics.items():
new_extrinsics[serial] = T_corr @ T_old
# Calculate metrics
# Rotation angle of T_corr
trace = np.trace(T_corr[:3, :3])
cos_angle = np.clip((trace - 1) / 2, -1.0, 1.0)
metrics.rotation_deg = float(np.rad2deg(np.arccos(cos_angle)))
metrics.translation_m = float(np.linalg.norm(T_corr[:3, 3]))
metrics.success = True
metrics.correction_applied = True
metrics.message = "Success"
return new_extrinsics, metrics
+160
View File
@@ -5,8 +5,11 @@ from aruco.ground_plane import (
detect_floor_plane, detect_floor_plane,
compute_consensus_plane, compute_consensus_plane,
compute_floor_correction, compute_floor_correction,
refine_ground_from_depth,
FloorPlane, FloorPlane,
FloorCorrection, FloorCorrection,
GroundPlaneConfig,
GroundPlaneMetrics,
) )
@@ -315,3 +318,160 @@ def test_compute_floor_correction_bounds():
assert not result.valid assert not result.valid
assert "exceeds limit" in result.reason assert "exceeds limit" in result.reason
def test_refine_ground_from_depth_disabled():
config = GroundPlaneConfig(enabled=False)
extrinsics = {"cam1": np.eye(4)}
camera_data = {"cam1": {"depth": np.zeros((10, 10)), "K": np.eye(3)}}
new_extrinsics, metrics = refine_ground_from_depth(camera_data, extrinsics, config)
assert not metrics.success
assert "disabled" in metrics.message
assert new_extrinsics == extrinsics
def test_refine_ground_from_depth_insufficient_cameras():
# Only 1 camera, need 2
config = GroundPlaneConfig(min_valid_cameras=2, min_inliers=10)
# Create fake depth map that produces a plane
# Plane at y=-1.0
width, height = 20, 20
K = np.eye(3)
K[0, 2] = 10
K[1, 2] = 10
K[0, 0] = 20
K[1, 1] = 20
# Generate points on plane y=-1.0
# In camera frame (assuming cam at origin looking -Z), floor is at Y=-1.0
# But wait, standard camera frame is Y-down.
# Let's assume world frame is Y-up.
# If cam is at origin, and looking down -Z (OpenGL) or +Z (OpenCV).
# Let's use identity extrinsics -> cam frame = world frame.
# World frame Y-up.
# So we want points with y=-1.0.
# But unproject_depth gives points in camera frame.
# If we want world y=-1.0, and T=I, then cam y=-1.0.
# But unproject uses OpenCV convention: Y-down.
# So y=-1.0 means 1m UP in camera frame.
# Let's just make points that form A plane, doesn't matter which one,
# as long as it's detected.
# Let's make a flat plane at Z=2.0 (fronto-parallel)
depth_map = np.full((height, width), 2.0, dtype=np.float32)
# Need to ensure we have enough points for RANSAC
# 20x20 = 400 points.
# Stride default is 8. 20/8 = 2. 2x2 = 4 points.
# RANSAC n=3. So 4 points is enough.
# But min_inliers=10. 4 < 10.
# So we need to reduce stride or increase size.
config.stride = 1
camera_data = {"cam1": {"depth": depth_map, "K": K}}
extrinsics = {"cam1": np.eye(4)}
new_extrinsics, metrics = refine_ground_from_depth(camera_data, extrinsics, config)
assert not metrics.success
assert "Found 1 valid planes" in metrics.message
assert metrics.num_cameras_valid == 1
def test_refine_ground_from_depth_success():
# 2 cameras, both seeing floor at y=-1.0
# We want to correct it to y=0.0
config = GroundPlaneConfig(
min_valid_cameras=2,
min_inliers=10,
target_y=0.0,
max_translation_m=2.0,
ransac_dist_thresh=0.05,
)
width, height = 20, 20
K = np.eye(3)
K[0, 2] = 10
K[1, 2] = 10
K[0, 0] = 20
K[1, 1] = 20
# Create points on plane y=-1.0 in WORLD frame
# Cam 1 at origin. T_world_cam = I.
# So points in cam 1 should be at y=-1.0.
# OpenCV cam: Y-down. So y=-1.0 is UP.
# Let's just use the fact that we transform points to world frame before detection.
# So if we make depth map such that unprojected points + extrinsics -> plane y=-1.0.
# Let's manually mock the detection to avoid complex depth map math
# We can't easily mock internal functions without monkeypatching.
# Instead, let's construct a depth map that corresponds to a plane.
# Simplest: Camera looking down at floor.
# Cam at (0, 2, 0) looking at (0, 0, 0).
# World floor at y=0.
# Cam floor distance = 2.0.
# But here we want to simulate a MISALIGNED floor.
# Say we think floor is at y=-1.0 (in our current world frame).
# So we generate points at y=-1.0.
# Let's try a simpler approach:
# Create depth map for a plane Z=2.0 in camera frame.
# Set extrinsics such that this plane becomes Y=-1.0 in world frame.
# Plane Z=2.0 in cam frame: (x, y, 2).
# We want R * (x, y, 2) + t = (X, -1, Z).
# Let R = Rotation(-90 deg around X).
# R = [[1, 0, 0], [0, 0, 1], [0, -1, 0]]
# R * (x, y, 2) = (x, 2, -y).
# We want Y_world = -1.
# So 2 + ty = -1 => ty = -3.
# So if we put cam at y=-3, rotated -90X.
# Then Z=2 plane becomes Y=-1 plane.
# Rotation -90 deg around X
Rx_neg90 = np.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
t = np.array([0, -3, 0])
T_world_cam = np.eye(4)
T_world_cam[:3, :3] = Rx_neg90
T_world_cam[:3, 3] = t
# Depth map: constant 2.0
depth_map = np.full((height, width), 2.0, dtype=np.float32)
# Need to ensure we have enough points for RANSAC
# 20x20 = 400 points.
# Stride default is 8. 20/8 = 2. 2x2 = 4 points.
# RANSAC n=3. So 4 points is enough.
# But min_inliers=10. 4 < 10.
# So we need to reduce stride or increase size.
config.stride = 1
camera_data = {
"cam1": {"depth": depth_map, "K": K},
"cam2": {"depth": depth_map, "K": K},
}
extrinsics = {
"cam1": T_world_cam,
"cam2": T_world_cam,
}
new_extrinsics, metrics = refine_ground_from_depth(camera_data, extrinsics, config)
assert metrics.success
assert metrics.num_cameras_valid == 2
assert metrics.correction_applied
# We started with floor at y=-1.0. Target is y=0.0.
# So we expect translation of +1.0 in Y.
# T_corr should have ty approx 1.0.
T_corr = metrics.correction_transform
assert abs(T_corr[1, 3] - 1.0) < 0.1 # Allow some slack for RANSAC noise
# Check new extrinsics
# New T = T_corr @ Old T
# Old T origin y = -3.
# New T origin y should be -3 + 1 = -2.
T_new = new_extrinsics["cam1"]
assert abs(T_new[1, 3] - (-2.0)) < 0.1