feat: add HDF5 depth map persistence module

- Implement aruco/depth_save.py with save_depth_data/load_depth_data - Add tests/test_depth_save.py covering roundtrip and edge cases - Ensure type safety with basedpyright - Support compression and metadata handling
2026-02-09 07:18:00 +00:00
parent 43a441f2d4
commit 83a74d293b
3 changed files with 314 additions and 1 deletions
@@ -0,0 +1,181 @@
+import h5py
+import numpy as np
+import json
+import datetime
+from pathlib import Path
+from typing import Dict, Any, List, Optional, cast
+from loguru import logger
+
+
+def save_depth_data(
+    path: str | Path, camera_data: Dict[str, Any], schema_version: int = 1
+) -> None:
+    """
+    Save depth data to HDF5 format.
+
+    Args:
+        path: Output file path
+        camera_data: Dictionary mapping serial numbers to camera data
+        schema_version: Schema version number (default: 1)
+    """
+    path = Path(path)
+
+    # Ensure parent directory exists
+    if not path.parent.exists():
+        raise FileNotFoundError(f"Parent directory does not exist: {path.parent}")
+
+    with h5py.File(path, "w") as f:
+        # Global metadata
+        meta = f.create_group("meta")
+        meta.attrs["schema_version"] = schema_version
+        meta.attrs["units"] = "meters"
+        meta.attrs["coordinate_frame"] = "world_from_cam"
+        meta.attrs["created_at"] = datetime.datetime.now().isoformat()
+
+        # Per-camera data
+        cameras = f.create_group("cameras")
+
+        for serial, data in camera_data.items():
+            cam_group = cameras.create_group(serial)
+
+            # Intrinsics and resolution
+            cam_group.create_dataset("intrinsics", data=data["intrinsics"])
+            cam_group.attrs["resolution"] = data["resolution"]
+
+            # Pooled depth map (compressed)
+            cam_group.create_dataset(
+                "pooled_depth",
+                data=data["pooled_depth"],
+                compression="gzip",
+                compression_opts=4,
+                shuffle=True,
+            )
+
+            # Optional pooled confidence
+            if "pooled_confidence" in data and data["pooled_confidence"] is not None:
+                cam_group.create_dataset(
+                    "pooled_confidence",
+                    data=data["pooled_confidence"],
+                    compression="gzip",
+                    compression_opts=4,
+                    shuffle=True,
+                )
+
+            # Pool metadata (JSON serialized)
+            if data.get("pool_metadata"):
+                cam_group.attrs["pool_metadata"] = json.dumps(data["pool_metadata"])
+
+            # Raw frames
+            if "raw_frames" in data and data["raw_frames"]:
+                raw_group = cam_group.create_group("raw_frames")
+                for i, frame in enumerate(data["raw_frames"]):
+                    frame_group = raw_group.create_group(f"frame_{i}")
+                    frame_group.attrs["frame_index"] = frame["frame_index"]
+                    frame_group.attrs["score"] = frame["score"]
+
+                    if "depth_map" in frame and frame["depth_map"] is not None:
+                        frame_group.create_dataset(
+                            "depth_map",
+                            data=frame["depth_map"],
+                            compression="gzip",
+                            compression_opts=4,
+                            shuffle=True,
+                        )
+
+                    if (
+                        "confidence_map" in frame
+                        and frame["confidence_map"] is not None
+                    ):
+                        frame_group.create_dataset(
+                            "confidence_map",
+                            data=frame["confidence_map"],
+                            compression="gzip",
+                            compression_opts=4,
+                            shuffle=True,
+                        )
+
+
+def load_depth_data(path: str | Path) -> Dict[str, Any]:
+    """
+    Load depth data from HDF5 file.
+
+    Args:
+        path: Input file path
+
+    Returns:
+        Dictionary mapping serial numbers to camera data
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {path}")
+
+    results = {}
+
+    with h5py.File(path, "r") as f:
+        if "cameras" not in f:
+            return {}
+
+        cameras_group = f["cameras"]
+        if not isinstance(cameras_group, h5py.Group):
+            return {}
+
+        for serial in cameras_group:
+            cam_group = cameras_group[serial]
+            if not isinstance(cam_group, h5py.Group):
+                continue
+
+            # We use type: ignore here because h5py dataset slicing returns numpy arrays
+            # but type checkers struggle with the dynamic nature of h5py
+            cam_data: Dict[str, Any] = {
+                "intrinsics": np.array(cam_group["intrinsics"]),
+                "resolution": tuple(cast(Any, cam_group.attrs["resolution"])),
+                "pooled_depth": np.array(cam_group["pooled_depth"]),
+            }
+
+            if "pooled_confidence" in cam_group:
+                cam_data["pooled_confidence"] = np.array(cam_group["pooled_confidence"])
+
+            if "pool_metadata" in cam_group.attrs:
+                metadata_str = cam_group.attrs["pool_metadata"]
+                if isinstance(metadata_str, str):
+                    cam_data["pool_metadata"] = json.loads(metadata_str)
+                elif isinstance(metadata_str, bytes):
+                    cam_data["pool_metadata"] = json.loads(metadata_str.decode("utf-8"))
+                else:
+                    cam_data["pool_metadata"] = None
+            else:
+                cam_data["pool_metadata"] = None
+
+            # Load raw frames
+            raw_frames: List[Dict[str, Any]] = []
+            if "raw_frames" in cam_group:
+                raw_group = cam_group["raw_frames"]
+                if isinstance(raw_group, h5py.Group):
+                    # Sort by frame index to maintain order if needed, though keys are frame_0, frame_1...
+                    # We'll just iterate sorted keys
+                    for key in sorted(
+                        raw_group.keys(), key=lambda x: int(x.split("_")[1])
+                    ):
+                        frame_group = raw_group[key]
+                        if not isinstance(frame_group, h5py.Group):
+                            continue
+
+                        frame_data: Dict[str, Any] = {
+                            "frame_index": frame_group.attrs["frame_index"],
+                            "score": frame_group.attrs["score"],
+                        }
+
+                        if "depth_map" in frame_group:
+                            frame_data["depth_map"] = np.array(frame_group["depth_map"])
+
+                        if "confidence_map" in frame_group:
+                            frame_data["confidence_map"] = np.array(
+                                frame_group["confidence_map"]
+                            )
+
+                        raw_frames.append(frame_data)
+
+            cam_data["raw_frames"] = raw_frames
+            results[serial] = cam_data
+
+    return results