feat: add HDF5 depth map persistence module

- Implement aruco/depth_save.py with save_depth_data/load_depth_data
- Add tests/test_depth_save.py covering roundtrip and edge cases
- Ensure type safety with basedpyright
- Support compression and metadata handling
This commit is contained in:
2026-02-09 07:18:00 +00:00
parent 43a441f2d4
commit 83a74d293b
3 changed files with 314 additions and 1 deletions
+181
View File
@@ -0,0 +1,181 @@
import h5py
import numpy as np
import json
import datetime
from pathlib import Path
from typing import Dict, Any, List, Optional, cast
from loguru import logger
def save_depth_data(
path: str | Path, camera_data: Dict[str, Any], schema_version: int = 1
) -> None:
"""
Save depth data to HDF5 format.
Args:
path: Output file path
camera_data: Dictionary mapping serial numbers to camera data
schema_version: Schema version number (default: 1)
"""
path = Path(path)
# Ensure parent directory exists
if not path.parent.exists():
raise FileNotFoundError(f"Parent directory does not exist: {path.parent}")
with h5py.File(path, "w") as f:
# Global metadata
meta = f.create_group("meta")
meta.attrs["schema_version"] = schema_version
meta.attrs["units"] = "meters"
meta.attrs["coordinate_frame"] = "world_from_cam"
meta.attrs["created_at"] = datetime.datetime.now().isoformat()
# Per-camera data
cameras = f.create_group("cameras")
for serial, data in camera_data.items():
cam_group = cameras.create_group(serial)
# Intrinsics and resolution
cam_group.create_dataset("intrinsics", data=data["intrinsics"])
cam_group.attrs["resolution"] = data["resolution"]
# Pooled depth map (compressed)
cam_group.create_dataset(
"pooled_depth",
data=data["pooled_depth"],
compression="gzip",
compression_opts=4,
shuffle=True,
)
# Optional pooled confidence
if "pooled_confidence" in data and data["pooled_confidence"] is not None:
cam_group.create_dataset(
"pooled_confidence",
data=data["pooled_confidence"],
compression="gzip",
compression_opts=4,
shuffle=True,
)
# Pool metadata (JSON serialized)
if data.get("pool_metadata"):
cam_group.attrs["pool_metadata"] = json.dumps(data["pool_metadata"])
# Raw frames
if "raw_frames" in data and data["raw_frames"]:
raw_group = cam_group.create_group("raw_frames")
for i, frame in enumerate(data["raw_frames"]):
frame_group = raw_group.create_group(f"frame_{i}")
frame_group.attrs["frame_index"] = frame["frame_index"]
frame_group.attrs["score"] = frame["score"]
if "depth_map" in frame and frame["depth_map"] is not None:
frame_group.create_dataset(
"depth_map",
data=frame["depth_map"],
compression="gzip",
compression_opts=4,
shuffle=True,
)
if (
"confidence_map" in frame
and frame["confidence_map"] is not None
):
frame_group.create_dataset(
"confidence_map",
data=frame["confidence_map"],
compression="gzip",
compression_opts=4,
shuffle=True,
)
def load_depth_data(path: str | Path) -> Dict[str, Any]:
"""
Load depth data from HDF5 file.
Args:
path: Input file path
Returns:
Dictionary mapping serial numbers to camera data
"""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")
results = {}
with h5py.File(path, "r") as f:
if "cameras" not in f:
return {}
cameras_group = f["cameras"]
if not isinstance(cameras_group, h5py.Group):
return {}
for serial in cameras_group:
cam_group = cameras_group[serial]
if not isinstance(cam_group, h5py.Group):
continue
# We use type: ignore here because h5py dataset slicing returns numpy arrays
# but type checkers struggle with the dynamic nature of h5py
cam_data: Dict[str, Any] = {
"intrinsics": np.array(cam_group["intrinsics"]),
"resolution": tuple(cast(Any, cam_group.attrs["resolution"])),
"pooled_depth": np.array(cam_group["pooled_depth"]),
}
if "pooled_confidence" in cam_group:
cam_data["pooled_confidence"] = np.array(cam_group["pooled_confidence"])
if "pool_metadata" in cam_group.attrs:
metadata_str = cam_group.attrs["pool_metadata"]
if isinstance(metadata_str, str):
cam_data["pool_metadata"] = json.loads(metadata_str)
elif isinstance(metadata_str, bytes):
cam_data["pool_metadata"] = json.loads(metadata_str.decode("utf-8"))
else:
cam_data["pool_metadata"] = None
else:
cam_data["pool_metadata"] = None
# Load raw frames
raw_frames: List[Dict[str, Any]] = []
if "raw_frames" in cam_group:
raw_group = cam_group["raw_frames"]
if isinstance(raw_group, h5py.Group):
# Sort by frame index to maintain order if needed, though keys are frame_0, frame_1...
# We'll just iterate sorted keys
for key in sorted(
raw_group.keys(), key=lambda x: int(x.split("_")[1])
):
frame_group = raw_group[key]
if not isinstance(frame_group, h5py.Group):
continue
frame_data: Dict[str, Any] = {
"frame_index": frame_group.attrs["frame_index"],
"score": frame_group.attrs["score"],
}
if "depth_map" in frame_group:
frame_data["depth_map"] = np.array(frame_group["depth_map"])
if "confidence_map" in frame_group:
frame_data["confidence_map"] = np.array(
frame_group["confidence_map"]
)
raw_frames.append(frame_data)
cam_data["raw_frames"] = raw_frames
results[serial] = cam_data
return results