e3a423433e
Add a new mcap_video_bounds helper binary plus a zed_segment_time_index.py CLI that builds and queries an embedded DuckDB index for bundled ZED segment recordings. The index stores segment folders, MCAP paths, video time bounds, durations, camera labels, and dataset metadata, and reuses the existing recursive multi-camera segment discovery logic so nested kindergarten layouts are indexed correctly. Infer a dataset default timezone from folder names versus MCAP timestamps, and make point queries precision-aware so second-level folder timestamps like 2026-03-18T12-00-23 resolve to the matching segment instead of missing due to subsecond start offsets. Verification: - uv add 'duckdb>=1.0' - cmake --build build --target mcap_video_bounds - uv run python -m unittest tests.test_zed_segment_time_index - uv run python scripts/zed_segment_time_index.py build /workspaces/data/kindergarten --jobs 8 - uv run python scripts/zed_segment_time_index.py query /workspaces/data/kindergarten --at 2026-03-18T12-00-23
140 lines
5.6 KiB
Python
140 lines
5.6 KiB
Python
from __future__ import annotations
|
|
|
|
import datetime as dt
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import duckdb
|
|
|
|
from scripts.zed_segment_time_index import (
|
|
BoundsRow,
|
|
format_ns_iso,
|
|
infer_dataset_timezone,
|
|
parse_timestamp_to_ns,
|
|
parse_timestamp_window,
|
|
require_query_window,
|
|
scan_segment_dir,
|
|
write_index,
|
|
)
|
|
|
|
|
|
class TimestampParseTests(unittest.TestCase):
|
|
def test_parse_folder_style_timestamp(self) -> None:
|
|
actual = parse_timestamp_to_ns("2026-03-18T12-00-23", "UTC")
|
|
expected = parse_timestamp_to_ns("2026-03-18T12:00:23+00:00", "UTC")
|
|
self.assertEqual(actual, expected)
|
|
|
|
def test_parse_integer_epoch_milliseconds(self) -> None:
|
|
self.assertEqual(parse_timestamp_to_ns("1710000000123", "UTC"), 1710000000123 * 1_000_000)
|
|
|
|
def test_parse_timestamp_window_for_second_precision_text(self) -> None:
|
|
start_ns, end_ns = parse_timestamp_window("2026-03-18T12-00-23", "UTC")
|
|
self.assertEqual(end_ns - start_ns, 999_999_999)
|
|
|
|
def test_require_query_window_rejects_mixed_modes(self) -> None:
|
|
with self.assertRaises(Exception):
|
|
require_query_window("1", "2", "3", "UTC")
|
|
|
|
def test_format_ns_iso_utc(self) -> None:
|
|
rendered = format_ns_iso(1_710_000_000_123_000_000, dt.timezone.utc)
|
|
self.assertTrue(rendered.startswith("2024-03-09T16:00:00.123000000"))
|
|
|
|
|
|
class SegmentDiscoveryTests(unittest.TestCase):
|
|
def test_scan_segment_dir_accepts_multicamera_dir(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
segment_dir = Path(tmp)
|
|
for label in ("zed1", "zed2", "zed3", "zed4"):
|
|
(segment_dir / f"2026-03-18T12-00-23_{label}.svo2").write_bytes(b"")
|
|
scan = scan_segment_dir(segment_dir)
|
|
self.assertTrue(scan.is_valid)
|
|
self.assertEqual(scan.camera_labels, ("zed1", "zed2", "zed3", "zed4"))
|
|
|
|
def test_scan_segment_dir_rejects_partial_dir(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
segment_dir = Path(tmp)
|
|
(segment_dir / "2026-03-18T12-00-23_zed1.svo2").write_bytes(b"")
|
|
scan = scan_segment_dir(segment_dir)
|
|
self.assertFalse(scan.is_valid)
|
|
|
|
|
|
class DuckDbIndexTests(unittest.TestCase):
|
|
def test_infer_dataset_timezone_from_folder_names(self) -> None:
|
|
row = BoundsRow(
|
|
segment_dir=Path("/tmp/bar/2026-03-18T11-59-41"),
|
|
relative_segment_dir="bar/2026-03-18T11-59-41",
|
|
group_path="bar",
|
|
activity="bar",
|
|
segment_name="2026-03-18T11-59-41",
|
|
mcap_path=Path("/tmp/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap"),
|
|
start_ns=1_773_806_381_201_081_000,
|
|
end_ns=1_773_806_392_268_226_000,
|
|
duration_ns=11_067_145_000,
|
|
start_iso_utc="2026-03-18T03:59:41.201081000Z",
|
|
end_iso_utc="2026-03-18T03:59:52.268226000Z",
|
|
camera_count=4,
|
|
camera_labels="zed1,zed2,zed3,zed4",
|
|
video_message_count=1330,
|
|
index_source="mcap_video_bounds",
|
|
)
|
|
self.assertEqual(infer_dataset_timezone([row]), "UTC+08:00")
|
|
|
|
def test_write_index_and_query_overlap(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
root = Path(tmp) / "dataset"
|
|
root.mkdir()
|
|
index_path = root / "segment_time_index.duckdb"
|
|
|
|
rows = [
|
|
BoundsRow(
|
|
segment_dir=root / "bar" / "2026-03-18T12-00-23",
|
|
relative_segment_dir="bar/2026-03-18T12-00-23",
|
|
group_path="bar",
|
|
activity="bar",
|
|
segment_name="2026-03-18T12-00-23",
|
|
mcap_path=root / "bar" / "2026-03-18T12-00-23" / "2026-03-18T12-00-23.mcap",
|
|
start_ns=100,
|
|
end_ns=200,
|
|
duration_ns=100,
|
|
start_iso_utc="1970-01-01T00:00:00.000000100Z",
|
|
end_iso_utc="1970-01-01T00:00:00.000000200Z",
|
|
camera_count=4,
|
|
camera_labels="zed1,zed2,zed3,zed4",
|
|
video_message_count=1330,
|
|
index_source="mcap_video_bounds",
|
|
),
|
|
BoundsRow(
|
|
segment_dir=root / "run" / "2026-03-18T12-01-00",
|
|
relative_segment_dir="run/2026-03-18T12-01-00",
|
|
group_path="run",
|
|
activity="run",
|
|
segment_name="2026-03-18T12-01-00",
|
|
mcap_path=root / "run" / "2026-03-18T12-01-00" / "2026-03-18T12-01-00.mcap",
|
|
start_ns=250,
|
|
end_ns=400,
|
|
duration_ns=150,
|
|
start_iso_utc="1970-01-01T00:00:00.000000250Z",
|
|
end_iso_utc="1970-01-01T00:00:00.000000400Z",
|
|
camera_count=4,
|
|
camera_labels="zed1,zed2,zed3,zed4",
|
|
video_message_count=1400,
|
|
index_source="mcap_video_bounds",
|
|
),
|
|
]
|
|
write_index(index_path, root, rows)
|
|
|
|
conn = duckdb.connect(str(index_path), read_only=True)
|
|
try:
|
|
matches = conn.execute(
|
|
"SELECT relative_segment_dir FROM segments WHERE start_ns <= ? AND end_ns >= ? ORDER BY start_ns",
|
|
[300, 180],
|
|
).fetchall()
|
|
self.assertEqual(matches, [("bar/2026-03-18T12-00-23",), ("run/2026-03-18T12-01-00",)])
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|