diff --git a/README.md b/README.md index 3d76057..a3d1f84 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ Use the batch wrapper to run `zed_svo_grid_to_mp4` over many segment directories ```bash uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - \ + --dataset-root \ --recursive \ --jobs 2 \ --encoder-device auto \ @@ -173,8 +173,8 @@ You can also provide the exact segments to convert: ```bash uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --segment-dir \ - --segment-dir \ + --segment \ + --segment \ --jobs 2 ``` @@ -193,7 +193,7 @@ When you suspect a previous run left behind partial MP4 files, opt into `ffprobe ```bash uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - \ + --dataset-root \ --probe-existing \ --jobs 2 ``` @@ -202,7 +202,7 @@ Use `--report-existing` to audit existing outputs without launching conversions. ```bash uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - \ + --dataset-root \ --report-existing ``` @@ -236,7 +236,7 @@ Use the wrapper to recurse through a dataset root, run `zed_svo_to_mcap --segmen ```bash uv run python scripts/zed_batch_svo_to_mcap.py \ - \ + --dataset-root \ --recursive \ --jobs 2 \ --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \ @@ -258,7 +258,7 @@ Enable per-camera pose export when the segment has valid tracking: ```bash uv run python scripts/zed_batch_svo_to_mcap.py \ - --segment-dir \ + --segment \ --with-pose \ --pose-config ``` @@ -275,6 +275,8 @@ Single-source `zed_svo_to_mcap` now writes the one-camera `copy` shape by defaul For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`. +For calibration-based depth/RGB mapping, use `scripts/mcap_depth_alignment.py` and [docs/depth_alignment.md](./docs/depth_alignment.md). That helper explains the current affine mapping implied by the exported calibration topics and can export example aligned-depth and overlay PNGs from a chosen MCAP frame. + ### MCAP RGBD Viewer The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`. @@ -322,7 +324,7 @@ That is why the batch wrapper supports mixed pools such as two NVENC workers plu ```bash uv run python scripts/zed_batch_svo_to_mcap.py \ - \ + --dataset-root \ --recursive \ --overwrite \ --hardware-jobs 2 \ @@ -340,7 +342,7 @@ Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid ```bash uv run python scripts/zed_batch_svo_to_mcap.py \ - \ + --dataset-root \ --probe-existing \ --jobs 2 ``` @@ -349,7 +351,7 @@ Use `--report-existing` to audit existing MCAPs without launching conversions: ```bash uv run python scripts/zed_batch_svo_to_mcap.py \ - \ + --dataset-root \ --report-existing ``` diff --git a/pyproject.toml b/pyproject.toml index 8ba0701..1fbfc86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "opencv-python-headless>=4.11", "progress-table>=3.2", "protobuf>=5.29", + "tqdm>=4.67", "zstandard>=0.23", ] diff --git a/scripts/zed_batch_segment_sources.py b/scripts/zed_batch_segment_sources.py new file mode 100644 index 0000000..24a7651 --- /dev/null +++ b/scripts/zed_batch_segment_sources.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +import csv +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Generic, Protocol, TypeVar + +import click +from click.core import ParameterSource + + +class SegmentScanLike(Protocol): + segment_dir: Path + matched_files: int + is_valid: bool + + +ScanT = TypeVar("ScanT", bound=SegmentScanLike) + + +@dataclass(slots=True, frozen=True) +class SourceResolution(Generic[ScanT]): + mode: str + segment_dirs: tuple[Path, ...] + ignored_partial_dirs: tuple[ScanT, ...] + + +def dedupe_paths(paths: list[Path]) -> list[Path]: + ordered: list[Path] = [] + seen: set[Path] = set() + for path in paths: + resolved = path.expanduser().resolve() + if resolved in seen: + continue + seen.add(resolved) + ordered.append(resolved) + return ordered + + +def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]: + csv_path = csv_path.expanduser().resolve() + if not csv_path.is_file(): + raise click.ClickException(f"CSV not found: {csv_path}") + + if csv_root is not None: + base_dir = csv_root.expanduser().resolve() + if not base_dir.is_dir(): + raise click.ClickException(f"CSV root is not a directory: {base_dir}") + else: + base_dir = csv_path.parent + + segment_dirs: list[Path] = [] + seen: set[Path] = set() + with csv_path.open(newline="") as stream: + reader = csv.DictReader(stream) + if reader.fieldnames is None or "segment_dir" not in reader.fieldnames: + raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header") + + for row_number, row in enumerate(reader, start=2): + raw_segment_dir = (row.get("segment_dir") or "").strip() + if not raw_segment_dir: + raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value") + segment_dir = Path(raw_segment_dir) + resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir + resolved = resolved.expanduser().resolve() + if resolved in seen: + continue + seen.add(resolved) + segment_dirs.append(resolved) + + if not segment_dirs: + raise click.ClickException(f"{csv_path} did not contain any segment_dir rows") + return tuple(segment_dirs) + + +def discover_segment_dirs( + root: Path, + recursive: bool, + *, + scan_segment_dir: Callable[[Path], ScanT], + no_matches_message: Callable[[Path], str], +) -> SourceResolution[ScanT]: + resolved_root = root.expanduser().resolve() + if not resolved_root.is_dir(): + raise click.ClickException(f"dataset root does not exist: {resolved_root}") + + candidate_dirs = {resolved_root} + iterator = resolved_root.rglob("*") if recursive else resolved_root.iterdir() + for path in iterator: + if path.is_dir(): + candidate_dirs.add(path.resolve()) + + valid_dirs: list[Path] = [] + ignored_partial_dirs: list[ScanT] = [] + for segment_dir in sorted(candidate_dirs): + scan = scan_segment_dir(segment_dir) + if scan.is_valid: + valid_dirs.append(segment_dir) + elif scan.matched_files > 0: + ignored_partial_dirs.append(scan) + + if not valid_dirs: + raise click.ClickException(no_matches_message(resolved_root)) + + return SourceResolution( + mode="dataset-root", + segment_dirs=tuple(valid_dirs), + ignored_partial_dirs=tuple(ignored_partial_dirs), + ) + + +def raise_if_recursive_flag_is_incompatible( + ctx: click.Context, + dataset_root: Path | None, + *, + dataset_root_flag: str = "--dataset-root", +) -> None: + if ctx.get_parameter_source("recursive") is ParameterSource.DEFAULT: + return + if dataset_root is None: + raise click.ClickException(f"--recursive/--no-recursive can only be used with {dataset_root_flag}") + + +def raise_for_legacy_source_args( + legacy_input_dir: Path | None, + legacy_segment_dirs: tuple[Path, ...], + *, + dataset_root_flag: str = "--dataset-root", + segment_flag: str = "--segment", +) -> None: + if legacy_input_dir is not None: + resolved = legacy_input_dir.expanduser().resolve() + raise click.ClickException( + f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}" + ) + + if legacy_segment_dirs: + resolved = legacy_segment_dirs[0].expanduser().resolve() + raise click.ClickException( + f"--segment-dir is no longer supported in this batch wrapper; use {segment_flag} {resolved} " + f"for an explicit segment directory, or {dataset_root_flag} --recursive for discovery" + ) + + +def raise_for_legacy_extra_args( + extra_args: list[str], + *, + dataset_root_flag: str = "--dataset-root", +) -> None: + if not extra_args: + return + + first = extra_args[0] + if first.startswith("-"): + extras_text = " ".join(extra_args) + raise click.ClickException(f"unexpected extra arguments: {extras_text}") + + resolved = Path(first).expanduser().resolve() + raise click.ClickException( + f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}" + ) + + +def raise_if_segment_path_looks_like_dataset_root( + segment_dir: Path, + *, + scan_segment_dir: Callable[[Path], ScanT], + dataset_root_flag: str = "--dataset-root", + segment_flag: str = "--segment", +) -> None: + resolved = segment_dir.expanduser().resolve() + if not resolved.is_dir(): + return + + scan = scan_segment_dir(resolved) + if scan.is_valid or scan.matched_files > 0: + return + + nested_segments = _find_nested_valid_segment_dirs(resolved, scan_segment_dir=scan_segment_dir) + if not nested_segments: + return + + example = nested_segments[0] + raise click.ClickException( + f"{resolved} looks like a dataset root, not a segment directory. " + f"{segment_flag} expects a directory that directly contains *_zedN.svo or *_zedN.svo2 files. " + f"Use {dataset_root_flag} {resolved} to discover nested segments such as {example}" + ) + + +def resolve_sources( + dataset_root: Path | None, + segment_dirs: tuple[Path, ...], + segments_csv: Path | None, + csv_root: Path | None, + recursive: bool, + *, + scan_segment_dir: Callable[[Path], ScanT], + no_matches_message: Callable[[Path], str], +) -> SourceResolution[ScanT]: + source_count = sum( + ( + 1 if dataset_root is not None else 0, + 1 if segment_dirs else 0, + 1 if segments_csv is not None else 0, + ) + ) + if source_count != 1: + raise click.ClickException( + "provide exactly one source mode: --dataset-root, --segment, or --segments-csv" + ) + + if dataset_root is not None: + return discover_segment_dirs( + dataset_root, + recursive, + scan_segment_dir=scan_segment_dir, + no_matches_message=no_matches_message, + ) + + if segment_dirs: + ordered_dirs = dedupe_paths(list(segment_dirs)) + for segment_dir in ordered_dirs: + raise_if_segment_path_looks_like_dataset_root( + segment_dir, + scan_segment_dir=scan_segment_dir, + ) + return SourceResolution(mode="segments", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=()) + + return SourceResolution( + mode="segments-csv", + segment_dirs=parse_segments_csv(segments_csv, csv_root), + ignored_partial_dirs=(), + ) + + +def _find_nested_valid_segment_dirs( + root: Path, + *, + scan_segment_dir: Callable[[Path], ScanT], + limit: int = 3, +) -> tuple[Path, ...]: + matches: list[Path] = [] + for path in sorted(root.rglob("*")): + if not path.is_dir(): + continue + resolved = path.resolve() + if resolved == root: + continue + scan = scan_segment_dir(resolved) + if scan.is_valid: + matches.append(resolved) + if len(matches) >= limit: + break + return tuple(matches) diff --git a/scripts/zed_batch_svo_grid_to_mp4.py b/scripts/zed_batch_svo_grid_to_mp4.py index 2d27800..a6d51ff 100644 --- a/scripts/zed_batch_svo_grid_to_mp4.py +++ b/scripts/zed_batch_svo_grid_to_mp4.py @@ -3,7 +3,6 @@ from __future__ import annotations import concurrent.futures -import csv import json import math import os @@ -17,6 +16,11 @@ from pathlib import Path import click from tqdm import tqdm +try: + from scripts import zed_batch_segment_sources as segment_sources +except ModuleNotFoundError: + import zed_batch_segment_sources as segment_sources + SCRIPT_PATH = Path(__file__).resolve() REPO_ROOT = SCRIPT_PATH.parents[1] @@ -70,13 +74,6 @@ class SegmentScan: reason: str | None = None -@dataclass(slots=True, frozen=True) -class SourceResolution: - mode: str - segment_dirs: tuple[Path, ...] - ignored_partial_dirs: tuple[SegmentScan, ...] - - @dataclass(slots=True, frozen=True) class OutputProbeResult: output_path: Path @@ -157,116 +154,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan: return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True) -def dedupe_paths(paths: list[Path]) -> list[Path]: - ordered: list[Path] = [] - seen: set[Path] = set() - for path in paths: - resolved = path.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - ordered.append(resolved) - return ordered - - -def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution: - if not root.is_dir(): - raise click.ClickException(f"input directory does not exist: {root}") - - candidate_dirs = {root.resolve()} - iterator = root.rglob("*") if recursive else root.iterdir() - for path in iterator: - if path.is_dir(): - candidate_dirs.add(path.resolve()) - - valid_dirs: list[Path] = [] - ignored_partial_dirs: list[SegmentScan] = [] - for segment_dir in sorted(candidate_dirs): - scan = scan_segment_dir(segment_dir) - if scan.is_valid: - valid_dirs.append(segment_dir) - elif scan.matched_files > 0: - ignored_partial_dirs.append(scan) - - if not valid_dirs: - raise click.ClickException(f"no complete four-camera segments found under {root}") - - return SourceResolution( - mode="discovery", - segment_dirs=tuple(valid_dirs), - ignored_partial_dirs=tuple(ignored_partial_dirs), - ) - - -def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]: - csv_path = csv_path.expanduser().resolve() - if not csv_path.is_file(): - raise click.ClickException(f"CSV not found: {csv_path}") - - if csv_root is not None: - base_dir = csv_root.expanduser().resolve() - if not base_dir.is_dir(): - raise click.ClickException(f"CSV root is not a directory: {base_dir}") - else: - base_dir = csv_path.parent - - segment_dirs: list[Path] = [] - seen: set[Path] = set() - with csv_path.open(newline="") as stream: - reader = csv.DictReader(stream) - if reader.fieldnames is None or "segment_dir" not in reader.fieldnames: - raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header") - - for row_number, row in enumerate(reader, start=2): - raw_segment_dir = (row.get("segment_dir") or "").strip() - if not raw_segment_dir: - raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value") - segment_dir = Path(raw_segment_dir) - resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir - resolved = resolved.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - segment_dirs.append(resolved) - - if not segment_dirs: - raise click.ClickException(f"{csv_path} did not contain any segment_dir rows") - return tuple(segment_dirs) - - -def resolve_sources( - input_dir: Path | None, - segment_dirs: tuple[Path, ...], - segments_csv: Path | None, - csv_root: Path | None, - recursive: bool, -) -> SourceResolution: - source_count = sum( - ( - 1 if input_dir is not None else 0, - 1 if segment_dirs else 0, - 1 if segments_csv is not None else 0, - ) - ) - if source_count != 1: - raise click.ClickException( - "provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv" - ) - - if input_dir is not None: - return discover_segment_dirs(input_dir.expanduser().resolve(), recursive) - - if segment_dirs: - ordered_dirs = dedupe_paths(list(segment_dirs)) - return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=()) - - return SourceResolution( - mode="segments-csv", - segment_dirs=parse_segments_csv(segments_csv, csv_root), - ignored_partial_dirs=(), - ) - - def output_path_for(segment_dir: Path) -> Path: return segment_dir / f"{segment_dir.name}_grid.mp4" @@ -514,30 +401,45 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) - return results, aborted_count -@click.command() -@click.argument( - "input_dir", - required=False, +@click.command(context_settings={"allow_extra_args": True}) +@click.option( + "--dataset-root", type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.", +) +@click.option( + "--segment", + "segment_dirs", + multiple=True, + type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True), + help=( + "Explicit segment directory. Repeatable. The directory must directly contain " + "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv." + ), ) @click.option( "--segment-dir", - "segment_dirs", + "legacy_segment_dirs", multiple=True, type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.", + hidden=True, ) @click.option( "--segments-csv", type=click.Path(path_type=Path, dir_okay=False), - help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.", + help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.", ) @click.option( "--csv-root", type=click.Path(path_type=Path, file_okay=False, dir_okay=True), help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", ) -@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.") +@click.option( + "--recursive/--no-recursive", + default=True, + show_default=True, + help="Recurse when discovering segment directories from --dataset-root.", +) @click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") @click.option( "--zed-bin", @@ -625,9 +527,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) - show_default=True, help="Scale each tile relative to the source resolution.", ) +@click.pass_context def main( - input_dir: Path | None, + ctx: click.Context, + dataset_root: Path | None, segment_dirs: tuple[Path, ...], + legacy_segment_dirs: tuple[Path, ...], segments_csv: Path | None, csv_root: Path | None, recursive: bool, @@ -653,14 +558,26 @@ def main( tile_scale: float, ) -> None: """Batch-convert synced four-camera ZED segments into grid MP4 files.""" + segment_sources.raise_for_legacy_extra_args(ctx.args) + segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs) + segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root) + if b_frames > gop: raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames") if report_existing and dry_run: raise click.ClickException("--report-existing and --dry-run are mutually exclusive") + sources = segment_sources.resolve_sources( + dataset_root, + segment_dirs, + segments_csv, + csv_root, + recursive, + scan_segment_dir=scan_segment_dir, + no_matches_message=lambda root: f"no complete four-camera segments found under {root}", + ) ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None binary_path = None if report_existing else locate_binary(zed_bin) - sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive) config = BatchConfig( zed_bin=binary_path, ffprobe_bin=ffprobe_path, diff --git a/scripts/zed_batch_svo_to_mcap.py b/scripts/zed_batch_svo_to_mcap.py index 9dcecde..8a85216 100644 --- a/scripts/zed_batch_svo_to_mcap.py +++ b/scripts/zed_batch_svo_to_mcap.py @@ -3,7 +3,6 @@ from __future__ import annotations import concurrent.futures -import csv import importlib import os import re @@ -17,6 +16,11 @@ from pathlib import Path import click from progress_table import ProgressTable +try: + from scripts import zed_batch_segment_sources as segment_sources +except ModuleNotFoundError: + import zed_batch_segment_sources as segment_sources + SCRIPT_PATH = Path(__file__).resolve() REPO_ROOT = SCRIPT_PATH.parents[1] @@ -82,13 +86,6 @@ class SegmentScan: reason: str | None = None -@dataclass(slots=True, frozen=True) -class SourceResolution: - mode: str - segment_dirs: tuple[Path, ...] - ignored_partial_dirs: tuple[SegmentScan, ...] - - @dataclass(slots=True, frozen=True) class OutputProbeResult: output_path: Path @@ -339,116 +336,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan: ) -def dedupe_paths(paths: list[Path]) -> list[Path]: - ordered: list[Path] = [] - seen: set[Path] = set() - for path in paths: - resolved = path.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - ordered.append(resolved) - return ordered - - -def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution: - if not root.is_dir(): - raise click.ClickException(f"input directory does not exist: {root}") - - candidate_dirs = {root.resolve()} - iterator = root.rglob("*") if recursive else root.iterdir() - for path in iterator: - if path.is_dir(): - candidate_dirs.add(path.resolve()) - - valid_dirs: list[Path] = [] - ignored_partial_dirs: list[SegmentScan] = [] - for segment_dir in sorted(candidate_dirs): - scan = scan_segment_dir(segment_dir) - if scan.is_valid: - valid_dirs.append(segment_dir) - elif scan.matched_files > 0: - ignored_partial_dirs.append(scan) - - if not valid_dirs: - raise click.ClickException(f"no multi-camera segments found under {root}") - - return SourceResolution( - mode="discovery", - segment_dirs=tuple(valid_dirs), - ignored_partial_dirs=tuple(ignored_partial_dirs), - ) - - -def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]: - csv_path = csv_path.expanduser().resolve() - if not csv_path.is_file(): - raise click.ClickException(f"CSV not found: {csv_path}") - - if csv_root is not None: - base_dir = csv_root.expanduser().resolve() - if not base_dir.is_dir(): - raise click.ClickException(f"CSV root is not a directory: {base_dir}") - else: - base_dir = csv_path.parent - - segment_dirs: list[Path] = [] - seen: set[Path] = set() - with csv_path.open(newline="") as stream: - reader = csv.DictReader(stream) - if reader.fieldnames is None or "segment_dir" not in reader.fieldnames: - raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header") - - for row_number, row in enumerate(reader, start=2): - raw_segment_dir = (row.get("segment_dir") or "").strip() - if not raw_segment_dir: - raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value") - segment_dir = Path(raw_segment_dir) - resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir - resolved = resolved.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - segment_dirs.append(resolved) - - if not segment_dirs: - raise click.ClickException(f"{csv_path} did not contain any segment_dir rows") - return tuple(segment_dirs) - - -def resolve_sources( - input_dir: Path | None, - segment_dirs: tuple[Path, ...], - segments_csv: Path | None, - csv_root: Path | None, - recursive: bool, -) -> SourceResolution: - source_count = sum( - ( - 1 if input_dir is not None else 0, - 1 if segment_dirs else 0, - 1 if segments_csv is not None else 0, - ) - ) - if source_count != 1: - raise click.ClickException( - "provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv" - ) - - if input_dir is not None: - return discover_segment_dirs(input_dir.expanduser().resolve(), recursive) - - if segment_dirs: - ordered_dirs = dedupe_paths(list(segment_dirs)) - return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=()) - - return SourceResolution( - mode="segments-csv", - segment_dirs=parse_segments_csv(segments_csv, csv_root), - ignored_partial_dirs=(), - ) - - def output_path_for(segment_dir: Path) -> Path: return segment_dir / f"{segment_dir.name}.mcap" @@ -469,7 +356,7 @@ def display_name_for_segment( input_root: Path | None, common_parent: Path | None, ) -> str: - if source_mode == "discovery" and input_root is not None: + if source_mode == "dataset-root" and input_root is not None: try: return str(segment_dir.relative_to(input_root)) except ValueError: @@ -1071,30 +958,45 @@ def build_worker_slots( return worker_slots -@click.command() -@click.argument( - "input_dir", - required=False, +@click.command(context_settings={"allow_extra_args": True}) +@click.option( + "--dataset-root", type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.", +) +@click.option( + "--segment", + "segment_dirs", + multiple=True, + type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True), + help=( + "Explicit segment directory. Repeatable. The directory must directly contain " + "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv." + ), ) @click.option( "--segment-dir", - "segment_dirs", + "legacy_segment_dirs", multiple=True, type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.", + hidden=True, ) @click.option( "--segments-csv", type=click.Path(path_type=Path, dir_okay=False), - help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.", + help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.", ) @click.option( "--csv-root", type=click.Path(path_type=Path, file_okay=False, dir_okay=True), help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", ) -@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.") +@click.option( + "--recursive/--no-recursive", + default=True, + show_default=True, + help="Recurse when discovering segment directories from --dataset-root.", +) @click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") @click.option( "--hardware-jobs", @@ -1231,9 +1133,12 @@ def build_worker_slots( show_default=True, help="Progress output mode. Auto uses a table on TTY and text logging otherwise.", ) +@click.pass_context def main( - input_dir: Path | None, + ctx: click.Context, + dataset_root: Path | None, segment_dirs: tuple[Path, ...], + legacy_segment_dirs: tuple[Path, ...], segments_csv: Path | None, csv_root: Path | None, recursive: bool, @@ -1266,6 +1171,10 @@ def main( progress_ui: str, ) -> None: """Batch-convert multi-camera ZED segments into grouped MCAP files.""" + segment_sources.raise_for_legacy_extra_args(ctx.args) + segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs) + segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root) + if report_existing and dry_run: raise click.ClickException("--report-existing and --dry-run are mutually exclusive") if bundle_policy == "copy": @@ -1276,8 +1185,16 @@ def main( if bundle_topic != "/bundle": raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy") + sources = segment_sources.resolve_sources( + dataset_root, + segment_dirs, + segments_csv, + csv_root, + recursive, + scan_segment_dir=scan_segment_dir, + no_matches_message=lambda root: f"no multi-camera segments found under {root}", + ) binary_path = None if report_existing else locate_binary(zed_bin) - sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive) worker_slots = build_worker_slots( jobs=jobs, encoder_device=encoder_device, @@ -1307,7 +1224,7 @@ def main( sync_tolerance_ms=sync_tolerance_ms, progress_ui=progress_ui, ) - input_root = input_dir.expanduser().resolve() if input_dir is not None else None + input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None display_parent = common_segment_parent(sources.segment_dirs) skipped_results: list[JobResult] = [] diff --git a/tests/test_zed_batch_segment_sources.py b/tests/test_zed_batch_segment_sources.py new file mode 100644 index 0000000..faa04df --- /dev/null +++ b/tests/test_zed_batch_segment_sources.py @@ -0,0 +1,268 @@ +from __future__ import annotations + +import dataclasses +import tempfile +import unittest +from pathlib import Path + +import click +from click.testing import CliRunner + +from scripts import zed_batch_segment_sources as segment_sources +from scripts.zed_batch_svo_grid_to_mp4 import main as grid_main +from scripts.zed_batch_svo_to_mcap import main as mcap_main + + +@dataclasses.dataclass(slots=True, frozen=True) +class FakeScan: + segment_dir: Path + matched_files: int + is_valid: bool + reason: str | None = None + + +def fake_scan(segment_dir: Path) -> FakeScan: + if not segment_dir.is_dir(): + return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="missing directory") + if (segment_dir / "valid.segment").is_file(): + return FakeScan(segment_dir=segment_dir, matched_files=2, is_valid=True) + if (segment_dir / "partial.segment").is_file(): + return FakeScan(segment_dir=segment_dir, matched_files=1, is_valid=False, reason="partial segment") + return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="no camera files") + + +def create_multicamera_segment(parent: Path, segment_name: str) -> Path: + segment_dir = parent / segment_name + segment_dir.mkdir(parents=True) + for camera_index in range(1, 5): + (segment_dir / f"{segment_name}_zed{camera_index}.svo2").write_bytes(b"") + return segment_dir + + +class SharedSourceResolutionTests(unittest.TestCase): + def test_dataset_root_recursive_discovers_nested_segments(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" + segment_dir.mkdir(parents=True) + (segment_dir / "valid.segment").write_text("", encoding="utf-8") + + sources = segment_sources.resolve_sources( + dataset_root, + (), + None, + None, + True, + scan_segment_dir=fake_scan, + no_matches_message=lambda root: f"no segments under {root}", + ) + + self.assertEqual(sources.mode, "dataset-root") + self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) + + def test_dataset_root_without_recursive_does_not_descend(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" + segment_dir.mkdir(parents=True) + (segment_dir / "valid.segment").write_text("", encoding="utf-8") + + with self.assertRaises(click.ClickException) as error: + segment_sources.resolve_sources( + dataset_root, + (), + None, + None, + False, + scan_segment_dir=fake_scan, + no_matches_message=lambda root: f"no segments under {root}", + ) + + self.assertIn("no segments under", str(error.exception)) + + def test_explicit_segments_are_deduped(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + segment_dir = Path(tmp) / "2026-04-08T11-50-32" + segment_dir.mkdir() + (segment_dir / "valid.segment").write_text("", encoding="utf-8") + + sources = segment_sources.resolve_sources( + None, + (segment_dir, segment_dir), + None, + None, + True, + scan_segment_dir=fake_scan, + no_matches_message=lambda root: f"no segments under {root}", + ) + + self.assertEqual(sources.mode, "segments") + self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) + + def test_segments_csv_uses_segment_dir_column(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + segment_dir = tmp_path / "segments" / "2026-04-08T11-50-32" + segment_dir.mkdir(parents=True) + (segment_dir / "valid.segment").write_text("", encoding="utf-8") + csv_path = tmp_path / "segments.csv" + csv_path.write_text("segment_dir\nsegments/2026-04-08T11-50-32\n", encoding="utf-8") + + sources = segment_sources.resolve_sources( + None, + (), + csv_path, + None, + True, + scan_segment_dir=fake_scan, + no_matches_message=lambda root: f"no segments under {root}", + ) + + self.assertEqual(sources.mode, "segments-csv") + self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) + + def test_segment_path_like_dataset_root_has_hint(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" + segment_dir.mkdir(parents=True) + (segment_dir / "valid.segment").write_text("", encoding="utf-8") + + with self.assertRaises(click.ClickException) as error: + segment_sources.resolve_sources( + None, + (dataset_root,), + None, + None, + True, + scan_segment_dir=fake_scan, + no_matches_message=lambda root: f"no segments under {root}", + ) + + message = str(error.exception) + self.assertIn("looks like a dataset root", message) + self.assertIn("--dataset-root", message) + + +class BatchCliSmokeTests(unittest.TestCase): + def setUp(self) -> None: + self.runner = CliRunner() + + def test_mcap_dataset_root_flag_discovers_segments(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") + + result = self.runner.invoke( + mcap_main, + [ + "--dataset-root", + str(dataset_root), + "--recursive", + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertEqual(result.exit_code, 0, result.output) + self.assertIn("source=dataset-root matched=1 pending=1", result.output) + + def test_mcap_segment_flag_rejects_dataset_root_with_hint(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") + + result = self.runner.invoke( + mcap_main, + [ + "--segment", + str(dataset_root), + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertNotEqual(result.exit_code, 0) + self.assertIn("looks like a dataset root", result.output) + self.assertIn("--dataset-root", result.output) + + def test_mcap_rejects_legacy_positional_dataset_root(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dataset_root = Path(tmp) / "dataset" + create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") + + result = self.runner.invoke( + mcap_main, + [ + str(dataset_root), + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertNotEqual(result.exit_code, 0) + self.assertIn("positional dataset paths are no longer supported", result.output) + self.assertIn("--dataset-root", result.output) + + def test_mcap_rejects_recursive_without_dataset_root(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") + + result = self.runner.invoke( + mcap_main, + [ + "--segment", + str(segment_dir), + "--no-recursive", + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertNotEqual(result.exit_code, 0) + self.assertIn("--recursive/--no-recursive can only be used with --dataset-root", result.output) + + def test_grid_segment_flag_discovers_one_segment(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") + + result = self.runner.invoke( + grid_main, + [ + "--segment", + str(segment_dir), + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertEqual(result.exit_code, 0, result.output) + self.assertIn("source=segments matched=1 pending=1", result.output) + + def test_grid_rejects_legacy_segment_dir_flag(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") + + result = self.runner.invoke( + grid_main, + [ + "--segment-dir", + str(segment_dir), + "--dry-run", + "--zed-bin", + "/bin/true", + ], + ) + + self.assertNotEqual(result.exit_code, 0) + self.assertIn("--segment-dir is no longer supported", result.output) + self.assertIn("--segment", result.output) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index 482d153..72c8256 100644 --- a/uv.lock +++ b/uv.lock @@ -40,6 +40,7 @@ dependencies = [ { name = "opencv-python-headless" }, { name = "progress-table" }, { name = "protobuf" }, + { name = "tqdm" }, { name = "zstandard" }, ] @@ -59,6 +60,7 @@ requires-dist = [ { name = "progress-table", specifier = ">=3.2" }, { name = "protobuf", specifier = ">=5.29" }, { name = "rvl-impl", marker = "python_full_version >= '3.12' and extra == 'viewer'", git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9" }, + { name = "tqdm", specifier = ">=4.67" }, { name = "zstandard", specifier = ">=0.23" }, ] provides-extras = ["viewer"] @@ -327,6 +329,18 @@ name = "rvl-impl" version = "0.1.0" source = { git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9#74308bcaf184cb39428237e8f4f99a67a6de22d9" } +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + [[package]] name = "wcwidth" version = "0.6.0"