Redesign batch segment source selection

2026-04-08 08:07:05 +00:00
parent c320bf01af
commit 0a3da46f19
7 changed files with 642 additions and 268 deletions
@@ -162,7 +162,7 @@ Use the batch wrapper to run `zed_svo_grid_to_mp4` over many segment directories
 ```bash
 uv run python scripts/zed_batch_svo_grid_to_mp4.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --recursive \
    --jobs 2 \
    --encoder-device auto \
@@ -173,8 +173,8 @@ You can also provide the exact segments to convert:
 ```bash
 uv run python scripts/zed_batch_svo_grid_to_mp4.py \
-    --segment-dir <SEGMENT_DIR_A> \
+    --segment <SEGMENT_DIR_A> \
-    --segment-dir <SEGMENT_DIR_B> \
+    --segment <SEGMENT_DIR_B> \
    --jobs 2
 ```
@@ -193,7 +193,7 @@ When you suspect a previous run left behind partial MP4 files, opt into `ffprobe
 ```bash
 uv run python scripts/zed_batch_svo_grid_to_mp4.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --probe-existing \
    --jobs 2
 ```
@@ -202,7 +202,7 @@ Use `--report-existing` to audit existing outputs without launching conversions.
 ```bash
 uv run python scripts/zed_batch_svo_grid_to_mp4.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --report-existing
 ```
@@ -236,7 +236,7 @@ Use the wrapper to recurse through a dataset root, run `zed_svo_to_mcap --segmen
 ```bash
 uv run python scripts/zed_batch_svo_to_mcap.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --recursive \
    --jobs 2 \
    --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
@@ -258,7 +258,7 @@ Enable per-camera pose export when the segment has valid tracking:
 ```bash
 uv run python scripts/zed_batch_svo_to_mcap.py \
-    --segment-dir <SEGMENT_DIR> \
+    --segment <SEGMENT_DIR> \
    --with-pose \
    --pose-config <POSE_CONFIG>
 ```
@@ -275,6 +275,8 @@ Single-source `zed_svo_to_mcap` now writes the one-camera `copy` shape by defaul
 For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`.
 For calibration-based depth/RGB mapping, use `scripts/mcap_depth_alignment.py` and [docs/depth_alignment.md](./docs/depth_alignment.md). That helper explains the current affine mapping implied by the exported calibration topics and can export example aligned-depth and overlay PNGs from a chosen MCAP frame.
 ### MCAP RGBD Viewer
 The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`.
@@ -322,7 +324,7 @@ That is why the batch wrapper supports mixed pools such as two NVENC workers plu
 ```bash
 uv run python scripts/zed_batch_svo_to_mcap.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --recursive \
    --overwrite \
    --hardware-jobs 2 \
@@ -340,7 +342,7 @@ Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid
 ```bash
 uv run python scripts/zed_batch_svo_to_mcap.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --probe-existing \
    --jobs 2
 ```
@@ -349,7 +351,7 @@ Use `--report-existing` to audit existing MCAPs without launching conversions:
 ```bash
 uv run python scripts/zed_batch_svo_to_mcap.py \
-    <DATASET_ROOT> \
+    --dataset-root <DATASET_ROOT> \
    --report-existing
 ```
@@ -9,6 +9,7 @@ dependencies = [
    "opencv-python-headless>=4.11",
    "progress-table>=3.2",
    "protobuf>=5.29",
    "tqdm>=4.67",
    "zstandard>=0.23",
 ]
@@ -0,0 +1,255 @@
 from __future__ import annotations
 import csv
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, Generic, Protocol, TypeVar
 import click
 from click.core import ParameterSource
 class SegmentScanLike(Protocol):
    segment_dir: Path
    matched_files: int
    is_valid: bool
 ScanT = TypeVar("ScanT", bound=SegmentScanLike)
@dataclass(slots=True, frozen=True)
 class SourceResolution(Generic[ScanT]):
    mode: str
    segment_dirs: tuple[Path, ...]
    ignored_partial_dirs: tuple[ScanT, ...]
 def dedupe_paths(paths: list[Path]) -> list[Path]:
    ordered: list[Path] = []
    seen: set[Path] = set()
    for path in paths:
        resolved = path.expanduser().resolve()
        if resolved in seen:
            continue
        seen.add(resolved)
        ordered.append(resolved)
    return ordered
 def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
    csv_path = csv_path.expanduser().resolve()
    if not csv_path.is_file():
        raise click.ClickException(f"CSV not found: {csv_path}")
    if csv_root is not None:
        base_dir = csv_root.expanduser().resolve()
        if not base_dir.is_dir():
            raise click.ClickException(f"CSV root is not a directory: {base_dir}")
    else:
        base_dir = csv_path.parent
    segment_dirs: list[Path] = []
    seen: set[Path] = set()
    with csv_path.open(newline="") as stream:
        reader = csv.DictReader(stream)
        if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
            raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
        for row_number, row in enumerate(reader, start=2):
            raw_segment_dir = (row.get("segment_dir") or "").strip()
            if not raw_segment_dir:
                raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
            segment_dir = Path(raw_segment_dir)
            resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
            resolved = resolved.expanduser().resolve()
            if resolved in seen:
                continue
            seen.add(resolved)
            segment_dirs.append(resolved)
    if not segment_dirs:
        raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
    return tuple(segment_dirs)
 def discover_segment_dirs(
    root: Path,
    recursive: bool,
    *,
    scan_segment_dir: Callable[[Path], ScanT],
    no_matches_message: Callable[[Path], str],
 ) -> SourceResolution[ScanT]:
    resolved_root = root.expanduser().resolve()
    if not resolved_root.is_dir():
        raise click.ClickException(f"dataset root does not exist: {resolved_root}")
    candidate_dirs = {resolved_root}
    iterator = resolved_root.rglob("*") if recursive else resolved_root.iterdir()
    for path in iterator:
        if path.is_dir():
            candidate_dirs.add(path.resolve())
    valid_dirs: list[Path] = []
    ignored_partial_dirs: list[ScanT] = []
    for segment_dir in sorted(candidate_dirs):
        scan = scan_segment_dir(segment_dir)
        if scan.is_valid:
            valid_dirs.append(segment_dir)
        elif scan.matched_files > 0:
            ignored_partial_dirs.append(scan)
    if not valid_dirs:
        raise click.ClickException(no_matches_message(resolved_root))
    return SourceResolution(
        mode="dataset-root",
        segment_dirs=tuple(valid_dirs),
        ignored_partial_dirs=tuple(ignored_partial_dirs),
    )
 def raise_if_recursive_flag_is_incompatible(
    ctx: click.Context,
    dataset_root: Path | None,
    *,
    dataset_root_flag: str = "--dataset-root",
 ) -> None:
    if ctx.get_parameter_source("recursive") is ParameterSource.DEFAULT:
        return
    if dataset_root is None:
        raise click.ClickException(f"--recursive/--no-recursive can only be used with {dataset_root_flag}")
 def raise_for_legacy_source_args(
    legacy_input_dir: Path | None,
    legacy_segment_dirs: tuple[Path, ...],
    *,
    dataset_root_flag: str = "--dataset-root",
    segment_flag: str = "--segment",
 ) -> None:
    if legacy_input_dir is not None:
        resolved = legacy_input_dir.expanduser().resolve()
        raise click.ClickException(
            f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
        )
    if legacy_segment_dirs:
        resolved = legacy_segment_dirs[0].expanduser().resolve()
        raise click.ClickException(
            f"--segment-dir is no longer supported in this batch wrapper; use {segment_flag} {resolved} "
            f"for an explicit segment directory, or {dataset_root_flag} <DATASET_ROOT> --recursive for discovery"
        )
 def raise_for_legacy_extra_args(
    extra_args: list[str],
    *,
    dataset_root_flag: str = "--dataset-root",
 ) -> None:
    if not extra_args:
        return
    first = extra_args[0]
    if first.startswith("-"):
        extras_text = " ".join(extra_args)
        raise click.ClickException(f"unexpected extra arguments: {extras_text}")
    resolved = Path(first).expanduser().resolve()
    raise click.ClickException(
        f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
    )
 def raise_if_segment_path_looks_like_dataset_root(
    segment_dir: Path,
    *,
    scan_segment_dir: Callable[[Path], ScanT],
    dataset_root_flag: str = "--dataset-root",
    segment_flag: str = "--segment",
 ) -> None:
    resolved = segment_dir.expanduser().resolve()
    if not resolved.is_dir():
        return
    scan = scan_segment_dir(resolved)
    if scan.is_valid or scan.matched_files > 0:
        return
    nested_segments = _find_nested_valid_segment_dirs(resolved, scan_segment_dir=scan_segment_dir)
    if not nested_segments:
        return
    example = nested_segments[0]
    raise click.ClickException(
        f"{resolved} looks like a dataset root, not a segment directory. "
        f"{segment_flag} expects a directory that directly contains *_zedN.svo or *_zedN.svo2 files. "
        f"Use {dataset_root_flag} {resolved} to discover nested segments such as {example}"
    )
 def resolve_sources(
    dataset_root: Path | None,
    segment_dirs: tuple[Path, ...],
    segments_csv: Path | None,
    csv_root: Path | None,
    recursive: bool,
    *,
    scan_segment_dir: Callable[[Path], ScanT],
    no_matches_message: Callable[[Path], str],
 ) -> SourceResolution[ScanT]:
    source_count = sum(
        (
            1 if dataset_root is not None else 0,
            1 if segment_dirs else 0,
            1 if segments_csv is not None else 0,
        )
    )
    if source_count != 1:
        raise click.ClickException(
            "provide exactly one source mode: --dataset-root, --segment, or --segments-csv"
        )
    if dataset_root is not None:
        return discover_segment_dirs(
            dataset_root,
            recursive,
            scan_segment_dir=scan_segment_dir,
            no_matches_message=no_matches_message,
        )
    if segment_dirs:
        ordered_dirs = dedupe_paths(list(segment_dirs))
        for segment_dir in ordered_dirs:
            raise_if_segment_path_looks_like_dataset_root(
                segment_dir,
                scan_segment_dir=scan_segment_dir,
            )
        return SourceResolution(mode="segments", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
    return SourceResolution(
        mode="segments-csv",
        segment_dirs=parse_segments_csv(segments_csv, csv_root),
        ignored_partial_dirs=(),
    )
 def _find_nested_valid_segment_dirs(
    root: Path,
    *,
    scan_segment_dir: Callable[[Path], ScanT],
    limit: int = 3,
 ) -> tuple[Path, ...]:
    matches: list[Path] = []
    for path in sorted(root.rglob("*")):
        if not path.is_dir():
            continue
        resolved = path.resolve()
        if resolved == root:
            continue
        scan = scan_segment_dir(resolved)
        if scan.is_valid:
            matches.append(resolved)
            if len(matches) >= limit:
                break
    return tuple(matches)
@@ -3,7 +3,6 @@
 from __future__ import annotations
 import concurrent.futures
 import csv
 import json
 import math
 import os
@@ -17,6 +16,11 @@ from pathlib import Path
 import click
 from tqdm import tqdm
 try:
    from scripts import zed_batch_segment_sources as segment_sources
 except ModuleNotFoundError:
    import zed_batch_segment_sources as segment_sources
 SCRIPT_PATH = Path(__file__).resolve()
 REPO_ROOT = SCRIPT_PATH.parents[1]
@@ -70,13 +74,6 @@ class SegmentScan:
    reason: str | None = None
@dataclass(slots=True, frozen=True)
 class SourceResolution:
    mode: str
    segment_dirs: tuple[Path, ...]
    ignored_partial_dirs: tuple[SegmentScan, ...]
@dataclass(slots=True, frozen=True)
 class OutputProbeResult:
    output_path: Path
@@ -157,116 +154,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
    return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True)
 def dedupe_paths(paths: list[Path]) -> list[Path]:
    ordered: list[Path] = []
    seen: set[Path] = set()
    for path in paths:
        resolved = path.expanduser().resolve()
        if resolved in seen:
            continue
        seen.add(resolved)
        ordered.append(resolved)
    return ordered
 def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
    if not root.is_dir():
        raise click.ClickException(f"input directory does not exist: {root}")
    candidate_dirs = {root.resolve()}
    iterator = root.rglob("*") if recursive else root.iterdir()
    for path in iterator:
        if path.is_dir():
            candidate_dirs.add(path.resolve())
    valid_dirs: list[Path] = []
    ignored_partial_dirs: list[SegmentScan] = []
    for segment_dir in sorted(candidate_dirs):
        scan = scan_segment_dir(segment_dir)
        if scan.is_valid:
            valid_dirs.append(segment_dir)
        elif scan.matched_files > 0:
            ignored_partial_dirs.append(scan)
    if not valid_dirs:
        raise click.ClickException(f"no complete four-camera segments found under {root}")
    return SourceResolution(
        mode="discovery",
        segment_dirs=tuple(valid_dirs),
        ignored_partial_dirs=tuple(ignored_partial_dirs),
    )
 def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
    csv_path = csv_path.expanduser().resolve()
    if not csv_path.is_file():
        raise click.ClickException(f"CSV not found: {csv_path}")
    if csv_root is not None:
        base_dir = csv_root.expanduser().resolve()
        if not base_dir.is_dir():
            raise click.ClickException(f"CSV root is not a directory: {base_dir}")
    else:
        base_dir = csv_path.parent
    segment_dirs: list[Path] = []
    seen: set[Path] = set()
    with csv_path.open(newline="") as stream:
        reader = csv.DictReader(stream)
        if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
            raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
        for row_number, row in enumerate(reader, start=2):
            raw_segment_dir = (row.get("segment_dir") or "").strip()
            if not raw_segment_dir:
                raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
            segment_dir = Path(raw_segment_dir)
            resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
            resolved = resolved.expanduser().resolve()
            if resolved in seen:
                continue
            seen.add(resolved)
            segment_dirs.append(resolved)
    if not segment_dirs:
        raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
    return tuple(segment_dirs)
 def resolve_sources(
    input_dir: Path | None,
    segment_dirs: tuple[Path, ...],
    segments_csv: Path | None,
    csv_root: Path | None,
    recursive: bool,
 ) -> SourceResolution:
    source_count = sum(
        (
            1 if input_dir is not None else 0,
            1 if segment_dirs else 0,
            1 if segments_csv is not None else 0,
        )
    )
    if source_count != 1:
        raise click.ClickException(
            "provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
        )
    if input_dir is not None:
        return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
    if segment_dirs:
        ordered_dirs = dedupe_paths(list(segment_dirs))
        return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
    return SourceResolution(
        mode="segments-csv",
        segment_dirs=parse_segments_csv(segments_csv, csv_root),
        ignored_partial_dirs=(),
    )
 def output_path_for(segment_dir: Path) -> Path:
    return segment_dir / f"{segment_dir.name}_grid.mp4"
@@ -514,30 +401,45 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
    return results, aborted_count
-@click.command()
+@click.command(context_settings={"allow_extra_args": True})
-@click.argument(
+@click.option(
-    "input_dir",
+    "--dataset-root",
    required=False,
    type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
    help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
 )
@click.option(
    "--segment",
    "segment_dirs",
    multiple=True,
    type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
    help=(
        "Explicit segment directory. Repeatable. The directory must directly contain "
        "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
    ),
 )
@click.option(
    "--segment-dir",
-    "segment_dirs",
+    "legacy_segment_dirs",
    multiple=True,
    type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
-    help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.",
+    hidden=True,
 )
@click.option(
    "--segments-csv",
    type=click.Path(path_type=Path, dir_okay=False),
-    help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.",
+    help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
 )
@click.option(
    "--csv-root",
    type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
    help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
 )
-@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.")
+@click.option(
    "--recursive/--no-recursive",
    default=True,
    show_default=True,
    help="Recurse when discovering segment directories from --dataset-root.",
 )
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option(
    "--zed-bin",
@@ -625,9 +527,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
    show_default=True,
    help="Scale each tile relative to the source resolution.",
 )
@click.pass_context
 def main(
-    input_dir: Path | None,
+    ctx: click.Context,
    dataset_root: Path | None,
    segment_dirs: tuple[Path, ...],
    legacy_segment_dirs: tuple[Path, ...],
    segments_csv: Path | None,
    csv_root: Path | None,
    recursive: bool,
@@ -653,14 +558,26 @@ def main(
    tile_scale: float,
 ) -> None:
    """Batch-convert synced four-camera ZED segments into grid MP4 files."""
    segment_sources.raise_for_legacy_extra_args(ctx.args)
    segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
    segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
    if b_frames > gop:
        raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames")
    if report_existing and dry_run:
        raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
    sources = segment_sources.resolve_sources(
        dataset_root,
        segment_dirs,
        segments_csv,
        csv_root,
        recursive,
        scan_segment_dir=scan_segment_dir,
        no_matches_message=lambda root: f"no complete four-camera segments found under {root}",
    )
    ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None
    binary_path = None if report_existing else locate_binary(zed_bin)
    sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
    config = BatchConfig(
        zed_bin=binary_path,
        ffprobe_bin=ffprobe_path,
@@ -3,7 +3,6 @@
 from __future__ import annotations
 import concurrent.futures
 import csv
 import importlib
 import os
 import re
@@ -17,6 +16,11 @@ from pathlib import Path
 import click
 from progress_table import ProgressTable
 try:
    from scripts import zed_batch_segment_sources as segment_sources
 except ModuleNotFoundError:
    import zed_batch_segment_sources as segment_sources
 SCRIPT_PATH = Path(__file__).resolve()
 REPO_ROOT = SCRIPT_PATH.parents[1]
@@ -82,13 +86,6 @@ class SegmentScan:
    reason: str | None = None
@dataclass(slots=True, frozen=True)
 class SourceResolution:
    mode: str
    segment_dirs: tuple[Path, ...]
    ignored_partial_dirs: tuple[SegmentScan, ...]
@dataclass(slots=True, frozen=True)
 class OutputProbeResult:
    output_path: Path
@@ -339,116 +336,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
    )
 def dedupe_paths(paths: list[Path]) -> list[Path]:
    ordered: list[Path] = []
    seen: set[Path] = set()
    for path in paths:
        resolved = path.expanduser().resolve()
        if resolved in seen:
            continue
        seen.add(resolved)
        ordered.append(resolved)
    return ordered
 def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
    if not root.is_dir():
        raise click.ClickException(f"input directory does not exist: {root}")
    candidate_dirs = {root.resolve()}
    iterator = root.rglob("*") if recursive else root.iterdir()
    for path in iterator:
        if path.is_dir():
            candidate_dirs.add(path.resolve())
    valid_dirs: list[Path] = []
    ignored_partial_dirs: list[SegmentScan] = []
    for segment_dir in sorted(candidate_dirs):
        scan = scan_segment_dir(segment_dir)
        if scan.is_valid:
            valid_dirs.append(segment_dir)
        elif scan.matched_files > 0:
            ignored_partial_dirs.append(scan)
    if not valid_dirs:
        raise click.ClickException(f"no multi-camera segments found under {root}")
    return SourceResolution(
        mode="discovery",
        segment_dirs=tuple(valid_dirs),
        ignored_partial_dirs=tuple(ignored_partial_dirs),
    )
 def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
    csv_path = csv_path.expanduser().resolve()
    if not csv_path.is_file():
        raise click.ClickException(f"CSV not found: {csv_path}")
    if csv_root is not None:
        base_dir = csv_root.expanduser().resolve()
        if not base_dir.is_dir():
            raise click.ClickException(f"CSV root is not a directory: {base_dir}")
    else:
        base_dir = csv_path.parent
    segment_dirs: list[Path] = []
    seen: set[Path] = set()
    with csv_path.open(newline="") as stream:
        reader = csv.DictReader(stream)
        if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
            raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
        for row_number, row in enumerate(reader, start=2):
            raw_segment_dir = (row.get("segment_dir") or "").strip()
            if not raw_segment_dir:
                raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
            segment_dir = Path(raw_segment_dir)
            resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
            resolved = resolved.expanduser().resolve()
            if resolved in seen:
                continue
            seen.add(resolved)
            segment_dirs.append(resolved)
    if not segment_dirs:
        raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
    return tuple(segment_dirs)
 def resolve_sources(
    input_dir: Path | None,
    segment_dirs: tuple[Path, ...],
    segments_csv: Path | None,
    csv_root: Path | None,
    recursive: bool,
 ) -> SourceResolution:
    source_count = sum(
        (
            1 if input_dir is not None else 0,
            1 if segment_dirs else 0,
            1 if segments_csv is not None else 0,
        )
    )
    if source_count != 1:
        raise click.ClickException(
            "provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
        )
    if input_dir is not None:
        return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
    if segment_dirs:
        ordered_dirs = dedupe_paths(list(segment_dirs))
        return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
    return SourceResolution(
        mode="segments-csv",
        segment_dirs=parse_segments_csv(segments_csv, csv_root),
        ignored_partial_dirs=(),
    )
 def output_path_for(segment_dir: Path) -> Path:
    return segment_dir / f"{segment_dir.name}.mcap"
@@ -469,7 +356,7 @@ def display_name_for_segment(
    input_root: Path | None,
    common_parent: Path | None,
 ) -> str:
-    if source_mode == "discovery" and input_root is not None:
+    if source_mode == "dataset-root" and input_root is not None:
        try:
            return str(segment_dir.relative_to(input_root))
        except ValueError:
@@ -1071,30 +958,45 @@ def build_worker_slots(
    return worker_slots
-@click.command()
+@click.command(context_settings={"allow_extra_args": True})
-@click.argument(
+@click.option(
-    "input_dir",
+    "--dataset-root",
    required=False,
    type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
    help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
 )
@click.option(
    "--segment",
    "segment_dirs",
    multiple=True,
    type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
    help=(
        "Explicit segment directory. Repeatable. The directory must directly contain "
        "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
    ),
 )
@click.option(
    "--segment-dir",
-    "segment_dirs",
+    "legacy_segment_dirs",
    multiple=True,
    type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
-    help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.",
+    hidden=True,
 )
@click.option(
    "--segments-csv",
    type=click.Path(path_type=Path, dir_okay=False),
-    help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.",
+    help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
 )
@click.option(
    "--csv-root",
    type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
    help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
 )
-@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.")
+@click.option(
    "--recursive/--no-recursive",
    default=True,
    show_default=True,
    help="Recurse when discovering segment directories from --dataset-root.",
 )
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option(
    "--hardware-jobs",
@@ -1231,9 +1133,12 @@ def build_worker_slots(
    show_default=True,
    help="Progress output mode. Auto uses a table on TTY and text logging otherwise.",
 )
@click.pass_context
 def main(
-    input_dir: Path | None,
+    ctx: click.Context,
    dataset_root: Path | None,
    segment_dirs: tuple[Path, ...],
    legacy_segment_dirs: tuple[Path, ...],
    segments_csv: Path | None,
    csv_root: Path | None,
    recursive: bool,
@@ -1266,6 +1171,10 @@ def main(
    progress_ui: str,
 ) -> None:
    """Batch-convert multi-camera ZED segments into grouped MCAP files."""
    segment_sources.raise_for_legacy_extra_args(ctx.args)
    segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
    segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
    if report_existing and dry_run:
        raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
    if bundle_policy == "copy":
@@ -1276,8 +1185,16 @@ def main(
        if bundle_topic != "/bundle":
            raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy")
    sources = segment_sources.resolve_sources(
        dataset_root,
        segment_dirs,
        segments_csv,
        csv_root,
        recursive,
        scan_segment_dir=scan_segment_dir,
        no_matches_message=lambda root: f"no multi-camera segments found under {root}",
    )
    binary_path = None if report_existing else locate_binary(zed_bin)
    sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
    worker_slots = build_worker_slots(
        jobs=jobs,
        encoder_device=encoder_device,
@@ -1307,7 +1224,7 @@ def main(
        sync_tolerance_ms=sync_tolerance_ms,
        progress_ui=progress_ui,
    )
-    input_root = input_dir.expanduser().resolve() if input_dir is not None else None
+    input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None
    display_parent = common_segment_parent(sources.segment_dirs)
    skipped_results: list[JobResult] = []
@@ -0,0 +1,268 @@
 from __future__ import annotations
 import dataclasses
 import tempfile
 import unittest
 from pathlib import Path
 import click
 from click.testing import CliRunner
 from scripts import zed_batch_segment_sources as segment_sources
 from scripts.zed_batch_svo_grid_to_mp4 import main as grid_main
 from scripts.zed_batch_svo_to_mcap import main as mcap_main
@dataclasses.dataclass(slots=True, frozen=True)
 class FakeScan:
    segment_dir: Path
    matched_files: int
    is_valid: bool
    reason: str | None = None
 def fake_scan(segment_dir: Path) -> FakeScan:
    if not segment_dir.is_dir():
        return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="missing directory")
    if (segment_dir / "valid.segment").is_file():
        return FakeScan(segment_dir=segment_dir, matched_files=2, is_valid=True)
    if (segment_dir / "partial.segment").is_file():
        return FakeScan(segment_dir=segment_dir, matched_files=1, is_valid=False, reason="partial segment")
    return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="no camera files")
 def create_multicamera_segment(parent: Path, segment_name: str) -> Path:
    segment_dir = parent / segment_name
    segment_dir.mkdir(parents=True)
    for camera_index in range(1, 5):
        (segment_dir / f"{segment_name}_zed{camera_index}.svo2").write_bytes(b"")
    return segment_dir
 class SharedSourceResolutionTests(unittest.TestCase):
    def test_dataset_root_recursive_discovers_nested_segments(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
            segment_dir.mkdir(parents=True)
            (segment_dir / "valid.segment").write_text("", encoding="utf-8")
            sources = segment_sources.resolve_sources(
                dataset_root,
                (),
                None,
                None,
                True,
                scan_segment_dir=fake_scan,
                no_matches_message=lambda root: f"no segments under {root}",
            )
            self.assertEqual(sources.mode, "dataset-root")
            self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
    def test_dataset_root_without_recursive_does_not_descend(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
            segment_dir.mkdir(parents=True)
            (segment_dir / "valid.segment").write_text("", encoding="utf-8")
            with self.assertRaises(click.ClickException) as error:
                segment_sources.resolve_sources(
                    dataset_root,
                    (),
                    None,
                    None,
                    False,
                    scan_segment_dir=fake_scan,
                    no_matches_message=lambda root: f"no segments under {root}",
                )
            self.assertIn("no segments under", str(error.exception))
    def test_explicit_segments_are_deduped(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            segment_dir = Path(tmp) / "2026-04-08T11-50-32"
            segment_dir.mkdir()
            (segment_dir / "valid.segment").write_text("", encoding="utf-8")
            sources = segment_sources.resolve_sources(
                None,
                (segment_dir, segment_dir),
                None,
                None,
                True,
                scan_segment_dir=fake_scan,
                no_matches_message=lambda root: f"no segments under {root}",
            )
            self.assertEqual(sources.mode, "segments")
            self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
    def test_segments_csv_uses_segment_dir_column(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            segment_dir = tmp_path / "segments" / "2026-04-08T11-50-32"
            segment_dir.mkdir(parents=True)
            (segment_dir / "valid.segment").write_text("", encoding="utf-8")
            csv_path = tmp_path / "segments.csv"
            csv_path.write_text("segment_dir\nsegments/2026-04-08T11-50-32\n", encoding="utf-8")
            sources = segment_sources.resolve_sources(
                None,
                (),
                csv_path,
                None,
                True,
                scan_segment_dir=fake_scan,
                no_matches_message=lambda root: f"no segments under {root}",
            )
            self.assertEqual(sources.mode, "segments-csv")
            self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
    def test_segment_path_like_dataset_root_has_hint(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
            segment_dir.mkdir(parents=True)
            (segment_dir / "valid.segment").write_text("", encoding="utf-8")
            with self.assertRaises(click.ClickException) as error:
                segment_sources.resolve_sources(
                    None,
                    (dataset_root,),
                    None,
                    None,
                    True,
                    scan_segment_dir=fake_scan,
                    no_matches_message=lambda root: f"no segments under {root}",
                )
            message = str(error.exception)
            self.assertIn("looks like a dataset root", message)
            self.assertIn("--dataset-root", message)
 class BatchCliSmokeTests(unittest.TestCase):
    def setUp(self) -> None:
        self.runner = CliRunner()
    def test_mcap_dataset_root_flag_discovers_segments(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
            result = self.runner.invoke(
                mcap_main,
                [
                    "--dataset-root",
                    str(dataset_root),
                    "--recursive",
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertEqual(result.exit_code, 0, result.output)
            self.assertIn("source=dataset-root matched=1 pending=1", result.output)
    def test_mcap_segment_flag_rejects_dataset_root_with_hint(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
            result = self.runner.invoke(
                mcap_main,
                [
                    "--segment",
                    str(dataset_root),
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertNotEqual(result.exit_code, 0)
            self.assertIn("looks like a dataset root", result.output)
            self.assertIn("--dataset-root", result.output)
    def test_mcap_rejects_legacy_positional_dataset_root(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            dataset_root = Path(tmp) / "dataset"
            create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
            result = self.runner.invoke(
                mcap_main,
                [
                    str(dataset_root),
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertNotEqual(result.exit_code, 0)
            self.assertIn("positional dataset paths are no longer supported", result.output)
            self.assertIn("--dataset-root", result.output)
    def test_mcap_rejects_recursive_without_dataset_root(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
            result = self.runner.invoke(
                mcap_main,
                [
                    "--segment",
                    str(segment_dir),
                    "--no-recursive",
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertNotEqual(result.exit_code, 0)
            self.assertIn("--recursive/--no-recursive can only be used with --dataset-root", result.output)
    def test_grid_segment_flag_discovers_one_segment(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
            result = self.runner.invoke(
                grid_main,
                [
                    "--segment",
                    str(segment_dir),
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertEqual(result.exit_code, 0, result.output)
            self.assertIn("source=segments matched=1 pending=1", result.output)
    def test_grid_rejects_legacy_segment_dir_flag(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
            result = self.runner.invoke(
                grid_main,
                [
                    "--segment-dir",
                    str(segment_dir),
                    "--dry-run",
                    "--zed-bin",
                    "/bin/true",
                ],
            )
            self.assertNotEqual(result.exit_code, 0)
            self.assertIn("--segment-dir is no longer supported", result.output)
            self.assertIn("--segment", result.output)
 if __name__ == "__main__":
    unittest.main()
@@ -40,6 +40,7 @@ dependencies = [
    { name = "opencv-python-headless" },
    { name = "progress-table" },
    { name = "protobuf" },
    { name = "tqdm" },
    { name = "zstandard" },
 ]
@@ -59,6 +60,7 @@ requires-dist = [
    { name = "progress-table", specifier = ">=3.2" },
    { name = "protobuf", specifier = ">=5.29" },
    { name = "rvl-impl", marker = "python_full_version >= '3.12' and extra == 'viewer'", git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9" },
    { name = "tqdm", specifier = ">=4.67" },
    { name = "zstandard", specifier = ">=0.23" },
 ]
 provides-extras = ["viewer"]
@@ -327,6 +329,18 @@ name = "rvl-impl"
 version = "0.1.0"
 source = { git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9#74308bcaf184cb39428237e8f4f99a67a6de22d9" }
 [[package]]
 name = "tqdm"
 version = "4.67.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
 ]
 [[package]]
 name = "wcwidth"
 version = "0.6.0"