Redesign batch segment source selection

This commit is contained in:
2026-04-08 08:07:05 +00:00
parent c320bf01af
commit 0a3da46f19
7 changed files with 642 additions and 268 deletions
+12 -10
View File
@@ -162,7 +162,7 @@ Use the batch wrapper to run `zed_svo_grid_to_mp4` over many segment directories
```bash ```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \ uv run python scripts/zed_batch_svo_grid_to_mp4.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--recursive \ --recursive \
--jobs 2 \ --jobs 2 \
--encoder-device auto \ --encoder-device auto \
@@ -173,8 +173,8 @@ You can also provide the exact segments to convert:
```bash ```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \ uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--segment-dir <SEGMENT_DIR_A> \ --segment <SEGMENT_DIR_A> \
--segment-dir <SEGMENT_DIR_B> \ --segment <SEGMENT_DIR_B> \
--jobs 2 --jobs 2
``` ```
@@ -193,7 +193,7 @@ When you suspect a previous run left behind partial MP4 files, opt into `ffprobe
```bash ```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \ uv run python scripts/zed_batch_svo_grid_to_mp4.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--probe-existing \ --probe-existing \
--jobs 2 --jobs 2
``` ```
@@ -202,7 +202,7 @@ Use `--report-existing` to audit existing outputs without launching conversions.
```bash ```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \ uv run python scripts/zed_batch_svo_grid_to_mp4.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--report-existing --report-existing
``` ```
@@ -236,7 +236,7 @@ Use the wrapper to recurse through a dataset root, run `zed_svo_to_mcap --segmen
```bash ```bash
uv run python scripts/zed_batch_svo_to_mcap.py \ uv run python scripts/zed_batch_svo_to_mcap.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--recursive \ --recursive \
--jobs 2 \ --jobs 2 \
--cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \ --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
@@ -258,7 +258,7 @@ Enable per-camera pose export when the segment has valid tracking:
```bash ```bash
uv run python scripts/zed_batch_svo_to_mcap.py \ uv run python scripts/zed_batch_svo_to_mcap.py \
--segment-dir <SEGMENT_DIR> \ --segment <SEGMENT_DIR> \
--with-pose \ --with-pose \
--pose-config <POSE_CONFIG> --pose-config <POSE_CONFIG>
``` ```
@@ -275,6 +275,8 @@ Single-source `zed_svo_to_mcap` now writes the one-camera `copy` shape by defaul
For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`. For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`.
For calibration-based depth/RGB mapping, use `scripts/mcap_depth_alignment.py` and [docs/depth_alignment.md](./docs/depth_alignment.md). That helper explains the current affine mapping implied by the exported calibration topics and can export example aligned-depth and overlay PNGs from a chosen MCAP frame.
### MCAP RGBD Viewer ### MCAP RGBD Viewer
The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`. The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`.
@@ -322,7 +324,7 @@ That is why the batch wrapper supports mixed pools such as two NVENC workers plu
```bash ```bash
uv run python scripts/zed_batch_svo_to_mcap.py \ uv run python scripts/zed_batch_svo_to_mcap.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--recursive \ --recursive \
--overwrite \ --overwrite \
--hardware-jobs 2 \ --hardware-jobs 2 \
@@ -340,7 +342,7 @@ Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid
```bash ```bash
uv run python scripts/zed_batch_svo_to_mcap.py \ uv run python scripts/zed_batch_svo_to_mcap.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--probe-existing \ --probe-existing \
--jobs 2 --jobs 2
``` ```
@@ -349,7 +351,7 @@ Use `--report-existing` to audit existing MCAPs without launching conversions:
```bash ```bash
uv run python scripts/zed_batch_svo_to_mcap.py \ uv run python scripts/zed_batch_svo_to_mcap.py \
<DATASET_ROOT> \ --dataset-root <DATASET_ROOT> \
--report-existing --report-existing
``` ```
+1
View File
@@ -9,6 +9,7 @@ dependencies = [
"opencv-python-headless>=4.11", "opencv-python-headless>=4.11",
"progress-table>=3.2", "progress-table>=3.2",
"protobuf>=5.29", "protobuf>=5.29",
"tqdm>=4.67",
"zstandard>=0.23", "zstandard>=0.23",
] ]
+255
View File
@@ -0,0 +1,255 @@
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Generic, Protocol, TypeVar
import click
from click.core import ParameterSource
class SegmentScanLike(Protocol):
segment_dir: Path
matched_files: int
is_valid: bool
ScanT = TypeVar("ScanT", bound=SegmentScanLike)
@dataclass(slots=True, frozen=True)
class SourceResolution(Generic[ScanT]):
mode: str
segment_dirs: tuple[Path, ...]
ignored_partial_dirs: tuple[ScanT, ...]
def dedupe_paths(paths: list[Path]) -> list[Path]:
ordered: list[Path] = []
seen: set[Path] = set()
for path in paths:
resolved = path.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
ordered.append(resolved)
return ordered
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
csv_path = csv_path.expanduser().resolve()
if not csv_path.is_file():
raise click.ClickException(f"CSV not found: {csv_path}")
if csv_root is not None:
base_dir = csv_root.expanduser().resolve()
if not base_dir.is_dir():
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
else:
base_dir = csv_path.parent
segment_dirs: list[Path] = []
seen: set[Path] = set()
with csv_path.open(newline="") as stream:
reader = csv.DictReader(stream)
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
for row_number, row in enumerate(reader, start=2):
raw_segment_dir = (row.get("segment_dir") or "").strip()
if not raw_segment_dir:
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
segment_dir = Path(raw_segment_dir)
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
resolved = resolved.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
segment_dirs.append(resolved)
if not segment_dirs:
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
return tuple(segment_dirs)
def discover_segment_dirs(
root: Path,
recursive: bool,
*,
scan_segment_dir: Callable[[Path], ScanT],
no_matches_message: Callable[[Path], str],
) -> SourceResolution[ScanT]:
resolved_root = root.expanduser().resolve()
if not resolved_root.is_dir():
raise click.ClickException(f"dataset root does not exist: {resolved_root}")
candidate_dirs = {resolved_root}
iterator = resolved_root.rglob("*") if recursive else resolved_root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_dirs: list[Path] = []
ignored_partial_dirs: list[ScanT] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_dirs.append(segment_dir)
elif scan.matched_files > 0:
ignored_partial_dirs.append(scan)
if not valid_dirs:
raise click.ClickException(no_matches_message(resolved_root))
return SourceResolution(
mode="dataset-root",
segment_dirs=tuple(valid_dirs),
ignored_partial_dirs=tuple(ignored_partial_dirs),
)
def raise_if_recursive_flag_is_incompatible(
ctx: click.Context,
dataset_root: Path | None,
*,
dataset_root_flag: str = "--dataset-root",
) -> None:
if ctx.get_parameter_source("recursive") is ParameterSource.DEFAULT:
return
if dataset_root is None:
raise click.ClickException(f"--recursive/--no-recursive can only be used with {dataset_root_flag}")
def raise_for_legacy_source_args(
legacy_input_dir: Path | None,
legacy_segment_dirs: tuple[Path, ...],
*,
dataset_root_flag: str = "--dataset-root",
segment_flag: str = "--segment",
) -> None:
if legacy_input_dir is not None:
resolved = legacy_input_dir.expanduser().resolve()
raise click.ClickException(
f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
)
if legacy_segment_dirs:
resolved = legacy_segment_dirs[0].expanduser().resolve()
raise click.ClickException(
f"--segment-dir is no longer supported in this batch wrapper; use {segment_flag} {resolved} "
f"for an explicit segment directory, or {dataset_root_flag} <DATASET_ROOT> --recursive for discovery"
)
def raise_for_legacy_extra_args(
extra_args: list[str],
*,
dataset_root_flag: str = "--dataset-root",
) -> None:
if not extra_args:
return
first = extra_args[0]
if first.startswith("-"):
extras_text = " ".join(extra_args)
raise click.ClickException(f"unexpected extra arguments: {extras_text}")
resolved = Path(first).expanduser().resolve()
raise click.ClickException(
f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
)
def raise_if_segment_path_looks_like_dataset_root(
segment_dir: Path,
*,
scan_segment_dir: Callable[[Path], ScanT],
dataset_root_flag: str = "--dataset-root",
segment_flag: str = "--segment",
) -> None:
resolved = segment_dir.expanduser().resolve()
if not resolved.is_dir():
return
scan = scan_segment_dir(resolved)
if scan.is_valid or scan.matched_files > 0:
return
nested_segments = _find_nested_valid_segment_dirs(resolved, scan_segment_dir=scan_segment_dir)
if not nested_segments:
return
example = nested_segments[0]
raise click.ClickException(
f"{resolved} looks like a dataset root, not a segment directory. "
f"{segment_flag} expects a directory that directly contains *_zedN.svo or *_zedN.svo2 files. "
f"Use {dataset_root_flag} {resolved} to discover nested segments such as {example}"
)
def resolve_sources(
dataset_root: Path | None,
segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
*,
scan_segment_dir: Callable[[Path], ScanT],
no_matches_message: Callable[[Path], str],
) -> SourceResolution[ScanT]:
source_count = sum(
(
1 if dataset_root is not None else 0,
1 if segment_dirs else 0,
1 if segments_csv is not None else 0,
)
)
if source_count != 1:
raise click.ClickException(
"provide exactly one source mode: --dataset-root, --segment, or --segments-csv"
)
if dataset_root is not None:
return discover_segment_dirs(
dataset_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=no_matches_message,
)
if segment_dirs:
ordered_dirs = dedupe_paths(list(segment_dirs))
for segment_dir in ordered_dirs:
raise_if_segment_path_looks_like_dataset_root(
segment_dir,
scan_segment_dir=scan_segment_dir,
)
return SourceResolution(mode="segments", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
return SourceResolution(
mode="segments-csv",
segment_dirs=parse_segments_csv(segments_csv, csv_root),
ignored_partial_dirs=(),
)
def _find_nested_valid_segment_dirs(
root: Path,
*,
scan_segment_dir: Callable[[Path], ScanT],
limit: int = 3,
) -> tuple[Path, ...]:
matches: list[Path] = []
for path in sorted(root.rglob("*")):
if not path.is_dir():
continue
resolved = path.resolve()
if resolved == root:
continue
scan = scan_segment_dir(resolved)
if scan.is_valid:
matches.append(resolved)
if len(matches) >= limit:
break
return tuple(matches)
+45 -128
View File
@@ -3,7 +3,6 @@
from __future__ import annotations from __future__ import annotations
import concurrent.futures import concurrent.futures
import csv
import json import json
import math import math
import os import os
@@ -17,6 +16,11 @@ from pathlib import Path
import click import click
from tqdm import tqdm from tqdm import tqdm
try:
from scripts import zed_batch_segment_sources as segment_sources
except ModuleNotFoundError:
import zed_batch_segment_sources as segment_sources
SCRIPT_PATH = Path(__file__).resolve() SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1] REPO_ROOT = SCRIPT_PATH.parents[1]
@@ -70,13 +74,6 @@ class SegmentScan:
reason: str | None = None reason: str | None = None
@dataclass(slots=True, frozen=True)
class SourceResolution:
mode: str
segment_dirs: tuple[Path, ...]
ignored_partial_dirs: tuple[SegmentScan, ...]
@dataclass(slots=True, frozen=True) @dataclass(slots=True, frozen=True)
class OutputProbeResult: class OutputProbeResult:
output_path: Path output_path: Path
@@ -157,116 +154,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True) return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True)
def dedupe_paths(paths: list[Path]) -> list[Path]:
ordered: list[Path] = []
seen: set[Path] = set()
for path in paths:
resolved = path.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
ordered.append(resolved)
return ordered
def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
if not root.is_dir():
raise click.ClickException(f"input directory does not exist: {root}")
candidate_dirs = {root.resolve()}
iterator = root.rglob("*") if recursive else root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_dirs: list[Path] = []
ignored_partial_dirs: list[SegmentScan] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_dirs.append(segment_dir)
elif scan.matched_files > 0:
ignored_partial_dirs.append(scan)
if not valid_dirs:
raise click.ClickException(f"no complete four-camera segments found under {root}")
return SourceResolution(
mode="discovery",
segment_dirs=tuple(valid_dirs),
ignored_partial_dirs=tuple(ignored_partial_dirs),
)
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
csv_path = csv_path.expanduser().resolve()
if not csv_path.is_file():
raise click.ClickException(f"CSV not found: {csv_path}")
if csv_root is not None:
base_dir = csv_root.expanduser().resolve()
if not base_dir.is_dir():
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
else:
base_dir = csv_path.parent
segment_dirs: list[Path] = []
seen: set[Path] = set()
with csv_path.open(newline="") as stream:
reader = csv.DictReader(stream)
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
for row_number, row in enumerate(reader, start=2):
raw_segment_dir = (row.get("segment_dir") or "").strip()
if not raw_segment_dir:
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
segment_dir = Path(raw_segment_dir)
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
resolved = resolved.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
segment_dirs.append(resolved)
if not segment_dirs:
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
return tuple(segment_dirs)
def resolve_sources(
input_dir: Path | None,
segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
) -> SourceResolution:
source_count = sum(
(
1 if input_dir is not None else 0,
1 if segment_dirs else 0,
1 if segments_csv is not None else 0,
)
)
if source_count != 1:
raise click.ClickException(
"provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
)
if input_dir is not None:
return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
if segment_dirs:
ordered_dirs = dedupe_paths(list(segment_dirs))
return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
return SourceResolution(
mode="segments-csv",
segment_dirs=parse_segments_csv(segments_csv, csv_root),
ignored_partial_dirs=(),
)
def output_path_for(segment_dir: Path) -> Path: def output_path_for(segment_dir: Path) -> Path:
return segment_dir / f"{segment_dir.name}_grid.mp4" return segment_dir / f"{segment_dir.name}_grid.mp4"
@@ -514,30 +401,45 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
return results, aborted_count return results, aborted_count
@click.command() @click.command(context_settings={"allow_extra_args": True})
@click.argument( @click.option(
"input_dir", "--dataset-root",
required=False,
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
)
@click.option(
"--segment",
"segment_dirs",
multiple=True,
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
help=(
"Explicit segment directory. Repeatable. The directory must directly contain "
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
),
) )
@click.option( @click.option(
"--segment-dir", "--segment-dir",
"segment_dirs", "legacy_segment_dirs",
multiple=True, multiple=True,
type=click.Path(path_type=Path, file_okay=False, dir_okay=True), type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.", hidden=True,
) )
@click.option( @click.option(
"--segments-csv", "--segments-csv",
type=click.Path(path_type=Path, dir_okay=False), type=click.Path(path_type=Path, dir_okay=False),
help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.", help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
) )
@click.option( @click.option(
"--csv-root", "--csv-root",
type=click.Path(path_type=Path, file_okay=False, dir_okay=True), type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
) )
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.") @click.option(
"--recursive/--no-recursive",
default=True,
show_default=True,
help="Recurse when discovering segment directories from --dataset-root.",
)
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") @click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option( @click.option(
"--zed-bin", "--zed-bin",
@@ -625,9 +527,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
show_default=True, show_default=True,
help="Scale each tile relative to the source resolution.", help="Scale each tile relative to the source resolution.",
) )
@click.pass_context
def main( def main(
input_dir: Path | None, ctx: click.Context,
dataset_root: Path | None,
segment_dirs: tuple[Path, ...], segment_dirs: tuple[Path, ...],
legacy_segment_dirs: tuple[Path, ...],
segments_csv: Path | None, segments_csv: Path | None,
csv_root: Path | None, csv_root: Path | None,
recursive: bool, recursive: bool,
@@ -653,14 +558,26 @@ def main(
tile_scale: float, tile_scale: float,
) -> None: ) -> None:
"""Batch-convert synced four-camera ZED segments into grid MP4 files.""" """Batch-convert synced four-camera ZED segments into grid MP4 files."""
segment_sources.raise_for_legacy_extra_args(ctx.args)
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
if b_frames > gop: if b_frames > gop:
raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames") raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames")
if report_existing and dry_run: if report_existing and dry_run:
raise click.ClickException("--report-existing and --dry-run are mutually exclusive") raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
sources = segment_sources.resolve_sources(
dataset_root,
segment_dirs,
segments_csv,
csv_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=lambda root: f"no complete four-camera segments found under {root}",
)
ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None
binary_path = None if report_existing else locate_binary(zed_bin) binary_path = None if report_existing else locate_binary(zed_bin)
sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
config = BatchConfig( config = BatchConfig(
zed_bin=binary_path, zed_bin=binary_path,
ffprobe_bin=ffprobe_path, ffprobe_bin=ffprobe_path,
+47 -130
View File
@@ -3,7 +3,6 @@
from __future__ import annotations from __future__ import annotations
import concurrent.futures import concurrent.futures
import csv
import importlib import importlib
import os import os
import re import re
@@ -17,6 +16,11 @@ from pathlib import Path
import click import click
from progress_table import ProgressTable from progress_table import ProgressTable
try:
from scripts import zed_batch_segment_sources as segment_sources
except ModuleNotFoundError:
import zed_batch_segment_sources as segment_sources
SCRIPT_PATH = Path(__file__).resolve() SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1] REPO_ROOT = SCRIPT_PATH.parents[1]
@@ -82,13 +86,6 @@ class SegmentScan:
reason: str | None = None reason: str | None = None
@dataclass(slots=True, frozen=True)
class SourceResolution:
mode: str
segment_dirs: tuple[Path, ...]
ignored_partial_dirs: tuple[SegmentScan, ...]
@dataclass(slots=True, frozen=True) @dataclass(slots=True, frozen=True)
class OutputProbeResult: class OutputProbeResult:
output_path: Path output_path: Path
@@ -339,116 +336,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
) )
def dedupe_paths(paths: list[Path]) -> list[Path]:
ordered: list[Path] = []
seen: set[Path] = set()
for path in paths:
resolved = path.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
ordered.append(resolved)
return ordered
def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
if not root.is_dir():
raise click.ClickException(f"input directory does not exist: {root}")
candidate_dirs = {root.resolve()}
iterator = root.rglob("*") if recursive else root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_dirs: list[Path] = []
ignored_partial_dirs: list[SegmentScan] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_dirs.append(segment_dir)
elif scan.matched_files > 0:
ignored_partial_dirs.append(scan)
if not valid_dirs:
raise click.ClickException(f"no multi-camera segments found under {root}")
return SourceResolution(
mode="discovery",
segment_dirs=tuple(valid_dirs),
ignored_partial_dirs=tuple(ignored_partial_dirs),
)
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
csv_path = csv_path.expanduser().resolve()
if not csv_path.is_file():
raise click.ClickException(f"CSV not found: {csv_path}")
if csv_root is not None:
base_dir = csv_root.expanduser().resolve()
if not base_dir.is_dir():
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
else:
base_dir = csv_path.parent
segment_dirs: list[Path] = []
seen: set[Path] = set()
with csv_path.open(newline="") as stream:
reader = csv.DictReader(stream)
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
for row_number, row in enumerate(reader, start=2):
raw_segment_dir = (row.get("segment_dir") or "").strip()
if not raw_segment_dir:
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
segment_dir = Path(raw_segment_dir)
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
resolved = resolved.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
segment_dirs.append(resolved)
if not segment_dirs:
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
return tuple(segment_dirs)
def resolve_sources(
input_dir: Path | None,
segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
) -> SourceResolution:
source_count = sum(
(
1 if input_dir is not None else 0,
1 if segment_dirs else 0,
1 if segments_csv is not None else 0,
)
)
if source_count != 1:
raise click.ClickException(
"provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
)
if input_dir is not None:
return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
if segment_dirs:
ordered_dirs = dedupe_paths(list(segment_dirs))
return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
return SourceResolution(
mode="segments-csv",
segment_dirs=parse_segments_csv(segments_csv, csv_root),
ignored_partial_dirs=(),
)
def output_path_for(segment_dir: Path) -> Path: def output_path_for(segment_dir: Path) -> Path:
return segment_dir / f"{segment_dir.name}.mcap" return segment_dir / f"{segment_dir.name}.mcap"
@@ -469,7 +356,7 @@ def display_name_for_segment(
input_root: Path | None, input_root: Path | None,
common_parent: Path | None, common_parent: Path | None,
) -> str: ) -> str:
if source_mode == "discovery" and input_root is not None: if source_mode == "dataset-root" and input_root is not None:
try: try:
return str(segment_dir.relative_to(input_root)) return str(segment_dir.relative_to(input_root))
except ValueError: except ValueError:
@@ -1071,30 +958,45 @@ def build_worker_slots(
return worker_slots return worker_slots
@click.command() @click.command(context_settings={"allow_extra_args": True})
@click.argument( @click.option(
"input_dir", "--dataset-root",
required=False,
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
)
@click.option(
"--segment",
"segment_dirs",
multiple=True,
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
help=(
"Explicit segment directory. Repeatable. The directory must directly contain "
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
),
) )
@click.option( @click.option(
"--segment-dir", "--segment-dir",
"segment_dirs", "legacy_segment_dirs",
multiple=True, multiple=True,
type=click.Path(path_type=Path, file_okay=False, dir_okay=True), type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.", hidden=True,
) )
@click.option( @click.option(
"--segments-csv", "--segments-csv",
type=click.Path(path_type=Path, dir_okay=False), type=click.Path(path_type=Path, dir_okay=False),
help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.", help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
) )
@click.option( @click.option(
"--csv-root", "--csv-root",
type=click.Path(path_type=Path, file_okay=False, dir_okay=True), type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
) )
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.") @click.option(
"--recursive/--no-recursive",
default=True,
show_default=True,
help="Recurse when discovering segment directories from --dataset-root.",
)
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") @click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option( @click.option(
"--hardware-jobs", "--hardware-jobs",
@@ -1231,9 +1133,12 @@ def build_worker_slots(
show_default=True, show_default=True,
help="Progress output mode. Auto uses a table on TTY and text logging otherwise.", help="Progress output mode. Auto uses a table on TTY and text logging otherwise.",
) )
@click.pass_context
def main( def main(
input_dir: Path | None, ctx: click.Context,
dataset_root: Path | None,
segment_dirs: tuple[Path, ...], segment_dirs: tuple[Path, ...],
legacy_segment_dirs: tuple[Path, ...],
segments_csv: Path | None, segments_csv: Path | None,
csv_root: Path | None, csv_root: Path | None,
recursive: bool, recursive: bool,
@@ -1266,6 +1171,10 @@ def main(
progress_ui: str, progress_ui: str,
) -> None: ) -> None:
"""Batch-convert multi-camera ZED segments into grouped MCAP files.""" """Batch-convert multi-camera ZED segments into grouped MCAP files."""
segment_sources.raise_for_legacy_extra_args(ctx.args)
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
if report_existing and dry_run: if report_existing and dry_run:
raise click.ClickException("--report-existing and --dry-run are mutually exclusive") raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
if bundle_policy == "copy": if bundle_policy == "copy":
@@ -1276,8 +1185,16 @@ def main(
if bundle_topic != "/bundle": if bundle_topic != "/bundle":
raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy") raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy")
sources = segment_sources.resolve_sources(
dataset_root,
segment_dirs,
segments_csv,
csv_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=lambda root: f"no multi-camera segments found under {root}",
)
binary_path = None if report_existing else locate_binary(zed_bin) binary_path = None if report_existing else locate_binary(zed_bin)
sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
worker_slots = build_worker_slots( worker_slots = build_worker_slots(
jobs=jobs, jobs=jobs,
encoder_device=encoder_device, encoder_device=encoder_device,
@@ -1307,7 +1224,7 @@ def main(
sync_tolerance_ms=sync_tolerance_ms, sync_tolerance_ms=sync_tolerance_ms,
progress_ui=progress_ui, progress_ui=progress_ui,
) )
input_root = input_dir.expanduser().resolve() if input_dir is not None else None input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None
display_parent = common_segment_parent(sources.segment_dirs) display_parent = common_segment_parent(sources.segment_dirs)
skipped_results: list[JobResult] = [] skipped_results: list[JobResult] = []
+268
View File
@@ -0,0 +1,268 @@
from __future__ import annotations
import dataclasses
import tempfile
import unittest
from pathlib import Path
import click
from click.testing import CliRunner
from scripts import zed_batch_segment_sources as segment_sources
from scripts.zed_batch_svo_grid_to_mp4 import main as grid_main
from scripts.zed_batch_svo_to_mcap import main as mcap_main
@dataclasses.dataclass(slots=True, frozen=True)
class FakeScan:
segment_dir: Path
matched_files: int
is_valid: bool
reason: str | None = None
def fake_scan(segment_dir: Path) -> FakeScan:
if not segment_dir.is_dir():
return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="missing directory")
if (segment_dir / "valid.segment").is_file():
return FakeScan(segment_dir=segment_dir, matched_files=2, is_valid=True)
if (segment_dir / "partial.segment").is_file():
return FakeScan(segment_dir=segment_dir, matched_files=1, is_valid=False, reason="partial segment")
return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="no camera files")
def create_multicamera_segment(parent: Path, segment_name: str) -> Path:
segment_dir = parent / segment_name
segment_dir.mkdir(parents=True)
for camera_index in range(1, 5):
(segment_dir / f"{segment_name}_zed{camera_index}.svo2").write_bytes(b"")
return segment_dir
class SharedSourceResolutionTests(unittest.TestCase):
def test_dataset_root_recursive_discovers_nested_segments(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
sources = segment_sources.resolve_sources(
dataset_root,
(),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "dataset-root")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_dataset_root_without_recursive_does_not_descend(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
with self.assertRaises(click.ClickException) as error:
segment_sources.resolve_sources(
dataset_root,
(),
None,
None,
False,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertIn("no segments under", str(error.exception))
def test_explicit_segments_are_deduped(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = Path(tmp) / "2026-04-08T11-50-32"
segment_dir.mkdir()
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
sources = segment_sources.resolve_sources(
None,
(segment_dir, segment_dir),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "segments")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_segments_csv_uses_segment_dir_column(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
segment_dir = tmp_path / "segments" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
csv_path = tmp_path / "segments.csv"
csv_path.write_text("segment_dir\nsegments/2026-04-08T11-50-32\n", encoding="utf-8")
sources = segment_sources.resolve_sources(
None,
(),
csv_path,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "segments-csv")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_segment_path_like_dataset_root_has_hint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
with self.assertRaises(click.ClickException) as error:
segment_sources.resolve_sources(
None,
(dataset_root,),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
message = str(error.exception)
self.assertIn("looks like a dataset root", message)
self.assertIn("--dataset-root", message)
class BatchCliSmokeTests(unittest.TestCase):
def setUp(self) -> None:
self.runner = CliRunner()
def test_mcap_dataset_root_flag_discovers_segments(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--dataset-root",
str(dataset_root),
"--recursive",
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertEqual(result.exit_code, 0, result.output)
self.assertIn("source=dataset-root matched=1 pending=1", result.output)
def test_mcap_segment_flag_rejects_dataset_root_with_hint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--segment",
str(dataset_root),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("looks like a dataset root", result.output)
self.assertIn("--dataset-root", result.output)
def test_mcap_rejects_legacy_positional_dataset_root(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
str(dataset_root),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("positional dataset paths are no longer supported", result.output)
self.assertIn("--dataset-root", result.output)
def test_mcap_rejects_recursive_without_dataset_root(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--segment",
str(segment_dir),
"--no-recursive",
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("--recursive/--no-recursive can only be used with --dataset-root", result.output)
def test_grid_segment_flag_discovers_one_segment(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
grid_main,
[
"--segment",
str(segment_dir),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertEqual(result.exit_code, 0, result.output)
self.assertIn("source=segments matched=1 pending=1", result.output)
def test_grid_rejects_legacy_segment_dir_flag(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
grid_main,
[
"--segment-dir",
str(segment_dir),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("--segment-dir is no longer supported", result.output)
self.assertIn("--segment", result.output)
if __name__ == "__main__":
unittest.main()
Generated
+14
View File
@@ -40,6 +40,7 @@ dependencies = [
{ name = "opencv-python-headless" }, { name = "opencv-python-headless" },
{ name = "progress-table" }, { name = "progress-table" },
{ name = "protobuf" }, { name = "protobuf" },
{ name = "tqdm" },
{ name = "zstandard" }, { name = "zstandard" },
] ]
@@ -59,6 +60,7 @@ requires-dist = [
{ name = "progress-table", specifier = ">=3.2" }, { name = "progress-table", specifier = ">=3.2" },
{ name = "protobuf", specifier = ">=5.29" }, { name = "protobuf", specifier = ">=5.29" },
{ name = "rvl-impl", marker = "python_full_version >= '3.12' and extra == 'viewer'", git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9" }, { name = "rvl-impl", marker = "python_full_version >= '3.12' and extra == 'viewer'", git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9" },
{ name = "tqdm", specifier = ">=4.67" },
{ name = "zstandard", specifier = ">=0.23" }, { name = "zstandard", specifier = ">=0.23" },
] ]
provides-extras = ["viewer"] provides-extras = ["viewer"]
@@ -327,6 +329,18 @@ name = "rvl-impl"
version = "0.1.0" version = "0.1.0"
source = { git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9#74308bcaf184cb39428237e8f4f99a67a6de22d9" } source = { git = "https://github.com/crosstyan/rvl-impl.git?rev=74308bcaf184cb39428237e8f4f99a67a6de22d9#74308bcaf184cb39428237e8f4f99a67a6de22d9" }
[[package]]
name = "tqdm"
version = "4.67.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
]
[[package]] [[package]]
name = "wcwidth" name = "wcwidth"
version = "0.6.0" version = "0.6.0"