Redesign batch segment source selection
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import csv
|
||||
import importlib
|
||||
import os
|
||||
import re
|
||||
@@ -17,6 +16,11 @@ from pathlib import Path
|
||||
import click
|
||||
from progress_table import ProgressTable
|
||||
|
||||
try:
|
||||
from scripts import zed_batch_segment_sources as segment_sources
|
||||
except ModuleNotFoundError:
|
||||
import zed_batch_segment_sources as segment_sources
|
||||
|
||||
|
||||
SCRIPT_PATH = Path(__file__).resolve()
|
||||
REPO_ROOT = SCRIPT_PATH.parents[1]
|
||||
@@ -82,13 +86,6 @@ class SegmentScan:
|
||||
reason: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class SourceResolution:
|
||||
mode: str
|
||||
segment_dirs: tuple[Path, ...]
|
||||
ignored_partial_dirs: tuple[SegmentScan, ...]
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class OutputProbeResult:
|
||||
output_path: Path
|
||||
@@ -339,116 +336,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
|
||||
)
|
||||
|
||||
|
||||
def dedupe_paths(paths: list[Path]) -> list[Path]:
|
||||
ordered: list[Path] = []
|
||||
seen: set[Path] = set()
|
||||
for path in paths:
|
||||
resolved = path.expanduser().resolve()
|
||||
if resolved in seen:
|
||||
continue
|
||||
seen.add(resolved)
|
||||
ordered.append(resolved)
|
||||
return ordered
|
||||
|
||||
|
||||
def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
|
||||
if not root.is_dir():
|
||||
raise click.ClickException(f"input directory does not exist: {root}")
|
||||
|
||||
candidate_dirs = {root.resolve()}
|
||||
iterator = root.rglob("*") if recursive else root.iterdir()
|
||||
for path in iterator:
|
||||
if path.is_dir():
|
||||
candidate_dirs.add(path.resolve())
|
||||
|
||||
valid_dirs: list[Path] = []
|
||||
ignored_partial_dirs: list[SegmentScan] = []
|
||||
for segment_dir in sorted(candidate_dirs):
|
||||
scan = scan_segment_dir(segment_dir)
|
||||
if scan.is_valid:
|
||||
valid_dirs.append(segment_dir)
|
||||
elif scan.matched_files > 0:
|
||||
ignored_partial_dirs.append(scan)
|
||||
|
||||
if not valid_dirs:
|
||||
raise click.ClickException(f"no multi-camera segments found under {root}")
|
||||
|
||||
return SourceResolution(
|
||||
mode="discovery",
|
||||
segment_dirs=tuple(valid_dirs),
|
||||
ignored_partial_dirs=tuple(ignored_partial_dirs),
|
||||
)
|
||||
|
||||
|
||||
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
|
||||
csv_path = csv_path.expanduser().resolve()
|
||||
if not csv_path.is_file():
|
||||
raise click.ClickException(f"CSV not found: {csv_path}")
|
||||
|
||||
if csv_root is not None:
|
||||
base_dir = csv_root.expanduser().resolve()
|
||||
if not base_dir.is_dir():
|
||||
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
|
||||
else:
|
||||
base_dir = csv_path.parent
|
||||
|
||||
segment_dirs: list[Path] = []
|
||||
seen: set[Path] = set()
|
||||
with csv_path.open(newline="") as stream:
|
||||
reader = csv.DictReader(stream)
|
||||
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
|
||||
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
|
||||
|
||||
for row_number, row in enumerate(reader, start=2):
|
||||
raw_segment_dir = (row.get("segment_dir") or "").strip()
|
||||
if not raw_segment_dir:
|
||||
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
|
||||
segment_dir = Path(raw_segment_dir)
|
||||
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
|
||||
resolved = resolved.expanduser().resolve()
|
||||
if resolved in seen:
|
||||
continue
|
||||
seen.add(resolved)
|
||||
segment_dirs.append(resolved)
|
||||
|
||||
if not segment_dirs:
|
||||
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
|
||||
return tuple(segment_dirs)
|
||||
|
||||
|
||||
def resolve_sources(
|
||||
input_dir: Path | None,
|
||||
segment_dirs: tuple[Path, ...],
|
||||
segments_csv: Path | None,
|
||||
csv_root: Path | None,
|
||||
recursive: bool,
|
||||
) -> SourceResolution:
|
||||
source_count = sum(
|
||||
(
|
||||
1 if input_dir is not None else 0,
|
||||
1 if segment_dirs else 0,
|
||||
1 if segments_csv is not None else 0,
|
||||
)
|
||||
)
|
||||
if source_count != 1:
|
||||
raise click.ClickException(
|
||||
"provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
|
||||
)
|
||||
|
||||
if input_dir is not None:
|
||||
return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
|
||||
|
||||
if segment_dirs:
|
||||
ordered_dirs = dedupe_paths(list(segment_dirs))
|
||||
return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
|
||||
|
||||
return SourceResolution(
|
||||
mode="segments-csv",
|
||||
segment_dirs=parse_segments_csv(segments_csv, csv_root),
|
||||
ignored_partial_dirs=(),
|
||||
)
|
||||
|
||||
|
||||
def output_path_for(segment_dir: Path) -> Path:
|
||||
return segment_dir / f"{segment_dir.name}.mcap"
|
||||
|
||||
@@ -469,7 +356,7 @@ def display_name_for_segment(
|
||||
input_root: Path | None,
|
||||
common_parent: Path | None,
|
||||
) -> str:
|
||||
if source_mode == "discovery" and input_root is not None:
|
||||
if source_mode == "dataset-root" and input_root is not None:
|
||||
try:
|
||||
return str(segment_dir.relative_to(input_root))
|
||||
except ValueError:
|
||||
@@ -1071,30 +958,45 @@ def build_worker_slots(
|
||||
return worker_slots
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument(
|
||||
"input_dir",
|
||||
required=False,
|
||||
@click.command(context_settings={"allow_extra_args": True})
|
||||
@click.option(
|
||||
"--dataset-root",
|
||||
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
||||
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
|
||||
)
|
||||
@click.option(
|
||||
"--segment",
|
||||
"segment_dirs",
|
||||
multiple=True,
|
||||
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
|
||||
help=(
|
||||
"Explicit segment directory. Repeatable. The directory must directly contain "
|
||||
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
|
||||
),
|
||||
)
|
||||
@click.option(
|
||||
"--segment-dir",
|
||||
"segment_dirs",
|
||||
"legacy_segment_dirs",
|
||||
multiple=True,
|
||||
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
||||
help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.",
|
||||
hidden=True,
|
||||
)
|
||||
@click.option(
|
||||
"--segments-csv",
|
||||
type=click.Path(path_type=Path, dir_okay=False),
|
||||
help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.",
|
||||
help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
|
||||
)
|
||||
@click.option(
|
||||
"--csv-root",
|
||||
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
||||
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
|
||||
)
|
||||
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.")
|
||||
@click.option(
|
||||
"--recursive/--no-recursive",
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Recurse when discovering segment directories from --dataset-root.",
|
||||
)
|
||||
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
|
||||
@click.option(
|
||||
"--hardware-jobs",
|
||||
@@ -1231,9 +1133,12 @@ def build_worker_slots(
|
||||
show_default=True,
|
||||
help="Progress output mode. Auto uses a table on TTY and text logging otherwise.",
|
||||
)
|
||||
@click.pass_context
|
||||
def main(
|
||||
input_dir: Path | None,
|
||||
ctx: click.Context,
|
||||
dataset_root: Path | None,
|
||||
segment_dirs: tuple[Path, ...],
|
||||
legacy_segment_dirs: tuple[Path, ...],
|
||||
segments_csv: Path | None,
|
||||
csv_root: Path | None,
|
||||
recursive: bool,
|
||||
@@ -1266,6 +1171,10 @@ def main(
|
||||
progress_ui: str,
|
||||
) -> None:
|
||||
"""Batch-convert multi-camera ZED segments into grouped MCAP files."""
|
||||
segment_sources.raise_for_legacy_extra_args(ctx.args)
|
||||
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
|
||||
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
|
||||
|
||||
if report_existing and dry_run:
|
||||
raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
|
||||
if bundle_policy == "copy":
|
||||
@@ -1276,8 +1185,16 @@ def main(
|
||||
if bundle_topic != "/bundle":
|
||||
raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy")
|
||||
|
||||
sources = segment_sources.resolve_sources(
|
||||
dataset_root,
|
||||
segment_dirs,
|
||||
segments_csv,
|
||||
csv_root,
|
||||
recursive,
|
||||
scan_segment_dir=scan_segment_dir,
|
||||
no_matches_message=lambda root: f"no multi-camera segments found under {root}",
|
||||
)
|
||||
binary_path = None if report_existing else locate_binary(zed_bin)
|
||||
sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
|
||||
worker_slots = build_worker_slots(
|
||||
jobs=jobs,
|
||||
encoder_device=encoder_device,
|
||||
@@ -1307,7 +1224,7 @@ def main(
|
||||
sync_tolerance_ms=sync_tolerance_ms,
|
||||
progress_ui=progress_ui,
|
||||
)
|
||||
input_root = input_dir.expanduser().resolve() if input_dir is not None else None
|
||||
input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None
|
||||
display_parent = common_segment_parent(sources.segment_dirs)
|
||||
|
||||
skipped_results: list[JobResult] = []
|
||||
|
||||
Reference in New Issue
Block a user