Redesign batch segment source selection

This commit is contained in:
2026-04-08 08:07:05 +00:00
parent c320bf01af
commit 0a3da46f19
7 changed files with 642 additions and 268 deletions
+45 -128
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import concurrent.futures
import csv
import json
import math
import os
@@ -17,6 +16,11 @@ from pathlib import Path
import click
from tqdm import tqdm
try:
from scripts import zed_batch_segment_sources as segment_sources
except ModuleNotFoundError:
import zed_batch_segment_sources as segment_sources
SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1]
@@ -70,13 +74,6 @@ class SegmentScan:
reason: str | None = None
@dataclass(slots=True, frozen=True)
class SourceResolution:
mode: str
segment_dirs: tuple[Path, ...]
ignored_partial_dirs: tuple[SegmentScan, ...]
@dataclass(slots=True, frozen=True)
class OutputProbeResult:
output_path: Path
@@ -157,116 +154,6 @@ def scan_segment_dir(segment_dir: Path) -> SegmentScan:
return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True)
def dedupe_paths(paths: list[Path]) -> list[Path]:
ordered: list[Path] = []
seen: set[Path] = set()
for path in paths:
resolved = path.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
ordered.append(resolved)
return ordered
def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
if not root.is_dir():
raise click.ClickException(f"input directory does not exist: {root}")
candidate_dirs = {root.resolve()}
iterator = root.rglob("*") if recursive else root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_dirs: list[Path] = []
ignored_partial_dirs: list[SegmentScan] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_dirs.append(segment_dir)
elif scan.matched_files > 0:
ignored_partial_dirs.append(scan)
if not valid_dirs:
raise click.ClickException(f"no complete four-camera segments found under {root}")
return SourceResolution(
mode="discovery",
segment_dirs=tuple(valid_dirs),
ignored_partial_dirs=tuple(ignored_partial_dirs),
)
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
csv_path = csv_path.expanduser().resolve()
if not csv_path.is_file():
raise click.ClickException(f"CSV not found: {csv_path}")
if csv_root is not None:
base_dir = csv_root.expanduser().resolve()
if not base_dir.is_dir():
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
else:
base_dir = csv_path.parent
segment_dirs: list[Path] = []
seen: set[Path] = set()
with csv_path.open(newline="") as stream:
reader = csv.DictReader(stream)
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
for row_number, row in enumerate(reader, start=2):
raw_segment_dir = (row.get("segment_dir") or "").strip()
if not raw_segment_dir:
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
segment_dir = Path(raw_segment_dir)
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
resolved = resolved.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
segment_dirs.append(resolved)
if not segment_dirs:
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
return tuple(segment_dirs)
def resolve_sources(
input_dir: Path | None,
segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
) -> SourceResolution:
source_count = sum(
(
1 if input_dir is not None else 0,
1 if segment_dirs else 0,
1 if segments_csv is not None else 0,
)
)
if source_count != 1:
raise click.ClickException(
"provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
)
if input_dir is not None:
return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
if segment_dirs:
ordered_dirs = dedupe_paths(list(segment_dirs))
return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
return SourceResolution(
mode="segments-csv",
segment_dirs=parse_segments_csv(segments_csv, csv_root),
ignored_partial_dirs=(),
)
def output_path_for(segment_dir: Path) -> Path:
return segment_dir / f"{segment_dir.name}_grid.mp4"
@@ -514,30 +401,45 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
return results, aborted_count
@click.command()
@click.argument(
"input_dir",
required=False,
@click.command(context_settings={"allow_extra_args": True})
@click.option(
"--dataset-root",
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
)
@click.option(
"--segment",
"segment_dirs",
multiple=True,
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
help=(
"Explicit segment directory. Repeatable. The directory must directly contain "
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
),
)
@click.option(
"--segment-dir",
"segment_dirs",
"legacy_segment_dirs",
multiple=True,
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.",
hidden=True,
)
@click.option(
"--segments-csv",
type=click.Path(path_type=Path, dir_okay=False),
help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.",
help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
)
@click.option(
"--csv-root",
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
)
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.")
@click.option(
"--recursive/--no-recursive",
default=True,
show_default=True,
help="Recurse when discovering segment directories from --dataset-root.",
)
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option(
"--zed-bin",
@@ -625,9 +527,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
show_default=True,
help="Scale each tile relative to the source resolution.",
)
@click.pass_context
def main(
input_dir: Path | None,
ctx: click.Context,
dataset_root: Path | None,
segment_dirs: tuple[Path, ...],
legacy_segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
@@ -653,14 +558,26 @@ def main(
tile_scale: float,
) -> None:
"""Batch-convert synced four-camera ZED segments into grid MP4 files."""
segment_sources.raise_for_legacy_extra_args(ctx.args)
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
if b_frames > gop:
raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames")
if report_existing and dry_run:
raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
sources = segment_sources.resolve_sources(
dataset_root,
segment_dirs,
segments_csv,
csv_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=lambda root: f"no complete four-camera segments found under {root}",
)
ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None
binary_path = None if report_existing else locate_binary(zed_bin)
sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
config = BatchConfig(
zed_bin=binary_path,
ffprobe_bin=ffprobe_path,