a0b9c95d5b
Default ZED MCAP export to neural_plus depth across the CLI and Python wrappers, and add tail-frame handling plus better corrupted-frame diagnostics in zed_svo_to_mcap. Add mixed hardware/software worker pools to the batch MCAP wrapper, replace tqdm with progress-table on TTYs, keep text event logging and heartbeats for non-TTY runs, and document the NVENC session-limit rationale for mixed mode in the README. Also refresh Python dependencies for the batch tooling and move the OpenSSL lookup in CMake so the local workspace build remains compatible with the vendored cnats setup.
1279 lines
41 KiB
Python
1279 lines
41 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from __future__ import annotations
|
|
|
|
import concurrent.futures
|
|
import csv
|
|
import importlib
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import click
|
|
from progress_table import ProgressTable
|
|
|
|
|
|
SCRIPT_PATH = Path(__file__).resolve()
|
|
REPO_ROOT = SCRIPT_PATH.parents[1]
|
|
WORKSPACE_ROOT = REPO_ROOT.parent
|
|
MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap"
|
|
SEGMENT_FILE_PATTERN = re.compile(r".*_zed([0-9]+)\.svo2?$", re.IGNORECASE)
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class BatchConfig:
|
|
zed_bin: Path | None
|
|
probe_existing: bool
|
|
overwrite: bool
|
|
fail_fast: bool
|
|
codec: str
|
|
mcap_compression: str
|
|
depth_mode: str
|
|
depth_size: str
|
|
with_pose: bool
|
|
pose_config: Path | None
|
|
world_frame_id: str | None
|
|
start_frame: int | None
|
|
end_frame: int | None
|
|
sync_tolerance_ms: float | None
|
|
progress_ui: str
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class ConversionJob:
|
|
segment_dir: Path
|
|
output_path: Path
|
|
camera_labels: tuple[str, ...]
|
|
display_name: str
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class WorkerSlot:
|
|
label: str
|
|
encoder_device: str
|
|
cuda_visible_devices: str | None
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class JobResult:
|
|
status: str
|
|
segment_dir: Path
|
|
output_path: Path
|
|
command: tuple[str, ...]
|
|
return_code: int = 0
|
|
stdout: str = ""
|
|
stderr: str = ""
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class SegmentScan:
|
|
segment_dir: Path
|
|
matched_files: int
|
|
camera_labels: tuple[str, ...]
|
|
is_valid: bool
|
|
reason: str | None = None
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class SourceResolution:
|
|
mode: str
|
|
segment_dirs: tuple[Path, ...]
|
|
ignored_partial_dirs: tuple[SegmentScan, ...]
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class OutputProbeResult:
|
|
output_path: Path
|
|
status: str
|
|
reason: str = ""
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ActiveJobState:
|
|
submission_index: int
|
|
job: ConversionJob
|
|
slot: WorkerSlot
|
|
started_at_monotonic: float
|
|
row_index: int | None = None
|
|
|
|
|
|
_MCAP_READER_MODULE = None
|
|
TABLE_REFRESH_SECONDS = 1.0
|
|
TEXT_HEARTBEAT_SECONDS = 30.0
|
|
|
|
|
|
def format_elapsed(seconds: float) -> str:
|
|
rounded = max(0, int(round(seconds)))
|
|
minutes, secs = divmod(rounded, 60)
|
|
hours, minutes = divmod(minutes, 60)
|
|
if hours > 0:
|
|
return f"{hours:d}:{minutes:02d}:{secs:02d}"
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
class ProgressReporter:
|
|
heartbeat_interval_seconds: float
|
|
|
|
def __init__(self, total_jobs: int) -> None:
|
|
self.total_jobs = total_jobs
|
|
self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
return
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
return
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
return
|
|
|
|
def close(self) -> None:
|
|
return
|
|
|
|
|
|
class TextProgressReporter(ProgressReporter):
|
|
def __init__(self, total_jobs: int) -> None:
|
|
super().__init__(total_jobs)
|
|
self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
cuda_label = state.slot.cuda_visible_devices or "-"
|
|
click.echo(
|
|
(
|
|
f"started: [{state.submission_index}/{self.total_jobs}] "
|
|
f"{state.slot.label} encoder={state.slot.encoder_device} cuda={cuda_label} "
|
|
f"segment={state.job.display_name}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
elapsed = format_elapsed(time.monotonic() - state.started_at_monotonic)
|
|
prefix = "completed" if result.status == "converted" else "failed"
|
|
exit_text = "" if result.status == "converted" else f" exit={result.return_code}"
|
|
click.echo(
|
|
(
|
|
f"{prefix}: [{state.submission_index}/{self.total_jobs}] "
|
|
f"{state.slot.label} elapsed={elapsed}{exit_text} segment={state.job.display_name}"
|
|
),
|
|
err=True,
|
|
)
|
|
if result.status == "failed":
|
|
for line in failure_excerpt(result):
|
|
click.echo(f" {line}", err=True)
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
active_count = len(active_states)
|
|
remaining_count = self.total_jobs - completed_count - failed_count - active_count
|
|
click.echo(
|
|
(
|
|
f"progress: completed={completed_count} failed={failed_count} "
|
|
f"active={active_count} remaining={remaining_count}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
|
|
class TableProgressReporter(ProgressReporter):
|
|
def __init__(self, total_jobs: int) -> None:
|
|
super().__init__(total_jobs)
|
|
self.heartbeat_interval_seconds = TABLE_REFRESH_SECONDS
|
|
self.table = ProgressTable(
|
|
"#",
|
|
"segment",
|
|
"worker",
|
|
"encoder",
|
|
"cuda",
|
|
"status",
|
|
"elapsed_s",
|
|
interactive=2,
|
|
refresh_rate=10,
|
|
default_column_alignment="left",
|
|
default_column_width=12,
|
|
pbar_show_throughput=False,
|
|
pbar_show_progress=False,
|
|
pbar_show_percents=False,
|
|
pbar_show_eta=False,
|
|
print_header_every_n_rows=30,
|
|
file=sys.stderr,
|
|
)
|
|
self.table.add_column("#", width=4, alignment="right")
|
|
self.table.add_column("segment", width=44, alignment="left")
|
|
self.table.add_column("worker", width=8, alignment="left")
|
|
self.table.add_column("encoder", width=10, alignment="left")
|
|
self.table.add_column("cuda", width=6, alignment="left")
|
|
self.table.add_column("status", width=12, alignment="left")
|
|
self.table.add_column("elapsed_s", width=10, alignment="right")
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
self.table.add_row(
|
|
state.submission_index,
|
|
state.job.display_name,
|
|
state.slot.label,
|
|
state.slot.encoder_device,
|
|
state.slot.cuda_visible_devices or "-",
|
|
"running",
|
|
format_elapsed(0.0),
|
|
)
|
|
state.row_index = self.table.num_rows() - 1
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
if state.row_index is None:
|
|
return
|
|
self.table.update("status", "converted" if result.status == "converted" else f"failed({result.return_code})", row=state.row_index)
|
|
self.table.update(
|
|
"elapsed_s",
|
|
format_elapsed(time.monotonic() - state.started_at_monotonic),
|
|
row=state.row_index,
|
|
)
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
for state in active_states:
|
|
if state.row_index is None:
|
|
continue
|
|
self.table.update(
|
|
"elapsed_s",
|
|
format_elapsed(time.monotonic() - state.started_at_monotonic),
|
|
row=state.row_index,
|
|
)
|
|
|
|
def close(self) -> None:
|
|
self.table.close()
|
|
|
|
|
|
def locate_binary(override: Path | None) -> Path:
|
|
if override is not None:
|
|
candidate = override.expanduser().resolve()
|
|
if not candidate.is_file():
|
|
raise click.ClickException(f"binary not found: {candidate}")
|
|
return candidate
|
|
|
|
candidates = (
|
|
REPO_ROOT / "build" / "bin" / "zed_svo_to_mcap",
|
|
REPO_ROOT / "build" / "zed_svo_to_mcap",
|
|
)
|
|
for candidate in candidates:
|
|
if candidate.is_file():
|
|
return candidate
|
|
raise click.ClickException(f"could not find zed_svo_to_mcap under {REPO_ROOT / 'build'}")
|
|
|
|
|
|
def sorted_camera_labels(labels: set[str]) -> tuple[str, ...]:
|
|
return tuple(sorted(labels, key=lambda label: int(label[3:])))
|
|
|
|
|
|
def scan_segment_dir(segment_dir: Path) -> SegmentScan:
|
|
if not segment_dir.is_dir():
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=0,
|
|
camera_labels=(),
|
|
is_valid=False,
|
|
reason=f"segment directory does not exist: {segment_dir}",
|
|
)
|
|
|
|
matched_by_camera: dict[str, list[Path]] = {}
|
|
for child in segment_dir.iterdir():
|
|
if not child.is_file():
|
|
continue
|
|
match = SEGMENT_FILE_PATTERN.fullmatch(child.name)
|
|
if match is None:
|
|
continue
|
|
label = f"zed{int(match.group(1))}"
|
|
matched_by_camera.setdefault(label, []).append(child)
|
|
|
|
matched_files = sum(len(paths) for paths in matched_by_camera.values())
|
|
camera_labels = sorted_camera_labels(set(matched_by_camera))
|
|
duplicate_cameras = [label for label, paths in sorted(matched_by_camera.items()) if len(paths) > 1]
|
|
|
|
if duplicate_cameras:
|
|
duplicate_text = ", ".join(duplicate_cameras)
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=False,
|
|
reason=f"duplicate camera inputs under {segment_dir}: {duplicate_text}",
|
|
)
|
|
if len(camera_labels) < 2:
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=False,
|
|
reason=f"expected at least 2 camera inputs under {segment_dir}, found {len(camera_labels)}",
|
|
)
|
|
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=True,
|
|
)
|
|
|
|
|
|
def dedupe_paths(paths: list[Path]) -> list[Path]:
|
|
ordered: list[Path] = []
|
|
seen: set[Path] = set()
|
|
for path in paths:
|
|
resolved = path.expanduser().resolve()
|
|
if resolved in seen:
|
|
continue
|
|
seen.add(resolved)
|
|
ordered.append(resolved)
|
|
return ordered
|
|
|
|
|
|
def discover_segment_dirs(root: Path, recursive: bool) -> SourceResolution:
|
|
if not root.is_dir():
|
|
raise click.ClickException(f"input directory does not exist: {root}")
|
|
|
|
candidate_dirs = {root.resolve()}
|
|
iterator = root.rglob("*") if recursive else root.iterdir()
|
|
for path in iterator:
|
|
if path.is_dir():
|
|
candidate_dirs.add(path.resolve())
|
|
|
|
valid_dirs: list[Path] = []
|
|
ignored_partial_dirs: list[SegmentScan] = []
|
|
for segment_dir in sorted(candidate_dirs):
|
|
scan = scan_segment_dir(segment_dir)
|
|
if scan.is_valid:
|
|
valid_dirs.append(segment_dir)
|
|
elif scan.matched_files > 0:
|
|
ignored_partial_dirs.append(scan)
|
|
|
|
if not valid_dirs:
|
|
raise click.ClickException(f"no multi-camera segments found under {root}")
|
|
|
|
return SourceResolution(
|
|
mode="discovery",
|
|
segment_dirs=tuple(valid_dirs),
|
|
ignored_partial_dirs=tuple(ignored_partial_dirs),
|
|
)
|
|
|
|
|
|
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
|
|
csv_path = csv_path.expanduser().resolve()
|
|
if not csv_path.is_file():
|
|
raise click.ClickException(f"CSV not found: {csv_path}")
|
|
|
|
if csv_root is not None:
|
|
base_dir = csv_root.expanduser().resolve()
|
|
if not base_dir.is_dir():
|
|
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
|
|
else:
|
|
base_dir = csv_path.parent
|
|
|
|
segment_dirs: list[Path] = []
|
|
seen: set[Path] = set()
|
|
with csv_path.open(newline="") as stream:
|
|
reader = csv.DictReader(stream)
|
|
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
|
|
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
|
|
|
|
for row_number, row in enumerate(reader, start=2):
|
|
raw_segment_dir = (row.get("segment_dir") or "").strip()
|
|
if not raw_segment_dir:
|
|
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
|
|
segment_dir = Path(raw_segment_dir)
|
|
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
|
|
resolved = resolved.expanduser().resolve()
|
|
if resolved in seen:
|
|
continue
|
|
seen.add(resolved)
|
|
segment_dirs.append(resolved)
|
|
|
|
if not segment_dirs:
|
|
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
|
|
return tuple(segment_dirs)
|
|
|
|
|
|
def resolve_sources(
|
|
input_dir: Path | None,
|
|
segment_dirs: tuple[Path, ...],
|
|
segments_csv: Path | None,
|
|
csv_root: Path | None,
|
|
recursive: bool,
|
|
) -> SourceResolution:
|
|
source_count = sum(
|
|
(
|
|
1 if input_dir is not None else 0,
|
|
1 if segment_dirs else 0,
|
|
1 if segments_csv is not None else 0,
|
|
)
|
|
)
|
|
if source_count != 1:
|
|
raise click.ClickException(
|
|
"provide exactly one source mode: INPUT_DIR, --segment-dir, or --segments-csv"
|
|
)
|
|
|
|
if input_dir is not None:
|
|
return discover_segment_dirs(input_dir.expanduser().resolve(), recursive)
|
|
|
|
if segment_dirs:
|
|
ordered_dirs = dedupe_paths(list(segment_dirs))
|
|
return SourceResolution(mode="segment-dir", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
|
|
|
|
return SourceResolution(
|
|
mode="segments-csv",
|
|
segment_dirs=parse_segments_csv(segments_csv, csv_root),
|
|
ignored_partial_dirs=(),
|
|
)
|
|
|
|
|
|
def output_path_for(segment_dir: Path) -> Path:
|
|
return segment_dir / f"{segment_dir.name}.mcap"
|
|
|
|
|
|
def common_segment_parent(segment_dirs: tuple[Path, ...]) -> Path | None:
|
|
if len(segment_dirs) <= 1:
|
|
return None
|
|
try:
|
|
return Path(os.path.commonpath([str(path) for path in segment_dirs]))
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def display_name_for_segment(
|
|
segment_dir: Path,
|
|
*,
|
|
source_mode: str,
|
|
input_root: Path | None,
|
|
common_parent: Path | None,
|
|
) -> str:
|
|
if source_mode == "discovery" and input_root is not None:
|
|
try:
|
|
return str(segment_dir.relative_to(input_root))
|
|
except ValueError:
|
|
pass
|
|
if common_parent is not None:
|
|
try:
|
|
relative = segment_dir.relative_to(common_parent)
|
|
if str(relative) != ".":
|
|
return str(relative)
|
|
except ValueError:
|
|
pass
|
|
parent_name = segment_dir.parent.name
|
|
if parent_name:
|
|
return str(Path(parent_name) / segment_dir.name)
|
|
return segment_dir.name
|
|
|
|
|
|
def command_for_job(job: ConversionJob, config: BatchConfig, encoder_device: str) -> list[str]:
|
|
if config.zed_bin is None:
|
|
raise RuntimeError("zed_svo_to_mcap binary is not configured")
|
|
|
|
command = [
|
|
str(config.zed_bin),
|
|
"--segment-dir",
|
|
str(job.segment_dir),
|
|
"--codec",
|
|
config.codec,
|
|
"--encoder-device",
|
|
encoder_device,
|
|
"--mcap-compression",
|
|
config.mcap_compression,
|
|
"--depth-mode",
|
|
config.depth_mode,
|
|
"--depth-size",
|
|
config.depth_size,
|
|
]
|
|
if config.with_pose:
|
|
command.append("--with-pose")
|
|
if config.pose_config is not None:
|
|
command.extend(["--pose-config", str(config.pose_config)])
|
|
if config.world_frame_id is not None:
|
|
command.extend(["--world-frame-id", config.world_frame_id])
|
|
if config.start_frame is not None:
|
|
command.extend(["--start-frame", str(config.start_frame)])
|
|
if config.end_frame is not None:
|
|
command.extend(["--end-frame", str(config.end_frame)])
|
|
if config.sync_tolerance_ms is not None:
|
|
command.extend(["--sync-tolerance-ms", str(config.sync_tolerance_ms)])
|
|
return command
|
|
|
|
|
|
def env_for_job(config: BatchConfig) -> dict[str, str]:
|
|
return env_for_job_with_cuda(None)
|
|
|
|
|
|
def env_for_job_with_cuda(assigned_cuda_visible_devices: str | None) -> dict[str, str]:
|
|
env = dict(os.environ)
|
|
if assigned_cuda_visible_devices is not None:
|
|
env["CUDA_VISIBLE_DEVICES"] = assigned_cuda_visible_devices
|
|
return env
|
|
|
|
|
|
def parse_cuda_device_pool(raw_value: str | None) -> tuple[str, ...]:
|
|
if raw_value is None:
|
|
return ()
|
|
devices = tuple(device.strip() for device in raw_value.split(",") if device.strip())
|
|
return devices
|
|
|
|
|
|
def choose_progress_reporter(progress_ui: str, total_jobs: int) -> ProgressReporter:
|
|
if progress_ui == "table":
|
|
return TableProgressReporter(total_jobs)
|
|
if progress_ui == "text":
|
|
return TextProgressReporter(total_jobs)
|
|
if sys.stderr.isatty():
|
|
return TableProgressReporter(total_jobs)
|
|
return TextProgressReporter(total_jobs)
|
|
|
|
|
|
def load_mcap_reader():
|
|
global _MCAP_READER_MODULE
|
|
if _MCAP_READER_MODULE is not None:
|
|
return _MCAP_READER_MODULE
|
|
|
|
if str(MCAP_PYTHON_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(MCAP_PYTHON_ROOT))
|
|
try:
|
|
_MCAP_READER_MODULE = importlib.import_module("mcap.reader")
|
|
except ModuleNotFoundError as error:
|
|
raise click.ClickException(
|
|
f"could not import mcap.reader from {MCAP_PYTHON_ROOT}"
|
|
) from error
|
|
return _MCAP_READER_MODULE
|
|
|
|
|
|
def required_topics_for(camera_labels: tuple[str, ...]) -> set[str]:
|
|
topics: set[str] = set()
|
|
for label in camera_labels:
|
|
topics.add(f"/{label}/video")
|
|
topics.add(f"/{label}/depth")
|
|
topics.add(f"/{label}/calibration")
|
|
return topics
|
|
|
|
|
|
def probe_output(output_path: Path, camera_labels: tuple[str, ...]) -> OutputProbeResult:
|
|
if not output_path.is_file():
|
|
return OutputProbeResult(output_path=output_path, status="missing")
|
|
|
|
reader_module = load_mcap_reader()
|
|
expected_topics = required_topics_for(camera_labels)
|
|
found_topics: set[str] = set()
|
|
|
|
try:
|
|
with output_path.open("rb") as stream:
|
|
reader = reader_module.make_reader(stream)
|
|
for _schema, channel, _message in reader.iter_messages():
|
|
if channel.topic in expected_topics:
|
|
found_topics.add(channel.topic)
|
|
if found_topics == expected_topics:
|
|
return OutputProbeResult(output_path=output_path, status="valid")
|
|
except Exception as error: # noqa: BLE001
|
|
return OutputProbeResult(output_path=output_path, status="invalid", reason=str(error))
|
|
|
|
missing_topics = sorted(expected_topics - found_topics)
|
|
if missing_topics:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason="missing expected topics: " + ", ".join(missing_topics),
|
|
)
|
|
return OutputProbeResult(output_path=output_path, status="valid")
|
|
|
|
|
|
def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult:
|
|
return run_conversion_on_slot(
|
|
job,
|
|
config,
|
|
WorkerSlot(label="job-1", encoder_device="auto", cuda_visible_devices=None),
|
|
)
|
|
|
|
|
|
def run_conversion_on_slot(
|
|
job: ConversionJob,
|
|
config: BatchConfig,
|
|
slot: WorkerSlot,
|
|
) -> JobResult:
|
|
command = command_for_job(job, config, slot.encoder_device)
|
|
completed = subprocess.run(
|
|
command,
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
env=env_for_job_with_cuda(slot.cuda_visible_devices),
|
|
)
|
|
status = "converted" if completed.returncode == 0 else "failed"
|
|
return JobResult(
|
|
status=status,
|
|
segment_dir=job.segment_dir,
|
|
output_path=job.output_path,
|
|
command=tuple(command),
|
|
return_code=completed.returncode,
|
|
stdout=completed.stdout,
|
|
stderr=completed.stderr,
|
|
)
|
|
|
|
|
|
def split_lines_for_excerpt(text: str, max_lines: int = 8) -> list[str]:
|
|
lines = [line.rstrip() for line in text.splitlines() if line.strip()]
|
|
if len(lines) <= max_lines:
|
|
return lines
|
|
excerpt = lines[:max_lines]
|
|
excerpt.append(f"... ({len(lines) - max_lines} more lines)")
|
|
return excerpt
|
|
|
|
|
|
def failure_excerpt(result: JobResult, max_lines: int = 8) -> list[str]:
|
|
if result.stderr.strip():
|
|
return split_lines_for_excerpt(result.stderr, max_lines=max_lines)
|
|
if result.stdout.strip():
|
|
return split_lines_for_excerpt(result.stdout, max_lines=max_lines)
|
|
return []
|
|
|
|
|
|
def summarize_failures(results: list[JobResult]) -> None:
|
|
failed_results = [result for result in results if result.status == "failed"]
|
|
if not failed_results:
|
|
return
|
|
|
|
click.echo("\nFailed conversions:", err=True)
|
|
for result in failed_results:
|
|
click.echo(f"- {result.segment_dir} (exit {result.return_code})", err=True)
|
|
if result.stderr.strip():
|
|
click.echo(result.stderr.rstrip(), err=True)
|
|
elif result.stdout.strip():
|
|
click.echo(result.stdout.rstrip(), err=True)
|
|
|
|
|
|
def report_invalid_existing_outputs(
|
|
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]],
|
|
) -> None:
|
|
if not invalid_existing:
|
|
return
|
|
|
|
click.echo("\nInvalid existing outputs:", err=True)
|
|
for job, probe in invalid_existing:
|
|
click.echo(f"- {job.segment_dir}", err=True)
|
|
click.echo(f" output: {probe.output_path}", err=True)
|
|
reason_lines = probe.reason.splitlines() or [probe.reason]
|
|
click.echo(f" reason: {reason_lines[0]}", err=True)
|
|
for line in reason_lines[1:]:
|
|
click.echo(f" {line}", err=True)
|
|
|
|
|
|
def report_dry_run_plan(
|
|
pending_jobs: list[ConversionJob],
|
|
pending_reasons: dict[Path, str],
|
|
pending_details: dict[Path, str],
|
|
) -> None:
|
|
if not pending_jobs:
|
|
click.echo("dry-run: no conversions would be launched", err=True)
|
|
return
|
|
|
|
click.echo("\nDry-run plan:", err=True)
|
|
for job in pending_jobs:
|
|
reason = pending_reasons[job.segment_dir]
|
|
detail = pending_details.get(job.segment_dir)
|
|
line = f"- {job.segment_dir} [{reason}]"
|
|
if detail:
|
|
line = f"{line}: {detail.replace(chr(10), ' | ')}"
|
|
click.echo(line, err=True)
|
|
|
|
|
|
def run_batch(
|
|
jobs: list[ConversionJob],
|
|
config: BatchConfig,
|
|
worker_slots: list[WorkerSlot],
|
|
) -> tuple[list[JobResult], int]:
|
|
results: list[JobResult] = []
|
|
aborted_count = 0
|
|
if not jobs:
|
|
return results, aborted_count
|
|
if not worker_slots:
|
|
raise click.ClickException("no worker slots configured")
|
|
|
|
available_slots = list(worker_slots)
|
|
max_parallel_jobs = len(worker_slots)
|
|
future_to_job: dict[concurrent.futures.Future[JobResult], ActiveJobState] = {}
|
|
job_iter = iter(jobs)
|
|
stop_submitting = False
|
|
completed_count = 0
|
|
failed_count = 0
|
|
submission_index = 0
|
|
reporter = choose_progress_reporter(config.progress_ui, len(jobs))
|
|
last_heartbeat_at = time.monotonic()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_parallel_jobs) as executor:
|
|
|
|
def submit_next() -> bool:
|
|
nonlocal submission_index
|
|
if stop_submitting or not available_slots:
|
|
return False
|
|
slot = available_slots.pop(0)
|
|
try:
|
|
job = next(job_iter)
|
|
except StopIteration:
|
|
available_slots.insert(0, slot)
|
|
return False
|
|
|
|
submission_index += 1
|
|
state = ActiveJobState(
|
|
submission_index=submission_index,
|
|
job=job,
|
|
slot=slot,
|
|
started_at_monotonic=time.monotonic(),
|
|
)
|
|
reporter.job_started(state)
|
|
future = executor.submit(run_conversion_on_slot, job, config, slot)
|
|
future_to_job[future] = state
|
|
return True
|
|
|
|
for _ in range(min(max_parallel_jobs, len(jobs))):
|
|
submit_next()
|
|
|
|
while future_to_job:
|
|
done, _ = concurrent.futures.wait(
|
|
future_to_job,
|
|
timeout=reporter.heartbeat_interval_seconds,
|
|
return_when=concurrent.futures.FIRST_COMPLETED,
|
|
)
|
|
if not done:
|
|
reporter.heartbeat(
|
|
completed_count=completed_count,
|
|
failed_count=failed_count,
|
|
active_states=list(future_to_job.values()),
|
|
)
|
|
last_heartbeat_at = time.monotonic()
|
|
continue
|
|
|
|
for future in done:
|
|
state = future_to_job.pop(future)
|
|
available_slots.append(state.slot)
|
|
result = future.result()
|
|
results.append(result)
|
|
reporter.job_finished(state, result)
|
|
|
|
if result.status == "failed":
|
|
failed_count += 1
|
|
if config.fail_fast:
|
|
stop_submitting = True
|
|
else:
|
|
completed_count += 1
|
|
|
|
if not stop_submitting:
|
|
submit_next()
|
|
|
|
now = time.monotonic()
|
|
if now - last_heartbeat_at >= reporter.heartbeat_interval_seconds:
|
|
reporter.heartbeat(
|
|
completed_count=completed_count,
|
|
failed_count=failed_count,
|
|
active_states=list(future_to_job.values()),
|
|
)
|
|
last_heartbeat_at = now
|
|
|
|
if stop_submitting:
|
|
aborted_count = sum(1 for _ in job_iter)
|
|
|
|
reporter.close()
|
|
|
|
return results, aborted_count
|
|
|
|
|
|
def build_uniform_worker_slots(
|
|
jobs: int,
|
|
encoder_device: str,
|
|
cuda_visible_devices: str | None,
|
|
) -> list[WorkerSlot]:
|
|
if jobs < 1:
|
|
raise click.ClickException("--jobs must be at least 1")
|
|
|
|
if cuda_visible_devices is None:
|
|
return [
|
|
WorkerSlot(
|
|
label=f"job-{index + 1}",
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=None,
|
|
)
|
|
for index in range(jobs)
|
|
]
|
|
|
|
device_pool = parse_cuda_device_pool(cuda_visible_devices)
|
|
if len(device_pool) < jobs:
|
|
raise click.ClickException(
|
|
f"--cuda-visible-devices must provide at least {jobs} entries when --jobs={jobs}"
|
|
)
|
|
return [
|
|
WorkerSlot(
|
|
label=f"job-{index + 1}",
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=device_pool[index],
|
|
)
|
|
for index in range(jobs)
|
|
]
|
|
|
|
|
|
def parse_required_device_pool(raw_value: str | None, expected_count: int, flag_name: str) -> tuple[str, ...]:
|
|
if expected_count == 0:
|
|
if raw_value is None:
|
|
return ()
|
|
raise click.ClickException(f"{flag_name} cannot be used when the matching job count is 0")
|
|
|
|
device_pool = parse_cuda_device_pool(raw_value)
|
|
if len(device_pool) != expected_count:
|
|
raise click.ClickException(
|
|
f"{flag_name} must provide exactly {expected_count} entries when the matching job count is {expected_count}"
|
|
)
|
|
return device_pool
|
|
|
|
|
|
def build_worker_slots(
|
|
*,
|
|
jobs: int,
|
|
encoder_device: str,
|
|
cuda_visible_devices: str | None,
|
|
hardware_jobs: int,
|
|
hardware_cuda_visible_devices: str | None,
|
|
software_jobs: int,
|
|
software_cuda_visible_devices: str | None,
|
|
) -> list[WorkerSlot]:
|
|
mixed_mode_requested = any(
|
|
(
|
|
hardware_jobs > 0,
|
|
software_jobs > 0,
|
|
hardware_cuda_visible_devices is not None,
|
|
software_cuda_visible_devices is not None,
|
|
)
|
|
)
|
|
if not mixed_mode_requested:
|
|
return build_uniform_worker_slots(jobs, encoder_device, cuda_visible_devices)
|
|
|
|
if jobs != 1:
|
|
raise click.ClickException("--jobs cannot be combined with mixed worker pool flags")
|
|
if cuda_visible_devices is not None:
|
|
raise click.ClickException("--cuda-visible-devices cannot be combined with mixed worker pool flags")
|
|
if encoder_device != "auto":
|
|
raise click.ClickException("--encoder-device cannot be combined with mixed worker pool flags")
|
|
|
|
total_jobs = hardware_jobs + software_jobs
|
|
if total_jobs < 1:
|
|
raise click.ClickException("mixed worker pool flags require at least one hardware or software job")
|
|
|
|
hardware_device_pool = parse_required_device_pool(
|
|
hardware_cuda_visible_devices,
|
|
hardware_jobs,
|
|
"--hardware-cuda-visible-devices",
|
|
)
|
|
software_device_pool = parse_required_device_pool(
|
|
software_cuda_visible_devices,
|
|
software_jobs,
|
|
"--software-cuda-visible-devices",
|
|
)
|
|
|
|
worker_slots: list[WorkerSlot] = []
|
|
worker_slots.extend(
|
|
WorkerSlot(
|
|
label=f"hw-{index + 1}",
|
|
encoder_device="nvidia",
|
|
cuda_visible_devices=device,
|
|
)
|
|
for index, device in enumerate(hardware_device_pool)
|
|
)
|
|
worker_slots.extend(
|
|
WorkerSlot(
|
|
label=f"sw-{index + 1}",
|
|
encoder_device="software",
|
|
cuda_visible_devices=device,
|
|
)
|
|
for index, device in enumerate(software_device_pool)
|
|
)
|
|
return worker_slots
|
|
|
|
|
|
@click.command()
|
|
@click.argument(
|
|
"input_dir",
|
|
required=False,
|
|
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
)
|
|
@click.option(
|
|
"--segment-dir",
|
|
"segment_dirs",
|
|
multiple=True,
|
|
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
|
help="Explicit segment directory. Repeatable. Mutually exclusive with INPUT_DIR and --segments-csv.",
|
|
)
|
|
@click.option(
|
|
"--segments-csv",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="CSV file containing a segment_dir column. Mutually exclusive with INPUT_DIR and --segment-dir.",
|
|
)
|
|
@click.option(
|
|
"--csv-root",
|
|
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
|
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
|
|
)
|
|
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Recurse when discovering segment directories from INPUT_DIR.")
|
|
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
|
|
@click.option(
|
|
"--hardware-jobs",
|
|
default=0,
|
|
show_default=True,
|
|
type=click.IntRange(min=0),
|
|
help="Mixed mode: number of hardware-encoded workers.",
|
|
)
|
|
@click.option(
|
|
"--hardware-cuda-visible-devices",
|
|
help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for hardware workers, one entry per worker.",
|
|
)
|
|
@click.option(
|
|
"--software-jobs",
|
|
default=0,
|
|
show_default=True,
|
|
type=click.IntRange(min=0),
|
|
help="Mixed mode: number of software-encoded workers.",
|
|
)
|
|
@click.option(
|
|
"--software-cuda-visible-devices",
|
|
help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for software workers, one entry per worker.",
|
|
)
|
|
@click.option(
|
|
"--zed-bin",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="Explicit path to the zed_svo_to_mcap binary.",
|
|
)
|
|
@click.option(
|
|
"--cuda-visible-devices",
|
|
help="Optional CUDA_VISIBLE_DEVICES value. A comma-separated list is distributed across concurrent jobs one GPU per subprocess.",
|
|
)
|
|
@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing MCAP files.")
|
|
@click.option(
|
|
"--probe-existing/--trust-existing",
|
|
default=False,
|
|
show_default=True,
|
|
help="Validate existing MCAP files before skipping them. Invalid outputs are treated as missing.",
|
|
)
|
|
@click.option(
|
|
"--report-existing",
|
|
is_flag=True,
|
|
help="Probe existing MCAP files, report invalid ones, and do not launch conversions.",
|
|
)
|
|
@click.option(
|
|
"--dry-run",
|
|
is_flag=True,
|
|
help="Show which segments would be converted after applying skip/probe logic, without launching conversions.",
|
|
)
|
|
@click.option(
|
|
"--fail-fast/--continue-on-error",
|
|
default=False,
|
|
show_default=True,
|
|
help="Stop submitting new work after the first failed conversion.",
|
|
)
|
|
@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True)
|
|
@click.option(
|
|
"--encoder-device",
|
|
type=click.Choice(("auto", "nvidia", "software")),
|
|
default="auto",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--mcap-compression",
|
|
type=click.Choice(("none", "lz4", "zstd")),
|
|
default="zstd",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--depth-mode",
|
|
type=click.Choice(("neural_light", "neural", "neural_plus")),
|
|
default="neural_plus",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--depth-size",
|
|
type=str,
|
|
default="optimal",
|
|
show_default=True,
|
|
)
|
|
@click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.")
|
|
@click.option(
|
|
"--pose-config",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="TOML config passed to zed_svo_to_mcap for pose tracking settings.",
|
|
)
|
|
@click.option(
|
|
"--world-frame-id",
|
|
default=None,
|
|
help="Optional pose reference frame id passed through to zed_svo_to_mcap.",
|
|
)
|
|
@click.option(
|
|
"--start-frame",
|
|
type=click.IntRange(min=0),
|
|
default=None,
|
|
help="First synced frame group to export (inclusive) in bundled multi-camera mode.",
|
|
)
|
|
@click.option(
|
|
"--end-frame",
|
|
type=click.IntRange(min=0),
|
|
default=None,
|
|
help="Last synced frame group to export (inclusive) in bundled multi-camera mode.",
|
|
)
|
|
@click.option(
|
|
"--sync-tolerance-ms",
|
|
type=click.FloatRange(min=0.0, min_open=True),
|
|
default=None,
|
|
help="Override the maximum timestamp delta used for bundled multi-camera sync.",
|
|
)
|
|
@click.option(
|
|
"--progress-ui",
|
|
type=click.Choice(("auto", "table", "text")),
|
|
default="auto",
|
|
show_default=True,
|
|
help="Progress output mode. Auto uses a table on TTY and text logging otherwise.",
|
|
)
|
|
def main(
|
|
input_dir: Path | None,
|
|
segment_dirs: tuple[Path, ...],
|
|
segments_csv: Path | None,
|
|
csv_root: Path | None,
|
|
recursive: bool,
|
|
jobs: int,
|
|
hardware_jobs: int,
|
|
hardware_cuda_visible_devices: str | None,
|
|
software_jobs: int,
|
|
software_cuda_visible_devices: str | None,
|
|
zed_bin: Path | None,
|
|
cuda_visible_devices: str | None,
|
|
overwrite: bool,
|
|
probe_existing: bool,
|
|
report_existing: bool,
|
|
dry_run: bool,
|
|
fail_fast: bool,
|
|
codec: str,
|
|
encoder_device: str,
|
|
mcap_compression: str,
|
|
depth_mode: str,
|
|
depth_size: str,
|
|
with_pose: bool,
|
|
pose_config: Path | None,
|
|
world_frame_id: str | None,
|
|
start_frame: int | None,
|
|
end_frame: int | None,
|
|
sync_tolerance_ms: float | None,
|
|
progress_ui: str,
|
|
) -> None:
|
|
"""Batch-convert multi-camera ZED segments into bundled MCAP files."""
|
|
if report_existing and dry_run:
|
|
raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
|
|
|
|
binary_path = None if report_existing else locate_binary(zed_bin)
|
|
sources = resolve_sources(input_dir, segment_dirs, segments_csv, csv_root, recursive)
|
|
worker_slots = build_worker_slots(
|
|
jobs=jobs,
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=cuda_visible_devices,
|
|
hardware_jobs=hardware_jobs,
|
|
hardware_cuda_visible_devices=hardware_cuda_visible_devices,
|
|
software_jobs=software_jobs,
|
|
software_cuda_visible_devices=software_cuda_visible_devices,
|
|
)
|
|
config = BatchConfig(
|
|
zed_bin=binary_path,
|
|
probe_existing=probe_existing or report_existing,
|
|
overwrite=overwrite,
|
|
fail_fast=fail_fast,
|
|
codec=codec,
|
|
mcap_compression=mcap_compression,
|
|
depth_mode=depth_mode,
|
|
depth_size=depth_size,
|
|
with_pose=with_pose,
|
|
pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
|
|
world_frame_id=world_frame_id,
|
|
start_frame=start_frame,
|
|
end_frame=end_frame,
|
|
sync_tolerance_ms=sync_tolerance_ms,
|
|
progress_ui=progress_ui,
|
|
)
|
|
input_root = input_dir.expanduser().resolve() if input_dir is not None else None
|
|
display_parent = common_segment_parent(sources.segment_dirs)
|
|
|
|
skipped_results: list[JobResult] = []
|
|
failed_results: list[JobResult] = []
|
|
pending_jobs: list[ConversionJob] = []
|
|
pending_reasons: dict[Path, str] = {}
|
|
pending_details: dict[Path, str] = {}
|
|
valid_existing: list[OutputProbeResult] = []
|
|
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]] = []
|
|
missing_outputs: list[ConversionJob] = []
|
|
|
|
for segment_dir in sources.segment_dirs:
|
|
scan = scan_segment_dir(segment_dir)
|
|
output_path = output_path_for(segment_dir)
|
|
job = ConversionJob(
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
camera_labels=scan.camera_labels,
|
|
display_name=display_name_for_segment(
|
|
segment_dir,
|
|
source_mode=sources.mode,
|
|
input_root=input_root,
|
|
common_parent=display_parent,
|
|
),
|
|
)
|
|
default_encoder_device = worker_slots[0].encoder_device if worker_slots else encoder_device
|
|
command = (
|
|
tuple(command_for_job(job, config, default_encoder_device))
|
|
if config.zed_bin is not None
|
|
else ()
|
|
)
|
|
if not scan.is_valid:
|
|
failed_results.append(
|
|
JobResult(
|
|
status="failed",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
return_code=2,
|
|
stderr=scan.reason or "",
|
|
)
|
|
)
|
|
continue
|
|
|
|
if report_existing:
|
|
probe_result = probe_output(output_path, job.camera_labels)
|
|
if probe_result.status == "valid":
|
|
valid_existing.append(probe_result)
|
|
elif probe_result.status == "invalid":
|
|
invalid_existing.append((job, probe_result))
|
|
else:
|
|
missing_outputs.append(job)
|
|
continue
|
|
|
|
if overwrite:
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "overwrite"
|
|
continue
|
|
|
|
if config.probe_existing:
|
|
probe_result = probe_output(output_path, job.camera_labels)
|
|
if probe_result.status == "valid":
|
|
valid_existing.append(probe_result)
|
|
skipped_results.append(
|
|
JobResult(
|
|
status="skipped",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
)
|
|
)
|
|
continue
|
|
if probe_result.status == "invalid":
|
|
invalid_existing.append((job, probe_result))
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "invalid-existing-output"
|
|
pending_details[segment_dir] = probe_result.reason
|
|
continue
|
|
missing_outputs.append(job)
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "missing-output"
|
|
continue
|
|
|
|
if output_path.exists():
|
|
skipped_results.append(
|
|
JobResult(
|
|
status="skipped",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
)
|
|
)
|
|
continue
|
|
|
|
missing_outputs.append(job)
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "missing-output"
|
|
|
|
if report_existing:
|
|
click.echo(
|
|
(
|
|
f"source={sources.mode} matched={len(sources.segment_dirs)} valid={len(valid_existing)} "
|
|
f"invalid={len(invalid_existing)} missing={len(missing_outputs)} "
|
|
f"invalid-segments={len(failed_results)}"
|
|
),
|
|
err=True,
|
|
)
|
|
if sources.ignored_partial_dirs:
|
|
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
|
|
report_invalid_existing_outputs(invalid_existing)
|
|
summarize_failures(failed_results)
|
|
if failed_results or invalid_existing:
|
|
raise SystemExit(1)
|
|
return
|
|
|
|
click.echo(
|
|
(
|
|
f"source={sources.mode} matched={len(sources.segment_dirs)} pending={len(pending_jobs)} "
|
|
f"skipped={len(skipped_results)} invalid={len(failed_results)} jobs={len(worker_slots)} "
|
|
f"dry_run={'yes' if dry_run else 'no'}"
|
|
),
|
|
err=True,
|
|
)
|
|
if sources.ignored_partial_dirs:
|
|
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
|
|
if config.probe_existing:
|
|
click.echo(
|
|
(
|
|
f"probed-existing: valid={len(valid_existing)} invalid={len(invalid_existing)} "
|
|
f"missing={len(missing_outputs)}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
if dry_run:
|
|
report_dry_run_plan(pending_jobs, pending_reasons, pending_details)
|
|
summarize_failures(failed_results)
|
|
if failed_results:
|
|
raise SystemExit(1)
|
|
return
|
|
|
|
results = list(skipped_results)
|
|
results.extend(failed_results)
|
|
conversion_results, aborted_count = run_batch(pending_jobs, config, worker_slots)
|
|
results.extend(conversion_results)
|
|
|
|
converted_count = sum(1 for result in results if result.status == "converted")
|
|
skipped_count = sum(1 for result in results if result.status == "skipped")
|
|
failed_count = sum(1 for result in results if result.status == "failed")
|
|
|
|
click.echo(
|
|
(
|
|
f"summary: matched={len(sources.segment_dirs)} converted={converted_count} "
|
|
f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}"
|
|
),
|
|
err=True,
|
|
)
|
|
summarize_failures(results)
|
|
|
|
if failed_count > 0 or aborted_count > 0:
|
|
raise SystemExit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|