1403 lines
48 KiB
Python
1403 lines
48 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from __future__ import annotations
|
|
|
|
import concurrent.futures
|
|
import importlib
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from collections import Counter
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import click
|
|
from progress_table import ProgressTable
|
|
|
|
try:
|
|
from scripts import zed_batch_segment_sources as segment_sources
|
|
except ModuleNotFoundError:
|
|
import zed_batch_segment_sources as segment_sources
|
|
|
|
|
|
SCRIPT_PATH = Path(__file__).resolve()
|
|
REPO_ROOT = SCRIPT_PATH.parents[1]
|
|
WORKSPACE_ROOT = REPO_ROOT.parent
|
|
MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap"
|
|
SEGMENT_FILE_PATTERN = re.compile(r".*_zed([0-9]+)\.svo2?$", re.IGNORECASE)
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class BatchConfig:
|
|
zed_bin: Path | None
|
|
probe_existing: bool
|
|
overwrite: bool
|
|
fail_fast: bool
|
|
codec: str
|
|
mcap_compression: str
|
|
depth_mode: str
|
|
depth_size: str
|
|
bundle_policy: str
|
|
copy_range: str
|
|
bundle_topic: str | None
|
|
with_pose: bool
|
|
pose_config: Path | None
|
|
world_frame_id: str | None
|
|
start_frame: int | None
|
|
end_frame: int | None
|
|
sync_tolerance_ms: float | None
|
|
progress_ui: str
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class ConversionJob:
|
|
segment_dir: Path
|
|
output_path: Path
|
|
camera_labels: tuple[str, ...]
|
|
display_name: str
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class WorkerSlot:
|
|
label: str
|
|
encoder_device: str
|
|
cuda_visible_devices: str | None
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class JobResult:
|
|
status: str
|
|
segment_dir: Path
|
|
output_path: Path
|
|
command: tuple[str, ...]
|
|
return_code: int = 0
|
|
stdout: str = ""
|
|
stderr: str = ""
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class SegmentScan:
|
|
segment_dir: Path
|
|
matched_files: int
|
|
camera_labels: tuple[str, ...]
|
|
is_valid: bool
|
|
reason: str | None = None
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class OutputProbeResult:
|
|
output_path: Path
|
|
status: str
|
|
reason: str = ""
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ActiveJobState:
|
|
submission_index: int
|
|
job: ConversionJob
|
|
slot: WorkerSlot
|
|
started_at_monotonic: float
|
|
row_index: int | None = None
|
|
|
|
|
|
_MCAP_READER_MODULE = None
|
|
_BUNDLE_MANIFEST_CLASS_CACHE: dict[bytes, tuple[object, int | None]] = {}
|
|
TABLE_REFRESH_SECONDS = 1.0
|
|
TEXT_HEARTBEAT_SECONDS = 30.0
|
|
|
|
|
|
def format_elapsed(seconds: float) -> str:
|
|
rounded = max(0, int(round(seconds)))
|
|
minutes, secs = divmod(rounded, 60)
|
|
hours, minutes = divmod(minutes, 60)
|
|
if hours > 0:
|
|
return f"{hours:d}:{minutes:02d}:{secs:02d}"
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
class ProgressReporter:
|
|
heartbeat_interval_seconds: float
|
|
|
|
def __init__(self, total_jobs: int) -> None:
|
|
self.total_jobs = total_jobs
|
|
self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
return
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
return
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
return
|
|
|
|
def close(self) -> None:
|
|
return
|
|
|
|
|
|
class TextProgressReporter(ProgressReporter):
|
|
def __init__(self, total_jobs: int) -> None:
|
|
super().__init__(total_jobs)
|
|
self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
cuda_label = state.slot.cuda_visible_devices or "-"
|
|
click.echo(
|
|
(
|
|
f"started: [{state.submission_index}/{self.total_jobs}] "
|
|
f"{state.slot.label} encoder={state.slot.encoder_device} cuda={cuda_label} "
|
|
f"segment={state.job.display_name}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
elapsed = format_elapsed(time.monotonic() - state.started_at_monotonic)
|
|
prefix = "completed" if result.status == "converted" else "failed"
|
|
exit_text = "" if result.status == "converted" else f" exit={result.return_code}"
|
|
click.echo(
|
|
(
|
|
f"{prefix}: [{state.submission_index}/{self.total_jobs}] "
|
|
f"{state.slot.label} elapsed={elapsed}{exit_text} segment={state.job.display_name}"
|
|
),
|
|
err=True,
|
|
)
|
|
if result.status == "failed":
|
|
for line in failure_excerpt(result):
|
|
click.echo(f" {line}", err=True)
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
active_count = len(active_states)
|
|
remaining_count = self.total_jobs - completed_count - failed_count - active_count
|
|
click.echo(
|
|
(
|
|
f"progress: completed={completed_count} failed={failed_count} "
|
|
f"active={active_count} remaining={remaining_count}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
|
|
class TableProgressReporter(ProgressReporter):
|
|
def __init__(self, total_jobs: int) -> None:
|
|
super().__init__(total_jobs)
|
|
self.heartbeat_interval_seconds = TABLE_REFRESH_SECONDS
|
|
self.table = ProgressTable(
|
|
"#",
|
|
"segment",
|
|
"worker",
|
|
"encoder",
|
|
"cuda",
|
|
"status",
|
|
"elapsed_s",
|
|
interactive=2,
|
|
refresh_rate=10,
|
|
default_column_alignment="left",
|
|
default_column_width=12,
|
|
pbar_show_throughput=False,
|
|
pbar_show_progress=False,
|
|
pbar_show_percents=False,
|
|
pbar_show_eta=False,
|
|
print_header_every_n_rows=30,
|
|
file=sys.stderr,
|
|
)
|
|
self.table.add_column("#", width=4, alignment="right")
|
|
self.table.add_column("segment", width=44, alignment="left")
|
|
self.table.add_column("worker", width=8, alignment="left")
|
|
self.table.add_column("encoder", width=10, alignment="left")
|
|
self.table.add_column("cuda", width=6, alignment="left")
|
|
self.table.add_column("status", width=12, alignment="left")
|
|
self.table.add_column("elapsed_s", width=10, alignment="right")
|
|
|
|
def job_started(self, state: ActiveJobState) -> None:
|
|
self.table.add_row(
|
|
state.submission_index,
|
|
state.job.display_name,
|
|
state.slot.label,
|
|
state.slot.encoder_device,
|
|
state.slot.cuda_visible_devices or "-",
|
|
"running",
|
|
format_elapsed(0.0),
|
|
)
|
|
state.row_index = self.table.num_rows() - 1
|
|
|
|
def job_finished(self, state: ActiveJobState, result: JobResult) -> None:
|
|
if state.row_index is None:
|
|
return
|
|
self.table.update("status", "converted" if result.status == "converted" else f"failed({result.return_code})", row=state.row_index)
|
|
self.table.update(
|
|
"elapsed_s",
|
|
format_elapsed(time.monotonic() - state.started_at_monotonic),
|
|
row=state.row_index,
|
|
)
|
|
|
|
def heartbeat(
|
|
self,
|
|
*,
|
|
completed_count: int,
|
|
failed_count: int,
|
|
active_states: list[ActiveJobState],
|
|
) -> None:
|
|
for state in active_states:
|
|
if state.row_index is None:
|
|
continue
|
|
self.table.update(
|
|
"elapsed_s",
|
|
format_elapsed(time.monotonic() - state.started_at_monotonic),
|
|
row=state.row_index,
|
|
)
|
|
|
|
def close(self) -> None:
|
|
self.table.close()
|
|
|
|
|
|
def locate_binary(override: Path | None) -> Path:
|
|
if override is not None:
|
|
candidate = override.expanduser().resolve()
|
|
if not candidate.is_file():
|
|
raise click.ClickException(f"binary not found: {candidate}")
|
|
return candidate
|
|
|
|
candidates = (
|
|
REPO_ROOT / "build" / "bin" / "zed_svo_to_mcap",
|
|
REPO_ROOT / "build" / "zed_svo_to_mcap",
|
|
)
|
|
for candidate in candidates:
|
|
if candidate.is_file():
|
|
return candidate
|
|
raise click.ClickException(f"could not find zed_svo_to_mcap under {REPO_ROOT / 'build'}")
|
|
|
|
|
|
def sorted_camera_labels(labels: set[str]) -> tuple[str, ...]:
|
|
return tuple(sorted(labels, key=lambda label: int(label[3:])))
|
|
|
|
|
|
def scan_segment_dir(segment_dir: Path) -> SegmentScan:
|
|
if not segment_dir.is_dir():
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=0,
|
|
camera_labels=(),
|
|
is_valid=False,
|
|
reason=f"segment directory does not exist: {segment_dir}",
|
|
)
|
|
|
|
matched_by_camera: dict[str, list[Path]] = {}
|
|
for child in segment_dir.iterdir():
|
|
if not child.is_file():
|
|
continue
|
|
match = SEGMENT_FILE_PATTERN.fullmatch(child.name)
|
|
if match is None:
|
|
continue
|
|
label = f"zed{int(match.group(1))}"
|
|
matched_by_camera.setdefault(label, []).append(child)
|
|
|
|
matched_files = sum(len(paths) for paths in matched_by_camera.values())
|
|
camera_labels = sorted_camera_labels(set(matched_by_camera))
|
|
duplicate_cameras = [label for label, paths in sorted(matched_by_camera.items()) if len(paths) > 1]
|
|
|
|
if duplicate_cameras:
|
|
duplicate_text = ", ".join(duplicate_cameras)
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=False,
|
|
reason=f"duplicate camera inputs under {segment_dir}: {duplicate_text}",
|
|
)
|
|
if len(camera_labels) < 2:
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=False,
|
|
reason=f"expected at least 2 camera inputs under {segment_dir}, found {len(camera_labels)}",
|
|
)
|
|
|
|
return SegmentScan(
|
|
segment_dir=segment_dir,
|
|
matched_files=matched_files,
|
|
camera_labels=camera_labels,
|
|
is_valid=True,
|
|
)
|
|
|
|
|
|
def output_path_for(segment_dir: Path) -> Path:
|
|
return segment_dir / f"{segment_dir.name}.mcap"
|
|
|
|
|
|
def common_segment_parent(segment_dirs: tuple[Path, ...]) -> Path | None:
|
|
if len(segment_dirs) <= 1:
|
|
return None
|
|
try:
|
|
return Path(os.path.commonpath([str(path) for path in segment_dirs]))
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def display_name_for_segment(
|
|
segment_dir: Path,
|
|
*,
|
|
source_mode: str,
|
|
input_root: Path | None,
|
|
common_parent: Path | None,
|
|
) -> str:
|
|
if source_mode == "dataset-root" and input_root is not None:
|
|
try:
|
|
return str(segment_dir.relative_to(input_root))
|
|
except ValueError:
|
|
pass
|
|
if common_parent is not None:
|
|
try:
|
|
relative = segment_dir.relative_to(common_parent)
|
|
if str(relative) != ".":
|
|
return str(relative)
|
|
except ValueError:
|
|
pass
|
|
parent_name = segment_dir.parent.name
|
|
if parent_name:
|
|
return str(Path(parent_name) / segment_dir.name)
|
|
return segment_dir.name
|
|
|
|
|
|
def command_for_job(job: ConversionJob, config: BatchConfig, encoder_device: str) -> list[str]:
|
|
if config.zed_bin is None:
|
|
raise RuntimeError("zed_svo_to_mcap binary is not configured")
|
|
|
|
command = [
|
|
str(config.zed_bin),
|
|
"--segment-dir",
|
|
str(job.segment_dir),
|
|
"--codec",
|
|
config.codec,
|
|
"--encoder-device",
|
|
encoder_device,
|
|
"--mcap-compression",
|
|
config.mcap_compression,
|
|
"--depth-mode",
|
|
config.depth_mode,
|
|
"--depth-size",
|
|
config.depth_size,
|
|
"--bundle-policy",
|
|
config.bundle_policy,
|
|
"--copy-range",
|
|
config.copy_range,
|
|
]
|
|
if config.bundle_topic and config.bundle_policy != "copy":
|
|
command.extend(["--bundle-topic", config.bundle_topic])
|
|
if config.with_pose:
|
|
command.append("--with-pose")
|
|
if config.pose_config is not None:
|
|
command.extend(["--pose-config", str(config.pose_config)])
|
|
if config.world_frame_id is not None:
|
|
command.extend(["--world-frame-id", config.world_frame_id])
|
|
if config.start_frame is not None:
|
|
command.extend(["--start-frame", str(config.start_frame)])
|
|
if config.end_frame is not None:
|
|
command.extend(["--end-frame", str(config.end_frame)])
|
|
if config.sync_tolerance_ms is not None:
|
|
command.extend(["--sync-tolerance-ms", str(config.sync_tolerance_ms)])
|
|
return command
|
|
|
|
|
|
def env_for_job(config: BatchConfig) -> dict[str, str]:
|
|
return env_for_job_with_cuda(None)
|
|
|
|
|
|
def env_for_job_with_cuda(assigned_cuda_visible_devices: str | None) -> dict[str, str]:
|
|
env = dict(os.environ)
|
|
if assigned_cuda_visible_devices is not None:
|
|
env["CUDA_VISIBLE_DEVICES"] = assigned_cuda_visible_devices
|
|
return env
|
|
|
|
|
|
def parse_cuda_device_pool(raw_value: str | None) -> tuple[str, ...]:
|
|
if raw_value is None:
|
|
return ()
|
|
devices = tuple(device.strip() for device in raw_value.split(",") if device.strip())
|
|
return devices
|
|
|
|
|
|
def choose_progress_reporter(progress_ui: str, total_jobs: int) -> ProgressReporter:
|
|
if progress_ui == "table":
|
|
return TableProgressReporter(total_jobs)
|
|
if progress_ui == "text":
|
|
return TextProgressReporter(total_jobs)
|
|
if sys.stderr.isatty():
|
|
return TableProgressReporter(total_jobs)
|
|
return TextProgressReporter(total_jobs)
|
|
|
|
|
|
def load_mcap_reader():
|
|
global _MCAP_READER_MODULE
|
|
if _MCAP_READER_MODULE is not None:
|
|
return _MCAP_READER_MODULE
|
|
|
|
if str(MCAP_PYTHON_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(MCAP_PYTHON_ROOT))
|
|
try:
|
|
_MCAP_READER_MODULE = importlib.import_module("mcap.reader")
|
|
except ModuleNotFoundError as error:
|
|
raise click.ClickException(
|
|
f"could not import mcap.reader from {MCAP_PYTHON_ROOT}"
|
|
) from error
|
|
return _MCAP_READER_MODULE
|
|
|
|
|
|
def required_topics_for(camera_labels: tuple[str, ...]) -> set[str]:
|
|
topics: set[str] = set()
|
|
for label in camera_labels:
|
|
topics.add(f"/{label}/video")
|
|
topics.add(f"/{label}/depth")
|
|
topics.add(f"/{label}/calibration")
|
|
return topics
|
|
|
|
|
|
def load_bundle_manifest_type(schema_data: bytes) -> tuple[object, int | None]:
|
|
cached = _BUNDLE_MANIFEST_CLASS_CACHE.get(schema_data)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
from google.protobuf import descriptor_pb2, descriptor_pool, message_factory, timestamp_pb2
|
|
|
|
descriptor_set = descriptor_pb2.FileDescriptorSet()
|
|
descriptor_set.ParseFromString(schema_data)
|
|
pool = descriptor_pool.DescriptorPool()
|
|
has_embedded_timestamp = any(
|
|
file_descriptor.name == "google/protobuf/timestamp.proto"
|
|
for file_descriptor in descriptor_set.file
|
|
)
|
|
if has_embedded_timestamp:
|
|
for file_descriptor in descriptor_set.file:
|
|
if file_descriptor.name == "google/protobuf/timestamp.proto":
|
|
pool.Add(file_descriptor)
|
|
break
|
|
else:
|
|
pool.AddSerializedFile(timestamp_pb2.DESCRIPTOR.serialized_pb)
|
|
for file_descriptor in descriptor_set.file:
|
|
if file_descriptor.name == "google/protobuf/timestamp.proto":
|
|
continue
|
|
pool.Add(file_descriptor)
|
|
message_descriptor = pool.FindMessageTypeByName("cvmmap_streamer.BundleManifest")
|
|
message_class = message_factory.GetMessageClass(message_descriptor)
|
|
present_value = None
|
|
if "BundleMemberStatus" in message_descriptor.enum_types_by_name:
|
|
status_enum = message_descriptor.enum_types_by_name["BundleMemberStatus"]
|
|
present_value = status_enum.values_by_name["BUNDLE_MEMBER_STATUS_PRESENT"].number
|
|
_BUNDLE_MANIFEST_CLASS_CACHE[schema_data] = (message_class, present_value)
|
|
return message_class, present_value
|
|
|
|
|
|
def probe_output(
|
|
output_path: Path,
|
|
camera_labels: tuple[str, ...],
|
|
*,
|
|
layout: str,
|
|
bundle_topic: str | None,
|
|
) -> OutputProbeResult:
|
|
if not output_path.is_file():
|
|
return OutputProbeResult(output_path=output_path, status="missing")
|
|
|
|
reader_module = load_mcap_reader()
|
|
expected_topics = required_topics_for(camera_labels)
|
|
require_bundle = layout == "bundled" and len(camera_labels) > 1 and bool(bundle_topic)
|
|
if require_bundle:
|
|
expected_topics.add(bundle_topic or "/bundle")
|
|
found_topics: set[str] = set()
|
|
video_counts: Counter[str] = Counter()
|
|
depth_counts: Counter[str] = Counter()
|
|
bundle_present_counts: Counter[str] = Counter()
|
|
expected_camera_labels = set(camera_labels)
|
|
|
|
try:
|
|
with output_path.open("rb") as stream:
|
|
reader = reader_module.make_reader(stream)
|
|
for schema, channel, message in reader.iter_messages():
|
|
if layout == "copy" and channel.topic == "/bundle":
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason="copy-layout MCAP must not contain /bundle",
|
|
)
|
|
if channel.topic in expected_topics:
|
|
found_topics.add(channel.topic)
|
|
if channel.topic.endswith("/video"):
|
|
video_counts[channel.topic.removeprefix("/").removesuffix("/video")] += 1
|
|
continue
|
|
if channel.topic.endswith("/depth"):
|
|
depth_counts[channel.topic.removeprefix("/").removesuffix("/depth")] += 1
|
|
continue
|
|
if require_bundle and channel.topic == bundle_topic:
|
|
if schema is None or schema.name != "cvmmap_streamer.BundleManifest":
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=f"bundle topic '{bundle_topic}' is missing the BundleManifest schema",
|
|
)
|
|
try:
|
|
bundle_class, present_value = load_bundle_manifest_type(schema.data)
|
|
bundle = bundle_class()
|
|
bundle.ParseFromString(message.data)
|
|
except Exception as error: # noqa: BLE001
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=f"failed to parse bundle manifest: {error}",
|
|
)
|
|
|
|
bundle_labels: set[str] = set()
|
|
for member in bundle.members:
|
|
label = str(member.camera_label)
|
|
if label not in expected_camera_labels:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=f"bundle manifest referenced unknown camera label '{label}'",
|
|
)
|
|
if label in bundle_labels:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=f"bundle manifest duplicated camera label '{label}'",
|
|
)
|
|
bundle_labels.add(label)
|
|
is_present = member.HasField("timestamp")
|
|
if present_value is not None:
|
|
is_present = member.status == present_value
|
|
if is_present and not member.HasField("timestamp"):
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=f"bundle member '{label}' is present but missing a timestamp",
|
|
)
|
|
if is_present:
|
|
bundle_present_counts[label] += 1
|
|
if bundle_labels != expected_camera_labels:
|
|
missing_labels = sorted(expected_camera_labels - bundle_labels)
|
|
extra_labels = sorted(bundle_labels - expected_camera_labels)
|
|
details = []
|
|
if missing_labels:
|
|
details.append("missing=" + ",".join(missing_labels))
|
|
if extra_labels:
|
|
details.append("extra=" + ",".join(extra_labels))
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason="bundle manifest camera coverage mismatch: " + " ".join(details),
|
|
)
|
|
except Exception as error: # noqa: BLE001
|
|
return OutputProbeResult(output_path=output_path, status="invalid", reason=str(error))
|
|
|
|
missing_topics = sorted(expected_topics - found_topics)
|
|
if missing_topics:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason="missing expected topics: " + ", ".join(missing_topics),
|
|
)
|
|
if require_bundle:
|
|
for label in camera_labels:
|
|
present_count = bundle_present_counts[label]
|
|
if video_counts[label] != present_count:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=(
|
|
f"video count mismatch for {label}: "
|
|
f"bundle_present={present_count} video_messages={video_counts[label]}"
|
|
),
|
|
)
|
|
if depth_counts[label] != present_count:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=(
|
|
f"depth count mismatch for {label}: "
|
|
f"bundle_present={present_count} depth_messages={depth_counts[label]}"
|
|
),
|
|
)
|
|
else:
|
|
for label in camera_labels:
|
|
if video_counts[label] != depth_counts[label]:
|
|
return OutputProbeResult(
|
|
output_path=output_path,
|
|
status="invalid",
|
|
reason=(
|
|
f"video/depth count mismatch for {label}: "
|
|
f"video_messages={video_counts[label]} depth_messages={depth_counts[label]}"
|
|
),
|
|
)
|
|
return OutputProbeResult(output_path=output_path, status="valid")
|
|
|
|
|
|
def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult:
|
|
return run_conversion_on_slot(
|
|
job,
|
|
config,
|
|
WorkerSlot(label="job-1", encoder_device="auto", cuda_visible_devices=None),
|
|
)
|
|
|
|
|
|
def run_conversion_on_slot(
|
|
job: ConversionJob,
|
|
config: BatchConfig,
|
|
slot: WorkerSlot,
|
|
) -> JobResult:
|
|
command = command_for_job(job, config, slot.encoder_device)
|
|
completed = subprocess.run(
|
|
command,
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
env=env_for_job_with_cuda(slot.cuda_visible_devices),
|
|
)
|
|
status = "converted" if completed.returncode == 0 else "failed"
|
|
return JobResult(
|
|
status=status,
|
|
segment_dir=job.segment_dir,
|
|
output_path=job.output_path,
|
|
command=tuple(command),
|
|
return_code=completed.returncode,
|
|
stdout=completed.stdout,
|
|
stderr=completed.stderr,
|
|
)
|
|
|
|
|
|
def split_lines_for_excerpt(text: str, max_lines: int = 8) -> list[str]:
|
|
lines = [line.rstrip() for line in text.splitlines() if line.strip()]
|
|
if len(lines) <= max_lines:
|
|
return lines
|
|
head_count = max(1, max_lines // 2)
|
|
tail_count = max_lines - head_count
|
|
excerpt = lines[:head_count]
|
|
omitted = len(lines) - head_count - tail_count
|
|
if omitted > 0:
|
|
excerpt.append(f"... ({omitted} omitted line(s))")
|
|
excerpt.extend(lines[-tail_count:])
|
|
return excerpt
|
|
|
|
|
|
def failure_excerpt(result: JobResult, max_lines: int = 8) -> list[str]:
|
|
if result.stderr.strip():
|
|
return split_lines_for_excerpt(result.stderr, max_lines=max_lines)
|
|
if result.stdout.strip():
|
|
return split_lines_for_excerpt(result.stdout, max_lines=max_lines)
|
|
return []
|
|
|
|
|
|
def summarize_failures(results: list[JobResult]) -> None:
|
|
failed_results = [result for result in results if result.status == "failed"]
|
|
if not failed_results:
|
|
return
|
|
|
|
click.echo("\nFailed conversions:", err=True)
|
|
for result in failed_results:
|
|
click.echo(f"- {result.segment_dir} (exit {result.return_code})", err=True)
|
|
if result.stderr.strip():
|
|
click.echo(result.stderr.rstrip(), err=True)
|
|
elif result.stdout.strip():
|
|
click.echo(result.stdout.rstrip(), err=True)
|
|
|
|
|
|
def report_invalid_existing_outputs(
|
|
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]],
|
|
) -> None:
|
|
if not invalid_existing:
|
|
return
|
|
|
|
click.echo("\nInvalid existing outputs:", err=True)
|
|
for job, probe in invalid_existing:
|
|
click.echo(f"- {job.segment_dir}", err=True)
|
|
click.echo(f" output: {probe.output_path}", err=True)
|
|
reason_lines = probe.reason.splitlines() or [probe.reason]
|
|
click.echo(f" reason: {reason_lines[0]}", err=True)
|
|
for line in reason_lines[1:]:
|
|
click.echo(f" {line}", err=True)
|
|
|
|
|
|
def report_dry_run_plan(
|
|
pending_jobs: list[ConversionJob],
|
|
pending_reasons: dict[Path, str],
|
|
pending_details: dict[Path, str],
|
|
) -> None:
|
|
if not pending_jobs:
|
|
click.echo("dry-run: no conversions would be launched", err=True)
|
|
return
|
|
|
|
click.echo("\nDry-run plan:", err=True)
|
|
for job in pending_jobs:
|
|
reason = pending_reasons[job.segment_dir]
|
|
detail = pending_details.get(job.segment_dir)
|
|
line = f"- {job.segment_dir} [{reason}]"
|
|
if detail:
|
|
line = f"{line}: {detail.replace(chr(10), ' | ')}"
|
|
click.echo(line, err=True)
|
|
|
|
|
|
def run_batch(
|
|
jobs: list[ConversionJob],
|
|
config: BatchConfig,
|
|
worker_slots: list[WorkerSlot],
|
|
) -> tuple[list[JobResult], int]:
|
|
results: list[JobResult] = []
|
|
aborted_count = 0
|
|
if not jobs:
|
|
return results, aborted_count
|
|
if not worker_slots:
|
|
raise click.ClickException("no worker slots configured")
|
|
|
|
available_slots = list(worker_slots)
|
|
max_parallel_jobs = len(worker_slots)
|
|
future_to_job: dict[concurrent.futures.Future[JobResult], ActiveJobState] = {}
|
|
job_iter = iter(jobs)
|
|
stop_submitting = False
|
|
completed_count = 0
|
|
failed_count = 0
|
|
submission_index = 0
|
|
reporter = choose_progress_reporter(config.progress_ui, len(jobs))
|
|
last_heartbeat_at = time.monotonic()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_parallel_jobs) as executor:
|
|
|
|
def submit_next() -> bool:
|
|
nonlocal submission_index
|
|
if stop_submitting or not available_slots:
|
|
return False
|
|
slot = available_slots.pop(0)
|
|
try:
|
|
job = next(job_iter)
|
|
except StopIteration:
|
|
available_slots.insert(0, slot)
|
|
return False
|
|
|
|
submission_index += 1
|
|
state = ActiveJobState(
|
|
submission_index=submission_index,
|
|
job=job,
|
|
slot=slot,
|
|
started_at_monotonic=time.monotonic(),
|
|
)
|
|
reporter.job_started(state)
|
|
future = executor.submit(run_conversion_on_slot, job, config, slot)
|
|
future_to_job[future] = state
|
|
return True
|
|
|
|
for _ in range(min(max_parallel_jobs, len(jobs))):
|
|
submit_next()
|
|
|
|
while future_to_job:
|
|
done, _ = concurrent.futures.wait(
|
|
future_to_job,
|
|
timeout=reporter.heartbeat_interval_seconds,
|
|
return_when=concurrent.futures.FIRST_COMPLETED,
|
|
)
|
|
if not done:
|
|
reporter.heartbeat(
|
|
completed_count=completed_count,
|
|
failed_count=failed_count,
|
|
active_states=list(future_to_job.values()),
|
|
)
|
|
last_heartbeat_at = time.monotonic()
|
|
continue
|
|
|
|
for future in done:
|
|
state = future_to_job.pop(future)
|
|
available_slots.append(state.slot)
|
|
result = future.result()
|
|
results.append(result)
|
|
reporter.job_finished(state, result)
|
|
|
|
if result.status == "failed":
|
|
failed_count += 1
|
|
if config.fail_fast:
|
|
stop_submitting = True
|
|
else:
|
|
completed_count += 1
|
|
|
|
if not stop_submitting:
|
|
submit_next()
|
|
|
|
now = time.monotonic()
|
|
if now - last_heartbeat_at >= reporter.heartbeat_interval_seconds:
|
|
reporter.heartbeat(
|
|
completed_count=completed_count,
|
|
failed_count=failed_count,
|
|
active_states=list(future_to_job.values()),
|
|
)
|
|
last_heartbeat_at = now
|
|
|
|
if stop_submitting:
|
|
aborted_count = sum(1 for _ in job_iter)
|
|
|
|
reporter.close()
|
|
|
|
return results, aborted_count
|
|
|
|
|
|
def build_uniform_worker_slots(
|
|
jobs: int,
|
|
encoder_device: str,
|
|
cuda_visible_devices: str | None,
|
|
) -> list[WorkerSlot]:
|
|
if jobs < 1:
|
|
raise click.ClickException("--jobs must be at least 1")
|
|
|
|
if cuda_visible_devices is None:
|
|
return [
|
|
WorkerSlot(
|
|
label=f"job-{index + 1}",
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=None,
|
|
)
|
|
for index in range(jobs)
|
|
]
|
|
|
|
device_pool = parse_cuda_device_pool(cuda_visible_devices)
|
|
if len(device_pool) < jobs:
|
|
raise click.ClickException(
|
|
f"--cuda-visible-devices must provide at least {jobs} entries when --jobs={jobs}"
|
|
)
|
|
return [
|
|
WorkerSlot(
|
|
label=f"job-{index + 1}",
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=device_pool[index],
|
|
)
|
|
for index in range(jobs)
|
|
]
|
|
|
|
|
|
def parse_required_device_pool(raw_value: str | None, expected_count: int, flag_name: str) -> tuple[str, ...]:
|
|
if expected_count == 0:
|
|
if raw_value is None:
|
|
return ()
|
|
raise click.ClickException(f"{flag_name} cannot be used when the matching job count is 0")
|
|
|
|
device_pool = parse_cuda_device_pool(raw_value)
|
|
if len(device_pool) != expected_count:
|
|
raise click.ClickException(
|
|
f"{flag_name} must provide exactly {expected_count} entries when the matching job count is {expected_count}"
|
|
)
|
|
return device_pool
|
|
|
|
|
|
def build_worker_slots(
|
|
*,
|
|
jobs: int,
|
|
encoder_device: str,
|
|
cuda_visible_devices: str | None,
|
|
hardware_jobs: int,
|
|
hardware_cuda_visible_devices: str | None,
|
|
software_jobs: int,
|
|
software_cuda_visible_devices: str | None,
|
|
) -> list[WorkerSlot]:
|
|
mixed_mode_requested = any(
|
|
(
|
|
hardware_jobs > 0,
|
|
software_jobs > 0,
|
|
hardware_cuda_visible_devices is not None,
|
|
software_cuda_visible_devices is not None,
|
|
)
|
|
)
|
|
if not mixed_mode_requested:
|
|
return build_uniform_worker_slots(jobs, encoder_device, cuda_visible_devices)
|
|
|
|
if jobs != 1:
|
|
raise click.ClickException("--jobs cannot be combined with mixed worker pool flags")
|
|
if cuda_visible_devices is not None:
|
|
raise click.ClickException("--cuda-visible-devices cannot be combined with mixed worker pool flags")
|
|
if encoder_device != "auto":
|
|
raise click.ClickException("--encoder-device cannot be combined with mixed worker pool flags")
|
|
|
|
total_jobs = hardware_jobs + software_jobs
|
|
if total_jobs < 1:
|
|
raise click.ClickException("mixed worker pool flags require at least one hardware or software job")
|
|
|
|
hardware_device_pool = parse_required_device_pool(
|
|
hardware_cuda_visible_devices,
|
|
hardware_jobs,
|
|
"--hardware-cuda-visible-devices",
|
|
)
|
|
software_device_pool = parse_required_device_pool(
|
|
software_cuda_visible_devices,
|
|
software_jobs,
|
|
"--software-cuda-visible-devices",
|
|
)
|
|
|
|
worker_slots: list[WorkerSlot] = []
|
|
worker_slots.extend(
|
|
WorkerSlot(
|
|
label=f"hw-{index + 1}",
|
|
encoder_device="nvidia",
|
|
cuda_visible_devices=device,
|
|
)
|
|
for index, device in enumerate(hardware_device_pool)
|
|
)
|
|
worker_slots.extend(
|
|
WorkerSlot(
|
|
label=f"sw-{index + 1}",
|
|
encoder_device="software",
|
|
cuda_visible_devices=device,
|
|
)
|
|
for index, device in enumerate(software_device_pool)
|
|
)
|
|
return worker_slots
|
|
|
|
|
|
@click.command(context_settings={"allow_extra_args": True})
|
|
@click.option(
|
|
"--dataset-root",
|
|
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
|
|
)
|
|
@click.option(
|
|
"--segment",
|
|
"segment_dirs",
|
|
multiple=True,
|
|
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
|
|
help=(
|
|
"Explicit segment directory. Repeatable. The directory must directly contain "
|
|
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
|
|
),
|
|
)
|
|
@click.option(
|
|
"--segment-dir",
|
|
"legacy_segment_dirs",
|
|
multiple=True,
|
|
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
|
hidden=True,
|
|
)
|
|
@click.option(
|
|
"--segments-csv",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
|
|
)
|
|
@click.option(
|
|
"--csv-root",
|
|
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
|
|
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
|
|
)
|
|
@click.option(
|
|
"--recursive/--no-recursive",
|
|
default=True,
|
|
show_default=True,
|
|
help="Recurse when discovering segment directories from --dataset-root.",
|
|
)
|
|
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
|
|
@click.option(
|
|
"--hardware-jobs",
|
|
default=0,
|
|
show_default=True,
|
|
type=click.IntRange(min=0),
|
|
help="Mixed mode: number of hardware-encoded workers.",
|
|
)
|
|
@click.option(
|
|
"--hardware-cuda-visible-devices",
|
|
help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for hardware workers, one entry per worker.",
|
|
)
|
|
@click.option(
|
|
"--software-jobs",
|
|
default=0,
|
|
show_default=True,
|
|
type=click.IntRange(min=0),
|
|
help="Mixed mode: number of software-encoded workers.",
|
|
)
|
|
@click.option(
|
|
"--software-cuda-visible-devices",
|
|
help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for software workers, one entry per worker.",
|
|
)
|
|
@click.option(
|
|
"--zed-bin",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="Explicit path to the zed_svo_to_mcap binary.",
|
|
)
|
|
@click.option(
|
|
"--cuda-visible-devices",
|
|
help="Optional CUDA_VISIBLE_DEVICES value. A comma-separated list is distributed across concurrent jobs one GPU per subprocess.",
|
|
)
|
|
@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing MCAP files.")
|
|
@click.option(
|
|
"--probe-existing/--trust-existing",
|
|
default=False,
|
|
show_default=True,
|
|
help="Validate existing MCAP files before skipping them. Invalid outputs are treated as missing.",
|
|
)
|
|
@click.option(
|
|
"--report-existing",
|
|
is_flag=True,
|
|
help="Probe existing MCAP files, report invalid ones, and do not launch conversions.",
|
|
)
|
|
@click.option(
|
|
"--dry-run",
|
|
is_flag=True,
|
|
help="Show which segments would be converted after applying skip/probe logic, without launching conversions.",
|
|
)
|
|
@click.option(
|
|
"--fail-fast/--continue-on-error",
|
|
default=False,
|
|
show_default=True,
|
|
help="Stop submitting new work after the first failed conversion.",
|
|
)
|
|
@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True)
|
|
@click.option(
|
|
"--encoder-device",
|
|
type=click.Choice(("auto", "nvidia", "software")),
|
|
default="auto",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--mcap-compression",
|
|
type=click.Choice(("none", "lz4", "zstd")),
|
|
default="zstd",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--depth-mode",
|
|
type=click.Choice(("neural_light", "neural", "neural_plus")),
|
|
default="neural_plus",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--depth-size",
|
|
type=str,
|
|
default="optimal",
|
|
show_default=True,
|
|
)
|
|
@click.option(
|
|
"--bundle-policy",
|
|
type=click.Choice(("nearest", "strict", "copy")),
|
|
default="nearest",
|
|
show_default=True,
|
|
help="Bundling policy for multi-camera MCAP export.",
|
|
)
|
|
@click.option(
|
|
"--copy-range",
|
|
type=click.Choice(("common", "full")),
|
|
default="common",
|
|
show_default=True,
|
|
help="Timestamp range used when --bundle-policy copy is selected.",
|
|
)
|
|
@click.option(
|
|
"--bundle-topic",
|
|
default="/bundle",
|
|
show_default=True,
|
|
help="Topic used for bundled multi-camera manifest messages.",
|
|
)
|
|
@click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.")
|
|
@click.option(
|
|
"--pose-config",
|
|
type=click.Path(path_type=Path, dir_okay=False),
|
|
help="TOML config passed to zed_svo_to_mcap for pose tracking settings.",
|
|
)
|
|
@click.option(
|
|
"--world-frame-id",
|
|
default=None,
|
|
help="Optional pose reference frame id passed through to zed_svo_to_mcap.",
|
|
)
|
|
@click.option(
|
|
"--start-frame",
|
|
type=click.IntRange(min=0),
|
|
default=None,
|
|
help="First bundle index to export (inclusive) in bundled multi-camera mode.",
|
|
)
|
|
@click.option(
|
|
"--end-frame",
|
|
type=click.IntRange(min=0),
|
|
default=None,
|
|
help="Last bundle index to export (inclusive) in bundled multi-camera mode.",
|
|
)
|
|
@click.option(
|
|
"--sync-tolerance-ms",
|
|
type=click.FloatRange(min=0.0, min_open=True),
|
|
default=None,
|
|
help="Override the maximum timestamp delta used by strict bundled sync.",
|
|
)
|
|
@click.option(
|
|
"--progress-ui",
|
|
type=click.Choice(("auto", "table", "text")),
|
|
default="auto",
|
|
show_default=True,
|
|
help="Progress output mode. Auto uses a table on TTY and text logging otherwise.",
|
|
)
|
|
@click.pass_context
|
|
def main(
|
|
ctx: click.Context,
|
|
dataset_root: Path | None,
|
|
segment_dirs: tuple[Path, ...],
|
|
legacy_segment_dirs: tuple[Path, ...],
|
|
segments_csv: Path | None,
|
|
csv_root: Path | None,
|
|
recursive: bool,
|
|
jobs: int,
|
|
hardware_jobs: int,
|
|
hardware_cuda_visible_devices: str | None,
|
|
software_jobs: int,
|
|
software_cuda_visible_devices: str | None,
|
|
zed_bin: Path | None,
|
|
cuda_visible_devices: str | None,
|
|
overwrite: bool,
|
|
probe_existing: bool,
|
|
report_existing: bool,
|
|
dry_run: bool,
|
|
fail_fast: bool,
|
|
codec: str,
|
|
encoder_device: str,
|
|
mcap_compression: str,
|
|
depth_mode: str,
|
|
depth_size: str,
|
|
bundle_policy: str,
|
|
copy_range: str,
|
|
bundle_topic: str,
|
|
with_pose: bool,
|
|
pose_config: Path | None,
|
|
world_frame_id: str | None,
|
|
start_frame: int | None,
|
|
end_frame: int | None,
|
|
sync_tolerance_ms: float | None,
|
|
progress_ui: str,
|
|
) -> None:
|
|
"""Batch-convert multi-camera ZED segments into grouped MCAP files."""
|
|
segment_sources.raise_for_legacy_extra_args(ctx.args)
|
|
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
|
|
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
|
|
|
|
if report_existing and dry_run:
|
|
raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
|
|
if bundle_policy == "copy":
|
|
if start_frame is not None or end_frame is not None:
|
|
raise click.ClickException("--start-frame/--end-frame cannot be used with --bundle-policy copy")
|
|
if sync_tolerance_ms is not None:
|
|
raise click.ClickException("--sync-tolerance-ms cannot be used with --bundle-policy copy")
|
|
if bundle_topic != "/bundle":
|
|
raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy")
|
|
|
|
sources = segment_sources.resolve_sources(
|
|
dataset_root,
|
|
segment_dirs,
|
|
segments_csv,
|
|
csv_root,
|
|
recursive,
|
|
scan_segment_dir=scan_segment_dir,
|
|
no_matches_message=lambda root: f"no multi-camera segments found under {root}",
|
|
)
|
|
binary_path = None if report_existing else locate_binary(zed_bin)
|
|
worker_slots = build_worker_slots(
|
|
jobs=jobs,
|
|
encoder_device=encoder_device,
|
|
cuda_visible_devices=cuda_visible_devices,
|
|
hardware_jobs=hardware_jobs,
|
|
hardware_cuda_visible_devices=hardware_cuda_visible_devices,
|
|
software_jobs=software_jobs,
|
|
software_cuda_visible_devices=software_cuda_visible_devices,
|
|
)
|
|
config = BatchConfig(
|
|
zed_bin=binary_path,
|
|
probe_existing=probe_existing or report_existing,
|
|
overwrite=overwrite,
|
|
fail_fast=fail_fast,
|
|
codec=codec,
|
|
mcap_compression=mcap_compression,
|
|
depth_mode=depth_mode,
|
|
depth_size=depth_size,
|
|
bundle_policy=bundle_policy,
|
|
copy_range=copy_range,
|
|
bundle_topic=None if bundle_policy == "copy" else bundle_topic,
|
|
with_pose=with_pose,
|
|
pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
|
|
world_frame_id=world_frame_id,
|
|
start_frame=start_frame,
|
|
end_frame=end_frame,
|
|
sync_tolerance_ms=sync_tolerance_ms,
|
|
progress_ui=progress_ui,
|
|
)
|
|
input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None
|
|
display_parent = common_segment_parent(sources.segment_dirs)
|
|
|
|
skipped_results: list[JobResult] = []
|
|
failed_results: list[JobResult] = []
|
|
pending_jobs: list[ConversionJob] = []
|
|
pending_reasons: dict[Path, str] = {}
|
|
pending_details: dict[Path, str] = {}
|
|
valid_existing: list[OutputProbeResult] = []
|
|
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]] = []
|
|
missing_outputs: list[ConversionJob] = []
|
|
|
|
for segment_dir in sources.segment_dirs:
|
|
scan = scan_segment_dir(segment_dir)
|
|
output_path = output_path_for(segment_dir)
|
|
job = ConversionJob(
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
camera_labels=scan.camera_labels,
|
|
display_name=display_name_for_segment(
|
|
segment_dir,
|
|
source_mode=sources.mode,
|
|
input_root=input_root,
|
|
common_parent=display_parent,
|
|
),
|
|
)
|
|
default_encoder_device = worker_slots[0].encoder_device if worker_slots else encoder_device
|
|
command = (
|
|
tuple(command_for_job(job, config, default_encoder_device))
|
|
if config.zed_bin is not None
|
|
else ()
|
|
)
|
|
if not scan.is_valid:
|
|
failed_results.append(
|
|
JobResult(
|
|
status="failed",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
return_code=2,
|
|
stderr=scan.reason or "",
|
|
)
|
|
)
|
|
continue
|
|
|
|
if report_existing:
|
|
probe_result = probe_output(
|
|
output_path,
|
|
job.camera_labels,
|
|
layout="copy" if config.bundle_policy == "copy" else "bundled",
|
|
bundle_topic=config.bundle_topic,
|
|
)
|
|
if probe_result.status == "valid":
|
|
valid_existing.append(probe_result)
|
|
elif probe_result.status == "invalid":
|
|
invalid_existing.append((job, probe_result))
|
|
else:
|
|
missing_outputs.append(job)
|
|
continue
|
|
|
|
if overwrite:
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "overwrite"
|
|
continue
|
|
|
|
if config.probe_existing:
|
|
probe_result = probe_output(
|
|
output_path,
|
|
job.camera_labels,
|
|
layout="copy" if config.bundle_policy == "copy" else "bundled",
|
|
bundle_topic=config.bundle_topic,
|
|
)
|
|
if probe_result.status == "valid":
|
|
valid_existing.append(probe_result)
|
|
skipped_results.append(
|
|
JobResult(
|
|
status="skipped",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
)
|
|
)
|
|
continue
|
|
if probe_result.status == "invalid":
|
|
invalid_existing.append((job, probe_result))
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "invalid-existing-output"
|
|
pending_details[segment_dir] = probe_result.reason
|
|
continue
|
|
missing_outputs.append(job)
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "missing-output"
|
|
continue
|
|
|
|
if output_path.exists():
|
|
skipped_results.append(
|
|
JobResult(
|
|
status="skipped",
|
|
segment_dir=segment_dir,
|
|
output_path=output_path,
|
|
command=command,
|
|
)
|
|
)
|
|
continue
|
|
|
|
missing_outputs.append(job)
|
|
pending_jobs.append(job)
|
|
pending_reasons[segment_dir] = "missing-output"
|
|
|
|
if report_existing:
|
|
click.echo(
|
|
(
|
|
f"source={sources.mode} matched={len(sources.segment_dirs)} valid={len(valid_existing)} "
|
|
f"invalid={len(invalid_existing)} missing={len(missing_outputs)} "
|
|
f"invalid-segments={len(failed_results)}"
|
|
),
|
|
err=True,
|
|
)
|
|
if sources.ignored_partial_dirs:
|
|
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
|
|
report_invalid_existing_outputs(invalid_existing)
|
|
summarize_failures(failed_results)
|
|
if failed_results or invalid_existing:
|
|
raise SystemExit(1)
|
|
return
|
|
|
|
click.echo(
|
|
(
|
|
f"source={sources.mode} matched={len(sources.segment_dirs)} pending={len(pending_jobs)} "
|
|
f"skipped={len(skipped_results)} invalid={len(failed_results)} jobs={len(worker_slots)} "
|
|
f"dry_run={'yes' if dry_run else 'no'}"
|
|
),
|
|
err=True,
|
|
)
|
|
if sources.ignored_partial_dirs:
|
|
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
|
|
if config.probe_existing:
|
|
click.echo(
|
|
(
|
|
f"probed-existing: valid={len(valid_existing)} invalid={len(invalid_existing)} "
|
|
f"missing={len(missing_outputs)}"
|
|
),
|
|
err=True,
|
|
)
|
|
|
|
if dry_run:
|
|
report_dry_run_plan(pending_jobs, pending_reasons, pending_details)
|
|
summarize_failures(failed_results)
|
|
if failed_results:
|
|
raise SystemExit(1)
|
|
return
|
|
|
|
results = list(skipped_results)
|
|
results.extend(failed_results)
|
|
conversion_results, aborted_count = run_batch(pending_jobs, config, worker_slots)
|
|
results.extend(conversion_results)
|
|
|
|
converted_count = sum(1 for result in results if result.status == "converted")
|
|
skipped_count = sum(1 for result in results if result.status == "skipped")
|
|
failed_count = sum(1 for result in results if result.status == "failed")
|
|
|
|
click.echo(
|
|
(
|
|
f"summary: matched={len(sources.segment_dirs)} converted={converted_count} "
|
|
f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}"
|
|
),
|
|
err=True,
|
|
)
|
|
summarize_failures(results)
|
|
|
|
if failed_count > 0 or aborted_count > 0:
|
|
raise SystemExit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|