feat: support bundled frame-window offsets in zed_svo_to_mcap

Implement bundled multi-camera --start-frame semantics as synced-group\nindices from the common synced start, inclusive with --end-frame.\n\nUpdate zed_svo_to_mcap to skip synced groups before writing, keep\nsingle-camera raw SVO frame semantics unchanged, and adjust exact\nprogress totals for selected bundled windows.\n\nReplace the expensive bundled exact pre-count path with approximate\ntime-window progress when --end-frame is not set, and update the\nshared TTY progress bar helper to support fraction-based rendering.\n\nExpose --start-frame in the batch MCAP wrapper and document the\nbundled frame-window semantics and approximate progress behavior in\nthe README.
2026-03-20 10:56:40 +00:00
parent 1369f5235d
commit 7808b89a03
5 changed files with 219 additions and 92 deletions
@@ -240,6 +240,7 @@ uv run python scripts/zed_batch_svo_to_mcap.py \
    --recursive \
    --jobs 2 \
    --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+    --start-frame 10 \
    --end-frame 29
 ```

@@ -249,6 +250,7 @@ You can also preserve the precomputed kindergarten CSV ordering:
 uv run python scripts/zed_batch_svo_to_mcap.py \
    --segments-csv <SEGMENTS_CSV> \
    --jobs 2 \
+    --start-frame 10 \
    --end-frame 29
 ```

@@ -263,8 +265,8 @@ uv run python scripts/zed_batch_svo_to_mcap.py \

 The batch MCAP wrapper writes `<segment>/<segment>.mcap` by default, skips existing outputs unless told otherwise, and returns a nonzero exit code if any segment fails.
 The repo includes a minimal pose config at `config/zed_pose_config.toml` so MCAP conversion does not depend on a separate `cv-mmap` checkout.
-In bundled multi-camera mode, `--end-frame` means the last emitted synced frame-group index from the common start timestamp.
-When stderr is attached to a TTY, `zed_svo_to_mcap` shows a tqdm-like progress bar. In bundled mode without `--end-frame`, it first counts synced groups to make that progress total exact.
+In bundled multi-camera mode, `--start-frame` and `--end-frame` mean the first and last emitted synced frame-group indices from the common start timestamp, inclusive.
+When stderr is attached to a TTY, `zed_svo_to_mcap` shows a tqdm-like progress bar. In bundled mode without `--end-frame`, it uses approximate progress over the common synchronized time window so export starts immediately without a full pre-count pass.

 Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid outputs are treated as missing and requeued:

@@ -2,6 +2,7 @@

 #include <cstdint>
 #include <memory>
+#include <string_view>

 namespace cvmmap_streamer::zed_tools {

@@ -17,7 +18,9 @@ public:
 	bool enabled() const;

 	void update(std::uint64_t completed_frames);
+	void update_fraction(double fraction, std::string_view detail = {});
 	void finish(std::uint64_t completed_frames, bool success);
+	void finish_fraction(double fraction, bool success, std::string_view detail = {});

 private:
 	struct Impl;
@@ -37,6 +37,7 @@ class BatchConfig:
    with_pose: bool
    pose_config: Path | None
    world_frame_id: str | None
+    start_frame: int | None
    end_frame: int | None
    sync_tolerance_ms: float | None

@@ -293,6 +294,8 @@ def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
        command.extend(["--pose-config", str(config.pose_config)])
    if config.world_frame_id is not None:
        command.extend(["--world-frame-id", config.world_frame_id])
+    if config.start_frame is not None:
+        command.extend(["--start-frame", str(config.start_frame)])
    if config.end_frame is not None:
        command.extend(["--end-frame", str(config.end_frame)])
    if config.sync_tolerance_ms is not None:
@@ -576,6 +579,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
    default=None,
    help="Optional pose reference frame id passed through to zed_svo_to_mcap.",
 )
+@click.option(
+    "--start-frame",
+    type=click.IntRange(min=0),
+    default=None,
+    help="First synced frame group to export (inclusive) in bundled multi-camera mode.",
+)
@click.option(
    "--end-frame",
    type=click.IntRange(min=0),
@@ -609,6 +618,7 @@ def main(
    with_pose: bool,
    pose_config: Path | None,
    world_frame_id: str | None,
+    start_frame: int | None,
    end_frame: int | None,
    sync_tolerance_ms: float | None,
 ) -> None:
@@ -631,6 +641,7 @@ def main(
        with_pose=with_pose,
        pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
        world_frame_id=world_frame_id,
+        start_frame=start_frame,
        end_frame=end_frame,
        sync_tolerance_ms=sync_tolerance_ms,
    )
@@ -1,5 +1,6 @@
 #include "cvmmap_streamer/tools/zed_progress_bar.hpp"

+#include <algorithm>
 #include <chrono>
 #include <cmath>
 #include <cstdio>
@@ -41,6 +42,26 @@ struct ProgressBar::Impl {
 		  started_at(Clock::now()),
 		  last_render_at(started_at) {}

+	void render_prefix(const double ratio, const Clock::time_point now, char *line, const std::size_t line_size) {
+		const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
+		std::string bar{};
+		bar.reserve(24);
+		for (std::size_t index = 0; index < 24; ++index) {
+			bar.push_back(index < filled ? '#' : '-');
+		}
+
+		const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
+		const auto eta_seconds = ratio > 0.0 ? elapsed_seconds * (1.0 - ratio) / ratio : 0.0;
+		std::snprintf(
+			line,
+			line_size,
+			"\r[%s] %6.2f%% | %s elapsed | %s ETA",
+			bar.c_str(),
+			ratio * 100.0,
+			format_duration(elapsed_seconds).c_str(),
+			format_duration(eta_seconds).c_str());
+	}
+
 	void render(const std::uint64_t completed_frames, const bool force) {
 		if (!enabled || total_frames == 0) {
 			return;
@@ -55,29 +76,45 @@ struct ProgressBar::Impl {

 		const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames;
 		const double ratio = static_cast<double>(bounded_completed) / static_cast<double>(total_frames);
-		const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
-		std::string bar{};
-		bar.reserve(24);
-		for (std::size_t index = 0; index < 24; ++index) {
-			bar.push_back(index < filled ? '#' : '-');
-		}
-
 		const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
 		const auto fps = elapsed_seconds > 0.0 ? static_cast<double>(bounded_completed) / elapsed_seconds : 0.0;
-		const auto eta_seconds = fps > 0.0 ? static_cast<double>(total_frames - bounded_completed) / fps : 0.0;

 		char line[256]{};
+		render_prefix(ratio, now, line, sizeof(line));
+		const auto written = std::char_traits<char>::length(line);
 		std::snprintf(
-			line,
-			sizeof(line),
-			"\r[%s] %6.2f%% %llu/%llu | %5.1f fps | %s elapsed | %s ETA\x1b[K",
-			bar.c_str(),
-			ratio * 100.0,
+			line + written,
+			sizeof(line) - written,
+			" | %llu/%llu | %5.1f fps\x1b[K",
 			static_cast<unsigned long long>(bounded_completed),
 			static_cast<unsigned long long>(total_frames),
-			fps,
-			format_duration(elapsed_seconds).c_str(),
-			format_duration(eta_seconds).c_str());
+			fps);
+		std::fprintf(stderr, "%s", line);
+		std::fflush(stderr);
+	}
+
+	void render_fraction(const double fraction, const std::string_view detail, const bool force) {
+		if (!enabled) {
+			return;
+		}
+
+		const auto now = Clock::now();
+		if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
+			return;
+		}
+		last_render_at = now;
+		rendered = true;
+
+		const double bounded_fraction = std::clamp(fraction, 0.0, 1.0);
+		char line[256]{};
+		render_prefix(bounded_fraction, now, line, sizeof(line));
+		if (!detail.empty()) {
+			const auto written = std::char_traits<char>::length(line);
+			std::snprintf(line + written, sizeof(line) - written, " | %.*s\x1b[K", static_cast<int>(detail.size()), detail.data());
+		} else {
+			const auto written = std::char_traits<char>::length(line);
+			std::snprintf(line + written, sizeof(line) - written, "\x1b[K");
+		}
 		std::fprintf(stderr, "%s", line);
 		std::fflush(stderr);
 	}
@@ -102,6 +139,10 @@ void ProgressBar::update(const std::uint64_t completed_frames) {
 	impl_->render(completed_frames, false);
 }

+void ProgressBar::update_fraction(const double fraction, const std::string_view detail) {
+	impl_->render_fraction(fraction, detail, false);
+}
+
 void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) {
 	if (impl_ == nullptr || !impl_->enabled) {
 		return;
@@ -118,4 +159,20 @@ void ProgressBar::finish(const std::uint64_t completed_frames, const bool succes
 	std::fflush(stderr);
 }

+void ProgressBar::finish_fraction(const double fraction, const bool success, const std::string_view detail) {
+	if (impl_ == nullptr || !impl_->enabled) {
+		return;
+	}
+
+	if (!(success && impl_->rendered && fraction >= 1.0)) {
+		impl_->render_fraction(fraction, detail, true);
+		if (!impl_->rendered) {
+			return;
+		}
+	}
+
+	std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
+	std::fflush(stderr);
+}
+
 }  // namespace cvmmap_streamer::zed_tools
@@ -1155,55 +1155,71 @@ std::expected<void, std::string> advance_after_emit(std::vector<CameraStream> &s
 }

 [[nodiscard]]
-std::expected<std::uint64_t, std::string> count_synced_groups(
-	std::vector<CameraStream> &streams,
-	const std::uint64_t tolerance_ns,
+double bundled_progress_fraction(
+	const std::uint64_t common_start_ts,
 	const std::uint64_t common_end_ts,
-	const std::optional<std::uint64_t> max_groups) {
-	bool shutdown_logged{false};
-	std::uint64_t counted_groups{0};
-
-	while (true) {
-		if (log_shutdown_request(shutdown_logged, "multi-camera progress scan")) {
-			return std::unexpected("interrupted");
+	const std::uint64_t group_timestamp_ns) {
+	if (common_end_ts <= common_start_ts) {
+		return 1.0;
 	}
+	const auto bounded_timestamp = std::clamp(group_timestamp_ns, common_start_ts, common_end_ts);
+	return static_cast<double>(bounded_timestamp - common_start_ts) /
+		static_cast<double>(common_end_ts - common_start_ts);
+}
+
+[[nodiscard]]
+std::string bundled_progress_detail(
+	const std::uint64_t common_start_ts,
+	const std::uint64_t common_end_ts,
+	const std::uint64_t group_timestamp_ns) {
+	const auto bounded_timestamp = std::clamp(group_timestamp_ns, common_start_ts, common_end_ts);
+	const double elapsed_seconds = static_cast<double>(bounded_timestamp - common_start_ts) / 1'000'000'000.0;
+	const double total_seconds = common_end_ts > common_start_ts
+		? static_cast<double>(common_end_ts - common_start_ts) / 1'000'000'000.0
+		: 0.0;
+	char buffer[96]{};
+	std::snprintf(buffer, sizeof(buffer), "%.1fs/%.1fs overlap", elapsed_seconds, total_seconds);
+	return std::string(buffer);
+}
+
+[[nodiscard]]
+std::expected<std::uint64_t, std::string> skip_bundled_start_groups(
+	std::vector<CameraStream> &streams,
+	const std::uint32_t start_group_index,
+	const std::uint64_t tolerance_ns,
+	const std::uint64_t common_end_ts) {
+	for (std::uint32_t skipped_groups = 0; skipped_groups < start_group_index; ++skipped_groups) {
 		auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
 		if (!group_timestamp) {
 			return std::unexpected(group_timestamp.error());
 		}
 		if (!*group_timestamp) {
-			break;
-		}
-
-		counted_groups += 1;
-		if (max_groups && counted_groups >= *max_groups) {
-			break;
+			return std::unexpected(
+				"start-frame " + std::to_string(start_group_index) +
+				" is out of range for bundled multi-camera mode");
 		}

 		auto advance = advance_after_emit(streams);
 		if (!advance) {
 			if (advance.error() == "end-of-svo") {
-				break;
+				return std::unexpected(
+					"start-frame " + std::to_string(start_group_index) +
+					" is out of range for bundled multi-camera mode");
 			}
 			return std::unexpected(advance.error());
 		}
 	}

-	return counted_groups;
+	auto first_selected_group = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
+	if (!first_selected_group) {
+		return std::unexpected(first_selected_group.error());
 	}
-
-void reset_streams_after_count(std::vector<CameraStream> &streams) {
-	for (auto &stream : streams) {
-		stream.current_tracking = {};
-		stream.next_tracking = {};
-		stream.current_timestamp_ns = 0;
-		stream.next_timestamp_ns = 0;
-		stream.dropped_frames = 0;
-		stream.sync_position = -1;
-		stream.has_next = false;
-		stream.calibration_written = false;
-		stream.last_tracking_state.reset();
+	if (!*first_selected_group) {
+		return std::unexpected(
+			"start-frame " + std::to_string(start_group_index) +
+			" is out of range for bundled multi-camera mode");
 	}
+	return **first_selected_group;
 }

 [[nodiscard]]
@@ -1648,8 +1664,11 @@ int run_multi_source(
 	const cvmmap_streamer::McapCompression compression,
 	const sl::DEPTH_MODE depth_mode,
 	const PoseTrackingOptions &pose_tracking) {
-	if (options.start_frame != 0) {
-		spdlog::error("multi-camera mode does not support --start-frame");
+	if (options.has_end_frame && options.end_frame < options.start_frame) {
+		spdlog::error(
+			"invalid bundled group range: start-frame={} end-frame={}",
+			options.start_frame,
+			options.end_frame);
 		return exit_code(ToolExitCode::UsageError);
 	}
 	bool interrupted{false};
@@ -1707,35 +1726,11 @@ int run_multi_source(
 		common_end_ts,
 		tolerance_ns);
 	const auto render_progress = stderr_supports_progress_bar();
-	std::uint64_t total_groups_to_emit{0};
-	if (render_progress) {
-		if (auto synced = sync_streams_to_timestamp(streams, common_start_ts, false); !synced) {
-			close_camera_streams(streams);
-			if (synced.error() == "interrupted") {
-				return interrupted_exit_code();
-			}
-			spdlog::error("{}", synced.error());
-			return exit_code(ToolExitCode::RuntimeError);
-		}
-		if (!options.has_end_frame) {
-			std::fprintf(stderr, "counting synced groups for exact progress...\n");
-			std::fflush(stderr);
-		}
-		const auto max_groups = options.has_end_frame
-			? std::optional<std::uint64_t>{static_cast<std::uint64_t>(options.end_frame) + 1}
-			: std::nullopt;
-		auto counted_groups = count_synced_groups(streams, tolerance_ns, common_end_ts, max_groups);
-		if (!counted_groups) {
-			close_camera_streams(streams);
-			if (counted_groups.error() == "interrupted") {
-				return interrupted_exit_code();
-			}
-			spdlog::error("failed to count synced groups: {}", counted_groups.error());
-			return exit_code(ToolExitCode::RuntimeError);
-		}
-		total_groups_to_emit = *counted_groups;
-		reset_streams_after_count(streams);
-	}
+	const std::uint64_t selected_total_groups = options.has_end_frame
+		? static_cast<std::uint64_t>(options.end_frame - options.start_frame) + 1
+		: 0;
+	const bool exact_group_progress = render_progress && options.has_end_frame;
+	const bool approximate_time_progress = render_progress && !options.has_end_frame;
 	if (auto synced = sync_streams_to_timestamp(streams, common_start_ts); !synced) {
 		close_camera_streams(streams);
 		if (synced.error() == "interrupted") {
@@ -1744,17 +1739,40 @@ int run_multi_source(
 		spdlog::error("{}", synced.error());
 		return exit_code(ToolExitCode::RuntimeError);
 	}
-	ProgressBar progress{total_groups_to_emit};
+	auto effective_progress_start_ts = common_start_ts;
+	if (options.start_frame > 0) {
+		if (auto skipped_to = skip_bundled_start_groups(streams, options.start_frame, tolerance_ns, common_end_ts); !skipped_to) {
+			close_camera_streams(streams);
+			if (skipped_to.error() == "interrupted") {
+				return interrupted_exit_code();
+			}
+			spdlog::error("{}", skipped_to.error());
+			return exit_code(ToolExitCode::UsageError);
+		} else {
+			effective_progress_start_ts = *skipped_to;
+		}
+	}
+	ProgressBar progress{exact_group_progress ? selected_total_groups : 0};
+	double last_progress_fraction = 0.0;
+	std::string last_progress_detail{};

 	auto sink = cvmmap_streamer::record::MultiMcapRecordSink::create(output_path.string(), compression);
 	if (!sink) {
+		if (approximate_time_progress) {
+			progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+		} else {
 			progress.finish(0, false);
+		}
 		close_camera_streams(streams);
 		spdlog::error("failed to create MCAP sink: {}", sink.error());
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	if (auto registered = register_mcap_streams(*sink, streams, options); !registered) {
+		if (approximate_time_progress) {
+			progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+		} else {
 			progress.finish(0, false);
+		}
 		sink->close();
 		close_camera_streams(streams);
 		spdlog::error("failed to register MCAP streams: {}", registered.error());
@@ -1769,7 +1787,11 @@ int run_multi_source(
 		}
 		auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
 		if (!group_timestamp) {
+			if (approximate_time_progress) {
+				progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+			} else {
 				progress.finish(emitted_groups, false);
+			}
 			sink->close();
 			close_camera_streams(streams);
 			if (group_timestamp.error() == "interrupted") {
@@ -1783,15 +1805,25 @@ int run_multi_source(
 		}

 		if (auto write = encode_and_write_group(*sink, streams, options, **group_timestamp); !write) {
+			if (approximate_time_progress) {
+				progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+			} else {
 				progress.finish(emitted_groups, false);
+			}
 			sink->close();
 			close_camera_streams(streams);
 			spdlog::error("{}", write.error());
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		emitted_groups += 1;
+		if (approximate_time_progress) {
+			last_progress_fraction = bundled_progress_fraction(effective_progress_start_ts, common_end_ts, **group_timestamp);
+			last_progress_detail = bundled_progress_detail(effective_progress_start_ts, common_end_ts, **group_timestamp);
+			progress.update_fraction(last_progress_fraction, last_progress_detail);
+		} else {
 			progress.update(emitted_groups);
-		if (options.has_end_frame && emitted_groups > options.end_frame) {
+		}
+		if (options.has_end_frame && emitted_groups >= selected_total_groups) {
 			break;
 		}

@@ -1800,7 +1832,11 @@ int run_multi_source(
 			if (advance.error() == "end-of-svo") {
 				break;
 			}
+			if (approximate_time_progress) {
+				progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+			} else {
 				progress.finish(emitted_groups, false);
+			}
 			sink->close();
 			close_camera_streams(streams);
 			spdlog::error("{}", advance.error());
@@ -1810,7 +1846,11 @@ int run_multi_source(

 	for (auto &stream : streams) {
 		if (auto flushed = flush_and_write(*sink, stream.mcap_stream_id, *stream.backend); !flushed) {
+			if (approximate_time_progress) {
+				progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
+			} else {
 				progress.finish(emitted_groups, false);
+			}
 			sink->close();
 			close_camera_streams(streams);
 			spdlog::error("failed to finalize encoded video for {}: {}", stream.source.label, flushed.error());
@@ -1819,7 +1859,15 @@ int run_multi_source(
 	}

 	sink->close();
+	if (approximate_time_progress) {
+		if (!interrupted) {
+			last_progress_fraction = 1.0;
+			last_progress_detail = bundled_progress_detail(effective_progress_start_ts, common_end_ts, common_end_ts);
+		}
+		progress.finish_fraction(last_progress_fraction, !interrupted, last_progress_detail);
+	} else {
 		progress.finish(emitted_groups, !interrupted);
+	}
 	for (const auto &stream : streams) {
 		spdlog::info(
 			"bundled {} dropped_frame(s) for {}",
@@ -1864,9 +1912,15 @@ int main(int argc, char **argv) {
 		->check(CLI::IsMember({"none", "lz4", "zstd"}));
 	app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
 		->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
-	app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
+	app.add_option(
+		"--start-frame",
+		options.start_frame,
+		"First frame/group to export (inclusive): raw SVO frame in single-camera mode, synced group index in bundled multi-camera mode")
 		->check(CLI::NonNegativeNumber);
-	auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
+	auto *end_frame_option = app.add_option(
+		"--end-frame",
+		options.end_frame,
+		"Last frame/group to export (inclusive): raw SVO frame in single-camera mode, synced group index in bundled multi-camera mode")
 		->check(CLI::NonNegativeNumber);
 	app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics");
 	app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");