feat: support bundled frame-window offsets in zed_svo_to_mcap

Implement bundled multi-camera --start-frame semantics as synced-group\nindices from the common synced start, inclusive with --end-frame.\n\nUpdate zed_svo_to_mcap to skip synced groups before writing, keep\nsingle-camera raw SVO frame semantics unchanged, and adjust exact\nprogress totals for selected bundled windows.\n\nReplace the expensive bundled exact pre-count path with approximate\ntime-window progress when --end-frame is not set, and update the\nshared TTY progress bar helper to support fraction-based rendering.\n\nExpose --start-frame in the batch MCAP wrapper and document the\nbundled frame-window semantics and approximate progress behavior in\nthe README.
This commit is contained in:
2026-03-20 10:56:40 +00:00
parent 1369f5235d
commit 7808b89a03
5 changed files with 219 additions and 92 deletions
+4 -2
View File
@@ -240,6 +240,7 @@ uv run python scripts/zed_batch_svo_to_mcap.py \
--recursive \
--jobs 2 \
--cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
--start-frame 10 \
--end-frame 29
```
@@ -249,6 +250,7 @@ You can also preserve the precomputed kindergarten CSV ordering:
uv run python scripts/zed_batch_svo_to_mcap.py \
--segments-csv <SEGMENTS_CSV> \
--jobs 2 \
--start-frame 10 \
--end-frame 29
```
@@ -263,8 +265,8 @@ uv run python scripts/zed_batch_svo_to_mcap.py \
The batch MCAP wrapper writes `<segment>/<segment>.mcap` by default, skips existing outputs unless told otherwise, and returns a nonzero exit code if any segment fails.
The repo includes a minimal pose config at `config/zed_pose_config.toml` so MCAP conversion does not depend on a separate `cv-mmap` checkout.
In bundled multi-camera mode, `--end-frame` means the last emitted synced frame-group index from the common start timestamp.
When stderr is attached to a TTY, `zed_svo_to_mcap` shows a tqdm-like progress bar. In bundled mode without `--end-frame`, it first counts synced groups to make that progress total exact.
In bundled multi-camera mode, `--start-frame` and `--end-frame` mean the first and last emitted synced frame-group indices from the common start timestamp, inclusive.
When stderr is attached to a TTY, `zed_svo_to_mcap` shows a tqdm-like progress bar. In bundled mode without `--end-frame`, it uses approximate progress over the common synchronized time window so export starts immediately without a full pre-count pass.
Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid outputs are treated as missing and requeued:
@@ -2,6 +2,7 @@
#include <cstdint>
#include <memory>
#include <string_view>
namespace cvmmap_streamer::zed_tools {
@@ -17,7 +18,9 @@ public:
bool enabled() const;
void update(std::uint64_t completed_frames);
void update_fraction(double fraction, std::string_view detail = {});
void finish(std::uint64_t completed_frames, bool success);
void finish_fraction(double fraction, bool success, std::string_view detail = {});
private:
struct Impl;
+11
View File
@@ -37,6 +37,7 @@ class BatchConfig:
with_pose: bool
pose_config: Path | None
world_frame_id: str | None
start_frame: int | None
end_frame: int | None
sync_tolerance_ms: float | None
@@ -293,6 +294,8 @@ def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
command.extend(["--pose-config", str(config.pose_config)])
if config.world_frame_id is not None:
command.extend(["--world-frame-id", config.world_frame_id])
if config.start_frame is not None:
command.extend(["--start-frame", str(config.start_frame)])
if config.end_frame is not None:
command.extend(["--end-frame", str(config.end_frame)])
if config.sync_tolerance_ms is not None:
@@ -576,6 +579,12 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
default=None,
help="Optional pose reference frame id passed through to zed_svo_to_mcap.",
)
@click.option(
"--start-frame",
type=click.IntRange(min=0),
default=None,
help="First synced frame group to export (inclusive) in bundled multi-camera mode.",
)
@click.option(
"--end-frame",
type=click.IntRange(min=0),
@@ -609,6 +618,7 @@ def main(
with_pose: bool,
pose_config: Path | None,
world_frame_id: str | None,
start_frame: int | None,
end_frame: int | None,
sync_tolerance_ms: float | None,
) -> None:
@@ -631,6 +641,7 @@ def main(
with_pose=with_pose,
pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
world_frame_id=world_frame_id,
start_frame=start_frame,
end_frame=end_frame,
sync_tolerance_ms=sync_tolerance_ms,
)
+73 -16
View File
@@ -1,5 +1,6 @@
#include "cvmmap_streamer/tools/zed_progress_bar.hpp"
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdio>
@@ -41,6 +42,26 @@ struct ProgressBar::Impl {
started_at(Clock::now()),
last_render_at(started_at) {}
void render_prefix(const double ratio, const Clock::time_point now, char *line, const std::size_t line_size) {
const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
std::string bar{};
bar.reserve(24);
for (std::size_t index = 0; index < 24; ++index) {
bar.push_back(index < filled ? '#' : '-');
}
const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
const auto eta_seconds = ratio > 0.0 ? elapsed_seconds * (1.0 - ratio) / ratio : 0.0;
std::snprintf(
line,
line_size,
"\r[%s] %6.2f%% | %s elapsed | %s ETA",
bar.c_str(),
ratio * 100.0,
format_duration(elapsed_seconds).c_str(),
format_duration(eta_seconds).c_str());
}
void render(const std::uint64_t completed_frames, const bool force) {
if (!enabled || total_frames == 0) {
return;
@@ -55,29 +76,45 @@ struct ProgressBar::Impl {
const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames;
const double ratio = static_cast<double>(bounded_completed) / static_cast<double>(total_frames);
const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
std::string bar{};
bar.reserve(24);
for (std::size_t index = 0; index < 24; ++index) {
bar.push_back(index < filled ? '#' : '-');
}
const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
const auto fps = elapsed_seconds > 0.0 ? static_cast<double>(bounded_completed) / elapsed_seconds : 0.0;
const auto eta_seconds = fps > 0.0 ? static_cast<double>(total_frames - bounded_completed) / fps : 0.0;
char line[256]{};
render_prefix(ratio, now, line, sizeof(line));
const auto written = std::char_traits<char>::length(line);
std::snprintf(
line,
sizeof(line),
"\r[%s] %6.2f%% %llu/%llu | %5.1f fps | %s elapsed | %s ETA\x1b[K",
bar.c_str(),
ratio * 100.0,
line + written,
sizeof(line) - written,
" | %llu/%llu | %5.1f fps\x1b[K",
static_cast<unsigned long long>(bounded_completed),
static_cast<unsigned long long>(total_frames),
fps,
format_duration(elapsed_seconds).c_str(),
format_duration(eta_seconds).c_str());
fps);
std::fprintf(stderr, "%s", line);
std::fflush(stderr);
}
void render_fraction(const double fraction, const std::string_view detail, const bool force) {
if (!enabled) {
return;
}
const auto now = Clock::now();
if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
return;
}
last_render_at = now;
rendered = true;
const double bounded_fraction = std::clamp(fraction, 0.0, 1.0);
char line[256]{};
render_prefix(bounded_fraction, now, line, sizeof(line));
if (!detail.empty()) {
const auto written = std::char_traits<char>::length(line);
std::snprintf(line + written, sizeof(line) - written, " | %.*s\x1b[K", static_cast<int>(detail.size()), detail.data());
} else {
const auto written = std::char_traits<char>::length(line);
std::snprintf(line + written, sizeof(line) - written, "\x1b[K");
}
std::fprintf(stderr, "%s", line);
std::fflush(stderr);
}
@@ -102,6 +139,10 @@ void ProgressBar::update(const std::uint64_t completed_frames) {
impl_->render(completed_frames, false);
}
void ProgressBar::update_fraction(const double fraction, const std::string_view detail) {
impl_->render_fraction(fraction, detail, false);
}
void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) {
if (impl_ == nullptr || !impl_->enabled) {
return;
@@ -118,4 +159,20 @@ void ProgressBar::finish(const std::uint64_t completed_frames, const bool succes
std::fflush(stderr);
}
void ProgressBar::finish_fraction(const double fraction, const bool success, const std::string_view detail) {
if (impl_ == nullptr || !impl_->enabled) {
return;
}
if (!(success && impl_->rendered && fraction >= 1.0)) {
impl_->render_fraction(fraction, detail, true);
if (!impl_->rendered) {
return;
}
}
std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
std::fflush(stderr);
}
} // namespace cvmmap_streamer::zed_tools
+119 -65
View File
@@ -1155,55 +1155,71 @@ std::expected<void, std::string> advance_after_emit(std::vector<CameraStream> &s
}
[[nodiscard]]
std::expected<std::uint64_t, std::string> count_synced_groups(
std::vector<CameraStream> &streams,
const std::uint64_t tolerance_ns,
double bundled_progress_fraction(
const std::uint64_t common_start_ts,
const std::uint64_t common_end_ts,
const std::optional<std::uint64_t> max_groups) {
bool shutdown_logged{false};
std::uint64_t counted_groups{0};
while (true) {
if (log_shutdown_request(shutdown_logged, "multi-camera progress scan")) {
return std::unexpected("interrupted");
const std::uint64_t group_timestamp_ns) {
if (common_end_ts <= common_start_ts) {
return 1.0;
}
const auto bounded_timestamp = std::clamp(group_timestamp_ns, common_start_ts, common_end_ts);
return static_cast<double>(bounded_timestamp - common_start_ts) /
static_cast<double>(common_end_ts - common_start_ts);
}
[[nodiscard]]
std::string bundled_progress_detail(
const std::uint64_t common_start_ts,
const std::uint64_t common_end_ts,
const std::uint64_t group_timestamp_ns) {
const auto bounded_timestamp = std::clamp(group_timestamp_ns, common_start_ts, common_end_ts);
const double elapsed_seconds = static_cast<double>(bounded_timestamp - common_start_ts) / 1'000'000'000.0;
const double total_seconds = common_end_ts > common_start_ts
? static_cast<double>(common_end_ts - common_start_ts) / 1'000'000'000.0
: 0.0;
char buffer[96]{};
std::snprintf(buffer, sizeof(buffer), "%.1fs/%.1fs overlap", elapsed_seconds, total_seconds);
return std::string(buffer);
}
[[nodiscard]]
std::expected<std::uint64_t, std::string> skip_bundled_start_groups(
std::vector<CameraStream> &streams,
const std::uint32_t start_group_index,
const std::uint64_t tolerance_ns,
const std::uint64_t common_end_ts) {
for (std::uint32_t skipped_groups = 0; skipped_groups < start_group_index; ++skipped_groups) {
auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
if (!group_timestamp) {
return std::unexpected(group_timestamp.error());
}
if (!*group_timestamp) {
break;
}
counted_groups += 1;
if (max_groups && counted_groups >= *max_groups) {
break;
return std::unexpected(
"start-frame " + std::to_string(start_group_index) +
" is out of range for bundled multi-camera mode");
}
auto advance = advance_after_emit(streams);
if (!advance) {
if (advance.error() == "end-of-svo") {
break;
return std::unexpected(
"start-frame " + std::to_string(start_group_index) +
" is out of range for bundled multi-camera mode");
}
return std::unexpected(advance.error());
}
}
return counted_groups;
auto first_selected_group = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
if (!first_selected_group) {
return std::unexpected(first_selected_group.error());
}
void reset_streams_after_count(std::vector<CameraStream> &streams) {
for (auto &stream : streams) {
stream.current_tracking = {};
stream.next_tracking = {};
stream.current_timestamp_ns = 0;
stream.next_timestamp_ns = 0;
stream.dropped_frames = 0;
stream.sync_position = -1;
stream.has_next = false;
stream.calibration_written = false;
stream.last_tracking_state.reset();
if (!*first_selected_group) {
return std::unexpected(
"start-frame " + std::to_string(start_group_index) +
" is out of range for bundled multi-camera mode");
}
return **first_selected_group;
}
[[nodiscard]]
@@ -1648,8 +1664,11 @@ int run_multi_source(
const cvmmap_streamer::McapCompression compression,
const sl::DEPTH_MODE depth_mode,
const PoseTrackingOptions &pose_tracking) {
if (options.start_frame != 0) {
spdlog::error("multi-camera mode does not support --start-frame");
if (options.has_end_frame && options.end_frame < options.start_frame) {
spdlog::error(
"invalid bundled group range: start-frame={} end-frame={}",
options.start_frame,
options.end_frame);
return exit_code(ToolExitCode::UsageError);
}
bool interrupted{false};
@@ -1707,35 +1726,11 @@ int run_multi_source(
common_end_ts,
tolerance_ns);
const auto render_progress = stderr_supports_progress_bar();
std::uint64_t total_groups_to_emit{0};
if (render_progress) {
if (auto synced = sync_streams_to_timestamp(streams, common_start_ts, false); !synced) {
close_camera_streams(streams);
if (synced.error() == "interrupted") {
return interrupted_exit_code();
}
spdlog::error("{}", synced.error());
return exit_code(ToolExitCode::RuntimeError);
}
if (!options.has_end_frame) {
std::fprintf(stderr, "counting synced groups for exact progress...\n");
std::fflush(stderr);
}
const auto max_groups = options.has_end_frame
? std::optional<std::uint64_t>{static_cast<std::uint64_t>(options.end_frame) + 1}
: std::nullopt;
auto counted_groups = count_synced_groups(streams, tolerance_ns, common_end_ts, max_groups);
if (!counted_groups) {
close_camera_streams(streams);
if (counted_groups.error() == "interrupted") {
return interrupted_exit_code();
}
spdlog::error("failed to count synced groups: {}", counted_groups.error());
return exit_code(ToolExitCode::RuntimeError);
}
total_groups_to_emit = *counted_groups;
reset_streams_after_count(streams);
}
const std::uint64_t selected_total_groups = options.has_end_frame
? static_cast<std::uint64_t>(options.end_frame - options.start_frame) + 1
: 0;
const bool exact_group_progress = render_progress && options.has_end_frame;
const bool approximate_time_progress = render_progress && !options.has_end_frame;
if (auto synced = sync_streams_to_timestamp(streams, common_start_ts); !synced) {
close_camera_streams(streams);
if (synced.error() == "interrupted") {
@@ -1744,17 +1739,40 @@ int run_multi_source(
spdlog::error("{}", synced.error());
return exit_code(ToolExitCode::RuntimeError);
}
ProgressBar progress{total_groups_to_emit};
auto effective_progress_start_ts = common_start_ts;
if (options.start_frame > 0) {
if (auto skipped_to = skip_bundled_start_groups(streams, options.start_frame, tolerance_ns, common_end_ts); !skipped_to) {
close_camera_streams(streams);
if (skipped_to.error() == "interrupted") {
return interrupted_exit_code();
}
spdlog::error("{}", skipped_to.error());
return exit_code(ToolExitCode::UsageError);
} else {
effective_progress_start_ts = *skipped_to;
}
}
ProgressBar progress{exact_group_progress ? selected_total_groups : 0};
double last_progress_fraction = 0.0;
std::string last_progress_detail{};
auto sink = cvmmap_streamer::record::MultiMcapRecordSink::create(output_path.string(), compression);
if (!sink) {
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(0, false);
}
close_camera_streams(streams);
spdlog::error("failed to create MCAP sink: {}", sink.error());
return exit_code(ToolExitCode::RuntimeError);
}
if (auto registered = register_mcap_streams(*sink, streams, options); !registered) {
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(0, false);
}
sink->close();
close_camera_streams(streams);
spdlog::error("failed to register MCAP streams: {}", registered.error());
@@ -1769,7 +1787,11 @@ int run_multi_source(
}
auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts);
if (!group_timestamp) {
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(emitted_groups, false);
}
sink->close();
close_camera_streams(streams);
if (group_timestamp.error() == "interrupted") {
@@ -1783,15 +1805,25 @@ int run_multi_source(
}
if (auto write = encode_and_write_group(*sink, streams, options, **group_timestamp); !write) {
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(emitted_groups, false);
}
sink->close();
close_camera_streams(streams);
spdlog::error("{}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
emitted_groups += 1;
if (approximate_time_progress) {
last_progress_fraction = bundled_progress_fraction(effective_progress_start_ts, common_end_ts, **group_timestamp);
last_progress_detail = bundled_progress_detail(effective_progress_start_ts, common_end_ts, **group_timestamp);
progress.update_fraction(last_progress_fraction, last_progress_detail);
} else {
progress.update(emitted_groups);
if (options.has_end_frame && emitted_groups > options.end_frame) {
}
if (options.has_end_frame && emitted_groups >= selected_total_groups) {
break;
}
@@ -1800,7 +1832,11 @@ int run_multi_source(
if (advance.error() == "end-of-svo") {
break;
}
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(emitted_groups, false);
}
sink->close();
close_camera_streams(streams);
spdlog::error("{}", advance.error());
@@ -1810,7 +1846,11 @@ int run_multi_source(
for (auto &stream : streams) {
if (auto flushed = flush_and_write(*sink, stream.mcap_stream_id, *stream.backend); !flushed) {
if (approximate_time_progress) {
progress.finish_fraction(last_progress_fraction, false, last_progress_detail);
} else {
progress.finish(emitted_groups, false);
}
sink->close();
close_camera_streams(streams);
spdlog::error("failed to finalize encoded video for {}: {}", stream.source.label, flushed.error());
@@ -1819,7 +1859,15 @@ int run_multi_source(
}
sink->close();
if (approximate_time_progress) {
if (!interrupted) {
last_progress_fraction = 1.0;
last_progress_detail = bundled_progress_detail(effective_progress_start_ts, common_end_ts, common_end_ts);
}
progress.finish_fraction(last_progress_fraction, !interrupted, last_progress_detail);
} else {
progress.finish(emitted_groups, !interrupted);
}
for (const auto &stream : streams) {
spdlog::info(
"bundled {} dropped_frame(s) for {}",
@@ -1864,9 +1912,15 @@ int main(int argc, char **argv) {
->check(CLI::IsMember({"none", "lz4", "zstd"}));
app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
app.add_option(
"--start-frame",
options.start_frame,
"First frame/group to export (inclusive): raw SVO frame in single-camera mode, synced group index in bundled multi-camera mode")
->check(CLI::NonNegativeNumber);
auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
auto *end_frame_option = app.add_option(
"--end-frame",
options.end_frame,
"Last frame/group to export (inclusive): raw SVO frame in single-camera mode, synced group index in bundled multi-camera mode")
->check(CLI::NonNegativeNumber);
app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics");
app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");