Add synced ZED SVO grid exporter

2026-03-19 08:26:38 +00:00
parent 83171b415f
commit 2671ac7ba9
6 changed files with 2066 additions and 0 deletions
@@ -0,0 +1,702 @@
+#include <CLI/CLI.hpp>
+#include <spdlog/spdlog.h>
+
+#include <sl/Camera.hpp>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <expected>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <regex>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+namespace {
+
+using cvmmap_streamer::zed_tools::EncodeTuning;
+using cvmmap_streamer::zed_tools::Mp4Writer;
+using cvmmap_streamer::zed_tools::ProgressBar;
+using cvmmap_streamer::zed_tools::frame_period_ns;
+using cvmmap_streamer::zed_tools::parse_codec;
+using cvmmap_streamer::zed_tools::parse_encoder_device;
+using cvmmap_streamer::zed_tools::parse_preset;
+using cvmmap_streamer::zed_tools::parse_tune;
+
+constexpr std::size_t kExpectedInputCount = 4;
+
+enum class ToolExitCode : int {
+	Success = 0,
+	UsageError = 2,
+	RuntimeError = 3,
+};
+
+struct CliOptions {
+	std::vector<std::string> input_paths{};
+	std::string segment_dir{};
+	std::string output_path{};
+	std::string codec{"h265"};
+	std::string encoder_device{"auto"};
+	std::string preset{"fast"};
+	std::string tune{"low-latency"};
+	int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
+	std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
+	std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
+	double start_offset_seconds{0.0};
+	double duration_seconds{0.0};
+	bool has_duration{false};
+	double output_fps{0.0};
+	bool has_output_fps{false};
+	double tile_scale{0.5};
+};
+
+struct SourceSpec {
+	std::filesystem::path path{};
+	std::string label{};
+};
+
+struct CameraStream {
+	SourceSpec source{};
+	std::unique_ptr<sl::Camera> camera{};
+	sl::RuntimeParameters runtime{};
+	sl::Mat current_frame{};
+	sl::Mat next_frame{};
+	std::uint64_t current_timestamp_ns{0};
+	std::uint64_t next_timestamp_ns{0};
+	std::uint64_t first_timestamp_ns{0};
+	std::uint64_t last_timestamp_ns{0};
+	std::uint64_t total_frames{0};
+	std::uint64_t nominal_frame_period_ns{0};
+	float fps{0.0f};
+	std::uint32_t width{0};
+	std::uint32_t height{0};
+	int sync_position{-1};
+	bool has_next{false};
+};
+
+[[nodiscard]]
+constexpr int exit_code(const ToolExitCode code) {
+	return static_cast<int>(code);
+}
+
+[[nodiscard]]
+std::string zed_string(const sl::String &value) {
+	return std::string(value.c_str() == nullptr ? "" : value.c_str());
+}
+
+[[nodiscard]]
+std::string zed_status_string(const sl::ERROR_CODE code) {
+	return zed_string(sl::toString(code));
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
+	if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
+		return std::unexpected(std::string(label) + " must be U8_C3");
+	}
+	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
+		return std::unexpected(std::string(label) + " dimensions must be non-zero");
+	}
+	if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
+		return std::unexpected(std::string(label) + " CPU buffer is null");
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<std::vector<SourceSpec>, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) {
+	if (!std::filesystem::is_directory(segment_dir)) {
+		return std::unexpected("segment directory does not exist: " + segment_dir.string());
+	}
+
+	const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase};
+	std::vector<std::pair<int, std::filesystem::path>> ordered_paths{};
+	for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) {
+		if (!entry.is_regular_file()) {
+			continue;
+		}
+
+		std::smatch match{};
+		const auto filename = entry.path().filename().string();
+		if (!std::regex_match(filename, match, pattern)) {
+			continue;
+		}
+		ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path());
+	}
+
+	std::sort(
+		ordered_paths.begin(),
+		ordered_paths.end(),
+		[](const auto &left, const auto &right) {
+			return left.first < right.first;
+		});
+
+	if (ordered_paths.size() != kExpectedInputCount) {
+		return std::unexpected(
+			"expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size()));
+	}
+
+	std::vector<SourceSpec> sources{};
+	sources.reserve(ordered_paths.size());
+	for (const auto &[camera_index, path] : ordered_paths) {
+		sources.push_back(SourceSpec{
+			.path = path,
+			.label = "zed" + std::to_string(camera_index),
+		});
+	}
+	return sources;
+}
+
+[[nodiscard]]
+std::expected<std::vector<SourceSpec>, std::string> resolve_sources(const CliOptions &options) {
+	if (!options.segment_dir.empty()) {
+		return discover_segment_inputs(std::filesystem::path{options.segment_dir});
+	}
+
+	if (options.input_paths.size() != kExpectedInputCount) {
+		return std::unexpected("repeat --input exactly 4 times");
+	}
+
+	std::vector<SourceSpec> sources{};
+	sources.reserve(options.input_paths.size());
+	for (std::size_t index = 0; index < options.input_paths.size(); ++index) {
+		const auto path = std::filesystem::path{options.input_paths[index]};
+		if (!std::filesystem::is_regular_file(path)) {
+			return std::unexpected("input file does not exist: " + path.string());
+		}
+		sources.push_back(SourceSpec{
+			.path = path,
+			.label = "view" + std::to_string(index + 1),
+		});
+	}
+	return sources;
+}
+
+[[nodiscard]]
+std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector<SourceSpec> &sources) {
+	if (!options.output_path.empty()) {
+		return std::filesystem::path{options.output_path};
+	}
+
+	if (!options.segment_dir.empty()) {
+		const auto segment_dir = std::filesystem::path{options.segment_dir};
+		return segment_dir / (segment_dir.filename().string() + "_grid.mp4");
+	}
+
+	auto output_path = sources.front().path;
+	output_path.replace_extension("");
+	output_path += "_grid.mp4";
+	return output_path;
+}
+
+[[nodiscard]]
+std::string format_unix_timestamp(const std::uint64_t timestamp_ns) {
+	const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond;
+	const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull;
+	return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds);
+}
+
+void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) {
+	const auto text = format_unix_timestamp(timestamp_ns);
+	int baseline = 0;
+	const auto font_face = cv::FONT_HERSHEY_SIMPLEX;
+	const double font_scale = 0.8;
+	const int thickness = 2;
+	const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline);
+	const cv::Point origin{16, 16 + text_size.height};
+	const cv::Rect background{
+		8,
+		8,
+		text_size.width + 16,
+		text_size.height + baseline + 16,
+	};
+	cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED);
+	cv::putText(
+		canvas,
+		text,
+		origin,
+		font_face,
+		font_scale,
+		cv::Scalar(255, 255, 255),
+		thickness,
+		cv::LINE_AA);
+}
+
+[[nodiscard]]
+std::expected<std::uint64_t, std::string> read_image_timestamp_ns(
+	sl::Camera &camera,
+	const std::optional<std::uint64_t> fallback_timestamp_ns,
+	const std::uint64_t nominal_frame_period_ns) {
+	auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
+	if (timestamp_ns == 0) {
+		if (!fallback_timestamp_ns) {
+			return std::unexpected("ZED SDK returned a zero image timestamp for the first frame");
+		}
+		timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns;
+	}
+	return timestamp_ns;
+}
+
+[[nodiscard]]
+std::expected<void, std::string> read_into_mat(
+	sl::Camera &camera,
+	sl::RuntimeParameters &runtime,
+	sl::Mat &target,
+	std::optional<std::uint64_t> fallback_timestamp_ns,
+	std::uint64_t nominal_frame_period_ns,
+	std::uint64_t &timestamp_ns_out,
+	const std::string_view label) {
+	const auto grab_status = camera.grab(runtime);
+	if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
+		return std::unexpected("end-of-svo");
+	}
+	if (grab_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status));
+	}
+
+	const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
+	if (image_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status));
+	}
+	if (auto valid = validate_u8c3_mat(target, label); !valid) {
+		return std::unexpected(valid.error());
+	}
+
+	auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns);
+	if (!timestamp_ns) {
+		return std::unexpected(timestamp_ns.error());
+	}
+	timestamp_ns_out = *timestamp_ns;
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> fill_next_frame(CameraStream &stream) {
+	std::uint64_t timestamp_ns = 0;
+	auto next = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.next_frame,
+		stream.current_timestamp_ns,
+		stream.nominal_frame_period_ns,
+		timestamp_ns,
+		stream.source.label);
+	if (!next) {
+		if (next.error() == "end-of-svo") {
+			stream.has_next = false;
+			return {};
+		}
+		return std::unexpected(next.error());
+	}
+
+	stream.next_timestamp_ns = timestamp_ns;
+	stream.has_next = true;
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> promote_next_frame(CameraStream &stream) {
+	if (!stream.has_next) {
+		return std::unexpected("no buffered next frame is available for " + stream.source.label);
+	}
+
+	std::swap(stream.current_frame, stream.next_frame);
+	std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns);
+	stream.has_next = false;
+	return fill_next_frame(stream);
+}
+
+[[nodiscard]]
+std::expected<CameraStream, std::string> open_camera_stream(const SourceSpec &source) {
+	CameraStream stream{};
+	stream.source = source;
+	stream.camera = std::make_unique<sl::Camera>();
+
+	sl::InitParameters init{};
+	init.input.setFromSVOFile(source.path.c_str());
+	init.svo_real_time_mode = false;
+	init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
+	init.coordinate_units = sl::UNIT::METER;
+	init.depth_mode = sl::DEPTH_MODE::NONE;
+	init.sdk_verbose = false;
+
+	const auto open_status = stream.camera->open(init);
+	if (open_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status));
+	}
+
+	const auto total_frames = stream.camera->getSVONumberOfFrames();
+	if (total_frames <= 0) {
+		return std::unexpected("input SVO has no frames: " + source.path.string());
+	}
+	stream.total_frames = static_cast<std::uint64_t>(total_frames);
+
+	const auto camera_info = stream.camera->getCameraInformation().camera_configuration;
+	stream.width = static_cast<std::uint32_t>(camera_info.resolution.width);
+	stream.height = static_cast<std::uint32_t>(camera_info.resolution.height);
+	stream.fps = camera_info.fps;
+	stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps);
+	if (stream.width == 0 || stream.height == 0) {
+		return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
+	}
+
+	std::uint64_t first_timestamp_ns = 0;
+	auto first_frame = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.current_frame,
+		std::nullopt,
+		stream.nominal_frame_period_ns,
+		first_timestamp_ns,
+		source.label);
+	if (!first_frame) {
+		return std::unexpected(first_frame.error());
+	}
+	stream.first_timestamp_ns = first_timestamp_ns;
+
+	stream.camera->setSVOPosition(static_cast<int>(stream.total_frames - 1));
+	std::uint64_t last_timestamp_ns = 0;
+	auto last_frame = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.current_frame,
+		std::nullopt,
+		stream.nominal_frame_period_ns,
+		last_timestamp_ns,
+		source.label);
+	if (!last_frame) {
+		return std::unexpected(last_frame.error());
+	}
+	stream.last_timestamp_ns = last_timestamp_ns;
+
+	return stream;
+}
+
+void close_camera_streams(std::vector<CameraStream> &streams) {
+	for (auto &stream : streams) {
+		if (stream.camera != nullptr && stream.camera->isOpened()) {
+			stream.camera->close();
+		}
+	}
+}
+
+}  // namespace
+
+int main(int argc, char **argv) {
+	CliOptions options{};
+
+	CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"};
+	auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)");
+	auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files");
+	input_option->excludes(segment_dir_option);
+	segment_dir_option->excludes(input_option);
+	app.add_option("--output", options.output_path, "Output MP4 file");
+	app.add_option("--codec", options.codec, "Video codec (h264|h265)")
+		->check(CLI::IsMember({"h264", "h265"}));
+	app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
+		->check(CLI::IsMember({"auto", "nvidia", "software"}));
+	app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
+		->check(CLI::IsMember({"fast", "balanced", "quality"}));
+	app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
+		->check(CLI::IsMember({"low-latency", "balanced"}));
+	app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
+		->check(CLI::Range(0, 51));
+	app.add_option("--gop", options.gop, "Encoder GOP length in frames")
+		->check(CLI::PositiveNumber);
+	app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
+		->check(CLI::NonNegativeNumber);
+	app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds")
+		->check(CLI::NonNegativeNumber);
+	auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync")
+		->check(CLI::PositiveNumber);
+	auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)")
+		->check(CLI::PositiveNumber);
+	app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution")
+		->check(CLI::Range(0.1, 1.0));
+
+	try {
+		app.parse(argc, argv);
+	} catch (const CLI::ParseError &error) {
+		return app.exit(error);
+	}
+	options.has_duration = duration_option->count() > 0;
+	options.has_output_fps = output_fps_option->count() > 0;
+
+	if (options.input_paths.empty() && options.segment_dir.empty()) {
+		spdlog::error("provide either --segment-dir or repeat --input exactly 4 times");
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.b_frames > options.gop) {
+		spdlog::error(
+			"invalid encoder config: b-frames {} must be <= gop {}",
+			options.b_frames,
+			options.gop);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto codec = parse_codec(options.codec);
+	if (!codec) {
+		spdlog::error("{}", codec.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto encoder_device = parse_encoder_device(options.encoder_device);
+	if (!encoder_device) {
+		spdlog::error("{}", encoder_device.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto preset = parse_preset(options.preset);
+	if (!preset) {
+		spdlog::error("{}", preset.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto tune = parse_tune(options.tune);
+	if (!tune) {
+		spdlog::error("{}", tune.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto sources = resolve_sources(options);
+	if (!sources) {
+		spdlog::error("{}", sources.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const auto output_path = derive_grid_output_path(options, *sources);
+	if (output_path.has_parent_path()) {
+		std::filesystem::create_directories(output_path.parent_path());
+	}
+
+	const EncodeTuning tuning{
+		.preset = *preset,
+		.tune = *tune,
+		.quality = options.quality,
+		.gop = options.gop,
+		.b_frames = options.b_frames,
+	};
+
+	std::vector<CameraStream> streams{};
+	streams.reserve(sources->size());
+	for (const auto &source : *sources) {
+		auto stream = open_camera_stream(source);
+		if (!stream) {
+			close_camera_streams(streams);
+			spdlog::error("{}", stream.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		streams.push_back(std::move(*stream));
+	}
+
+	const auto sync_start_ts = std::max_element(
+		streams.begin(),
+		streams.end(),
+		[](const auto &left, const auto &right) {
+			return left.first_timestamp_ns < right.first_timestamp_ns;
+		})->first_timestamp_ns;
+	const auto start_offset_ns = static_cast<std::uint64_t>(std::llround(options.start_offset_seconds * 1'000'000'000.0));
+	const auto effective_start_ts = sync_start_ts + start_offset_ns;
+
+	const auto common_end_ts = std::min_element(
+		streams.begin(),
+		streams.end(),
+		[](const auto &left, const auto &right) {
+			return left.last_timestamp_ns < right.last_timestamp_ns;
+		})->last_timestamp_ns;
+	const auto requested_end_exclusive_ts = options.has_duration
+		? effective_start_ts + static_cast<std::uint64_t>(std::llround(options.duration_seconds * 1'000'000'000.0))
+		: common_end_ts + 1;
+	const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1);
+	if (effective_start_ts >= output_end_exclusive_ts) {
+		close_camera_streams(streams);
+		spdlog::error(
+			"synced time window is empty: start_ts={} end_ts={}",
+			effective_start_ts,
+			output_end_exclusive_ts);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	std::uint32_t source_width = streams.front().width;
+	std::uint32_t source_height = streams.front().height;
+	float max_input_fps = streams.front().fps;
+	for (const auto &stream : streams) {
+		if (stream.width != source_width || stream.height != source_height) {
+			close_camera_streams(streams);
+			spdlog::error(
+				"all inputs must share the same resolution: expected {}x{}, got {}x{} for {}",
+				source_width,
+				source_height,
+				stream.width,
+				stream.height,
+				stream.source.path.string());
+			return exit_code(ToolExitCode::UsageError);
+		}
+		max_input_fps = std::max(max_input_fps, stream.fps);
+	}
+
+	const auto output_fps = options.has_output_fps ? static_cast<float>(options.output_fps) : max_input_fps;
+	const auto output_period_ns = frame_period_ns(output_fps);
+	const auto total_frames_to_emit =
+		static_cast<std::uint64_t>((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns);
+
+	for (auto &stream : streams) {
+		stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts});
+		if (stream.sync_position < 0) {
+			close_camera_streams(streams);
+			spdlog::error(
+				"failed to compute synced start frame for {} at timestamp {}",
+				stream.source.path.string(),
+				effective_start_ts);
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		stream.camera->setSVOPosition(stream.sync_position);
+		std::uint64_t current_timestamp_ns = 0;
+		auto current = read_into_mat(
+			*stream.camera,
+			stream.runtime,
+			stream.current_frame,
+			std::nullopt,
+			stream.nominal_frame_period_ns,
+			current_timestamp_ns,
+			stream.source.label);
+		if (!current) {
+			close_camera_streams(streams);
+			spdlog::error("{}", current.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		stream.current_timestamp_ns = current_timestamp_ns;
+
+		auto next = fill_next_frame(stream);
+		if (!next) {
+			close_camera_streams(streams);
+			spdlog::error("{}", next.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) {
+			auto promote = promote_next_frame(stream);
+			if (!promote) {
+				close_camera_streams(streams);
+				spdlog::error("{}", promote.error());
+				return exit_code(ToolExitCode::RuntimeError);
+			}
+		}
+
+		spdlog::info(
+			"ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}",
+			stream.source.path.string(),
+			stream.source.label,
+			stream.sync_position,
+			stream.first_timestamp_ns,
+			stream.current_timestamp_ns,
+			stream.has_next ? stream.next_timestamp_ns : 0);
+	}
+
+	const auto tile_width = static_cast<int>(std::llround(static_cast<double>(source_width) * options.tile_scale));
+	const auto tile_height = static_cast<int>(std::llround(static_cast<double>(source_height) * options.tile_scale));
+	if (tile_width <= 0 || tile_height <= 0) {
+		close_camera_streams(streams);
+		spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const auto composite_width = tile_width * 2;
+	const auto composite_height = tile_height * 2;
+
+	Mp4Writer writer{};
+	if (auto open_writer = writer.open(
+			output_path,
+			*codec,
+			*encoder_device,
+			static_cast<std::uint32_t>(composite_width),
+			static_cast<std::uint32_t>(composite_height),
+			output_fps,
+			tuning);
+		!open_writer) {
+		close_camera_streams(streams);
+		spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	cv::Mat composite(composite_height, composite_width, CV_8UC3);
+	std::vector<cv::Mat> resized_tiles(streams.size());
+	ProgressBar progress{total_frames_to_emit};
+
+	for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) {
+		const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns;
+		if (target_timestamp_ns >= output_end_exclusive_ts) {
+			break;
+		}
+
+		for (auto &stream : streams) {
+			while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) {
+				auto promote = promote_next_frame(stream);
+				if (!promote) {
+					progress.finish(emitted_frames, false);
+					close_camera_streams(streams);
+					spdlog::error("{}", promote.error());
+					return exit_code(ToolExitCode::RuntimeError);
+				}
+			}
+		}
+
+		composite.setTo(cv::Scalar(0, 0, 0));
+		for (std::size_t index = 0; index < streams.size(); ++index) {
+			auto &stream = streams[index];
+			cv::Mat source_view(
+				static_cast<int>(stream.current_frame.getHeight()),
+				static_cast<int>(stream.current_frame.getWidth()),
+				CV_8UC3,
+				stream.current_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
+				stream.current_frame.getStepBytes(sl::MEM::CPU));
+			cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA);
+
+			const int row = static_cast<int>(index / 2);
+			const int col = static_cast<int>(index % 2);
+			const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height};
+			resized_tiles[index].copyTo(composite(roi));
+		}
+
+		draw_timestamp_overlay(composite, target_timestamp_ns);
+		if (auto write = writer.write_bgr_frame(
+				composite.data,
+				static_cast<std::size_t>(composite.step),
+				target_timestamp_ns - effective_start_ts);
+			!write) {
+			progress.finish(emitted_frames, false);
+			close_camera_streams(streams);
+			spdlog::error("failed to encode or mux frame: {}", write.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		progress.update(emitted_frames + 1);
+	}
+
+	if (auto flush = writer.flush(); !flush) {
+		progress.finish(total_frames_to_emit, false);
+		close_camera_streams(streams);
+		spdlog::error("failed to finalize MP4 output: {}", flush.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	progress.finish(total_frames_to_emit, true);
+	close_camera_streams(streams);
+	spdlog::info(
+		"converted {} synced frames to '{}' using codec={} hardware={}",
+		total_frames_to_emit,
+		output_path.string(),
+		cvmmap_streamer::zed_tools::codec_name(*codec),
+		writer.using_hardware());
+	return exit_code(ToolExitCode::Success);
+}