Add synced ZED SVO grid exporter

2026-03-19 08:26:38 +00:00
parent 83171b415f
commit 2671ac7ba9
6 changed files with 2066 additions and 0 deletions
@@ -0,0 +1,702 @@
+#include <CLI/CLI.hpp>
+#include <spdlog/spdlog.h>
+
+#include <sl/Camera.hpp>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <expected>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <regex>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+namespace {
+
+using cvmmap_streamer::zed_tools::EncodeTuning;
+using cvmmap_streamer::zed_tools::Mp4Writer;
+using cvmmap_streamer::zed_tools::ProgressBar;
+using cvmmap_streamer::zed_tools::frame_period_ns;
+using cvmmap_streamer::zed_tools::parse_codec;
+using cvmmap_streamer::zed_tools::parse_encoder_device;
+using cvmmap_streamer::zed_tools::parse_preset;
+using cvmmap_streamer::zed_tools::parse_tune;
+
+constexpr std::size_t kExpectedInputCount = 4;
+
+enum class ToolExitCode : int {
+	Success = 0,
+	UsageError = 2,
+	RuntimeError = 3,
+};
+
+struct CliOptions {
+	std::vector<std::string> input_paths{};
+	std::string segment_dir{};
+	std::string output_path{};
+	std::string codec{"h265"};
+	std::string encoder_device{"auto"};
+	std::string preset{"fast"};
+	std::string tune{"low-latency"};
+	int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
+	std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
+	std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
+	double start_offset_seconds{0.0};
+	double duration_seconds{0.0};
+	bool has_duration{false};
+	double output_fps{0.0};
+	bool has_output_fps{false};
+	double tile_scale{0.5};
+};
+
+struct SourceSpec {
+	std::filesystem::path path{};
+	std::string label{};
+};
+
+struct CameraStream {
+	SourceSpec source{};
+	std::unique_ptr<sl::Camera> camera{};
+	sl::RuntimeParameters runtime{};
+	sl::Mat current_frame{};
+	sl::Mat next_frame{};
+	std::uint64_t current_timestamp_ns{0};
+	std::uint64_t next_timestamp_ns{0};
+	std::uint64_t first_timestamp_ns{0};
+	std::uint64_t last_timestamp_ns{0};
+	std::uint64_t total_frames{0};
+	std::uint64_t nominal_frame_period_ns{0};
+	float fps{0.0f};
+	std::uint32_t width{0};
+	std::uint32_t height{0};
+	int sync_position{-1};
+	bool has_next{false};
+};
+
+[[nodiscard]]
+constexpr int exit_code(const ToolExitCode code) {
+	return static_cast<int>(code);
+}
+
+[[nodiscard]]
+std::string zed_string(const sl::String &value) {
+	return std::string(value.c_str() == nullptr ? "" : value.c_str());
+}
+
+[[nodiscard]]
+std::string zed_status_string(const sl::ERROR_CODE code) {
+	return zed_string(sl::toString(code));
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
+	if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
+		return std::unexpected(std::string(label) + " must be U8_C3");
+	}
+	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
+		return std::unexpected(std::string(label) + " dimensions must be non-zero");
+	}
+	if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
+		return std::unexpected(std::string(label) + " CPU buffer is null");
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<std::vector<SourceSpec>, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) {
+	if (!std::filesystem::is_directory(segment_dir)) {
+		return std::unexpected("segment directory does not exist: " + segment_dir.string());
+	}
+
+	const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase};
+	std::vector<std::pair<int, std::filesystem::path>> ordered_paths{};
+	for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) {
+		if (!entry.is_regular_file()) {
+			continue;
+		}
+
+		std::smatch match{};
+		const auto filename = entry.path().filename().string();
+		if (!std::regex_match(filename, match, pattern)) {
+			continue;
+		}
+		ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path());
+	}
+
+	std::sort(
+		ordered_paths.begin(),
+		ordered_paths.end(),
+		[](const auto &left, const auto &right) {
+			return left.first < right.first;
+		});
+
+	if (ordered_paths.size() != kExpectedInputCount) {
+		return std::unexpected(
+			"expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size()));
+	}
+
+	std::vector<SourceSpec> sources{};
+	sources.reserve(ordered_paths.size());
+	for (const auto &[camera_index, path] : ordered_paths) {
+		sources.push_back(SourceSpec{
+			.path = path,
+			.label = "zed" + std::to_string(camera_index),
+		});
+	}
+	return sources;
+}
+
+[[nodiscard]]
+std::expected<std::vector<SourceSpec>, std::string> resolve_sources(const CliOptions &options) {
+	if (!options.segment_dir.empty()) {
+		return discover_segment_inputs(std::filesystem::path{options.segment_dir});
+	}
+
+	if (options.input_paths.size() != kExpectedInputCount) {
+		return std::unexpected("repeat --input exactly 4 times");
+	}
+
+	std::vector<SourceSpec> sources{};
+	sources.reserve(options.input_paths.size());
+	for (std::size_t index = 0; index < options.input_paths.size(); ++index) {
+		const auto path = std::filesystem::path{options.input_paths[index]};
+		if (!std::filesystem::is_regular_file(path)) {
+			return std::unexpected("input file does not exist: " + path.string());
+		}
+		sources.push_back(SourceSpec{
+			.path = path,
+			.label = "view" + std::to_string(index + 1),
+		});
+	}
+	return sources;
+}
+
+[[nodiscard]]
+std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector<SourceSpec> &sources) {
+	if (!options.output_path.empty()) {
+		return std::filesystem::path{options.output_path};
+	}
+
+	if (!options.segment_dir.empty()) {
+		const auto segment_dir = std::filesystem::path{options.segment_dir};
+		return segment_dir / (segment_dir.filename().string() + "_grid.mp4");
+	}
+
+	auto output_path = sources.front().path;
+	output_path.replace_extension("");
+	output_path += "_grid.mp4";
+	return output_path;
+}
+
+[[nodiscard]]
+std::string format_unix_timestamp(const std::uint64_t timestamp_ns) {
+	const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond;
+	const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull;
+	return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds);
+}
+
+void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) {
+	const auto text = format_unix_timestamp(timestamp_ns);
+	int baseline = 0;
+	const auto font_face = cv::FONT_HERSHEY_SIMPLEX;
+	const double font_scale = 0.8;
+	const int thickness = 2;
+	const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline);
+	const cv::Point origin{16, 16 + text_size.height};
+	const cv::Rect background{
+		8,
+		8,
+		text_size.width + 16,
+		text_size.height + baseline + 16,
+	};
+	cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED);
+	cv::putText(
+		canvas,
+		text,
+		origin,
+		font_face,
+		font_scale,
+		cv::Scalar(255, 255, 255),
+		thickness,
+		cv::LINE_AA);
+}
+
+[[nodiscard]]
+std::expected<std::uint64_t, std::string> read_image_timestamp_ns(
+	sl::Camera &camera,
+	const std::optional<std::uint64_t> fallback_timestamp_ns,
+	const std::uint64_t nominal_frame_period_ns) {
+	auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
+	if (timestamp_ns == 0) {
+		if (!fallback_timestamp_ns) {
+			return std::unexpected("ZED SDK returned a zero image timestamp for the first frame");
+		}
+		timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns;
+	}
+	return timestamp_ns;
+}
+
+[[nodiscard]]
+std::expected<void, std::string> read_into_mat(
+	sl::Camera &camera,
+	sl::RuntimeParameters &runtime,
+	sl::Mat &target,
+	std::optional<std::uint64_t> fallback_timestamp_ns,
+	std::uint64_t nominal_frame_period_ns,
+	std::uint64_t &timestamp_ns_out,
+	const std::string_view label) {
+	const auto grab_status = camera.grab(runtime);
+	if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
+		return std::unexpected("end-of-svo");
+	}
+	if (grab_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status));
+	}
+
+	const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
+	if (image_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status));
+	}
+	if (auto valid = validate_u8c3_mat(target, label); !valid) {
+		return std::unexpected(valid.error());
+	}
+
+	auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns);
+	if (!timestamp_ns) {
+		return std::unexpected(timestamp_ns.error());
+	}
+	timestamp_ns_out = *timestamp_ns;
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> fill_next_frame(CameraStream &stream) {
+	std::uint64_t timestamp_ns = 0;
+	auto next = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.next_frame,
+		stream.current_timestamp_ns,
+		stream.nominal_frame_period_ns,
+		timestamp_ns,
+		stream.source.label);
+	if (!next) {
+		if (next.error() == "end-of-svo") {
+			stream.has_next = false;
+			return {};
+		}
+		return std::unexpected(next.error());
+	}
+
+	stream.next_timestamp_ns = timestamp_ns;
+	stream.has_next = true;
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> promote_next_frame(CameraStream &stream) {
+	if (!stream.has_next) {
+		return std::unexpected("no buffered next frame is available for " + stream.source.label);
+	}
+
+	std::swap(stream.current_frame, stream.next_frame);
+	std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns);
+	stream.has_next = false;
+	return fill_next_frame(stream);
+}
+
+[[nodiscard]]
+std::expected<CameraStream, std::string> open_camera_stream(const SourceSpec &source) {
+	CameraStream stream{};
+	stream.source = source;
+	stream.camera = std::make_unique<sl::Camera>();
+
+	sl::InitParameters init{};
+	init.input.setFromSVOFile(source.path.c_str());
+	init.svo_real_time_mode = false;
+	init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
+	init.coordinate_units = sl::UNIT::METER;
+	init.depth_mode = sl::DEPTH_MODE::NONE;
+	init.sdk_verbose = false;
+
+	const auto open_status = stream.camera->open(init);
+	if (open_status != sl::ERROR_CODE::SUCCESS) {
+		return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status));
+	}
+
+	const auto total_frames = stream.camera->getSVONumberOfFrames();
+	if (total_frames <= 0) {
+		return std::unexpected("input SVO has no frames: " + source.path.string());
+	}
+	stream.total_frames = static_cast<std::uint64_t>(total_frames);
+
+	const auto camera_info = stream.camera->getCameraInformation().camera_configuration;
+	stream.width = static_cast<std::uint32_t>(camera_info.resolution.width);
+	stream.height = static_cast<std::uint32_t>(camera_info.resolution.height);
+	stream.fps = camera_info.fps;
+	stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps);
+	if (stream.width == 0 || stream.height == 0) {
+		return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
+	}
+
+	std::uint64_t first_timestamp_ns = 0;
+	auto first_frame = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.current_frame,
+		std::nullopt,
+		stream.nominal_frame_period_ns,
+		first_timestamp_ns,
+		source.label);
+	if (!first_frame) {
+		return std::unexpected(first_frame.error());
+	}
+	stream.first_timestamp_ns = first_timestamp_ns;
+
+	stream.camera->setSVOPosition(static_cast<int>(stream.total_frames - 1));
+	std::uint64_t last_timestamp_ns = 0;
+	auto last_frame = read_into_mat(
+		*stream.camera,
+		stream.runtime,
+		stream.current_frame,
+		std::nullopt,
+		stream.nominal_frame_period_ns,
+		last_timestamp_ns,
+		source.label);
+	if (!last_frame) {
+		return std::unexpected(last_frame.error());
+	}
+	stream.last_timestamp_ns = last_timestamp_ns;
+
+	return stream;
+}
+
+void close_camera_streams(std::vector<CameraStream> &streams) {
+	for (auto &stream : streams) {
+		if (stream.camera != nullptr && stream.camera->isOpened()) {
+			stream.camera->close();
+		}
+	}
+}
+
+}  // namespace
+
+int main(int argc, char **argv) {
+	CliOptions options{};
+
+	CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"};
+	auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)");
+	auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files");
+	input_option->excludes(segment_dir_option);
+	segment_dir_option->excludes(input_option);
+	app.add_option("--output", options.output_path, "Output MP4 file");
+	app.add_option("--codec", options.codec, "Video codec (h264|h265)")
+		->check(CLI::IsMember({"h264", "h265"}));
+	app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
+		->check(CLI::IsMember({"auto", "nvidia", "software"}));
+	app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
+		->check(CLI::IsMember({"fast", "balanced", "quality"}));
+	app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
+		->check(CLI::IsMember({"low-latency", "balanced"}));
+	app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
+		->check(CLI::Range(0, 51));
+	app.add_option("--gop", options.gop, "Encoder GOP length in frames")
+		->check(CLI::PositiveNumber);
+	app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
+		->check(CLI::NonNegativeNumber);
+	app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds")
+		->check(CLI::NonNegativeNumber);
+	auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync")
+		->check(CLI::PositiveNumber);
+	auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)")
+		->check(CLI::PositiveNumber);
+	app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution")
+		->check(CLI::Range(0.1, 1.0));
+
+	try {
+		app.parse(argc, argv);
+	} catch (const CLI::ParseError &error) {
+		return app.exit(error);
+	}
+	options.has_duration = duration_option->count() > 0;
+	options.has_output_fps = output_fps_option->count() > 0;
+
+	if (options.input_paths.empty() && options.segment_dir.empty()) {
+		spdlog::error("provide either --segment-dir or repeat --input exactly 4 times");
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.b_frames > options.gop) {
+		spdlog::error(
+			"invalid encoder config: b-frames {} must be <= gop {}",
+			options.b_frames,
+			options.gop);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto codec = parse_codec(options.codec);
+	if (!codec) {
+		spdlog::error("{}", codec.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto encoder_device = parse_encoder_device(options.encoder_device);
+	if (!encoder_device) {
+		spdlog::error("{}", encoder_device.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto preset = parse_preset(options.preset);
+	if (!preset) {
+		spdlog::error("{}", preset.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto tune = parse_tune(options.tune);
+	if (!tune) {
+		spdlog::error("{}", tune.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto sources = resolve_sources(options);
+	if (!sources) {
+		spdlog::error("{}", sources.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const auto output_path = derive_grid_output_path(options, *sources);
+	if (output_path.has_parent_path()) {
+		std::filesystem::create_directories(output_path.parent_path());
+	}
+
+	const EncodeTuning tuning{
+		.preset = *preset,
+		.tune = *tune,
+		.quality = options.quality,
+		.gop = options.gop,
+		.b_frames = options.b_frames,
+	};
+
+	std::vector<CameraStream> streams{};
+	streams.reserve(sources->size());
+	for (const auto &source : *sources) {
+		auto stream = open_camera_stream(source);
+		if (!stream) {
+			close_camera_streams(streams);
+			spdlog::error("{}", stream.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		streams.push_back(std::move(*stream));
+	}
+
+	const auto sync_start_ts = std::max_element(
+		streams.begin(),
+		streams.end(),
+		[](const auto &left, const auto &right) {
+			return left.first_timestamp_ns < right.first_timestamp_ns;
+		})->first_timestamp_ns;
+	const auto start_offset_ns = static_cast<std::uint64_t>(std::llround(options.start_offset_seconds * 1'000'000'000.0));
+	const auto effective_start_ts = sync_start_ts + start_offset_ns;
+
+	const auto common_end_ts = std::min_element(
+		streams.begin(),
+		streams.end(),
+		[](const auto &left, const auto &right) {
+			return left.last_timestamp_ns < right.last_timestamp_ns;
+		})->last_timestamp_ns;
+	const auto requested_end_exclusive_ts = options.has_duration
+		? effective_start_ts + static_cast<std::uint64_t>(std::llround(options.duration_seconds * 1'000'000'000.0))
+		: common_end_ts + 1;
+	const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1);
+	if (effective_start_ts >= output_end_exclusive_ts) {
+		close_camera_streams(streams);
+		spdlog::error(
+			"synced time window is empty: start_ts={} end_ts={}",
+			effective_start_ts,
+			output_end_exclusive_ts);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	std::uint32_t source_width = streams.front().width;
+	std::uint32_t source_height = streams.front().height;
+	float max_input_fps = streams.front().fps;
+	for (const auto &stream : streams) {
+		if (stream.width != source_width || stream.height != source_height) {
+			close_camera_streams(streams);
+			spdlog::error(
+				"all inputs must share the same resolution: expected {}x{}, got {}x{} for {}",
+				source_width,
+				source_height,
+				stream.width,
+				stream.height,
+				stream.source.path.string());
+			return exit_code(ToolExitCode::UsageError);
+		}
+		max_input_fps = std::max(max_input_fps, stream.fps);
+	}
+
+	const auto output_fps = options.has_output_fps ? static_cast<float>(options.output_fps) : max_input_fps;
+	const auto output_period_ns = frame_period_ns(output_fps);
+	const auto total_frames_to_emit =
+		static_cast<std::uint64_t>((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns);
+
+	for (auto &stream : streams) {
+		stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts});
+		if (stream.sync_position < 0) {
+			close_camera_streams(streams);
+			spdlog::error(
+				"failed to compute synced start frame for {} at timestamp {}",
+				stream.source.path.string(),
+				effective_start_ts);
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		stream.camera->setSVOPosition(stream.sync_position);
+		std::uint64_t current_timestamp_ns = 0;
+		auto current = read_into_mat(
+			*stream.camera,
+			stream.runtime,
+			stream.current_frame,
+			std::nullopt,
+			stream.nominal_frame_period_ns,
+			current_timestamp_ns,
+			stream.source.label);
+		if (!current) {
+			close_camera_streams(streams);
+			spdlog::error("{}", current.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		stream.current_timestamp_ns = current_timestamp_ns;
+
+		auto next = fill_next_frame(stream);
+		if (!next) {
+			close_camera_streams(streams);
+			spdlog::error("{}", next.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) {
+			auto promote = promote_next_frame(stream);
+			if (!promote) {
+				close_camera_streams(streams);
+				spdlog::error("{}", promote.error());
+				return exit_code(ToolExitCode::RuntimeError);
+			}
+		}
+
+		spdlog::info(
+			"ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}",
+			stream.source.path.string(),
+			stream.source.label,
+			stream.sync_position,
+			stream.first_timestamp_ns,
+			stream.current_timestamp_ns,
+			stream.has_next ? stream.next_timestamp_ns : 0);
+	}
+
+	const auto tile_width = static_cast<int>(std::llround(static_cast<double>(source_width) * options.tile_scale));
+	const auto tile_height = static_cast<int>(std::llround(static_cast<double>(source_height) * options.tile_scale));
+	if (tile_width <= 0 || tile_height <= 0) {
+		close_camera_streams(streams);
+		spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const auto composite_width = tile_width * 2;
+	const auto composite_height = tile_height * 2;
+
+	Mp4Writer writer{};
+	if (auto open_writer = writer.open(
+			output_path,
+			*codec,
+			*encoder_device,
+			static_cast<std::uint32_t>(composite_width),
+			static_cast<std::uint32_t>(composite_height),
+			output_fps,
+			tuning);
+		!open_writer) {
+		close_camera_streams(streams);
+		spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	cv::Mat composite(composite_height, composite_width, CV_8UC3);
+	std::vector<cv::Mat> resized_tiles(streams.size());
+	ProgressBar progress{total_frames_to_emit};
+
+	for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) {
+		const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns;
+		if (target_timestamp_ns >= output_end_exclusive_ts) {
+			break;
+		}
+
+		for (auto &stream : streams) {
+			while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) {
+				auto promote = promote_next_frame(stream);
+				if (!promote) {
+					progress.finish(emitted_frames, false);
+					close_camera_streams(streams);
+					spdlog::error("{}", promote.error());
+					return exit_code(ToolExitCode::RuntimeError);
+				}
+			}
+		}
+
+		composite.setTo(cv::Scalar(0, 0, 0));
+		for (std::size_t index = 0; index < streams.size(); ++index) {
+			auto &stream = streams[index];
+			cv::Mat source_view(
+				static_cast<int>(stream.current_frame.getHeight()),
+				static_cast<int>(stream.current_frame.getWidth()),
+				CV_8UC3,
+				stream.current_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
+				stream.current_frame.getStepBytes(sl::MEM::CPU));
+			cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA);
+
+			const int row = static_cast<int>(index / 2);
+			const int col = static_cast<int>(index % 2);
+			const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height};
+			resized_tiles[index].copyTo(composite(roi));
+		}
+
+		draw_timestamp_overlay(composite, target_timestamp_ns);
+		if (auto write = writer.write_bgr_frame(
+				composite.data,
+				static_cast<std::size_t>(composite.step),
+				target_timestamp_ns - effective_start_ts);
+			!write) {
+			progress.finish(emitted_frames, false);
+			close_camera_streams(streams);
+			spdlog::error("failed to encode or mux frame: {}", write.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		progress.update(emitted_frames + 1);
+	}
+
+	if (auto flush = writer.flush(); !flush) {
+		progress.finish(total_frames_to_emit, false);
+		close_camera_streams(streams);
+		spdlog::error("failed to finalize MP4 output: {}", flush.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	progress.finish(total_frames_to_emit, true);
+	close_camera_streams(streams);
+	spdlog::info(
+		"converted {} synced frames to '{}' using codec={} hardware={}",
+		total_frames_to_emit,
+		output_path.string(),
+		cvmmap_streamer::zed_tools::codec_name(*codec),
+		writer.using_hardware());
+	return exit_code(ToolExitCode::Success);
+}
@@ -0,0 +1,785 @@
+#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
+
+#include <spdlog/spdlog.h>
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/avutil.h>
+#include <libavutil/opt.h>
+#include <libavutil/pixfmt.h>
+#include <libswscale/swscale.h>
+}
+
+#include <chrono>
+#include <cmath>
+#include <cstdio>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include <unistd.h>
+
+namespace cvmmap_streamer::zed_tools {
+namespace {
+
+struct EncoderCandidate {
+	std::string name{};
+	bool using_hardware{false};
+	AVPixelFormat pixel_format{AV_PIX_FMT_NONE};
+};
+
+struct ResolvedEncoderSettings {
+	std::string requested_preset{};
+	std::string requested_tune{};
+	std::string mapped_preset{};
+	std::optional<std::string> mapped_tune{};
+	std::optional<std::string> rate_control_mode{};
+	std::string quality_key{};
+	int quality_value{kDefaultQuality};
+	std::uint32_t gop{kDefaultGopSize};
+	std::uint32_t b_frames{kDefaultBFrames};
+};
+
+[[nodiscard]]
+std::string av_error_string(const int error_code) {
+	char buffer[AV_ERROR_MAX_STRING_SIZE]{};
+	av_strerror(error_code, buffer, sizeof(buffer));
+	return std::string(buffer);
+}
+
+[[nodiscard]]
+AVCodecID codec_id(const CodecType codec) {
+	return codec == CodecType::H265 ? AV_CODEC_ID_HEVC : AV_CODEC_ID_H264;
+}
+
+[[nodiscard]]
+AVRational frame_rate_rational(const float fps) {
+	if (!(fps > 0.0f)) {
+		return AVRational{30, 1};
+	}
+
+	const auto scaled = static_cast<int>(std::llround(static_cast<double>(fps) * 1000.0));
+	if (scaled <= 0) {
+		return AVRational{30, 1};
+	}
+	return AVRational{scaled, 1000};
+}
+
+[[nodiscard]]
+std::string format_duration(const double seconds_raw) {
+	const auto seconds = seconds_raw > 0.0 ? static_cast<long long>(std::llround(seconds_raw)) : 0ll;
+	const auto hours = seconds / 3600;
+	const auto minutes = (seconds % 3600) / 60;
+	const auto secs = seconds % 60;
+
+	char buffer[32]{};
+	if (hours > 0) {
+		std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld:%02lld", hours, minutes, secs);
+	} else {
+		std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld", minutes, secs);
+	}
+	return std::string(buffer);
+}
+
+[[nodiscard]]
+std::vector<EncoderCandidate> encoder_candidates(const CodecType codec, const EncoderDeviceType device) {
+	const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc";
+	const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264";
+
+	switch (device) {
+	case EncoderDeviceType::Auto:
+		return {
+			EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
+			EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
+		};
+	case EncoderDeviceType::Nvidia:
+		return {
+			EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
+		};
+	case EncoderDeviceType::Software:
+		return {
+			EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
+		};
+	}
+
+	return {};
+}
+
+[[nodiscard]]
+std::string mapped_preset_value(const EncoderCandidate &candidate, const PresetKind preset) {
+	if (candidate.using_hardware) {
+		switch (preset) {
+		case PresetKind::Fast:
+			return "p1";
+		case PresetKind::Balanced:
+			return "p4";
+		case PresetKind::Quality:
+			return "p7";
+		}
+	}
+
+	switch (preset) {
+	case PresetKind::Fast:
+		return "veryfast";
+	case PresetKind::Balanced:
+		return "medium";
+	case PresetKind::Quality:
+		return "slow";
+	}
+
+	return "veryfast";
+}
+
+[[nodiscard]]
+std::optional<std::string> mapped_tune_value(const EncoderCandidate &candidate, const TuneKind tune) {
+	if (candidate.using_hardware) {
+		return tune == TuneKind::LowLatency ? std::optional<std::string>{"ull"} : std::optional<std::string>{"hq"};
+	}
+
+	if (candidate.name == "libx264" && tune == TuneKind::LowLatency) {
+		return std::optional<std::string>{"zerolatency"};
+	}
+	return std::nullopt;
+}
+
+[[nodiscard]]
+std::optional<std::string> x265_params_value(const EncoderCandidate &candidate, const TuneKind tune) {
+	if (candidate.name != "libx265") {
+		return std::nullopt;
+	}
+	if (tune == TuneKind::LowLatency) {
+		return std::optional<std::string>{"repeat-headers=1:scenecut=0"};
+	}
+	return std::optional<std::string>{"repeat-headers=1"};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> set_string_option(AVCodecContext *context, const char *key, const std::string &value) {
+	const auto result = av_opt_set(context->priv_data, key, value.c_str(), 0);
+	if (result < 0) {
+		return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + value + "': " + av_error_string(result));
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> set_int_option(AVCodecContext *context, const char *key, const std::int64_t value) {
+	const auto result = av_opt_set_int(context->priv_data, key, value, 0);
+	if (result < 0) {
+		return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result));
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
+	AVCodecContext *context,
+	const EncoderCandidate &candidate,
+	const CodecType codec,
+	const std::uint32_t width,
+	const std::uint32_t height,
+	const AVRational framerate,
+	const EncodeTuning &tuning) {
+	context->codec_type   = AVMEDIA_TYPE_VIDEO;
+	context->codec_id     = codec_id(codec);
+	context->width        = static_cast<int>(width);
+	context->height       = static_cast<int>(height);
+	context->pix_fmt      = candidate.pixel_format;
+	context->flags       |= AV_CODEC_FLAG_GLOBAL_HEADER;
+	context->time_base    = AVRational{1, static_cast<int>(kNanosPerSecond)};
+	context->framerate    = framerate;
+	context->gop_size     = static_cast<int>(tuning.gop);
+	context->max_b_frames = static_cast<int>(tuning.b_frames);
+	context->thread_count = 1;
+
+	ResolvedEncoderSettings resolved{
+		.requested_preset = std::string(preset_name(tuning.preset)),
+		.requested_tune = std::string(tune_name(tuning.tune)),
+		.mapped_preset = mapped_preset_value(candidate, tuning.preset),
+		.mapped_tune = mapped_tune_value(candidate, tuning.tune),
+		.quality_value = tuning.quality,
+		.gop = tuning.gop,
+		.b_frames = tuning.b_frames,
+	};
+
+	if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) {
+		return std::unexpected(set.error());
+	}
+	if (resolved.mapped_tune) {
+		if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) {
+			return std::unexpected(set.error());
+		}
+	}
+
+	if (candidate.using_hardware) {
+		resolved.rate_control_mode = "vbr";
+		resolved.quality_key = "cq";
+		if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) {
+			return std::unexpected(set.error());
+		}
+		if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) {
+			return std::unexpected(set.error());
+		}
+		if (tuning.tune == TuneKind::LowLatency) {
+			if (auto set = set_string_option(context, "zerolatency", "1"); !set) {
+				return std::unexpected(set.error());
+			}
+			if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) {
+				return std::unexpected(set.error());
+			}
+		}
+	} else {
+		resolved.quality_key = "crf";
+		if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) {
+			return std::unexpected(set.error());
+		}
+		if (const auto x265_params = x265_params_value(candidate, tuning.tune); x265_params) {
+			if (auto set = set_string_option(context, "x265-params", *x265_params); !set) {
+				return std::unexpected(set.error());
+			}
+		}
+	}
+
+	if (auto set = set_int_option(context, "forced-idr", 1); !set) {
+		return std::unexpected(set.error());
+	}
+
+	return resolved;
+}
+
+struct OpenedEncoder {
+	AVCodecContext *context{nullptr};
+	EncoderCandidate candidate{};
+	ResolvedEncoderSettings resolved{};
+};
+
+[[nodiscard]]
+std::expected<OpenedEncoder, std::string> open_encoder(
+	const CodecType codec,
+	const EncoderDeviceType device,
+	const std::uint32_t width,
+	const std::uint32_t height,
+	const AVRational framerate,
+	const EncodeTuning &tuning) {
+	std::string last_error{};
+
+	for (const auto &candidate : encoder_candidates(codec, device)) {
+		const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str());
+		if (encoder == nullptr) {
+			last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable";
+			if (device == EncoderDeviceType::Auto) {
+				spdlog::warn(
+					"encoder '{}' unavailable for codec={} in auto mode, trying next candidate",
+					candidate.name,
+					codec_name(codec));
+				continue;
+			}
+			return std::unexpected(last_error);
+		}
+
+		auto *context = avcodec_alloc_context3(encoder);
+		if (context == nullptr) {
+			return std::unexpected("failed to allocate FFmpeg encoder context");
+		}
+
+		auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning);
+		if (!resolved) {
+			avcodec_free_context(&context);
+			return std::unexpected(resolved.error());
+		}
+
+		const auto open_result = avcodec_open2(context, encoder, nullptr);
+		if (open_result < 0) {
+			last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result);
+			avcodec_free_context(&context);
+			if (device == EncoderDeviceType::Auto) {
+				spdlog::warn(
+					"encoder '{}' failed to open in auto mode: {}. trying software fallback",
+					candidate.name,
+					av_error_string(open_result));
+				continue;
+			}
+			return std::unexpected(last_error);
+		}
+
+		return OpenedEncoder{
+			.context = context,
+			.candidate = candidate,
+			.resolved = std::move(*resolved),
+		};
+	}
+
+	if (last_error.empty()) {
+		last_error = "no usable FFmpeg encoder candidates were configured";
+	}
+	return std::unexpected(last_error);
+}
+
+}  // namespace
+
+struct ProgressBar::Impl {
+	using Clock = std::chrono::steady_clock;
+
+	explicit Impl(const std::uint64_t total_frames_arg)
+		: total_frames(total_frames_arg),
+		  enabled(::isatty(STDERR_FILENO) == 1),
+		  started_at(Clock::now()),
+		  last_render_at(started_at) {}
+
+	void render(const std::uint64_t completed_frames, const bool force) {
+		if (!enabled || total_frames == 0) {
+			return;
+		}
+
+		const auto now = Clock::now();
+		if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
+			return;
+		}
+		last_render_at = now;
+		rendered = true;
+
+		const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames;
+		const double ratio = static_cast<double>(bounded_completed) / static_cast<double>(total_frames);
+		const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
+		std::string bar{};
+		bar.reserve(24);
+		for (std::size_t i = 0; i < 24; ++i) {
+			bar.push_back(i < filled ? '#' : '-');
+		}
+
+		const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
+		const auto fps = elapsed_seconds > 0.0 ? static_cast<double>(bounded_completed) / elapsed_seconds : 0.0;
+		const auto eta_seconds = fps > 0.0 ? static_cast<double>(total_frames - bounded_completed) / fps : 0.0;
+
+		char line[256]{};
+		std::snprintf(
+			line,
+			sizeof(line),
+			"\r[%s] %6.2f%% %llu/%llu | %5.1f fps | %s elapsed | %s ETA\x1b[K",
+			bar.c_str(),
+			ratio * 100.0,
+			static_cast<unsigned long long>(bounded_completed),
+			static_cast<unsigned long long>(total_frames),
+			fps,
+			format_duration(elapsed_seconds).c_str(),
+			format_duration(eta_seconds).c_str());
+		std::fprintf(stderr, "%s", line);
+		std::fflush(stderr);
+	}
+
+	std::uint64_t total_frames{0};
+	bool enabled{false};
+	bool rendered{false};
+	Clock::time_point started_at{};
+	Clock::time_point last_render_at{};
+};
+
+struct Mp4Writer::Impl {
+	[[nodiscard]]
+	std::expected<void, std::string> open(
+		const std::filesystem::path &output_path,
+		const CodecType codec_arg,
+		const EncoderDeviceType encoder_device,
+		const std::uint32_t width,
+		const std::uint32_t height,
+		const float fps,
+		const EncodeTuning &tuning) {
+		close();
+
+		codec = codec_arg;
+		frame_rate = frame_rate_rational(fps);
+		auto encoder = open_encoder(codec, encoder_device, width, height, frame_rate, tuning);
+		if (!encoder) {
+			return std::unexpected(encoder.error());
+		}
+
+		encoder_context = encoder->context;
+		encoder_name = encoder->candidate.name;
+		using_hardware = encoder->candidate.using_hardware;
+		encoder_pixel_format = encoder->candidate.pixel_format;
+		resolved_settings = std::move(encoder->resolved);
+
+		scaler = sws_getCachedContext(
+			nullptr,
+			static_cast<int>(width),
+			static_cast<int>(height),
+			AV_PIX_FMT_BGR24,
+			static_cast<int>(width),
+			static_cast<int>(height),
+			encoder_pixel_format,
+			SWS_BILINEAR,
+			nullptr,
+			nullptr,
+			nullptr);
+		if (scaler == nullptr) {
+			return std::unexpected("failed to create swscale conversion context");
+		}
+
+		frame = av_frame_alloc();
+		if (frame == nullptr) {
+			return std::unexpected("failed to allocate FFmpeg frame");
+		}
+		frame->format = encoder_pixel_format;
+		frame->width = encoder_context->width;
+		frame->height = encoder_context->height;
+		const auto frame_buffer_result = av_frame_get_buffer(frame, 32);
+		if (frame_buffer_result < 0) {
+			return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer_result));
+		}
+
+		packet = av_packet_alloc();
+		if (packet == nullptr) {
+			return std::unexpected("failed to allocate FFmpeg packet");
+		}
+
+		const auto alloc_result = avformat_alloc_output_context2(
+			&format_context,
+			nullptr,
+			"mp4",
+			output_path.string().c_str());
+		if (alloc_result < 0 || format_context == nullptr) {
+			return std::unexpected("failed to allocate MP4 output context: " + av_error_string(alloc_result));
+		}
+
+		video_stream = avformat_new_stream(format_context, nullptr);
+		if (video_stream == nullptr) {
+			return std::unexpected("failed to allocate MP4 video stream");
+		}
+
+		video_stream->time_base = encoder_context->time_base;
+		video_stream->avg_frame_rate = frame_rate;
+
+		const auto params_result = avcodec_parameters_from_context(video_stream->codecpar, encoder_context);
+		if (params_result < 0) {
+			return std::unexpected("failed to copy encoder parameters into MP4 stream: " + av_error_string(params_result));
+		}
+
+		if ((format_context->oformat->flags & AVFMT_NOFILE) == 0) {
+			const auto open_result = avio_open2(
+				&format_context->pb,
+				output_path.string().c_str(),
+				AVIO_FLAG_WRITE,
+				nullptr,
+				nullptr);
+			if (open_result < 0) {
+				return std::unexpected("failed to open output MP4 '" + output_path.string() + "': " + av_error_string(open_result));
+			}
+		}
+
+		AVDictionary *muxer_options = nullptr;
+		av_dict_set(&muxer_options, "movflags", "+faststart", 0);
+		const auto header_result = avformat_write_header(format_context, &muxer_options);
+		av_dict_free(&muxer_options);
+		if (header_result < 0) {
+			return std::unexpected("failed to write MP4 header: " + av_error_string(header_result));
+		}
+
+		spdlog::info(
+			"ZED_SVO_MP4_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} requested_preset={} requested_tune={} mapped_preset={} mapped_tune={} rc={} {}={} gop={} b_frames={} output={}",
+			codec_name(codec),
+			encoder_name,
+			using_hardware,
+			width,
+			height,
+			frame_rate.num,
+			frame_rate.den,
+			resolved_settings.requested_preset,
+			resolved_settings.requested_tune,
+			resolved_settings.mapped_preset,
+			resolved_settings.mapped_tune.value_or("none"),
+			resolved_settings.rate_control_mode.value_or("auto"),
+			resolved_settings.quality_key,
+			resolved_settings.quality_value,
+			resolved_settings.gop,
+			resolved_settings.b_frames,
+			output_path.string());
+		return {};
+	}
+
+	[[nodiscard]]
+	std::expected<void, std::string> write_bgr_frame(
+		const std::uint8_t *data,
+		const std::size_t row_stride_bytes,
+		const std::uint64_t relative_timestamp_ns) {
+		if (encoder_context == nullptr || frame == nullptr || scaler == nullptr || packet == nullptr || video_stream == nullptr) {
+			return std::unexpected("MP4 writer is not initialized");
+		}
+
+		const auto writable_result = av_frame_make_writable(frame);
+		if (writable_result < 0) {
+			return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(writable_result));
+		}
+
+		const std::uint8_t *source_planes[4]{data, nullptr, nullptr, nullptr};
+		const int source_strides[4]{static_cast<int>(row_stride_bytes), 0, 0, 0};
+		sws_scale(
+			scaler,
+			source_planes,
+			source_strides,
+			0,
+			encoder_context->height,
+			frame->data,
+			frame->linesize);
+
+		frame->pts = static_cast<std::int64_t>(relative_timestamp_ns);
+
+		const auto send_result = avcodec_send_frame(encoder_context, frame);
+		if (send_result < 0) {
+			return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result));
+		}
+
+		return drain_packets();
+	}
+
+	[[nodiscard]]
+	std::expected<void, std::string> flush() {
+		if (encoder_context == nullptr) {
+			return {};
+		}
+
+		const auto flush_result = avcodec_send_frame(encoder_context, nullptr);
+		if (flush_result < 0 && flush_result != AVERROR_EOF) {
+			return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result));
+		}
+
+		auto drained = drain_packets();
+		if (!drained) {
+			return drained;
+		}
+
+		return close_output();
+	}
+
+	[[nodiscard]]
+	std::expected<void, std::string> drain_packets() {
+		while (true) {
+			const auto receive_result = avcodec_receive_packet(encoder_context, packet);
+			if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) {
+				break;
+			}
+			if (receive_result < 0) {
+				return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result));
+			}
+
+			packet->stream_index = video_stream->index;
+			av_packet_rescale_ts(packet, encoder_context->time_base, video_stream->time_base);
+
+			const auto write_result = av_interleaved_write_frame(format_context, packet);
+			av_packet_unref(packet);
+			if (write_result < 0) {
+				return std::unexpected("failed to write MP4 packet: " + av_error_string(write_result));
+			}
+		}
+
+		return {};
+	}
+
+	[[nodiscard]]
+	std::expected<void, std::string> close_output() {
+		if (format_context == nullptr || trailer_written) {
+			return {};
+		}
+
+		const auto trailer_result = av_write_trailer(format_context);
+		if (trailer_result < 0) {
+			return std::unexpected("failed to write MP4 trailer: " + av_error_string(trailer_result));
+		}
+		trailer_written = true;
+		return {};
+	}
+
+	void close() {
+		(void)close_output();
+
+		if (packet != nullptr) {
+			av_packet_free(&packet);
+		}
+		if (frame != nullptr) {
+			av_frame_free(&frame);
+		}
+		if (encoder_context != nullptr) {
+			avcodec_free_context(&encoder_context);
+		}
+		if (scaler != nullptr) {
+			sws_freeContext(scaler);
+			scaler = nullptr;
+		}
+		if (format_context != nullptr) {
+			if ((format_context->oformat->flags & AVFMT_NOFILE) == 0 && format_context->pb != nullptr) {
+				avio_closep(&format_context->pb);
+			}
+			avformat_free_context(format_context);
+			format_context = nullptr;
+		}
+
+		video_stream = nullptr;
+		encoder_name.clear();
+		using_hardware = false;
+		trailer_written = false;
+		resolved_settings = ResolvedEncoderSettings{};
+	}
+
+	~Impl() {
+		close();
+	}
+
+	CodecType codec{CodecType::H265};
+	AVCodecContext *encoder_context{nullptr};
+	AVFormatContext *format_context{nullptr};
+	AVStream *video_stream{nullptr};
+	AVFrame *frame{nullptr};
+	AVPacket *packet{nullptr};
+	SwsContext *scaler{nullptr};
+	AVPixelFormat encoder_pixel_format{AV_PIX_FMT_NONE};
+	AVRational frame_rate{30, 1};
+	std::string encoder_name{};
+	ResolvedEncoderSettings resolved_settings{};
+	bool using_hardware{false};
+	bool trailer_written{false};
+};
+
+std::expected<CodecType, std::string> parse_codec(const std::string_view raw) {
+	if (raw == "h264") {
+		return CodecType::H264;
+	}
+	if (raw == "h265") {
+		return CodecType::H265;
+	}
+	return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
+}
+
+std::expected<EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
+	if (raw == "auto") {
+		return EncoderDeviceType::Auto;
+	}
+	if (raw == "nvidia") {
+		return EncoderDeviceType::Nvidia;
+	}
+	if (raw == "software") {
+		return EncoderDeviceType::Software;
+	}
+	return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
+}
+
+std::expected<PresetKind, std::string> parse_preset(const std::string_view raw) {
+	if (raw == "fast") {
+		return PresetKind::Fast;
+	}
+	if (raw == "balanced") {
+		return PresetKind::Balanced;
+	}
+	if (raw == "quality") {
+		return PresetKind::Quality;
+	}
+	return std::unexpected("invalid preset: '" + std::string(raw) + "' (expected: fast|balanced|quality)");
+}
+
+std::expected<TuneKind, std::string> parse_tune(const std::string_view raw) {
+	if (raw == "low-latency") {
+		return TuneKind::LowLatency;
+	}
+	if (raw == "balanced") {
+		return TuneKind::Balanced;
+	}
+	return std::unexpected("invalid tune: '" + std::string(raw) + "' (expected: low-latency|balanced)");
+}
+
+std::string_view codec_name(const CodecType codec) {
+	return codec == CodecType::H265 ? "h265" : "h264";
+}
+
+std::string_view preset_name(const PresetKind preset) {
+	switch (preset) {
+	case PresetKind::Fast:
+		return "fast";
+	case PresetKind::Balanced:
+		return "balanced";
+	case PresetKind::Quality:
+		return "quality";
+	}
+	return "fast";
+}
+
+std::string_view tune_name(const TuneKind tune) {
+	switch (tune) {
+	case TuneKind::LowLatency:
+		return "low-latency";
+	case TuneKind::Balanced:
+		return "balanced";
+	}
+	return "low-latency";
+}
+
+std::uint64_t frame_period_ns(const float fps) {
+	if (!(fps > 0.0f)) {
+		return 33'333'333ull;
+	}
+	return static_cast<std::uint64_t>(std::llround(1'000'000'000.0 / static_cast<double>(fps)));
+}
+
+std::filesystem::path derive_output_path(const std::filesystem::path &input_path) {
+	auto output_path = input_path;
+	output_path.replace_extension(".mp4");
+	return output_path;
+}
+
+ProgressBar::ProgressBar(const std::uint64_t total_frames)
+	: impl_(std::make_unique<Impl>(total_frames)) {}
+
+ProgressBar::~ProgressBar() = default;
+
+void ProgressBar::update(const std::uint64_t completed_frames) {
+	impl_->render(completed_frames, false);
+}
+
+void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) {
+	if (impl_ == nullptr || !impl_->enabled) {
+		return;
+	}
+
+	impl_->render(completed_frames, true);
+	if (!impl_->rendered) {
+		return;
+	}
+
+	std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
+	std::fflush(stderr);
+}
+
+Mp4Writer::Mp4Writer()
+	: impl_(std::make_unique<Impl>()) {}
+
+Mp4Writer::Mp4Writer(Mp4Writer &&) noexcept = default;
+Mp4Writer &Mp4Writer::operator=(Mp4Writer &&) noexcept = default;
+Mp4Writer::~Mp4Writer() = default;
+
+std::expected<void, std::string> Mp4Writer::open(
+	const std::filesystem::path &output_path,
+	const CodecType codec,
+	const EncoderDeviceType encoder_device,
+	const std::uint32_t width,
+	const std::uint32_t height,
+	const float fps,
+	const EncodeTuning &tuning) {
+	return impl_->open(output_path, codec, encoder_device, width, height, fps, tuning);
+}
+
+std::expected<void, std::string> Mp4Writer::write_bgr_frame(
+	const std::uint8_t *data,
+	const std::size_t row_stride_bytes,
+	const std::uint64_t relative_timestamp_ns) {
+	return impl_->write_bgr_frame(data, row_stride_bytes, relative_timestamp_ns);
+}
+
+std::expected<void, std::string> Mp4Writer::flush() {
+	return impl_->flush();
+}
+
+bool Mp4Writer::using_hardware() const {
+	return impl_ != nullptr && impl_->using_hardware;
+}
+
+}  // namespace cvmmap_streamer::zed_tools
@@ -0,0 +1,319 @@
+#include <CLI/CLI.hpp>
+#include <spdlog/spdlog.h>
+
+#include <sl/Camera.hpp>
+
+#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
+
+#include <cstdint>
+#include <expected>
+#include <filesystem>
+#include <optional>
+#include <string>
+#include <utility>
+
+namespace {
+
+using cvmmap_streamer::zed_tools::EncodeTuning;
+using cvmmap_streamer::zed_tools::Mp4Writer;
+using cvmmap_streamer::zed_tools::ProgressBar;
+using cvmmap_streamer::zed_tools::derive_output_path;
+using cvmmap_streamer::zed_tools::frame_period_ns;
+using cvmmap_streamer::zed_tools::parse_codec;
+using cvmmap_streamer::zed_tools::parse_encoder_device;
+using cvmmap_streamer::zed_tools::parse_preset;
+using cvmmap_streamer::zed_tools::parse_tune;
+
+enum class ToolExitCode : int {
+	Success = 0,
+	UsageError = 2,
+	RuntimeError = 3,
+};
+
+struct CliOptions {
+	std::string input_path{};
+	std::string output_path{};
+	std::string codec{"h265"};
+	std::string encoder_device{"auto"};
+	std::string preset{"fast"};
+	std::string tune{"low-latency"};
+	int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
+	std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
+	std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
+	std::uint32_t start_frame{0};
+	std::uint32_t end_frame{0};
+	bool has_end_frame{false};
+};
+
+[[nodiscard]]
+constexpr int exit_code(const ToolExitCode code) {
+	return static_cast<int>(code);
+}
+
+[[nodiscard]]
+std::string zed_string(const sl::String &value) {
+	return std::string(value.c_str() == nullptr ? "" : value.c_str());
+}
+
+[[nodiscard]]
+std::string zed_status_string(const sl::ERROR_CODE code) {
+	return zed_string(sl::toString(code));
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
+	if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
+		return std::unexpected(std::string(label) + " must be U8_C3");
+	}
+	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
+		return std::unexpected(std::string(label) + " dimensions must be non-zero");
+	}
+	if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
+		return std::unexpected(std::string(label) + " CPU buffer is null");
+	}
+	return {};
+}
+
+}  // namespace
+
+int main(int argc, char **argv) {
+	CliOptions options{};
+
+	CLI::App app{"zed_svo_to_mp4 - convert ZED SVO/SVO2 playback to MP4"};
+	app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
+	app.add_option("--output", options.output_path, "Output MP4 file (default: input path with .mp4 suffix)");
+	app.add_option("--codec", options.codec, "Video codec (h264|h265)")
+		->check(CLI::IsMember({"h264", "h265"}));
+	app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
+		->check(CLI::IsMember({"auto", "nvidia", "software"}));
+	app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
+		->check(CLI::IsMember({"fast", "balanced", "quality"}));
+	app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
+		->check(CLI::IsMember({"low-latency", "balanced"}));
+	app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
+		->check(CLI::Range(0, 51));
+	app.add_option("--gop", options.gop, "Encoder GOP length in frames")
+		->check(CLI::PositiveNumber);
+	app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
+		->check(CLI::NonNegativeNumber);
+	app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
+		->check(CLI::NonNegativeNumber);
+	auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
+		->check(CLI::NonNegativeNumber);
+
+	try {
+		app.parse(argc, argv);
+	} catch (const CLI::ParseError &error) {
+		return app.exit(error);
+	}
+	options.has_end_frame = end_frame_option->count() > 0;
+
+	auto codec = parse_codec(options.codec);
+	if (!codec) {
+		spdlog::error("{}", codec.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto encoder_device = parse_encoder_device(options.encoder_device);
+	if (!encoder_device) {
+		spdlog::error("{}", encoder_device.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto preset = parse_preset(options.preset);
+	if (!preset) {
+		spdlog::error("{}", preset.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	auto tune = parse_tune(options.tune);
+	if (!tune) {
+		spdlog::error("{}", tune.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	if (options.has_end_frame && options.end_frame < options.start_frame) {
+		spdlog::error(
+			"invalid frame range: start-frame={} end-frame={}",
+			options.start_frame,
+			options.end_frame);
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.b_frames > options.gop) {
+		spdlog::error(
+			"invalid encoder config: b-frames {} must be <= gop {}",
+			options.b_frames,
+			options.gop);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const auto output_path = options.output_path.empty()
+		? derive_output_path(std::filesystem::path{options.input_path})
+		: std::filesystem::path{options.output_path};
+	if (output_path.empty()) {
+		spdlog::error("output path must not be empty");
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (output_path.has_parent_path()) {
+		std::filesystem::create_directories(output_path.parent_path());
+	}
+
+	const EncodeTuning tuning{
+		.preset = *preset,
+		.tune = *tune,
+		.quality = options.quality,
+		.gop = options.gop,
+		.b_frames = options.b_frames,
+	};
+
+	sl::Camera camera{};
+	auto close_camera = [&]() {
+		if (camera.isOpened()) {
+			camera.close();
+		}
+	};
+
+	sl::InitParameters init{};
+	init.input.setFromSVOFile(options.input_path.c_str());
+	init.svo_real_time_mode = false;
+	init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
+	init.coordinate_units = sl::UNIT::METER;
+	init.depth_mode = sl::DEPTH_MODE::NONE;
+	init.sdk_verbose = false;
+
+	const auto open_status = camera.open(init);
+	if (open_status != sl::ERROR_CODE::SUCCESS) {
+		spdlog::error(
+			"failed to open SVO '{}': {}",
+			options.input_path,
+			zed_status_string(open_status));
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	const auto total_frames = camera.getSVONumberOfFrames();
+	if (total_frames <= 0) {
+		close_camera();
+		spdlog::error("input SVO has no frames");
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+	if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
+		close_camera();
+		spdlog::error(
+			"start-frame {} is out of range for {} frames",
+			options.start_frame,
+			total_frames);
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
+		close_camera();
+		spdlog::error(
+			"end-frame {} is out of range for {} frames",
+			options.end_frame,
+			total_frames);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	camera.setSVOPosition(static_cast<int>(options.start_frame));
+
+	const auto camera_info = camera.getCameraInformation();
+	const auto &camera_config = camera_info.camera_configuration;
+	const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
+	const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
+	if (width == 0 || height == 0) {
+		close_camera();
+		spdlog::error("camera resolution reported by the ZED SDK is invalid");
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	Mp4Writer writer{};
+	if (auto open_writer = writer.open(output_path, *codec, *encoder_device, width, height, camera_config.fps, tuning); !open_writer) {
+		close_camera();
+		spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	sl::RuntimeParameters runtime_parameters{};
+	sl::Mat left_frame{};
+	std::optional<std::uint64_t> first_timestamp_ns{};
+	std::optional<std::uint64_t> last_timestamp_ns{};
+	std::uint64_t emitted_frames{0};
+	const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
+	const auto last_frame = options.has_end_frame
+		? options.end_frame
+		: static_cast<std::uint32_t>(total_frames - 1);
+	const auto total_frames_to_emit = static_cast<std::uint64_t>(last_frame - options.start_frame + 1);
+	ProgressBar progress{total_frames_to_emit};
+
+	while (options.start_frame + emitted_frames <= last_frame) {
+		const auto grab_status = camera.grab(runtime_parameters);
+		if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
+			break;
+		}
+		if (grab_status != sl::ERROR_CODE::SUCCESS) {
+			progress.finish(emitted_frames, false);
+			close_camera();
+			spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
+		if (image_status != sl::ERROR_CODE::SUCCESS) {
+			progress.finish(emitted_frames, false);
+			close_camera();
+			spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
+			progress.finish(emitted_frames, false);
+			close_camera();
+			spdlog::error("{}", valid.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
+		if (timestamp_ns == 0) {
+			timestamp_ns = emitted_frames * nominal_frame_period_ns;
+		}
+		if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
+			timestamp_ns = *last_timestamp_ns + 1;
+		}
+		last_timestamp_ns = timestamp_ns;
+
+		if (!first_timestamp_ns) {
+			first_timestamp_ns = timestamp_ns;
+		}
+		const auto relative_timestamp_ns = timestamp_ns - *first_timestamp_ns;
+
+		if (auto write = writer.write_bgr_frame(
+				left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
+				left_frame.getStepBytes(sl::MEM::CPU),
+				relative_timestamp_ns);
+			!write) {
+			progress.finish(emitted_frames, false);
+			close_camera();
+			spdlog::error("failed to encode or mux frame: {}", write.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		emitted_frames += 1;
+		progress.update(emitted_frames);
+	}
+
+	if (auto flush = writer.flush(); !flush) {
+		progress.finish(emitted_frames, false);
+		close_camera();
+		spdlog::error("failed to finalize MP4 output: {}", flush.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	progress.finish(emitted_frames, true);
+	close_camera();
+	spdlog::info(
+		"converted {} frames from '{}' to '{}' using codec={} hardware={}",
+		emitted_frames,
+		options.input_path,
+		output_path.string(),
+		cvmmap_streamer::zed_tools::codec_name(*codec),
+		writer.using_hardware());
+	return exit_code(ToolExitCode::Success);
+}