Add ZED SVO to MCAP conversion tool

2026-03-17 02:03:59 +00:00
parent ee53d1958e
commit 0fef0595fb
16 changed files with 1430 additions and 49 deletions
@@ -365,6 +365,14 @@ std::expected<void, std::string> apply_toml_file(RuntimeConfig &config, const st
 		config.record.mcap.enabled = true;
 		config.record.mcap.depth_topic = *value;
 	}
+	if (auto value = toml_value<std::string>(table, "record.mcap.calibration_topic")) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.calibration_topic = *value;
+	}
+	if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.pose_topic = *value;
+	}
 	if (auto value = toml_value<std::string>(table, "record.mcap.body_topic")) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.body_topic = *value;
@@ -568,6 +576,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	std::string mcap_path_raw{};
 	std::string mcap_topic_raw{};
 	std::string mcap_depth_topic_raw{};
+	std::string mcap_calibration_topic_raw{};
+	std::string mcap_pose_topic_raw{};
 	std::string mcap_body_topic_raw{};
 	std::string mcap_frame_id_raw{};
 	std::string mcap_compression_raw{};
@@ -610,6 +620,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	app.add_option("--mcap-path", mcap_path_raw);
 	app.add_option("--mcap-topic", mcap_topic_raw);
 	app.add_option("--mcap-depth-topic", mcap_depth_topic_raw);
+	app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw);
+	app.add_option("--mcap-pose-topic", mcap_pose_topic_raw);
 	app.add_option("--mcap-body-topic", mcap_body_topic_raw);
 	app.add_option("--mcap-frame-id", mcap_frame_id_raw);
 	app.add_option("--mcap-compression", mcap_compression_raw);
@@ -726,6 +738,14 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 		config.record.mcap.enabled = true;
 		config.record.mcap.depth_topic = mcap_depth_topic_raw;
 	}
+	if (!mcap_calibration_topic_raw.empty()) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.calibration_topic = mcap_calibration_topic_raw;
+	}
+	if (!mcap_pose_topic_raw.empty()) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.pose_topic = mcap_pose_topic_raw;
+	}
 	if (!mcap_body_topic_raw.empty()) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.body_topic = mcap_body_topic_raw;
@@ -867,6 +887,12 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
 	if (config.record.mcap.depth_topic.empty()) {
 		return std::unexpected("invalid MCAP config: depth_topic must not be empty");
 	}
+	if (config.record.mcap.calibration_topic.empty()) {
+		return std::unexpected("invalid MCAP config: calibration_topic must not be empty");
+	}
+	if (config.record.mcap.pose_topic.empty()) {
+		return std::unexpected("invalid MCAP config: pose_topic must not be empty");
+	}
 	if (config.record.mcap.body_topic.empty()) {
 		return std::unexpected("invalid MCAP config: body_topic must not be empty");
 	}
@@ -910,6 +936,8 @@ std::string summarize_runtime_config(const RuntimeConfig &config) {
 	ss << ", mcap.path=" << config.record.mcap.path;
 	ss << ", mcap.topic=" << config.record.mcap.topic;
 	ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic;
+	ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic;
+	ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic;
 	ss << ", mcap.body_topic=" << config.record.mcap.body_topic;
 	ss << ", mcap.frame_id=" << config.record.mcap.frame_id;
 	ss << ", mcap.compression=" << to_string(config.record.mcap.compression);
@@ -200,6 +200,9 @@ public:
 				1) < 0) {
 			return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into FFmpeg image arrays");
 		}
+		if (frame.row_stride_bytes != 0) {
+			input_frame.linesize[0] = static_cast<int>(frame.row_stride_bytes);
+		}

 		sws_scale(
 			scaler_,
@@ -3,8 +3,10 @@
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"

 #include "protobuf_descriptor.hpp"
-#include "cvmmap_streamer/DepthMap.pb.h"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/cvmmap_streamer/DepthMap.pb.h"
+#include "proto/foxglove/CameraCalibration.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/PoseInFrame.pb.h"
 #include <rvl/rvl.hpp>

 #include <google/protobuf/timestamp.pb.h>
@@ -293,14 +295,14 @@ std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDe
 			};
 		}

-			std::vector<float> depth_m(pixel_count, std::numeric_limits<float>::quiet_NaN());
-			float finite_max_m = 0.0f;
-			for (std::size_t index = 0; index < pixel_count; ++index) {
-				float sample = depth_map.pixels[index];
-				if (source_unit == ipc::DepthUnit::Millimeter && std::isfinite(sample)) {
-					sample *= 0.001f;
-				}
-				if (!std::isfinite(sample) || sample <= 0.0f) {
+		std::vector<float> depth_m(pixel_count, std::numeric_limits<float>::quiet_NaN());
+		float finite_max_m = 0.0f;
+		for (std::size_t index = 0; index < pixel_count; ++index) {
+			float sample = depth_map.pixels[index];
+			if (source_unit == ipc::DepthUnit::Millimeter && std::isfinite(sample)) {
+				sample *= 0.001f;
+			}
+			if (!std::isfinite(sample) || sample <= 0.0f) {
 				continue;
 			}

@@ -321,18 +323,108 @@ std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDe
 	}
 }

+[[nodiscard]]
+std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDepthMapU16View &depth_map) {
+	const auto pixel_count = static_cast<std::size_t>(depth_map.width) * static_cast<std::size_t>(depth_map.height);
+	if (depth_map.width == 0 || depth_map.height == 0) {
+		return std::unexpected("depth map dimensions must be non-zero");
+	}
+	if (pixel_count != depth_map.pixels.size()) {
+		return std::unexpected("depth map dimensions do not match the pixel buffer");
+	}
+
+	try {
+		std::vector<std::uint16_t> pixels(depth_map.pixels.begin(), depth_map.pixels.end());
+		return EncodedDepthPayload{
+			.encoding = DepthEncoding::RvlU16Lossless,
+			.storage_unit = ipc::DepthUnit::Millimeter,
+			.bytes = rvl::compress_image(pixels, depth_map.height, depth_map.width),
+		};
+	} catch (const std::exception &error) {
+		return std::unexpected(std::string("failed to RVL-encode depth map: ") + error.what());
+	}
+}
+
+[[nodiscard]]
+std::expected<void, std::string> append_repeated_double(
+	google::protobuf::RepeatedField<double> *field,
+	std::span<const double> values,
+	std::size_t expected_size,
+	std::string_view field_name) {
+	if (values.size() != expected_size) {
+		return std::unexpected(
+			"expected " + std::to_string(expected_size) + " values for " + std::string(field_name));
+	}
+	field->Reserve(static_cast<int>(values.size()));
+	for (const double value : values) {
+		field->Add(value);
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> write_depth_message(
+	mcap::McapWriter &writer,
+	std::uint64_t timestamp_ns,
+	std::string_view frame_id,
+	std::uint32_t width,
+	std::uint32_t height,
+	ipc::DepthUnit source_unit,
+	mcap::ChannelId channel_id,
+	std::uint32_t &sequence,
+	const EncodedDepthPayload &encoded) {
+	cvmmap_streamer::DepthMap message{};
+	*message.mutable_timestamp() = to_proto_timestamp(timestamp_ns);
+	message.set_frame_id(std::string(frame_id));
+	message.set_width(width);
+	message.set_height(height);
+	message.set_source_unit(to_proto_depth_unit(source_unit));
+	message.set_storage_unit(to_proto_storage_unit(encoded.storage_unit));
+	message.set_encoding(to_proto_depth_encoding(encoded.encoding));
+	message.set_data(
+		reinterpret_cast<const char *>(encoded.bytes.data()),
+		static_cast<int>(encoded.bytes.size()));
+
+	std::string serialized{};
+	if (!message.SerializeToString(&serialized)) {
+		return std::unexpected("failed to serialize cvmmap_streamer.DepthMap");
+	}
+
+	mcap::Message record{};
+	record.channelId   = channel_id;
+	record.sequence    = sequence++;
+	record.logTime     = timestamp_ns;
+	record.publishTime = timestamp_ns;
+	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
+	record.dataSize    = serialized.size();
+
+	const auto write_status = writer.write(record);
+	if (!write_status.ok()) {
+		return std::unexpected("failed to write MCAP depth message: " + write_status.message);
+	}
+	return {};
+}
+
 }

 struct McapRecordSink::State {
 	mcap::McapWriter writer{};
 	std::string path{};
 	std::string frame_id{};
+	std::string calibration_topic{};
+	std::string pose_topic{};
 	std::string body_topic{};
+	mcap::SchemaId calibration_schema_id{0};
+	mcap::SchemaId pose_schema_id{0};
 	mcap::ChannelId video_channel_id{0};
 	mcap::ChannelId depth_channel_id{0};
+	mcap::ChannelId calibration_channel_id{0};
+	mcap::ChannelId pose_channel_id{0};
 	mcap::ChannelId body_channel_id{0};
 	std::uint32_t video_sequence{0};
 	std::uint32_t depth_sequence{0};
+	std::uint32_t calibration_sequence{0};
+	std::uint32_t pose_sequence{0};
 	std::uint32_t body_sequence{0};
 	CodecType codec{CodecType::H264};
 	std::vector<std::uint8_t> keyframe_preamble{};
@@ -363,6 +455,8 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
 	auto state = std::make_unique<State>();
 	state->path = config.record.mcap.path;
 	state->frame_id = config.record.mcap.frame_id;
+	state->calibration_topic = config.record.mcap.calibration_topic;
+	state->pose_topic = config.record.mcap.pose_topic;
 	state->body_topic = config.record.mcap.body_topic;

 	mcap::McapWriterOptions options("");
@@ -398,6 +492,26 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
 	state->writer.addChannel(depth_channel);
 	state->depth_channel_id = depth_channel.id;

+	const auto calibration_descriptor_set = build_file_descriptor_set(foxglove::CameraCalibration::descriptor());
+	std::string calibration_schema_bytes{};
+	if (!calibration_descriptor_set.SerializeToString(&calibration_schema_bytes)) {
+		return std::unexpected("failed to serialize foxglove.CameraCalibration descriptor set");
+	}
+
+	mcap::Schema calibration_schema("foxglove.CameraCalibration", "protobuf", calibration_schema_bytes);
+	state->writer.addSchema(calibration_schema);
+	state->calibration_schema_id = calibration_schema.id;
+
+	const auto pose_descriptor_set = build_file_descriptor_set(foxglove::PoseInFrame::descriptor());
+	std::string pose_schema_bytes{};
+	if (!pose_descriptor_set.SerializeToString(&pose_schema_bytes)) {
+		return std::unexpected("failed to serialize foxglove.PoseInFrame descriptor set");
+	}
+
+	mcap::Schema pose_schema("foxglove.PoseInFrame", "protobuf", pose_schema_bytes);
+	state->writer.addSchema(pose_schema);
+	state->pose_schema_id = pose_schema.id;
+
 	sink.state_ = state.release();
 	auto update = sink.update_stream_info(stream_info);
 	if (!update) {
@@ -470,34 +584,137 @@ std::expected<void, std::string> McapRecordSink::write_depth_map(const RawDepthM
 		return std::unexpected(encoded.error());
 	}

-	cvmmap_streamer::DepthMap message{};
-	*message.mutable_timestamp() = to_proto_timestamp(depth_map.timestamp_ns);
+	return write_depth_message(
+		state_->writer,
+		depth_map.timestamp_ns,
+		state_->frame_id,
+		depth_map.width,
+		depth_map.height,
+		source_unit,
+		state_->depth_channel_id,
+		state_->depth_sequence,
+		*encoded);
+}
+
+std::expected<void, std::string> McapRecordSink::write_depth_map_u16(const RawDepthMapU16View &depth_map) {
+	if (state_ == nullptr) {
+		return std::unexpected("MCAP sink is not open");
+	}
+
+	auto encoded = encode_depth_payload(depth_map);
+	if (!encoded) {
+		return std::unexpected(encoded.error());
+	}
+
+	return write_depth_message(
+		state_->writer,
+		depth_map.timestamp_ns,
+		state_->frame_id,
+		depth_map.width,
+		depth_map.height,
+		ipc::DepthUnit::Millimeter,
+		state_->depth_channel_id,
+		state_->depth_sequence,
+		*encoded);
+}
+
+std::expected<void, std::string> McapRecordSink::write_camera_calibration(const RawCameraCalibrationView &calibration) {
+	if (state_ == nullptr) {
+		return std::unexpected("MCAP sink is not open");
+	}
+	if (state_->calibration_topic.empty()) {
+		return std::unexpected("calibration topic is empty");
+	}
+
+	if (state_->calibration_channel_id == 0) {
+		mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id);
+		state_->writer.addChannel(calibration_channel);
+		state_->calibration_channel_id = calibration_channel.id;
+	}
+
+	foxglove::CameraCalibration message{};
+	*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns);
 	message.set_frame_id(state_->frame_id);
-	message.set_width(depth_map.width);
-	message.set_height(depth_map.height);
-	message.set_source_unit(to_proto_depth_unit(source_unit));
-	message.set_storage_unit(to_proto_storage_unit(encoded->storage_unit));
-	message.set_encoding(to_proto_depth_encoding(encoded->encoding));
-	message.set_data(
-		reinterpret_cast<const char *>(encoded->bytes.data()),
-		static_cast<int>(encoded->bytes.size()));
+	message.set_width(calibration.width);
+	message.set_height(calibration.height);
+	message.set_distortion_model(std::string(calibration.distortion_model));
+	for (const double value : calibration.distortion) {
+		message.add_d(value);
+	}
+	if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
+		return append;
+	}
+	if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
+		return append;
+	}
+	if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
+		return append;
+	}

 	std::string serialized{};
 	if (!message.SerializeToString(&serialized)) {
-		return std::unexpected("failed to serialize cvmmap_streamer.DepthMap");
+		return std::unexpected("failed to serialize foxglove.CameraCalibration");
 	}

 	mcap::Message record{};
-	record.channelId   = state_->depth_channel_id;
-	record.sequence    = state_->depth_sequence++;
-	record.logTime     = depth_map.timestamp_ns;
-	record.publishTime = depth_map.timestamp_ns;
+	record.channelId   = state_->calibration_channel_id;
+	record.sequence    = state_->calibration_sequence++;
+	record.logTime     = calibration.timestamp_ns;
+	record.publishTime = calibration.timestamp_ns;
 	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
 	record.dataSize    = serialized.size();

 	const auto write_status = state_->writer.write(record);
 	if (!write_status.ok()) {
-		return std::unexpected("failed to write MCAP depth message: " + write_status.message);
+		return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
+	}
+	return {};
+}
+
+std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) {
+	if (state_ == nullptr) {
+		return std::unexpected("MCAP sink is not open");
+	}
+	if (state_->pose_topic.empty()) {
+		return std::unexpected("pose topic is empty");
+	}
+
+	if (state_->pose_channel_id == 0) {
+		mcap::Channel pose_channel(state_->pose_topic, "protobuf", state_->pose_schema_id);
+		state_->writer.addChannel(pose_channel);
+		state_->pose_channel_id = pose_channel.id;
+	}
+
+	foxglove::PoseInFrame message{};
+	*message.mutable_timestamp() = to_proto_timestamp(pose.timestamp_ns);
+	message.set_frame_id(std::string(pose.reference_frame_id));
+	auto *pose_message = message.mutable_pose();
+	auto *position = pose_message->mutable_position();
+	position->set_x(pose.position[0]);
+	position->set_y(pose.position[1]);
+	position->set_z(pose.position[2]);
+	auto *orientation = pose_message->mutable_orientation();
+	orientation->set_x(pose.orientation[0]);
+	orientation->set_y(pose.orientation[1]);
+	orientation->set_z(pose.orientation[2]);
+	orientation->set_w(pose.orientation[3]);
+
+	std::string serialized{};
+	if (!message.SerializeToString(&serialized)) {
+		return std::unexpected("failed to serialize foxglove.PoseInFrame");
+	}
+
+	mcap::Message record{};
+	record.channelId   = state_->pose_channel_id;
+	record.sequence    = state_->pose_sequence++;
+	record.logTime     = pose.timestamp_ns;
+	record.publishTime = pose.timestamp_ns;
+	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
+	record.dataSize    = serialized.size();
+
+	const auto write_status = state_->writer.write(record);
+	if (!write_status.ok()) {
+		return std::unexpected("failed to write MCAP pose message: " + write_status.message);
 	}
 	return {};
 }
@@ -5,7 +5,7 @@

 #include "cvmmap_streamer/common.h"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"

 #include <spdlog/spdlog.h>

@@ -1,9 +1,9 @@
 #include <mcap/reader.hpp>

-#include "cvmmap_streamer/DepthMap.pb.h"
+#include "proto/cvmmap_streamer/DepthMap.pb.h"
 #include "cvmmap_streamer/common.h"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"

 #include <rvl/rvl.hpp>
 #include <spdlog/spdlog.h>
@@ -0,0 +1,279 @@
+#include <mcap/reader.hpp>
+
+#include "proto/cvmmap_streamer/DepthMap.pb.h"
+#include "cvmmap_streamer/common.h"
+#include "cvmmap_streamer/record/mcap_record_sink.hpp"
+#include "proto/foxglove/CameraCalibration.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/PoseInFrame.pb.h"
+
+#include <rvl/rvl.hpp>
+#include <spdlog/spdlog.h>
+
+#include <cmath>
+#include <cstdint>
+#include <filesystem>
+#include <string>
+#include <vector>
+
+namespace {
+
+enum class TesterExitCode : int {
+	Success = 0,
+	CreateError = 2,
+	WriteError = 3,
+	OpenError = 4,
+	VerificationError = 5,
+};
+
+[[nodiscard]]
+constexpr int exit_code(const TesterExitCode code) {
+	return static_cast<int>(code);
+}
+
+[[nodiscard]]
+bool approx_equal(const double lhs, const double rhs, const double tolerance = 1e-9) {
+	return std::fabs(lhs - rhs) <= tolerance;
+}
+
+}
+
+int main(int argc, char **argv) {
+	if (cvmmap_streamer::has_help_flag(argc, argv)) {
+		cvmmap_streamer::print_help("mcap_pose_record_tester");
+		return exit_code(TesterExitCode::Success);
+	}
+
+	const std::filesystem::path output_path =
+		argc > 1
+			? std::filesystem::path(argv[1])
+			: std::filesystem::temp_directory_path() / "cvmmap_streamer_pose_record_test.mcap";
+	if (output_path.has_parent_path()) {
+		std::filesystem::create_directories(output_path.parent_path());
+	}
+
+	cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
+	config.record.mcap.enabled = true;
+	config.record.mcap.path = output_path.string();
+	config.record.mcap.topic = "/camera/video";
+	config.record.mcap.depth_topic = "/camera/depth";
+	config.record.mcap.calibration_topic = "/camera/calibration";
+	config.record.mcap.pose_topic = "/camera/pose";
+	config.record.mcap.frame_id = "camera";
+	config.record.mcap.compression = cvmmap_streamer::McapCompression::None;
+
+	cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
+	stream_info.codec = cvmmap_streamer::CodecType::H264;
+
+	auto sink = cvmmap_streamer::record::McapRecordSink::create(config, stream_info);
+	if (!sink) {
+		spdlog::error("failed to create MCAP sink: {}", sink.error());
+		return exit_code(TesterExitCode::CreateError);
+	}
+
+	cvmmap_streamer::encode::EncodedAccessUnit access_unit{};
+	access_unit.codec = cvmmap_streamer::CodecType::H264;
+	access_unit.source_timestamp_ns = 100;
+	access_unit.stream_pts_ns = 100;
+	access_unit.keyframe = false;
+	access_unit.annexb_bytes = {0x00, 0x00, 0x00, 0x01, 0x09, 0x10};
+	if (auto write = sink->write_access_unit(access_unit); !write) {
+		spdlog::error("failed to write video access unit: {}", write.error());
+		return exit_code(TesterExitCode::WriteError);
+	}
+
+	const std::vector<std::uint16_t> depth_pixels{
+		1000, 2000,
+		0, 1500,
+	};
+	if (auto write = sink->write_depth_map_u16(cvmmap_streamer::record::RawDepthMapU16View{
+			.timestamp_ns = 101,
+			.width = 2,
+			.height = 2,
+			.pixels = depth_pixels,
+		}); !write) {
+		spdlog::error("failed to write depth map: {}", write.error());
+		return exit_code(TesterExitCode::WriteError);
+	}
+
+	const std::vector<double> distortion{0.0, 0.0, 0.0, 0.0, 0.0};
+	const std::vector<double> intrinsic_matrix{
+		500.0, 0.0, 320.0,
+		0.0, 500.0, 240.0,
+		0.0, 0.0, 1.0,
+	};
+	const std::vector<double> rectification_matrix{
+		1.0, 0.0, 0.0,
+		0.0, 1.0, 0.0,
+		0.0, 0.0, 1.0,
+	};
+	const std::vector<double> projection_matrix{
+		500.0, 0.0, 320.0, 0.0,
+		0.0, 500.0, 240.0, 0.0,
+		0.0, 0.0, 1.0, 0.0,
+	};
+	if (auto write = sink->write_camera_calibration(cvmmap_streamer::record::RawCameraCalibrationView{
+			.timestamp_ns = 102,
+			.width = 640,
+			.height = 480,
+			.distortion_model = "plumb_bob",
+			.distortion = distortion,
+			.intrinsic_matrix = intrinsic_matrix,
+			.rectification_matrix = rectification_matrix,
+			.projection_matrix = projection_matrix,
+		}); !write) {
+		spdlog::error("failed to write calibration: {}", write.error());
+		return exit_code(TesterExitCode::WriteError);
+	}
+
+	if (auto write = sink->write_pose(cvmmap_streamer::record::RawPoseView{
+			.timestamp_ns = 103,
+			.reference_frame_id = "world",
+			.position = {1.0, 2.0, 3.0},
+			.orientation = {0.1, 0.2, 0.3, 0.9},
+		}); !write) {
+		spdlog::error("failed to write pose: {}", write.error());
+		return exit_code(TesterExitCode::WriteError);
+	}
+
+	sink->close();
+
+	mcap::McapReader reader{};
+	const auto open_status = reader.open(output_path.string());
+	if (!open_status.ok()) {
+		spdlog::error("failed to open MCAP file '{}': {}", output_path.string(), open_status.message);
+		return exit_code(TesterExitCode::OpenError);
+	}
+
+	std::uint64_t video_messages{0};
+	std::uint64_t depth_messages{0};
+	std::uint64_t calibration_messages{0};
+	std::uint64_t pose_messages{0};
+
+	auto messages = reader.readMessages();
+	for (auto it = messages.begin(); it != messages.end(); ++it) {
+		if (it->schema == nullptr || it->channel == nullptr) {
+			spdlog::error("MCAP message missing schema or channel");
+			reader.close();
+			return exit_code(TesterExitCode::VerificationError);
+		}
+		if (it->schema->encoding != "protobuf" || it->channel->messageEncoding != "protobuf") {
+			spdlog::error("unexpected message encoding in MCAP");
+			reader.close();
+			return exit_code(TesterExitCode::VerificationError);
+		}
+
+		if (it->schema->name == "foxglove.CompressedVideo") {
+			foxglove::CompressedVideo video{};
+			if (!video.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
+				spdlog::error("failed to parse foxglove.CompressedVideo");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			if (it->channel->topic != "/camera/video" || video.frame_id() != "camera" || video.data().empty()) {
+				spdlog::error("video message verification failed");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			video_messages += 1;
+			continue;
+		}
+
+		if (it->schema->name == "cvmmap_streamer.DepthMap") {
+			cvmmap_streamer::DepthMap depth{};
+			if (!depth.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
+				spdlog::error("failed to parse cvmmap_streamer.DepthMap");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			if (it->channel->topic != "/camera/depth" ||
+				depth.frame_id() != "camera" ||
+				depth.encoding() != cvmmap_streamer::DepthMap::RVL_U16_LOSSLESS) {
+				spdlog::error("depth message metadata verification failed");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			const auto decoded = rvl::decompress_image(std::span<const std::uint8_t>(
+				reinterpret_cast<const std::uint8_t *>(depth.data().data()),
+				depth.data().size()));
+			if (decoded.pixels.size() != depth_pixels.size() ||
+				decoded.pixels[0] != 1000 ||
+				decoded.pixels[1] != 2000 ||
+				decoded.pixels[2] != 0 ||
+				decoded.pixels[3] != 1500) {
+				spdlog::error("depth RVL round-trip verification failed");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			depth_messages += 1;
+			continue;
+		}
+
+		if (it->schema->name == "foxglove.CameraCalibration") {
+			foxglove::CameraCalibration calibration{};
+			if (!calibration.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
+				spdlog::error("failed to parse foxglove.CameraCalibration");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			if (it->channel->topic != "/camera/calibration" ||
+				calibration.frame_id() != "camera" ||
+				calibration.width() != 640 ||
+				calibration.height() != 480 ||
+				calibration.distortion_model() != "plumb_bob" ||
+				calibration.d_size() != 5 ||
+				calibration.k_size() != 9 ||
+				calibration.r_size() != 9 ||
+				calibration.p_size() != 12 ||
+				!approx_equal(calibration.k(0), 500.0) ||
+				!approx_equal(calibration.k(2), 320.0) ||
+				!approx_equal(calibration.p(5), 500.0)) {
+				spdlog::error("calibration message verification failed");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			calibration_messages += 1;
+			continue;
+		}
+
+		if (it->schema->name == "foxglove.PoseInFrame") {
+			foxglove::PoseInFrame pose{};
+			if (!pose.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
+				spdlog::error("failed to parse foxglove.PoseInFrame");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			if (it->channel->topic != "/camera/pose" ||
+				pose.frame_id() != "world" ||
+				!approx_equal(pose.pose().position().x(), 1.0) ||
+				!approx_equal(pose.pose().position().y(), 2.0) ||
+				!approx_equal(pose.pose().position().z(), 3.0) ||
+				!approx_equal(pose.pose().orientation().x(), 0.1) ||
+				!approx_equal(pose.pose().orientation().y(), 0.2) ||
+				!approx_equal(pose.pose().orientation().z(), 0.3) ||
+				!approx_equal(pose.pose().orientation().w(), 0.9)) {
+				spdlog::error("pose message verification failed");
+				reader.close();
+				return exit_code(TesterExitCode::VerificationError);
+			}
+			pose_messages += 1;
+		}
+	}
+
+	reader.close();
+
+	if (video_messages != 1 || depth_messages != 1 || calibration_messages != 1 || pose_messages != 1) {
+		spdlog::error(
+			"unexpected message counts: video={} depth={} calibration={} pose={}",
+			video_messages,
+			depth_messages,
+			calibration_messages,
+			pose_messages);
+		return exit_code(TesterExitCode::VerificationError);
+	}
+
+	spdlog::info(
+		"validated same-file MCAP video+depth+calibration+pose recording at '{}'",
+		output_path.string());
+	return exit_code(TesterExitCode::Success);
+}
@@ -0,0 +1,645 @@
+#include <CLI/CLI.hpp>
+#include <spdlog/spdlog.h>
+
+#include <sl/Camera.hpp>
+
+#include "cvmmap_streamer/config/runtime_config.hpp"
+#include "cvmmap_streamer/core/status.hpp"
+#include "cvmmap_streamer/encode/encoder_backend.hpp"
+#include "cvmmap_streamer/ipc/contracts.hpp"
+#include "cvmmap_streamer/record/mcap_record_sink.hpp"
+
+#include <array>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <filesystem>
+#include <optional>
+#include <span>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace {
+
+enum class ToolExitCode : int {
+	Success = 0,
+	UsageError = 2,
+	RuntimeError = 3,
+};
+
+struct CliOptions {
+	std::string input_path{};
+	std::string output_path{};
+	std::string codec{"h264"};
+	std::string encoder_device{"auto"};
+	std::string mcap_compression{"zstd"};
+	std::string depth_mode{"neural"};
+	bool with_pose{false};
+	std::uint32_t start_frame{0};
+	std::uint32_t end_frame{0};
+	bool has_end_frame{false};
+	std::string frame_id{"camera"};
+	std::string video_topic{"/camera/video"};
+	std::string depth_topic{"/camera/depth"};
+	std::string calibration_topic{"/camera/calibration"};
+	std::string pose_topic{"/camera/pose"};
+};
+
+[[nodiscard]]
+constexpr int exit_code(const ToolExitCode code) {
+	return static_cast<int>(code);
+}
+
+[[nodiscard]]
+std::string zed_string(const sl::String &value) {
+	return std::string(value.c_str() == nullptr ? "" : value.c_str());
+}
+
+[[nodiscard]]
+std::string zed_status_string(const sl::ERROR_CODE code) {
+	return zed_string(sl::toString(code));
+}
+
+[[nodiscard]]
+std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state) {
+	return zed_string(sl::toString(state));
+}
+
+[[nodiscard]]
+std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) {
+	if (raw == "h264") {
+		return cvmmap_streamer::CodecType::H264;
+	}
+	if (raw == "h265") {
+		return cvmmap_streamer::CodecType::H265;
+	}
+	return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
+}
+
+[[nodiscard]]
+std::expected<cvmmap_streamer::EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
+	if (raw == "auto") {
+		return cvmmap_streamer::EncoderDeviceType::Auto;
+	}
+	if (raw == "nvidia") {
+		return cvmmap_streamer::EncoderDeviceType::Nvidia;
+	}
+	if (raw == "software") {
+		return cvmmap_streamer::EncoderDeviceType::Software;
+	}
+	return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
+}
+
+[[nodiscard]]
+std::expected<cvmmap_streamer::McapCompression, std::string> parse_mcap_compression(const std::string_view raw) {
+	if (raw == "none") {
+		return cvmmap_streamer::McapCompression::None;
+	}
+	if (raw == "lz4") {
+		return cvmmap_streamer::McapCompression::Lz4;
+	}
+	if (raw == "zstd") {
+		return cvmmap_streamer::McapCompression::Zstd;
+	}
+	return std::unexpected("invalid mcap compression: '" + std::string(raw) + "' (expected: none|lz4|zstd)");
+}
+
+[[nodiscard]]
+std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) {
+	if (raw == "neural") {
+		return sl::DEPTH_MODE::NEURAL;
+	}
+	if (raw == "quality") {
+		return sl::DEPTH_MODE::QUALITY;
+	}
+	if (raw == "performance") {
+		return sl::DEPTH_MODE::PERFORMANCE;
+	}
+	if (raw == "ultra") {
+		return sl::DEPTH_MODE::ULTRA;
+	}
+	return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)");
+}
+
+[[nodiscard]]
+std::uint64_t frame_period_ns(const float fps) {
+	if (!(fps > 0.0f)) {
+		return 33'333'333ull;
+	}
+	return static_cast<std::uint64_t>(std::llround(1'000'000'000.0 / static_cast<double>(fps)));
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
+	if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
+		return std::unexpected(std::string(label) + " must be U8_C3");
+	}
+	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
+		return std::unexpected(std::string(label) + " dimensions must be non-zero");
+	}
+	if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
+		return std::unexpected(std::string(label) + " CPU buffer is null");
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_u16c1_mat(const sl::Mat &mat, const std::string_view label) {
+	if (mat.getDataType() != sl::MAT_TYPE::U16_C1) {
+		return std::unexpected(std::string(label) + " must be U16_C1");
+	}
+	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
+		return std::unexpected(std::string(label) + " dimensions must be non-zero");
+	}
+	if (mat.getPtr<sl::ushort1>(sl::MEM::CPU) == nullptr) {
+		return std::unexpected(std::string(label) + " CPU buffer is null");
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::vector<std::uint16_t> copy_compact_u16_plane(const sl::Mat &mat) {
+	const auto width = static_cast<std::size_t>(mat.getWidth());
+	const auto height = static_cast<std::size_t>(mat.getHeight());
+	const auto row_bytes = width * sizeof(std::uint16_t);
+	const auto step_bytes = mat.getStepBytes(sl::MEM::CPU);
+	auto *src = mat.getPtr<sl::uchar1>(sl::MEM::CPU);
+
+	std::vector<std::uint16_t> compact(width * height, 0);
+	auto *dst = reinterpret_cast<std::uint8_t *>(compact.data());
+	for (std::size_t row = 0; row < height; ++row) {
+		std::memcpy(
+			dst + row * row_bytes,
+			src + row * step_bytes,
+			row_bytes);
+	}
+	return compact;
+}
+
+[[nodiscard]]
+std::expected<void, std::string> write_access_units(
+	cvmmap_streamer::record::McapRecordSink &sink,
+	const std::vector<cvmmap_streamer::encode::EncodedAccessUnit> &access_units) {
+	for (const auto &access_unit : access_units) {
+		if (auto write = sink.write_access_unit(access_unit); !write) {
+			return std::unexpected(write.error());
+		}
+	}
+	return {};
+}
+
+[[nodiscard]]
+std::expected<void, std::string> flush_and_write(
+	cvmmap_streamer::record::McapRecordSink &sink,
+	cvmmap_streamer::encode::EncoderBackend &backend) {
+	auto flushed = backend->flush();
+	if (!flushed) {
+		return std::unexpected(cvmmap_streamer::format_error(flushed.error()));
+	}
+	return write_access_units(sink, *flushed);
+}
+
+}
+
+int main(int argc, char **argv) {
+	CliOptions options{};
+
+	CLI::App app{"zed_svo_to_mcap - convert ZED SVO/SVO2 playback to MCAP"};
+	app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
+	app.add_option("--output", options.output_path, "Output MCAP file")->required();
+	app.add_option("--codec", options.codec, "Video codec (h264|h265)")
+		->check(CLI::IsMember({"h264", "h265"}));
+	app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
+		->check(CLI::IsMember({"auto", "nvidia", "software"}));
+	app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available");
+	app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)")
+		->check(CLI::IsMember({"none", "lz4", "zstd"}));
+	app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
+		->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
+	app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
+		->check(CLI::NonNegativeNumber);
+	auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
+		->check(CLI::NonNegativeNumber);
+	app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics");
+	app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");
+	app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap");
+	app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration");
+	app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame");
+
+	try {
+		app.parse(argc, argv);
+	} catch (const CLI::ParseError &error) {
+		return app.exit(error);
+	}
+	options.has_end_frame = end_frame_option->count() > 0;
+
+	auto codec = parse_codec(options.codec);
+	if (!codec) {
+		spdlog::error("{}", codec.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+	auto encoder_device = parse_encoder_device(options.encoder_device);
+	if (!encoder_device) {
+		spdlog::error("{}", encoder_device.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+	auto compression = parse_mcap_compression(options.mcap_compression);
+	if (!compression) {
+		spdlog::error("{}", compression.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+	auto depth_mode = parse_depth_mode(options.depth_mode);
+	if (!depth_mode) {
+		spdlog::error("{}", depth_mode.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	if (options.output_path.empty()) {
+		spdlog::error("output path must not be empty");
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.has_end_frame && options.end_frame < options.start_frame) {
+		spdlog::error(
+			"invalid frame range: start-frame={} end-frame={}",
+			options.start_frame,
+			options.end_frame);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	const std::filesystem::path output_path{options.output_path};
+	if (output_path.has_parent_path()) {
+		std::filesystem::create_directories(output_path.parent_path());
+	}
+
+	sl::Camera camera{};
+	bool tracking_enabled{false};
+
+	auto close_camera = [&]() {
+		if (tracking_enabled) {
+			camera.disablePositionalTracking();
+			tracking_enabled = false;
+		}
+		if (camera.isOpened()) {
+			camera.close();
+		}
+	};
+
+	sl::InitParameters init{};
+	init.input.setFromSVOFile(options.input_path.c_str());
+	init.svo_real_time_mode = false;
+	init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
+	init.coordinate_units = sl::UNIT::METER;
+	init.depth_mode = *depth_mode;
+	init.sdk_verbose = false;
+
+	const auto open_status = camera.open(init);
+	if (open_status != sl::ERROR_CODE::SUCCESS) {
+		spdlog::error(
+			"failed to open SVO '{}': {}",
+			options.input_path,
+			zed_status_string(open_status));
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	const auto total_frames = camera.getSVONumberOfFrames();
+	if (total_frames <= 0) {
+		close_camera();
+		spdlog::error("input SVO has no frames");
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+	if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
+		close_camera();
+		spdlog::error(
+			"start-frame {} is out of range for {} frames",
+			options.start_frame,
+			total_frames);
+		return exit_code(ToolExitCode::UsageError);
+	}
+	if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
+		close_camera();
+		spdlog::error(
+			"end-frame {} is out of range for {} frames",
+			options.end_frame,
+			total_frames);
+		return exit_code(ToolExitCode::UsageError);
+	}
+
+	camera.setSVOPosition(static_cast<int>(options.start_frame));
+
+	if (options.with_pose) {
+		sl::PositionalTrackingParameters tracking_parameters{};
+		const auto tracking_status = camera.enablePositionalTracking(tracking_parameters);
+		if (tracking_status == sl::ERROR_CODE::SUCCESS) {
+			tracking_enabled = true;
+		} else {
+			spdlog::warn(
+				"positional tracking unavailable for '{}': {}. continuing without pose output",
+				options.input_path,
+				zed_status_string(tracking_status));
+		}
+	}
+
+	const auto camera_info = camera.getCameraInformation();
+	const auto &camera_config = camera_info.camera_configuration;
+	const auto &left_camera = camera_config.calibration_parameters.left_cam;
+	const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
+	const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
+	if (width == 0 || height == 0) {
+		close_camera();
+		spdlog::error("camera resolution reported by the ZED SDK is invalid");
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
+	config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
+	config.encoder.device = *encoder_device;
+	config.encoder.codec = *codec;
+	config.encoder.gop = 30;
+	config.encoder.b_frames = 0;
+	config.record.mcap.enabled = true;
+	config.record.mcap.path = output_path.string();
+	config.record.mcap.topic = options.video_topic;
+	config.record.mcap.depth_topic = options.depth_topic;
+	config.record.mcap.calibration_topic = options.calibration_topic;
+	config.record.mcap.pose_topic = options.pose_topic;
+	config.record.mcap.frame_id = options.frame_id;
+	config.record.mcap.compression = *compression;
+
+	cvmmap_streamer::ipc::FrameInfo frame_info{
+		.width = static_cast<std::uint16_t>(width),
+		.height = static_cast<std::uint16_t>(height),
+		.channels = 3,
+		.depth = cvmmap_streamer::ipc::Depth::U8,
+		.pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR,
+		.buffer_size = width * height * 3u,
+	};
+
+	auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config);
+	if (!backend_result) {
+		close_camera();
+		spdlog::error("failed to create encoder backend: {}", cvmmap_streamer::format_error(backend_result.error()));
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+	auto backend = std::move(*backend_result);
+
+	if (auto init_status = backend->init(config, frame_info); !init_status) {
+		close_camera();
+		spdlog::error("failed to initialize encoder backend: {}", cvmmap_streamer::format_error(init_status.error()));
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	auto stream_info = backend->stream_info();
+	if (!stream_info) {
+		backend->shutdown();
+		close_camera();
+		spdlog::error("encoder backend did not provide stream info: {}", cvmmap_streamer::format_error(stream_info.error()));
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	auto sink = cvmmap_streamer::record::McapRecordSink::create(config, *stream_info);
+	if (!sink) {
+		backend->shutdown();
+		close_camera();
+		spdlog::error("failed to create MCAP sink: {}", sink.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	const std::array<double, 5> distortion{
+		0.0, 0.0, 0.0, 0.0, 0.0,
+	};
+	const std::array<double, 9> intrinsic_matrix{
+		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
+		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
+		0.0, 0.0, 1.0,
+	};
+	const std::array<double, 9> rectification_matrix{
+		1.0, 0.0, 0.0,
+		0.0, 1.0, 0.0,
+		0.0, 0.0, 1.0,
+	};
+	const std::array<double, 12> projection_matrix{
+		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
+		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
+		0.0, 0.0, 1.0, 0.0,
+	};
+
+	sl::RuntimeParameters runtime_parameters{};
+	sl::Mat left_frame{};
+	sl::Mat depth_frame{};
+	sl::Pose pose{};
+	bool calibration_written{false};
+	std::uint64_t emitted_frames{0};
+	std::optional<std::uint64_t> last_timestamp_ns{};
+	std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
+	const auto last_frame = options.has_end_frame
+		? options.end_frame
+		: static_cast<std::uint32_t>(total_frames - 1);
+	const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
+
+	while (options.start_frame + emitted_frames <= last_frame) {
+		const auto grab_status = camera.grab(runtime_parameters);
+		if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
+			break;
+		}
+		if (grab_status != sl::ERROR_CODE::SUCCESS) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
+		if (image_status != sl::ERROR_CODE::SUCCESS) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("{}", valid.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU);
+		if (depth_status != sl::ERROR_CODE::SUCCESS) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to retrieve depth map: {}", zed_status_string(depth_status));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		if (auto valid = validate_u16c1_mat(depth_frame, "depth map"); !valid) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("{}", valid.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
+		if (timestamp_ns == 0) {
+			timestamp_ns = emitted_frames * nominal_frame_period_ns;
+		}
+		if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
+			timestamp_ns = *last_timestamp_ns + 1;
+		}
+		last_timestamp_ns = timestamp_ns;
+
+		const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU);
+		const auto video_bytes = std::span<const std::uint8_t>(
+			left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
+			video_step_bytes * left_frame.getHeight());
+		cvmmap_streamer::encode::RawVideoFrame raw_video{
+			.info = frame_info,
+			.source_timestamp_ns = timestamp_ns,
+			.row_stride_bytes = video_step_bytes,
+			.bytes = video_bytes,
+		};
+
+		if (auto push = backend->push_frame(raw_video); !push) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to encode frame: {}", cvmmap_streamer::format_error(push.error()));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		auto drained = backend->drain();
+		if (!drained) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to drain encoded access units: {}", cvmmap_streamer::format_error(drained.error()));
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+		if (auto write = write_access_units(*sink, *drained); !write) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to write video access unit: {}", write.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		if (!calibration_written) {
+			cvmmap_streamer::record::RawCameraCalibrationView calibration{
+				.timestamp_ns = timestamp_ns,
+				.width = width,
+				.height = height,
+				.distortion_model = "plumb_bob",
+				.distortion = distortion,
+				.intrinsic_matrix = intrinsic_matrix,
+				.rectification_matrix = rectification_matrix,
+				.projection_matrix = projection_matrix,
+			};
+			if (auto write = sink->write_camera_calibration(calibration); !write) {
+				sink->close();
+				backend->shutdown();
+				close_camera();
+				spdlog::error("failed to write calibration: {}", write.error());
+				return exit_code(ToolExitCode::RuntimeError);
+			}
+			calibration_written = true;
+		}
+
+		const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU);
+		const auto packed_depth_bytes = static_cast<std::size_t>(width) * sizeof(std::uint16_t);
+		if (depth_step_bytes < packed_depth_bytes) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("depth stride {} is smaller than packed row size {}", depth_step_bytes, packed_depth_bytes);
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		std::optional<std::vector<std::uint16_t>> compact_depth{};
+		std::span<const std::uint16_t> depth_pixels{};
+		if (depth_step_bytes == packed_depth_bytes) {
+			depth_pixels = std::span<const std::uint16_t>(
+				depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
+				static_cast<std::size_t>(width) * static_cast<std::size_t>(height));
+		} else {
+			compact_depth = copy_compact_u16_plane(depth_frame);
+			depth_pixels = *compact_depth;
+		}
+
+		cvmmap_streamer::record::RawDepthMapU16View depth_map{
+			.timestamp_ns = timestamp_ns,
+			.width = width,
+			.height = height,
+			.pixels = depth_pixels,
+		};
+		if (auto write = sink->write_depth_map_u16(depth_map); !write) {
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error("failed to write depth map: {}", write.error());
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		if (tracking_enabled) {
+			const auto tracking_state = camera.getPosition(pose, sl::REFERENCE_FRAME::WORLD);
+			if (!last_tracking_state || *last_tracking_state != tracking_state) {
+				last_tracking_state = tracking_state;
+				if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) {
+					spdlog::warn(
+						"pose tracking state changed to {} at frame {}",
+						zed_tracking_state_string(tracking_state),
+						options.start_frame + emitted_frames);
+				}
+			}
+			if (tracking_state == sl::POSITIONAL_TRACKING_STATE::OK) {
+				const auto translation = pose.getTranslation();
+				const auto orientation = pose.getOrientation();
+				cvmmap_streamer::record::RawPoseView pose_view{
+					.timestamp_ns = timestamp_ns,
+					.reference_frame_id = "world",
+					.position = {
+						static_cast<double>(translation.x),
+						static_cast<double>(translation.y),
+						static_cast<double>(translation.z),
+					},
+					.orientation = {
+						static_cast<double>(orientation.x),
+						static_cast<double>(orientation.y),
+						static_cast<double>(orientation.z),
+						static_cast<double>(orientation.w),
+					},
+				};
+				if (auto write = sink->write_pose(pose_view); !write) {
+					sink->close();
+					backend->shutdown();
+					close_camera();
+					spdlog::error("failed to write pose: {}", write.error());
+					return exit_code(ToolExitCode::RuntimeError);
+				}
+			}
+		}
+
+		emitted_frames += 1;
+	}
+
+	if (auto flushed = flush_and_write(*sink, backend); !flushed) {
+		sink->close();
+		backend->shutdown();
+		close_camera();
+		spdlog::error("failed to finalize encoded video: {}", flushed.error());
+		return exit_code(ToolExitCode::RuntimeError);
+	}
+
+	sink->close();
+	backend->shutdown();
+	close_camera();
+
+	spdlog::info(
+		"wrote {} frame(s) from '{}' to '{}'",
+		emitted_frames,
+		options.input_path,
+		output_path.string());
+	return exit_code(ToolExitCode::Success);
+}