Add ZED SVO to MCAP conversion tool

2026-03-17 02:03:59 +00:00
parent ee53d1958e
commit 0fef0595fb
16 changed files with 1430 additions and 49 deletions
@@ -10,12 +10,6 @@ find_package(Threads REQUIRED)
 find_package(cppzmq QUIET)
 set(CVMMAP_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../cv-mmap" CACHE PATH "Path to a local cv-mmap checkout")
 set(CVMMAP_LOCAL_BUILD "${CVMMAP_LOCAL_ROOT}/build/core" CACHE PATH "Path to local cv-mmap build artifacts")
 if (
 	NOT cvmmap-core_DIR
 	AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreConfig.cmake"
 	AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreTargets.cmake")
 	set(cvmmap-core_DIR "${CVMMAP_LOCAL_BUILD}")
 endif()
 if (cvmmap-core_DIR)
 	find_package(cvmmap-core CONFIG QUIET)
 endif()
@@ -24,6 +18,9 @@ find_package(spdlog REQUIRED)
 find_package(Protobuf REQUIRED)
 find_package(PkgConfig REQUIRED)
 find_package(rvl CONFIG QUIET)
 set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK")
 find_package(ZED REQUIRED)
 find_package(CUDA ${ZED_CUDA_VERSION} REQUIRED)
 add_subdirectory(third_party)
@@ -48,7 +45,7 @@ if (NOT TARGET cvmmap::client)
 endif()
 if (NOT TARGET rvl::rvl)
-	set(RVL_LOCAL_ROOT "/home/crosstyan/Code/rvl_impl")
+	set(RVL_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../rvl_impl" CACHE PATH "Path to a local rvl_impl checkout")
 	set(RVL_LOCAL_BUILD "${RVL_LOCAL_ROOT}/build/core")
 	if (
 		EXISTS "${RVL_LOCAL_ROOT}/core/include/rvl/rvl.hpp"
@@ -66,7 +63,13 @@ add_library(cvmmap_streamer_foxglove_proto STATIC)
 protobuf_generate(
 	TARGET cvmmap_streamer_foxglove_proto
 	LANGUAGE cpp
-	PROTOS "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto"
+	PROTOS
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto"
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CameraCalibration.proto"
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/PoseInFrame.proto"
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Pose.proto"
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Quaternion.proto"
 		"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Vector3.proto"
 	IMPORT_DIRS "${CMAKE_CURRENT_LIST_DIR}/proto")
 add_library(cvmmap_streamer_depth_proto STATIC)
 protobuf_generate(
@@ -78,13 +81,16 @@ add_library(cvmmap_streamer_protobuf INTERFACE)
 target_include_directories(cvmmap_streamer_foxglove_proto
 	PUBLIC
 		"${CMAKE_CURRENT_BINARY_DIR}"
 		"${CMAKE_CURRENT_BINARY_DIR}/proto"
 		${Protobuf_INCLUDE_DIRS})
 target_include_directories(cvmmap_streamer_depth_proto
 	PUBLIC
 		"${CMAKE_CURRENT_BINARY_DIR}"
 		"${CMAKE_CURRENT_BINARY_DIR}/proto"
 		${Protobuf_INCLUDE_DIRS})
 target_include_directories(cvmmap_streamer_protobuf
 	INTERFACE
 		"${CMAKE_CURRENT_BINARY_DIR}/proto"
 		${Protobuf_INCLUDE_DIRS})
 if (TARGET protobuf::libprotobuf)
 	target_link_libraries(cvmmap_streamer_protobuf INTERFACE protobuf::libprotobuf)
@@ -109,6 +115,36 @@ target_link_libraries(cvmmap_streamer_mcap_runtime
 	PkgConfig::ZSTD
 	PkgConfig::LZ4)
 add_library(cvmmap_streamer_record_support STATIC
 	src/encode/encoder_backend.cpp
 	src/encode/ffmpeg_encoder_backend.cpp
 	src/record/protobuf_descriptor.cpp
 	src/record/mcap_record_sink.cpp)
 target_include_directories(cvmmap_streamer_record_support
 	PUBLIC
 		"${CMAKE_CURRENT_LIST_DIR}/include"
 		"${CMAKE_CURRENT_BINARY_DIR}")
 target_link_libraries(cvmmap_streamer_record_support
 	PUBLIC
 		cvmmap_streamer_foxglove_proto
 		cvmmap_streamer_depth_proto
 		cvmmap_streamer_mcap_runtime
 		PkgConfig::FFMPEG
 		PkgConfig::ZSTD
 		PkgConfig::LZ4
 		rvl::rvl
 		mcap::mcap
 		msft_proxy4::proxy
 		cvmmap_streamer_protobuf)
 if (TARGET spdlog::spdlog)
 	target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog::spdlog)
 elseif (TARGET spdlog)
 	target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog)
 endif()
 if (TARGET PkgConfig::PROTOBUF_PKG)
 	target_link_libraries(cvmmap_streamer_record_support PUBLIC PkgConfig::PROTOBUF_PKG)
 endif()
 add_library(cvmmap_streamer_common STATIC
 	src/ipc/help.cpp
 	src/config/runtime_config.cpp
@@ -119,11 +155,7 @@ add_library(cvmmap_streamer_common STATIC
 	src/metrics/latency_tracker.cpp
 	src/pipeline/pipeline_runtime.cpp
 	src/protocol/rtmp_output.cpp
-	src/protocol/rtp_publisher.cpp
+	src/protocol/rtp_publisher.cpp)
 	src/encode/encoder_backend.cpp
 	src/encode/ffmpeg_encoder_backend.cpp
 	src/record/protobuf_descriptor.cpp
 	src/record/mcap_record_sink.cpp)
 target_include_directories(cvmmap_streamer_common
 	PUBLIC
@@ -132,18 +164,14 @@ target_include_directories(cvmmap_streamer_common
 set(CVMMAP_STREAMER_LINK_DEPS
 	Threads::Threads
-	cvmmap_streamer_foxglove_proto
+	cvmmap_streamer_record_support
 	cvmmap_streamer_depth_proto
 	cvmmap_streamer_mcap_runtime
 	PkgConfig::FFMPEG
 	PkgConfig::ZSTD
 	PkgConfig::LZ4
 	rvl::rvl
 	cvmmap::client
 	CLI11::CLI11
 	tomlplusplus::tomlplusplus
-	mcap::mcap
+	mcap::mcap)
 	msft_proxy4::proxy)
 if (TARGET cppzmq::cppzmq)
 	list(APPEND CVMMAP_STREAMER_LINK_DEPS cppzmq::cppzmq)
@@ -193,6 +221,7 @@ add_cvmmap_binary(ipc_snapshot_tester src/testers/ipc_snapshot_tester.cpp)
 add_cvmmap_binary(mcap_depth_record_tester src/testers/mcap_depth_record_tester.cpp)
 add_cvmmap_binary(mcap_body_record_tester src/testers/mcap_body_record_tester.cpp)
 add_cvmmap_binary(mcap_body_inspector src/testers/mcap_body_inspector.cpp)
 add_cvmmap_binary(mcap_pose_record_tester src/testers/mcap_pose_record_tester.cpp)
 add_executable(mcap_reader_tester src/testers/mcap_reader_tester.cpp)
 target_include_directories(mcap_reader_tester
@@ -247,3 +276,34 @@ endif()
 set_target_properties(mcap_replay_tester PROPERTIES
 	OUTPUT_NAME "mcap_replay_tester"
 	RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
 add_executable(
 	zed_svo_to_mcap
 	src/tools/zed_svo_to_mcap.cpp
 	src/config/runtime_config.cpp)
 target_include_directories(zed_svo_to_mcap
 	PRIVATE
 		"${CMAKE_CURRENT_LIST_DIR}/include"
 		"${CMAKE_CURRENT_BINARY_DIR}"
 		${ZED_INCLUDE_DIRS}
 		${CUDA_INCLUDE_DIRS})
 target_link_directories(zed_svo_to_mcap
 	PRIVATE
 		${ZED_LIBRARY_DIR}
 		${CUDA_LIBRARY_DIRS})
 target_link_libraries(zed_svo_to_mcap
 	PRIVATE
 		cvmmap_streamer_record_support
 		CLI11::CLI11
 		tomlplusplus::tomlplusplus
 		${ZED_LIBRARIES}
 		${CUDA_CUDA_LIBRARY}
 		${CUDA_CUDART_LIBRARY})
 if (TARGET spdlog::spdlog)
 	target_link_libraries(zed_svo_to_mcap PRIVATE spdlog::spdlog)
 elseif (TARGET spdlog)
 	target_link_libraries(zed_svo_to_mcap PRIVATE spdlog)
 endif()
 set_target_properties(zed_svo_to_mcap PROPERTIES
 	OUTPUT_NAME "zed_svo_to_mcap"
 	RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
@@ -85,6 +85,8 @@ struct McapRecordConfig {
 	std::string path{"capture.mcap"};
 	std::string topic{"/camera/video"};
 	std::string depth_topic{"/camera/depth"};
 	std::string calibration_topic{"/camera/calibration"};
 	std::string pose_topic{"/camera/pose"};
 	std::string body_topic{"/camera/body"};
 	std::string frame_id{"camera"};
 	McapCompression compression{McapCompression::Zstd};
@@ -17,6 +17,7 @@ namespace cvmmap_streamer::encode {
 struct RawVideoFrame {
 	ipc::FrameInfo info{};
 	std::uint64_t source_timestamp_ns{0};
 	std::size_t row_stride_bytes{0};
 	std::span<const std::uint8_t> bytes{};
 };
@@ -4,6 +4,7 @@
 #include "cvmmap_streamer/encode/encoded_access_unit.hpp"
 #include "cvmmap_streamer/ipc/contracts.hpp"
 #include <array>
 #include <cstddef>
 #include <cstdint>
 #include <expected>
@@ -26,6 +27,31 @@ struct RawDepthMapView {
 	std::span<const float> pixels{};
 };
 struct RawDepthMapU16View {
 	std::uint64_t timestamp_ns{0};
 	std::uint32_t width{0};
 	std::uint32_t height{0};
 	std::span<const std::uint16_t> pixels{};
 };
 struct RawCameraCalibrationView {
 	std::uint64_t timestamp_ns{0};
 	std::uint32_t width{0};
 	std::uint32_t height{0};
 	std::string_view distortion_model{};
 	std::span<const double> distortion{};
 	std::span<const double> intrinsic_matrix{};
 	std::span<const double> rectification_matrix{};
 	std::span<const double> projection_matrix{};
 };
 struct RawPoseView {
 	std::uint64_t timestamp_ns{0};
 	std::string_view reference_frame_id{};
 	std::array<double, 3> position{};
 	std::array<double, 4> orientation{};
 };
 struct RawBodyTrackingMessageView {
 	std::uint64_t timestamp_ns{0};
 	std::span<const std::uint8_t> bytes{};
@@ -56,6 +82,15 @@ public:
 	[[nodiscard]]
 	std::expected<void, std::string> write_depth_map(const RawDepthMapView &depth_map);
 	[[nodiscard]]
 	std::expected<void, std::string> write_depth_map_u16(const RawDepthMapU16View &depth_map);
 	[[nodiscard]]
 	std::expected<void, std::string> write_camera_calibration(const RawCameraCalibrationView &calibration);
 	[[nodiscard]]
 	std::expected<void, std::string> write_pose(const RawPoseView &pose);
 	[[nodiscard]]
 	std::expected<void, std::string> write_body_tracking_message(const RawBodyTrackingMessageView &body_message);
@@ -0,0 +1,37 @@
 // Generated by https://github.com/foxglove/foxglove-sdk
 syntax = "proto3";
 import "google/protobuf/timestamp.proto";
 package foxglove;
 // Camera calibration parameters
 message CameraCalibration {
  // Timestamp of calibration data
  google.protobuf.Timestamp timestamp = 1;
  // Frame of reference for the camera. The origin of the frame is the optical center of the camera. +x points to the right in the image, +y points down, and +z points into the plane of the image.
  string frame_id = 9;
  // Image width
  fixed32 width = 2;
  // Image height
  fixed32 height = 3;
  // Name of distortion model
  string distortion_model = 4;
  // Distortion parameters
  repeated double D = 5;
  // Intrinsic camera matrix (3x3 row-major matrix)
  repeated double K = 6;
  // Rectification matrix (3x3 row-major matrix)
  repeated double R = 7;
  // Projection/camera matrix (3x4 row-major matrix)
  repeated double P = 8;
 }
@@ -0,0 +1,17 @@
 // Generated by https://github.com/foxglove/foxglove-sdk
 syntax = "proto3";
 import "proto/foxglove/Quaternion.proto";
 import "proto/foxglove/Vector3.proto";
 package foxglove;
 // A position and orientation for an object or reference frame in 3D space
 message Pose {
  // Point denoting position in 3D space
  foxglove.Vector3 position = 1;
  // Quaternion denoting orientation in 3D space
  foxglove.Quaternion orientation = 2;
 }
@@ -0,0 +1,20 @@
 // Generated by https://github.com/foxglove/foxglove-sdk
 syntax = "proto3";
 import "proto/foxglove/Pose.proto";
 import "google/protobuf/timestamp.proto";
 package foxglove;
 // A timestamped pose for an object or reference frame in 3D space
 message PoseInFrame {
  // Timestamp of pose
  google.protobuf.Timestamp timestamp = 1;
  // Frame of reference for pose position and orientation
  string frame_id = 2;
  // Pose in 3D space
  foxglove.Pose pose = 3;
 }
@@ -0,0 +1,20 @@
 // Generated by https://github.com/foxglove/foxglove-sdk
 syntax = "proto3";
 package foxglove;
 // A quaternion representing a rotation in 3D space
 message Quaternion {
  // x value
  double x = 1;
  // y value
  double y = 2;
  // z value
  double z = 3;
  // w value
  double w = 4;
 }
@@ -0,0 +1,17 @@
 // Generated by https://github.com/foxglove/foxglove-sdk
 syntax = "proto3";
 package foxglove;
 // A vector in 3D space that represents a direction only
 message Vector3 {
  // x coordinate length
  double x = 1;
  // y coordinate length
  double y = 2;
  // z coordinate length
  double z = 3;
 }
@@ -365,6 +365,14 @@ std::expected<void, std::string> apply_toml_file(RuntimeConfig &config, const st
 		config.record.mcap.enabled = true;
 		config.record.mcap.depth_topic = *value;
 	}
 	if (auto value = toml_value<std::string>(table, "record.mcap.calibration_topic")) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.calibration_topic = *value;
 	}
 	if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.pose_topic = *value;
 	}
 	if (auto value = toml_value<std::string>(table, "record.mcap.body_topic")) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.body_topic = *value;
@@ -568,6 +576,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	std::string mcap_path_raw{};
 	std::string mcap_topic_raw{};
 	std::string mcap_depth_topic_raw{};
 	std::string mcap_calibration_topic_raw{};
 	std::string mcap_pose_topic_raw{};
 	std::string mcap_body_topic_raw{};
 	std::string mcap_frame_id_raw{};
 	std::string mcap_compression_raw{};
@@ -610,6 +620,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	app.add_option("--mcap-path", mcap_path_raw);
 	app.add_option("--mcap-topic", mcap_topic_raw);
 	app.add_option("--mcap-depth-topic", mcap_depth_topic_raw);
 	app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw);
 	app.add_option("--mcap-pose-topic", mcap_pose_topic_raw);
 	app.add_option("--mcap-body-topic", mcap_body_topic_raw);
 	app.add_option("--mcap-frame-id", mcap_frame_id_raw);
 	app.add_option("--mcap-compression", mcap_compression_raw);
@@ -726,6 +738,14 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 		config.record.mcap.enabled = true;
 		config.record.mcap.depth_topic = mcap_depth_topic_raw;
 	}
 	if (!mcap_calibration_topic_raw.empty()) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.calibration_topic = mcap_calibration_topic_raw;
 	}
 	if (!mcap_pose_topic_raw.empty()) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.pose_topic = mcap_pose_topic_raw;
 	}
 	if (!mcap_body_topic_raw.empty()) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.body_topic = mcap_body_topic_raw;
@@ -867,6 +887,12 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
 	if (config.record.mcap.depth_topic.empty()) {
 		return std::unexpected("invalid MCAP config: depth_topic must not be empty");
 	}
 	if (config.record.mcap.calibration_topic.empty()) {
 		return std::unexpected("invalid MCAP config: calibration_topic must not be empty");
 	}
 	if (config.record.mcap.pose_topic.empty()) {
 		return std::unexpected("invalid MCAP config: pose_topic must not be empty");
 	}
 	if (config.record.mcap.body_topic.empty()) {
 		return std::unexpected("invalid MCAP config: body_topic must not be empty");
 	}
@@ -910,6 +936,8 @@ std::string summarize_runtime_config(const RuntimeConfig &config) {
 	ss << ", mcap.path=" << config.record.mcap.path;
 	ss << ", mcap.topic=" << config.record.mcap.topic;
 	ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic;
 	ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic;
 	ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic;
 	ss << ", mcap.body_topic=" << config.record.mcap.body_topic;
 	ss << ", mcap.frame_id=" << config.record.mcap.frame_id;
 	ss << ", mcap.compression=" << to_string(config.record.mcap.compression);
@@ -200,6 +200,9 @@ public:
 				1) < 0) {
 			return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into FFmpeg image arrays");
 		}
 		if (frame.row_stride_bytes != 0) {
 			input_frame.linesize[0] = static_cast<int>(frame.row_stride_bytes);
 		}
 		sws_scale(
 			scaler_,
@@ -3,8 +3,10 @@
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
 #include "protobuf_descriptor.hpp"
-#include "cvmmap_streamer/DepthMap.pb.h"
+#include "proto/cvmmap_streamer/DepthMap.pb.h"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/CameraCalibration.pb.h"
 #include "proto/foxglove/CompressedVideo.pb.h"
 #include "proto/foxglove/PoseInFrame.pb.h"
 #include <rvl/rvl.hpp>
 #include <google/protobuf/timestamp.pb.h>
@@ -321,18 +323,108 @@ std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDe
 	}
 }
 [[nodiscard]]
 std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDepthMapU16View &depth_map) {
 	const auto pixel_count = static_cast<std::size_t>(depth_map.width) * static_cast<std::size_t>(depth_map.height);
 	if (depth_map.width == 0 || depth_map.height == 0) {
 		return std::unexpected("depth map dimensions must be non-zero");
 	}
 	if (pixel_count != depth_map.pixels.size()) {
 		return std::unexpected("depth map dimensions do not match the pixel buffer");
 	}
 	try {
 		std::vector<std::uint16_t> pixels(depth_map.pixels.begin(), depth_map.pixels.end());
 		return EncodedDepthPayload{
 			.encoding = DepthEncoding::RvlU16Lossless,
 			.storage_unit = ipc::DepthUnit::Millimeter,
 			.bytes = rvl::compress_image(pixels, depth_map.height, depth_map.width),
 		};
 	} catch (const std::exception &error) {
 		return std::unexpected(std::string("failed to RVL-encode depth map: ") + error.what());
 	}
 }
 [[nodiscard]]
 std::expected<void, std::string> append_repeated_double(
 	google::protobuf::RepeatedField<double> *field,
 	std::span<const double> values,
 	std::size_t expected_size,
 	std::string_view field_name) {
 	if (values.size() != expected_size) {
 		return std::unexpected(
 			"expected " + std::to_string(expected_size) + " values for " + std::string(field_name));
 	}
 	field->Reserve(static_cast<int>(values.size()));
 	for (const double value : values) {
 		field->Add(value);
 	}
 	return {};
 }
 [[nodiscard]]
 std::expected<void, std::string> write_depth_message(
 	mcap::McapWriter &writer,
 	std::uint64_t timestamp_ns,
 	std::string_view frame_id,
 	std::uint32_t width,
 	std::uint32_t height,
 	ipc::DepthUnit source_unit,
 	mcap::ChannelId channel_id,
 	std::uint32_t &sequence,
 	const EncodedDepthPayload &encoded) {
 	cvmmap_streamer::DepthMap message{};
 	*message.mutable_timestamp() = to_proto_timestamp(timestamp_ns);
 	message.set_frame_id(std::string(frame_id));
 	message.set_width(width);
 	message.set_height(height);
 	message.set_source_unit(to_proto_depth_unit(source_unit));
 	message.set_storage_unit(to_proto_storage_unit(encoded.storage_unit));
 	message.set_encoding(to_proto_depth_encoding(encoded.encoding));
 	message.set_data(
 		reinterpret_cast<const char *>(encoded.bytes.data()),
 		static_cast<int>(encoded.bytes.size()));
 	std::string serialized{};
 	if (!message.SerializeToString(&serialized)) {
 		return std::unexpected("failed to serialize cvmmap_streamer.DepthMap");
 	}
 	mcap::Message record{};
 	record.channelId   = channel_id;
 	record.sequence    = sequence++;
 	record.logTime     = timestamp_ns;
 	record.publishTime = timestamp_ns;
 	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
 	record.dataSize    = serialized.size();
 	const auto write_status = writer.write(record);
 	if (!write_status.ok()) {
 		return std::unexpected("failed to write MCAP depth message: " + write_status.message);
 	}
 	return {};
 }
 }
 struct McapRecordSink::State {
 	mcap::McapWriter writer{};
 	std::string path{};
 	std::string frame_id{};
 	std::string calibration_topic{};
 	std::string pose_topic{};
 	std::string body_topic{};
 	mcap::SchemaId calibration_schema_id{0};
 	mcap::SchemaId pose_schema_id{0};
 	mcap::ChannelId video_channel_id{0};
 	mcap::ChannelId depth_channel_id{0};
 	mcap::ChannelId calibration_channel_id{0};
 	mcap::ChannelId pose_channel_id{0};
 	mcap::ChannelId body_channel_id{0};
 	std::uint32_t video_sequence{0};
 	std::uint32_t depth_sequence{0};
 	std::uint32_t calibration_sequence{0};
 	std::uint32_t pose_sequence{0};
 	std::uint32_t body_sequence{0};
 	CodecType codec{CodecType::H264};
 	std::vector<std::uint8_t> keyframe_preamble{};
@@ -363,6 +455,8 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
 	auto state = std::make_unique<State>();
 	state->path = config.record.mcap.path;
 	state->frame_id = config.record.mcap.frame_id;
 	state->calibration_topic = config.record.mcap.calibration_topic;
 	state->pose_topic = config.record.mcap.pose_topic;
 	state->body_topic = config.record.mcap.body_topic;
 	mcap::McapWriterOptions options("");
@@ -398,6 +492,26 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
 	state->writer.addChannel(depth_channel);
 	state->depth_channel_id = depth_channel.id;
 	const auto calibration_descriptor_set = build_file_descriptor_set(foxglove::CameraCalibration::descriptor());
 	std::string calibration_schema_bytes{};
 	if (!calibration_descriptor_set.SerializeToString(&calibration_schema_bytes)) {
 		return std::unexpected("failed to serialize foxglove.CameraCalibration descriptor set");
 	}
 	mcap::Schema calibration_schema("foxglove.CameraCalibration", "protobuf", calibration_schema_bytes);
 	state->writer.addSchema(calibration_schema);
 	state->calibration_schema_id = calibration_schema.id;
 	const auto pose_descriptor_set = build_file_descriptor_set(foxglove::PoseInFrame::descriptor());
 	std::string pose_schema_bytes{};
 	if (!pose_descriptor_set.SerializeToString(&pose_schema_bytes)) {
 		return std::unexpected("failed to serialize foxglove.PoseInFrame descriptor set");
 	}
 	mcap::Schema pose_schema("foxglove.PoseInFrame", "protobuf", pose_schema_bytes);
 	state->writer.addSchema(pose_schema);
 	state->pose_schema_id = pose_schema.id;
 	sink.state_ = state.release();
 	auto update = sink.update_stream_info(stream_info);
 	if (!update) {
@@ -470,34 +584,137 @@ std::expected<void, std::string> McapRecordSink::write_depth_map(const RawDepthM
 		return std::unexpected(encoded.error());
 	}
-	cvmmap_streamer::DepthMap message{};
+	return write_depth_message(
-	*message.mutable_timestamp() = to_proto_timestamp(depth_map.timestamp_ns);
+		state_->writer,
 		depth_map.timestamp_ns,
 		state_->frame_id,
 		depth_map.width,
 		depth_map.height,
 		source_unit,
 		state_->depth_channel_id,
 		state_->depth_sequence,
 		*encoded);
 }
 std::expected<void, std::string> McapRecordSink::write_depth_map_u16(const RawDepthMapU16View &depth_map) {
 	if (state_ == nullptr) {
 		return std::unexpected("MCAP sink is not open");
 	}
 	auto encoded = encode_depth_payload(depth_map);
 	if (!encoded) {
 		return std::unexpected(encoded.error());
 	}
 	return write_depth_message(
 		state_->writer,
 		depth_map.timestamp_ns,
 		state_->frame_id,
 		depth_map.width,
 		depth_map.height,
 		ipc::DepthUnit::Millimeter,
 		state_->depth_channel_id,
 		state_->depth_sequence,
 		*encoded);
 }
 std::expected<void, std::string> McapRecordSink::write_camera_calibration(const RawCameraCalibrationView &calibration) {
 	if (state_ == nullptr) {
 		return std::unexpected("MCAP sink is not open");
 	}
 	if (state_->calibration_topic.empty()) {
 		return std::unexpected("calibration topic is empty");
 	}
 	if (state_->calibration_channel_id == 0) {
 		mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id);
 		state_->writer.addChannel(calibration_channel);
 		state_->calibration_channel_id = calibration_channel.id;
 	}
 	foxglove::CameraCalibration message{};
 	*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns);
 	message.set_frame_id(state_->frame_id);
-	message.set_width(depth_map.width);
+	message.set_width(calibration.width);
-	message.set_height(depth_map.height);
+	message.set_height(calibration.height);
-	message.set_source_unit(to_proto_depth_unit(source_unit));
+	message.set_distortion_model(std::string(calibration.distortion_model));
-	message.set_storage_unit(to_proto_storage_unit(encoded->storage_unit));
+	for (const double value : calibration.distortion) {
-	message.set_encoding(to_proto_depth_encoding(encoded->encoding));
+		message.add_d(value);
-	message.set_data(
+	}
-		reinterpret_cast<const char *>(encoded->bytes.data()),
+	if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
-		static_cast<int>(encoded->bytes.size()));
+		return append;
 	}
 	if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
 		return append;
 	}
 	if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
 		return append;
 	}
 	std::string serialized{};
 	if (!message.SerializeToString(&serialized)) {
-		return std::unexpected("failed to serialize cvmmap_streamer.DepthMap");
+		return std::unexpected("failed to serialize foxglove.CameraCalibration");
 	}
 	mcap::Message record{};
-	record.channelId   = state_->depth_channel_id;
+	record.channelId   = state_->calibration_channel_id;
-	record.sequence    = state_->depth_sequence++;
+	record.sequence    = state_->calibration_sequence++;
-	record.logTime     = depth_map.timestamp_ns;
+	record.logTime     = calibration.timestamp_ns;
-	record.publishTime = depth_map.timestamp_ns;
+	record.publishTime = calibration.timestamp_ns;
 	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
 	record.dataSize    = serialized.size();
 	const auto write_status = state_->writer.write(record);
 	if (!write_status.ok()) {
-		return std::unexpected("failed to write MCAP depth message: " + write_status.message);
+		return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
 	}
 	return {};
 }
 std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) {
 	if (state_ == nullptr) {
 		return std::unexpected("MCAP sink is not open");
 	}
 	if (state_->pose_topic.empty()) {
 		return std::unexpected("pose topic is empty");
 	}
 	if (state_->pose_channel_id == 0) {
 		mcap::Channel pose_channel(state_->pose_topic, "protobuf", state_->pose_schema_id);
 		state_->writer.addChannel(pose_channel);
 		state_->pose_channel_id = pose_channel.id;
 	}
 	foxglove::PoseInFrame message{};
 	*message.mutable_timestamp() = to_proto_timestamp(pose.timestamp_ns);
 	message.set_frame_id(std::string(pose.reference_frame_id));
 	auto *pose_message = message.mutable_pose();
 	auto *position = pose_message->mutable_position();
 	position->set_x(pose.position[0]);
 	position->set_y(pose.position[1]);
 	position->set_z(pose.position[2]);
 	auto *orientation = pose_message->mutable_orientation();
 	orientation->set_x(pose.orientation[0]);
 	orientation->set_y(pose.orientation[1]);
 	orientation->set_z(pose.orientation[2]);
 	orientation->set_w(pose.orientation[3]);
 	std::string serialized{};
 	if (!message.SerializeToString(&serialized)) {
 		return std::unexpected("failed to serialize foxglove.PoseInFrame");
 	}
 	mcap::Message record{};
 	record.channelId   = state_->pose_channel_id;
 	record.sequence    = state_->pose_sequence++;
 	record.logTime     = pose.timestamp_ns;
 	record.publishTime = pose.timestamp_ns;
 	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
 	record.dataSize    = serialized.size();
 	const auto write_status = state_->writer.write(record);
 	if (!write_status.ok()) {
 		return std::unexpected("failed to write MCAP pose message: " + write_status.message);
 	}
 	return {};
 }
@@ -5,7 +5,7 @@
 #include "cvmmap_streamer/common.h"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"
 #include <spdlog/spdlog.h>
@@ -1,9 +1,9 @@
 #include <mcap/reader.hpp>
-#include "cvmmap_streamer/DepthMap.pb.h"
+#include "proto/cvmmap_streamer/DepthMap.pb.h"
 #include "cvmmap_streamer/common.h"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
-#include "foxglove/CompressedVideo.pb.h"
+#include "proto/foxglove/CompressedVideo.pb.h"
 #include <rvl/rvl.hpp>
 #include <spdlog/spdlog.h>
@@ -0,0 +1,279 @@
 #include <mcap/reader.hpp>
 #include "proto/cvmmap_streamer/DepthMap.pb.h"
 #include "cvmmap_streamer/common.h"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
 #include "proto/foxglove/CameraCalibration.pb.h"
 #include "proto/foxglove/CompressedVideo.pb.h"
 #include "proto/foxglove/PoseInFrame.pb.h"
 #include <rvl/rvl.hpp>
 #include <spdlog/spdlog.h>
 #include <cmath>
 #include <cstdint>
 #include <filesystem>
 #include <string>
 #include <vector>
 namespace {
 enum class TesterExitCode : int {
 	Success = 0,
 	CreateError = 2,
 	WriteError = 3,
 	OpenError = 4,
 	VerificationError = 5,
 };
 [[nodiscard]]
 constexpr int exit_code(const TesterExitCode code) {
 	return static_cast<int>(code);
 }
 [[nodiscard]]
 bool approx_equal(const double lhs, const double rhs, const double tolerance = 1e-9) {
 	return std::fabs(lhs - rhs) <= tolerance;
 }
 }
 int main(int argc, char **argv) {
 	if (cvmmap_streamer::has_help_flag(argc, argv)) {
 		cvmmap_streamer::print_help("mcap_pose_record_tester");
 		return exit_code(TesterExitCode::Success);
 	}
 	const std::filesystem::path output_path =
 		argc > 1
 			? std::filesystem::path(argv[1])
 			: std::filesystem::temp_directory_path() / "cvmmap_streamer_pose_record_test.mcap";
 	if (output_path.has_parent_path()) {
 		std::filesystem::create_directories(output_path.parent_path());
 	}
 	cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
 	config.record.mcap.enabled = true;
 	config.record.mcap.path = output_path.string();
 	config.record.mcap.topic = "/camera/video";
 	config.record.mcap.depth_topic = "/camera/depth";
 	config.record.mcap.calibration_topic = "/camera/calibration";
 	config.record.mcap.pose_topic = "/camera/pose";
 	config.record.mcap.frame_id = "camera";
 	config.record.mcap.compression = cvmmap_streamer::McapCompression::None;
 	cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
 	stream_info.codec = cvmmap_streamer::CodecType::H264;
 	auto sink = cvmmap_streamer::record::McapRecordSink::create(config, stream_info);
 	if (!sink) {
 		spdlog::error("failed to create MCAP sink: {}", sink.error());
 		return exit_code(TesterExitCode::CreateError);
 	}
 	cvmmap_streamer::encode::EncodedAccessUnit access_unit{};
 	access_unit.codec = cvmmap_streamer::CodecType::H264;
 	access_unit.source_timestamp_ns = 100;
 	access_unit.stream_pts_ns = 100;
 	access_unit.keyframe = false;
 	access_unit.annexb_bytes = {0x00, 0x00, 0x00, 0x01, 0x09, 0x10};
 	if (auto write = sink->write_access_unit(access_unit); !write) {
 		spdlog::error("failed to write video access unit: {}", write.error());
 		return exit_code(TesterExitCode::WriteError);
 	}
 	const std::vector<std::uint16_t> depth_pixels{
 		1000, 2000,
 		0, 1500,
 	};
 	if (auto write = sink->write_depth_map_u16(cvmmap_streamer::record::RawDepthMapU16View{
 			.timestamp_ns = 101,
 			.width = 2,
 			.height = 2,
 			.pixels = depth_pixels,
 		}); !write) {
 		spdlog::error("failed to write depth map: {}", write.error());
 		return exit_code(TesterExitCode::WriteError);
 	}
 	const std::vector<double> distortion{0.0, 0.0, 0.0, 0.0, 0.0};
 	const std::vector<double> intrinsic_matrix{
 		500.0, 0.0, 320.0,
 		0.0, 500.0, 240.0,
 		0.0, 0.0, 1.0,
 	};
 	const std::vector<double> rectification_matrix{
 		1.0, 0.0, 0.0,
 		0.0, 1.0, 0.0,
 		0.0, 0.0, 1.0,
 	};
 	const std::vector<double> projection_matrix{
 		500.0, 0.0, 320.0, 0.0,
 		0.0, 500.0, 240.0, 0.0,
 		0.0, 0.0, 1.0, 0.0,
 	};
 	if (auto write = sink->write_camera_calibration(cvmmap_streamer::record::RawCameraCalibrationView{
 			.timestamp_ns = 102,
 			.width = 640,
 			.height = 480,
 			.distortion_model = "plumb_bob",
 			.distortion = distortion,
 			.intrinsic_matrix = intrinsic_matrix,
 			.rectification_matrix = rectification_matrix,
 			.projection_matrix = projection_matrix,
 		}); !write) {
 		spdlog::error("failed to write calibration: {}", write.error());
 		return exit_code(TesterExitCode::WriteError);
 	}
 	if (auto write = sink->write_pose(cvmmap_streamer::record::RawPoseView{
 			.timestamp_ns = 103,
 			.reference_frame_id = "world",
 			.position = {1.0, 2.0, 3.0},
 			.orientation = {0.1, 0.2, 0.3, 0.9},
 		}); !write) {
 		spdlog::error("failed to write pose: {}", write.error());
 		return exit_code(TesterExitCode::WriteError);
 	}
 	sink->close();
 	mcap::McapReader reader{};
 	const auto open_status = reader.open(output_path.string());
 	if (!open_status.ok()) {
 		spdlog::error("failed to open MCAP file '{}': {}", output_path.string(), open_status.message);
 		return exit_code(TesterExitCode::OpenError);
 	}
 	std::uint64_t video_messages{0};
 	std::uint64_t depth_messages{0};
 	std::uint64_t calibration_messages{0};
 	std::uint64_t pose_messages{0};
 	auto messages = reader.readMessages();
 	for (auto it = messages.begin(); it != messages.end(); ++it) {
 		if (it->schema == nullptr || it->channel == nullptr) {
 			spdlog::error("MCAP message missing schema or channel");
 			reader.close();
 			return exit_code(TesterExitCode::VerificationError);
 		}
 		if (it->schema->encoding != "protobuf" || it->channel->messageEncoding != "protobuf") {
 			spdlog::error("unexpected message encoding in MCAP");
 			reader.close();
 			return exit_code(TesterExitCode::VerificationError);
 		}
 		if (it->schema->name == "foxglove.CompressedVideo") {
 			foxglove::CompressedVideo video{};
 			if (!video.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
 				spdlog::error("failed to parse foxglove.CompressedVideo");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			if (it->channel->topic != "/camera/video" || video.frame_id() != "camera" || video.data().empty()) {
 				spdlog::error("video message verification failed");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			video_messages += 1;
 			continue;
 		}
 		if (it->schema->name == "cvmmap_streamer.DepthMap") {
 			cvmmap_streamer::DepthMap depth{};
 			if (!depth.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
 				spdlog::error("failed to parse cvmmap_streamer.DepthMap");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			if (it->channel->topic != "/camera/depth" ||
 				depth.frame_id() != "camera" ||
 				depth.encoding() != cvmmap_streamer::DepthMap::RVL_U16_LOSSLESS) {
 				spdlog::error("depth message metadata verification failed");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			const auto decoded = rvl::decompress_image(std::span<const std::uint8_t>(
 				reinterpret_cast<const std::uint8_t *>(depth.data().data()),
 				depth.data().size()));
 			if (decoded.pixels.size() != depth_pixels.size() ||
 				decoded.pixels[0] != 1000 ||
 				decoded.pixels[1] != 2000 ||
 				decoded.pixels[2] != 0 ||
 				decoded.pixels[3] != 1500) {
 				spdlog::error("depth RVL round-trip verification failed");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			depth_messages += 1;
 			continue;
 		}
 		if (it->schema->name == "foxglove.CameraCalibration") {
 			foxglove::CameraCalibration calibration{};
 			if (!calibration.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
 				spdlog::error("failed to parse foxglove.CameraCalibration");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			if (it->channel->topic != "/camera/calibration" ||
 				calibration.frame_id() != "camera" ||
 				calibration.width() != 640 ||
 				calibration.height() != 480 ||
 				calibration.distortion_model() != "plumb_bob" ||
 				calibration.d_size() != 5 ||
 				calibration.k_size() != 9 ||
 				calibration.r_size() != 9 ||
 				calibration.p_size() != 12 ||
 				!approx_equal(calibration.k(0), 500.0) ||
 				!approx_equal(calibration.k(2), 320.0) ||
 				!approx_equal(calibration.p(5), 500.0)) {
 				spdlog::error("calibration message verification failed");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			calibration_messages += 1;
 			continue;
 		}
 		if (it->schema->name == "foxglove.PoseInFrame") {
 			foxglove::PoseInFrame pose{};
 			if (!pose.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
 				spdlog::error("failed to parse foxglove.PoseInFrame");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			if (it->channel->topic != "/camera/pose" ||
 				pose.frame_id() != "world" ||
 				!approx_equal(pose.pose().position().x(), 1.0) ||
 				!approx_equal(pose.pose().position().y(), 2.0) ||
 				!approx_equal(pose.pose().position().z(), 3.0) ||
 				!approx_equal(pose.pose().orientation().x(), 0.1) ||
 				!approx_equal(pose.pose().orientation().y(), 0.2) ||
 				!approx_equal(pose.pose().orientation().z(), 0.3) ||
 				!approx_equal(pose.pose().orientation().w(), 0.9)) {
 				spdlog::error("pose message verification failed");
 				reader.close();
 				return exit_code(TesterExitCode::VerificationError);
 			}
 			pose_messages += 1;
 		}
 	}
 	reader.close();
 	if (video_messages != 1 || depth_messages != 1 || calibration_messages != 1 || pose_messages != 1) {
 		spdlog::error(
 			"unexpected message counts: video={} depth={} calibration={} pose={}",
 			video_messages,
 			depth_messages,
 			calibration_messages,
 			pose_messages);
 		return exit_code(TesterExitCode::VerificationError);
 	}
 	spdlog::info(
 		"validated same-file MCAP video+depth+calibration+pose recording at '{}'",
 		output_path.string());
 	return exit_code(TesterExitCode::Success);
 }
@@ -0,0 +1,645 @@
 #include <CLI/CLI.hpp>
 #include <spdlog/spdlog.h>
 #include <sl/Camera.hpp>
 #include "cvmmap_streamer/config/runtime_config.hpp"
 #include "cvmmap_streamer/core/status.hpp"
 #include "cvmmap_streamer/encode/encoder_backend.hpp"
 #include "cvmmap_streamer/ipc/contracts.hpp"
 #include "cvmmap_streamer/record/mcap_record_sink.hpp"
 #include <array>
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
 #include <filesystem>
 #include <optional>
 #include <span>
 #include <string>
 #include <string_view>
 #include <vector>
 namespace {
 enum class ToolExitCode : int {
 	Success = 0,
 	UsageError = 2,
 	RuntimeError = 3,
 };
 struct CliOptions {
 	std::string input_path{};
 	std::string output_path{};
 	std::string codec{"h264"};
 	std::string encoder_device{"auto"};
 	std::string mcap_compression{"zstd"};
 	std::string depth_mode{"neural"};
 	bool with_pose{false};
 	std::uint32_t start_frame{0};
 	std::uint32_t end_frame{0};
 	bool has_end_frame{false};
 	std::string frame_id{"camera"};
 	std::string video_topic{"/camera/video"};
 	std::string depth_topic{"/camera/depth"};
 	std::string calibration_topic{"/camera/calibration"};
 	std::string pose_topic{"/camera/pose"};
 };
 [[nodiscard]]
 constexpr int exit_code(const ToolExitCode code) {
 	return static_cast<int>(code);
 }
 [[nodiscard]]
 std::string zed_string(const sl::String &value) {
 	return std::string(value.c_str() == nullptr ? "" : value.c_str());
 }
 [[nodiscard]]
 std::string zed_status_string(const sl::ERROR_CODE code) {
 	return zed_string(sl::toString(code));
 }
 [[nodiscard]]
 std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state) {
 	return zed_string(sl::toString(state));
 }
 [[nodiscard]]
 std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) {
 	if (raw == "h264") {
 		return cvmmap_streamer::CodecType::H264;
 	}
 	if (raw == "h265") {
 		return cvmmap_streamer::CodecType::H265;
 	}
 	return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
 }
 [[nodiscard]]
 std::expected<cvmmap_streamer::EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
 	if (raw == "auto") {
 		return cvmmap_streamer::EncoderDeviceType::Auto;
 	}
 	if (raw == "nvidia") {
 		return cvmmap_streamer::EncoderDeviceType::Nvidia;
 	}
 	if (raw == "software") {
 		return cvmmap_streamer::EncoderDeviceType::Software;
 	}
 	return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
 }
 [[nodiscard]]
 std::expected<cvmmap_streamer::McapCompression, std::string> parse_mcap_compression(const std::string_view raw) {
 	if (raw == "none") {
 		return cvmmap_streamer::McapCompression::None;
 	}
 	if (raw == "lz4") {
 		return cvmmap_streamer::McapCompression::Lz4;
 	}
 	if (raw == "zstd") {
 		return cvmmap_streamer::McapCompression::Zstd;
 	}
 	return std::unexpected("invalid mcap compression: '" + std::string(raw) + "' (expected: none|lz4|zstd)");
 }
 [[nodiscard]]
 std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) {
 	if (raw == "neural") {
 		return sl::DEPTH_MODE::NEURAL;
 	}
 	if (raw == "quality") {
 		return sl::DEPTH_MODE::QUALITY;
 	}
 	if (raw == "performance") {
 		return sl::DEPTH_MODE::PERFORMANCE;
 	}
 	if (raw == "ultra") {
 		return sl::DEPTH_MODE::ULTRA;
 	}
 	return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)");
 }
 [[nodiscard]]
 std::uint64_t frame_period_ns(const float fps) {
 	if (!(fps > 0.0f)) {
 		return 33'333'333ull;
 	}
 	return static_cast<std::uint64_t>(std::llround(1'000'000'000.0 / static_cast<double>(fps)));
 }
 [[nodiscard]]
 std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
 	if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
 		return std::unexpected(std::string(label) + " must be U8_C3");
 	}
 	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
 		return std::unexpected(std::string(label) + " dimensions must be non-zero");
 	}
 	if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
 		return std::unexpected(std::string(label) + " CPU buffer is null");
 	}
 	return {};
 }
 [[nodiscard]]
 std::expected<void, std::string> validate_u16c1_mat(const sl::Mat &mat, const std::string_view label) {
 	if (mat.getDataType() != sl::MAT_TYPE::U16_C1) {
 		return std::unexpected(std::string(label) + " must be U16_C1");
 	}
 	if (mat.getWidth() == 0 || mat.getHeight() == 0) {
 		return std::unexpected(std::string(label) + " dimensions must be non-zero");
 	}
 	if (mat.getPtr<sl::ushort1>(sl::MEM::CPU) == nullptr) {
 		return std::unexpected(std::string(label) + " CPU buffer is null");
 	}
 	return {};
 }
 [[nodiscard]]
 std::vector<std::uint16_t> copy_compact_u16_plane(const sl::Mat &mat) {
 	const auto width = static_cast<std::size_t>(mat.getWidth());
 	const auto height = static_cast<std::size_t>(mat.getHeight());
 	const auto row_bytes = width * sizeof(std::uint16_t);
 	const auto step_bytes = mat.getStepBytes(sl::MEM::CPU);
 	auto *src = mat.getPtr<sl::uchar1>(sl::MEM::CPU);
 	std::vector<std::uint16_t> compact(width * height, 0);
 	auto *dst = reinterpret_cast<std::uint8_t *>(compact.data());
 	for (std::size_t row = 0; row < height; ++row) {
 		std::memcpy(
 			dst + row * row_bytes,
 			src + row * step_bytes,
 			row_bytes);
 	}
 	return compact;
 }
 [[nodiscard]]
 std::expected<void, std::string> write_access_units(
 	cvmmap_streamer::record::McapRecordSink &sink,
 	const std::vector<cvmmap_streamer::encode::EncodedAccessUnit> &access_units) {
 	for (const auto &access_unit : access_units) {
 		if (auto write = sink.write_access_unit(access_unit); !write) {
 			return std::unexpected(write.error());
 		}
 	}
 	return {};
 }
 [[nodiscard]]
 std::expected<void, std::string> flush_and_write(
 	cvmmap_streamer::record::McapRecordSink &sink,
 	cvmmap_streamer::encode::EncoderBackend &backend) {
 	auto flushed = backend->flush();
 	if (!flushed) {
 		return std::unexpected(cvmmap_streamer::format_error(flushed.error()));
 	}
 	return write_access_units(sink, *flushed);
 }
 }
 int main(int argc, char **argv) {
 	CliOptions options{};
 	CLI::App app{"zed_svo_to_mcap - convert ZED SVO/SVO2 playback to MCAP"};
 	app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
 	app.add_option("--output", options.output_path, "Output MCAP file")->required();
 	app.add_option("--codec", options.codec, "Video codec (h264|h265)")
 		->check(CLI::IsMember({"h264", "h265"}));
 	app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
 		->check(CLI::IsMember({"auto", "nvidia", "software"}));
 	app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available");
 	app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)")
 		->check(CLI::IsMember({"none", "lz4", "zstd"}));
 	app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
 		->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
 	app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
 		->check(CLI::NonNegativeNumber);
 	auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
 		->check(CLI::NonNegativeNumber);
 	app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics");
 	app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");
 	app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap");
 	app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration");
 	app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame");
 	try {
 		app.parse(argc, argv);
 	} catch (const CLI::ParseError &error) {
 		return app.exit(error);
 	}
 	options.has_end_frame = end_frame_option->count() > 0;
 	auto codec = parse_codec(options.codec);
 	if (!codec) {
 		spdlog::error("{}", codec.error());
 		return exit_code(ToolExitCode::UsageError);
 	}
 	auto encoder_device = parse_encoder_device(options.encoder_device);
 	if (!encoder_device) {
 		spdlog::error("{}", encoder_device.error());
 		return exit_code(ToolExitCode::UsageError);
 	}
 	auto compression = parse_mcap_compression(options.mcap_compression);
 	if (!compression) {
 		spdlog::error("{}", compression.error());
 		return exit_code(ToolExitCode::UsageError);
 	}
 	auto depth_mode = parse_depth_mode(options.depth_mode);
 	if (!depth_mode) {
 		spdlog::error("{}", depth_mode.error());
 		return exit_code(ToolExitCode::UsageError);
 	}
 	if (options.output_path.empty()) {
 		spdlog::error("output path must not be empty");
 		return exit_code(ToolExitCode::UsageError);
 	}
 	if (options.has_end_frame && options.end_frame < options.start_frame) {
 		spdlog::error(
 			"invalid frame range: start-frame={} end-frame={}",
 			options.start_frame,
 			options.end_frame);
 		return exit_code(ToolExitCode::UsageError);
 	}
 	const std::filesystem::path output_path{options.output_path};
 	if (output_path.has_parent_path()) {
 		std::filesystem::create_directories(output_path.parent_path());
 	}
 	sl::Camera camera{};
 	bool tracking_enabled{false};
 	auto close_camera = [&]() {
 		if (tracking_enabled) {
 			camera.disablePositionalTracking();
 			tracking_enabled = false;
 		}
 		if (camera.isOpened()) {
 			camera.close();
 		}
 	};
 	sl::InitParameters init{};
 	init.input.setFromSVOFile(options.input_path.c_str());
 	init.svo_real_time_mode = false;
 	init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
 	init.coordinate_units = sl::UNIT::METER;
 	init.depth_mode = *depth_mode;
 	init.sdk_verbose = false;
 	const auto open_status = camera.open(init);
 	if (open_status != sl::ERROR_CODE::SUCCESS) {
 		spdlog::error(
 			"failed to open SVO '{}': {}",
 			options.input_path,
 			zed_status_string(open_status));
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	const auto total_frames = camera.getSVONumberOfFrames();
 	if (total_frames <= 0) {
 		close_camera();
 		spdlog::error("input SVO has no frames");
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
 		close_camera();
 		spdlog::error(
 			"start-frame {} is out of range for {} frames",
 			options.start_frame,
 			total_frames);
 		return exit_code(ToolExitCode::UsageError);
 	}
 	if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
 		close_camera();
 		spdlog::error(
 			"end-frame {} is out of range for {} frames",
 			options.end_frame,
 			total_frames);
 		return exit_code(ToolExitCode::UsageError);
 	}
 	camera.setSVOPosition(static_cast<int>(options.start_frame));
 	if (options.with_pose) {
 		sl::PositionalTrackingParameters tracking_parameters{};
 		const auto tracking_status = camera.enablePositionalTracking(tracking_parameters);
 		if (tracking_status == sl::ERROR_CODE::SUCCESS) {
 			tracking_enabled = true;
 		} else {
 			spdlog::warn(
 				"positional tracking unavailable for '{}': {}. continuing without pose output",
 				options.input_path,
 				zed_status_string(tracking_status));
 		}
 	}
 	const auto camera_info = camera.getCameraInformation();
 	const auto &camera_config = camera_info.camera_configuration;
 	const auto &left_camera = camera_config.calibration_parameters.left_cam;
 	const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
 	const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
 	if (width == 0 || height == 0) {
 		close_camera();
 		spdlog::error("camera resolution reported by the ZED SDK is invalid");
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
 	config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
 	config.encoder.device = *encoder_device;
 	config.encoder.codec = *codec;
 	config.encoder.gop = 30;
 	config.encoder.b_frames = 0;
 	config.record.mcap.enabled = true;
 	config.record.mcap.path = output_path.string();
 	config.record.mcap.topic = options.video_topic;
 	config.record.mcap.depth_topic = options.depth_topic;
 	config.record.mcap.calibration_topic = options.calibration_topic;
 	config.record.mcap.pose_topic = options.pose_topic;
 	config.record.mcap.frame_id = options.frame_id;
 	config.record.mcap.compression = *compression;
 	cvmmap_streamer::ipc::FrameInfo frame_info{
 		.width = static_cast<std::uint16_t>(width),
 		.height = static_cast<std::uint16_t>(height),
 		.channels = 3,
 		.depth = cvmmap_streamer::ipc::Depth::U8,
 		.pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR,
 		.buffer_size = width * height * 3u,
 	};
 	auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config);
 	if (!backend_result) {
 		close_camera();
 		spdlog::error("failed to create encoder backend: {}", cvmmap_streamer::format_error(backend_result.error()));
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	auto backend = std::move(*backend_result);
 	if (auto init_status = backend->init(config, frame_info); !init_status) {
 		close_camera();
 		spdlog::error("failed to initialize encoder backend: {}", cvmmap_streamer::format_error(init_status.error()));
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	auto stream_info = backend->stream_info();
 	if (!stream_info) {
 		backend->shutdown();
 		close_camera();
 		spdlog::error("encoder backend did not provide stream info: {}", cvmmap_streamer::format_error(stream_info.error()));
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	auto sink = cvmmap_streamer::record::McapRecordSink::create(config, *stream_info);
 	if (!sink) {
 		backend->shutdown();
 		close_camera();
 		spdlog::error("failed to create MCAP sink: {}", sink.error());
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	const std::array<double, 5> distortion{
 		0.0, 0.0, 0.0, 0.0, 0.0,
 	};
 	const std::array<double, 9> intrinsic_matrix{
 		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
 		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
 		0.0, 0.0, 1.0,
 	};
 	const std::array<double, 9> rectification_matrix{
 		1.0, 0.0, 0.0,
 		0.0, 1.0, 0.0,
 		0.0, 0.0, 1.0,
 	};
 	const std::array<double, 12> projection_matrix{
 		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
 		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
 		0.0, 0.0, 1.0, 0.0,
 	};
 	sl::RuntimeParameters runtime_parameters{};
 	sl::Mat left_frame{};
 	sl::Mat depth_frame{};
 	sl::Pose pose{};
 	bool calibration_written{false};
 	std::uint64_t emitted_frames{0};
 	std::optional<std::uint64_t> last_timestamp_ns{};
 	std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
 	const auto last_frame = options.has_end_frame
 		? options.end_frame
 		: static_cast<std::uint32_t>(total_frames - 1);
 	const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
 	while (options.start_frame + emitted_frames <= last_frame) {
 		const auto grab_status = camera.grab(runtime_parameters);
 		if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
 			break;
 		}
 		if (grab_status != sl::ERROR_CODE::SUCCESS) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
 		if (image_status != sl::ERROR_CODE::SUCCESS) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("{}", valid.error());
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU);
 		if (depth_status != sl::ERROR_CODE::SUCCESS) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to retrieve depth map: {}", zed_status_string(depth_status));
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		if (auto valid = validate_u16c1_mat(depth_frame, "depth map"); !valid) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("{}", valid.error());
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
 		if (timestamp_ns == 0) {
 			timestamp_ns = emitted_frames * nominal_frame_period_ns;
 		}
 		if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
 			timestamp_ns = *last_timestamp_ns + 1;
 		}
 		last_timestamp_ns = timestamp_ns;
 		const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU);
 		const auto video_bytes = std::span<const std::uint8_t>(
 			left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
 			video_step_bytes * left_frame.getHeight());
 		cvmmap_streamer::encode::RawVideoFrame raw_video{
 			.info = frame_info,
 			.source_timestamp_ns = timestamp_ns,
 			.row_stride_bytes = video_step_bytes,
 			.bytes = video_bytes,
 		};
 		if (auto push = backend->push_frame(raw_video); !push) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to encode frame: {}", cvmmap_streamer::format_error(push.error()));
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		auto drained = backend->drain();
 		if (!drained) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to drain encoded access units: {}", cvmmap_streamer::format_error(drained.error()));
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		if (auto write = write_access_units(*sink, *drained); !write) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to write video access unit: {}", write.error());
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		if (!calibration_written) {
 			cvmmap_streamer::record::RawCameraCalibrationView calibration{
 				.timestamp_ns = timestamp_ns,
 				.width = width,
 				.height = height,
 				.distortion_model = "plumb_bob",
 				.distortion = distortion,
 				.intrinsic_matrix = intrinsic_matrix,
 				.rectification_matrix = rectification_matrix,
 				.projection_matrix = projection_matrix,
 			};
 			if (auto write = sink->write_camera_calibration(calibration); !write) {
 				sink->close();
 				backend->shutdown();
 				close_camera();
 				spdlog::error("failed to write calibration: {}", write.error());
 				return exit_code(ToolExitCode::RuntimeError);
 			}
 			calibration_written = true;
 		}
 		const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU);
 		const auto packed_depth_bytes = static_cast<std::size_t>(width) * sizeof(std::uint16_t);
 		if (depth_step_bytes < packed_depth_bytes) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("depth stride {} is smaller than packed row size {}", depth_step_bytes, packed_depth_bytes);
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		std::optional<std::vector<std::uint16_t>> compact_depth{};
 		std::span<const std::uint16_t> depth_pixels{};
 		if (depth_step_bytes == packed_depth_bytes) {
 			depth_pixels = std::span<const std::uint16_t>(
 				depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
 				static_cast<std::size_t>(width) * static_cast<std::size_t>(height));
 		} else {
 			compact_depth = copy_compact_u16_plane(depth_frame);
 			depth_pixels = *compact_depth;
 		}
 		cvmmap_streamer::record::RawDepthMapU16View depth_map{
 			.timestamp_ns = timestamp_ns,
 			.width = width,
 			.height = height,
 			.pixels = depth_pixels,
 		};
 		if (auto write = sink->write_depth_map_u16(depth_map); !write) {
 			sink->close();
 			backend->shutdown();
 			close_camera();
 			spdlog::error("failed to write depth map: {}", write.error());
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 		if (tracking_enabled) {
 			const auto tracking_state = camera.getPosition(pose, sl::REFERENCE_FRAME::WORLD);
 			if (!last_tracking_state || *last_tracking_state != tracking_state) {
 				last_tracking_state = tracking_state;
 				if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) {
 					spdlog::warn(
 						"pose tracking state changed to {} at frame {}",
 						zed_tracking_state_string(tracking_state),
 						options.start_frame + emitted_frames);
 				}
 			}
 			if (tracking_state == sl::POSITIONAL_TRACKING_STATE::OK) {
 				const auto translation = pose.getTranslation();
 				const auto orientation = pose.getOrientation();
 				cvmmap_streamer::record::RawPoseView pose_view{
 					.timestamp_ns = timestamp_ns,
 					.reference_frame_id = "world",
 					.position = {
 						static_cast<double>(translation.x),
 						static_cast<double>(translation.y),
 						static_cast<double>(translation.z),
 					},
 					.orientation = {
 						static_cast<double>(orientation.x),
 						static_cast<double>(orientation.y),
 						static_cast<double>(orientation.z),
 						static_cast<double>(orientation.w),
 					},
 				};
 				if (auto write = sink->write_pose(pose_view); !write) {
 					sink->close();
 					backend->shutdown();
 					close_camera();
 					spdlog::error("failed to write pose: {}", write.error());
 					return exit_code(ToolExitCode::RuntimeError);
 				}
 			}
 		}
 		emitted_frames += 1;
 	}
 	if (auto flushed = flush_and_write(*sink, backend); !flushed) {
 		sink->close();
 		backend->shutdown();
 		close_camera();
 		spdlog::error("failed to finalize encoded video: {}", flushed.error());
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 	sink->close();
 	backend->shutdown();
 	close_camera();
 	spdlog::info(
 		"wrote {} frame(s) from '{}' to '{}'",
 		emitted_frames,
 		options.input_path,
 		output_path.string());
 	return exit_code(ToolExitCode::Success);
 }