From 2f74a9561dcc790e31a53ed4f015c18eb609668a Mon Sep 17 00:00:00 2001
From: crosstyan <crosstyan@outlook.com>
Date: Fri, 20 Mar 2026 12:04:37 +0000
Subject: [PATCH] feat(zed): export depth at neural optimal resolution

Restrict ZED depth export to neural modes and default offline MCAP conversion to the SDK-selected optimal depth size instead of camera resolution.

Add a configurable --depth-size option, propagate the actual returned depth dimensions into DepthMap metadata, and emit a dedicated depth calibration topic when depth resolution differs from video.

Update the batch and recording helper scripts to use the new neural-only depth mode surface and pass through depth sizing.

Verification:
- cmake --build build --target zed_svo_to_mcap mcap_multi_record_tester mcap_pose_record_tester -j4
- build/bin/mcap_multi_record_tester
- build/bin/mcap_pose_record_tester
- build/bin/zed_svo_to_mcap --input /workspaces/data/kindergarten/jump/experiment/2/2026-03-18T11-27-15/2026-03-18T11-27-15_zed4.svo2 --output /tmp/zed4_neural_optimal_test.mcap --codec h264 --encoder-device software --mcap-compression zstd --depth-mode neural --depth-size optimal --start-frame 0 --end-frame 9
---
 .../cvmmap_streamer/config/runtime_config.hpp |   1 +
 .../record/mcap_record_sink.hpp               |   9 +
 scripts/zed_batch_svo_to_mcap.py              |  15 +-
 scripts/zed_recording_mcap_tool.py            |   9 +-
 src/config/runtime_config.cpp                 |  15 +
 src/record/mcap_record_sink.cpp               | 227 ++++++----
 src/testers/mcap_multi_record_tester.cpp      |  17 +
 src/tools/zed_svo_to_mcap.cpp                 | 394 +++++++++++++-----
 8 files changed, 497 insertions(+), 190 deletions(-)

diff --git a/include/cvmmap_streamer/config/runtime_config.hpp b/include/cvmmap_streamer/config/runtime_config.hpp
index eedd3ae..bc5486d 100644
--- a/include/cvmmap_streamer/config/runtime_config.hpp
+++ b/include/cvmmap_streamer/config/runtime_config.hpp
@@ -86,6 +86,7 @@ struct McapRecordConfig {
 	std::string topic{"/camera/video"};
 	std::string depth_topic{"/camera/depth"};
 	std::string calibration_topic{"/camera/calibration"};
+	std::string depth_calibration_topic{"/camera/depth_calibration"};
 	std::string pose_topic{"/camera/pose"};
 	std::string body_topic{"/camera/body"};
 	std::string frame_id{"camera"};
diff --git a/include/cvmmap_streamer/record/mcap_record_sink.hpp b/include/cvmmap_streamer/record/mcap_record_sink.hpp
index a8720f9..cdcd31c 100644
--- a/include/cvmmap_streamer/record/mcap_record_sink.hpp
+++ b/include/cvmmap_streamer/record/mcap_record_sink.hpp
@@ -61,6 +61,7 @@ struct McapRecordStreamConfig {
 	std::string topic{"/camera/video"};
 	std::string depth_topic{"/camera/depth"};
 	std::string calibration_topic{"/camera/calibration"};
+	std::string depth_calibration_topic{"/camera/depth_calibration"};
 	std::string pose_topic{"/camera/pose"};
 	std::string body_topic{"/camera/body"};
 	std::string frame_id{"camera"};
@@ -97,6 +98,9 @@ public:
 	[[nodiscard]]
 	std::expected<void, std::string> write_camera_calibration(const RawCameraCalibrationView &calibration);
 
+	[[nodiscard]]
+	std::expected<void, std::string> write_depth_camera_calibration(const RawCameraCalibrationView &calibration);
+
 	[[nodiscard]]
 	std::expected<void, std::string> write_pose(const RawPoseView &pose);
 
@@ -165,6 +169,11 @@ public:
 		StreamId stream_id,
 		const RawCameraCalibrationView &calibration);
 
+	[[nodiscard]]
+	std::expected<void, std::string> write_depth_camera_calibration(
+		StreamId stream_id,
+		const RawCameraCalibrationView &calibration);
+
 	[[nodiscard]]
 	std::expected<void, std::string> write_pose(
 		StreamId stream_id,
diff --git a/scripts/zed_batch_svo_to_mcap.py b/scripts/zed_batch_svo_to_mcap.py
index 1d3db1d..83fba99 100644
--- a/scripts/zed_batch_svo_to_mcap.py
+++ b/scripts/zed_batch_svo_to_mcap.py
@@ -34,6 +34,7 @@ class BatchConfig:
     encoder_device: str
     mcap_compression: str
     depth_mode: str
+    depth_size: str
     with_pose: bool
     pose_config: Path | None
     world_frame_id: str | None
@@ -287,6 +288,8 @@ def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
         config.mcap_compression,
         "--depth-mode",
         config.depth_mode,
+        "--depth-size",
+        config.depth_size,
     ]
     if config.with_pose:
         command.append("--with-pose")
@@ -564,8 +567,14 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
 )
 @click.option(
     "--depth-mode",
-    type=click.Choice(("neural", "quality", "performance", "ultra")),
-    default="quality",
+    type=click.Choice(("neural_light", "neural", "neural_plus")),
+    default="neural",
+    show_default=True,
+)
+@click.option(
+    "--depth-size",
+    type=str,
+    default="optimal",
     show_default=True,
 )
 @click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.")
@@ -615,6 +624,7 @@ def main(
     encoder_device: str,
     mcap_compression: str,
     depth_mode: str,
+    depth_size: str,
     with_pose: bool,
     pose_config: Path | None,
     world_frame_id: str | None,
@@ -638,6 +648,7 @@ def main(
         encoder_device=encoder_device,
         mcap_compression=mcap_compression,
         depth_mode=depth_mode,
+        depth_size=depth_size,
         with_pose=with_pose,
         pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
         world_frame_id=world_frame_id,
diff --git a/scripts/zed_recording_mcap_tool.py b/scripts/zed_recording_mcap_tool.py
index 930887b..1670d41 100755
--- a/scripts/zed_recording_mcap_tool.py
+++ b/scripts/zed_recording_mcap_tool.py
@@ -66,10 +66,15 @@ def parse_args() -> argparse.Namespace:
     )
     parser.add_argument(
         "--depth-mode",
-        choices=("neural", "quality", "performance", "ultra"),
+        choices=("neural_light", "neural", "neural_plus"),
         default="neural",
         help="Depth mode passed to zed_svo_to_mcap",
     )
+    parser.add_argument(
+        "--depth-size",
+        default="optimal",
+        help="Depth size passed to zed_svo_to_mcap (optimal|native|<width>x<height>)",
+    )
     parser.add_argument("--start-frame", type=int, default=0, help="First SVO frame to convert")
     parser.add_argument("--end-frame", type=int, help="Last SVO frame to convert")
     parser.add_argument(
@@ -289,6 +294,8 @@ def convert_svo(
         args.mcap_compression,
         "--depth-mode",
         args.depth_mode,
+        "--depth-size",
+        args.depth_size,
         "--start-frame",
         str(args.start_frame),
     ]
diff --git a/src/config/runtime_config.cpp b/src/config/runtime_config.cpp
index 7603752..6324c23 100644
--- a/src/config/runtime_config.cpp
+++ b/src/config/runtime_config.cpp
@@ -369,6 +369,10 @@ std::expected<void, std::string> apply_toml_file(RuntimeConfig &config, const st
 		config.record.mcap.enabled = true;
 		config.record.mcap.calibration_topic = *value;
 	}
+	if (auto value = toml_value<std::string>(table, "record.mcap.depth_calibration_topic")) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.depth_calibration_topic = *value;
+	}
 	if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.pose_topic = *value;
@@ -581,6 +585,7 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	std::string mcap_topic_raw{};
 	std::string mcap_depth_topic_raw{};
 	std::string mcap_calibration_topic_raw{};
+	std::string mcap_depth_calibration_topic_raw{};
 	std::string mcap_pose_topic_raw{};
 	std::string mcap_body_topic_raw{};
 	std::string mcap_frame_id_raw{};
@@ -625,6 +630,7 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 	app.add_option("--mcap-topic", mcap_topic_raw);
 	app.add_option("--mcap-depth-topic", mcap_depth_topic_raw);
 	app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw);
+	app.add_option("--mcap-depth-calibration-topic", mcap_depth_calibration_topic_raw);
 	app.add_option("--mcap-pose-topic", mcap_pose_topic_raw);
 	app.add_option("--mcap-body-topic", mcap_body_topic_raw);
 	app.add_option("--mcap-frame-id", mcap_frame_id_raw);
@@ -746,6 +752,10 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
 		config.record.mcap.enabled = true;
 		config.record.mcap.calibration_topic = mcap_calibration_topic_raw;
 	}
+	if (!mcap_depth_calibration_topic_raw.empty()) {
+		config.record.mcap.enabled = true;
+		config.record.mcap.depth_calibration_topic = mcap_depth_calibration_topic_raw;
+	}
 	if (!mcap_pose_topic_raw.empty()) {
 		config.record.mcap.enabled = true;
 		config.record.mcap.pose_topic = mcap_pose_topic_raw;
@@ -894,6 +904,10 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
 	if (config.record.mcap.calibration_topic.empty()) {
 		return std::unexpected("invalid MCAP config: calibration_topic must not be empty");
 	}
+	if (!config.record.mcap.depth_calibration_topic.empty() &&
+		config.record.mcap.depth_calibration_topic == config.record.mcap.calibration_topic) {
+		return std::unexpected("invalid MCAP config: depth_calibration_topic must differ from calibration_topic");
+	}
 	if (config.record.mcap.pose_topic.empty()) {
 		return std::unexpected("invalid MCAP config: pose_topic must not be empty");
 	}
@@ -941,6 +955,7 @@ std::string summarize_runtime_config(const RuntimeConfig &config) {
 	ss << ", mcap.topic=" << config.record.mcap.topic;
 	ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic;
 	ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic;
+	ss << ", mcap.depth_calibration_topic=" << config.record.mcap.depth_calibration_topic;
 	ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic;
 	ss << ", mcap.body_topic=" << config.record.mcap.body_topic;
 	ss << ", mcap.frame_id=" << config.record.mcap.frame_id;
diff --git a/src/record/mcap_record_sink.cpp b/src/record/mcap_record_sink.cpp
index de1ba07..f5dc5d9 100644
--- a/src/record/mcap_record_sink.cpp
+++ b/src/record/mcap_record_sink.cpp
@@ -405,6 +405,65 @@ std::expected<void, std::string> write_depth_message(
 	return {};
 }
 
+[[nodiscard]]
+std::expected<void, std::string> write_calibration_message(
+	mcap::McapWriter &writer,
+	std::uint64_t timestamp_ns,
+	std::string_view frame_id,
+	mcap::SchemaId schema_id,
+	std::string_view topic,
+	mcap::ChannelId &channel_id,
+	std::uint32_t &sequence,
+	const RawCameraCalibrationView &calibration) {
+	if (topic.empty()) {
+		return std::unexpected("calibration topic is empty");
+	}
+
+	if (channel_id == 0) {
+		mcap::Channel calibration_channel(std::string(topic), "protobuf", schema_id);
+		writer.addChannel(calibration_channel);
+		channel_id = calibration_channel.id;
+	}
+
+	foxglove::CameraCalibration message{};
+	*message.mutable_timestamp() = to_proto_timestamp(timestamp_ns);
+	message.set_frame_id(std::string(frame_id));
+	message.set_width(calibration.width);
+	message.set_height(calibration.height);
+	message.set_distortion_model(std::string(calibration.distortion_model));
+	for (const double value : calibration.distortion) {
+		message.add_d(value);
+	}
+	if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
+		return append;
+	}
+	if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
+		return append;
+	}
+	if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
+		return append;
+	}
+
+	std::string serialized{};
+	if (!message.SerializeToString(&serialized)) {
+		return std::unexpected("failed to serialize foxglove.CameraCalibration");
+	}
+
+	mcap::Message record{};
+	record.channelId   = channel_id;
+	record.sequence    = sequence++;
+	record.logTime     = timestamp_ns;
+	record.publishTime = timestamp_ns;
+	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
+	record.dataSize    = serialized.size();
+
+	const auto write_status = writer.write(record);
+	if (!write_status.ok()) {
+		return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
+	}
+	return {};
+}
+
 }
 
 struct McapRecordSink::State {
@@ -412,6 +471,7 @@ struct McapRecordSink::State {
 	std::string path{};
 	std::string frame_id{};
 	std::string calibration_topic{};
+	std::string depth_calibration_topic{};
 	std::string pose_topic{};
 	std::string body_topic{};
 	mcap::SchemaId calibration_schema_id{0};
@@ -419,11 +479,13 @@ struct McapRecordSink::State {
 	mcap::ChannelId video_channel_id{0};
 	mcap::ChannelId depth_channel_id{0};
 	mcap::ChannelId calibration_channel_id{0};
+	mcap::ChannelId depth_calibration_channel_id{0};
 	mcap::ChannelId pose_channel_id{0};
 	mcap::ChannelId body_channel_id{0};
 	std::uint32_t video_sequence{0};
 	std::uint32_t depth_sequence{0};
 	std::uint32_t calibration_sequence{0};
+	std::uint32_t depth_calibration_sequence{0};
 	std::uint32_t pose_sequence{0};
 	std::uint32_t body_sequence{0};
 	CodecType codec{CodecType::H264};
@@ -456,6 +518,7 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
 	state->path = config.record.mcap.path;
 	state->frame_id = config.record.mcap.frame_id;
 	state->calibration_topic = config.record.mcap.calibration_topic;
+	state->depth_calibration_topic = config.record.mcap.depth_calibration_topic;
 	state->pose_topic = config.record.mcap.pose_topic;
 	state->body_topic = config.record.mcap.body_topic;
 
@@ -622,53 +685,30 @@ std::expected<void, std::string> McapRecordSink::write_camera_calibration(const
 	if (state_ == nullptr) {
 		return std::unexpected("MCAP sink is not open");
 	}
-	if (state_->calibration_topic.empty()) {
-		return std::unexpected("calibration topic is empty");
-	}
+	return write_calibration_message(
+		state_->writer,
+		calibration.timestamp_ns,
+		state_->frame_id,
+		state_->calibration_schema_id,
+		state_->calibration_topic,
+		state_->calibration_channel_id,
+		state_->calibration_sequence,
+		calibration);
+}
 
-	if (state_->calibration_channel_id == 0) {
-		mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id);
-		state_->writer.addChannel(calibration_channel);
-		state_->calibration_channel_id = calibration_channel.id;
+std::expected<void, std::string> McapRecordSink::write_depth_camera_calibration(const RawCameraCalibrationView &calibration) {
+	if (state_ == nullptr) {
+		return std::unexpected("MCAP sink is not open");
 	}
-
-	foxglove::CameraCalibration message{};
-	*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns);
-	message.set_frame_id(state_->frame_id);
-	message.set_width(calibration.width);
-	message.set_height(calibration.height);
-	message.set_distortion_model(std::string(calibration.distortion_model));
-	for (const double value : calibration.distortion) {
-		message.add_d(value);
-	}
-	if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
-		return append;
-	}
-	if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
-		return append;
-	}
-	if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
-		return append;
-	}
-
-	std::string serialized{};
-	if (!message.SerializeToString(&serialized)) {
-		return std::unexpected("failed to serialize foxglove.CameraCalibration");
-	}
-
-	mcap::Message record{};
-	record.channelId   = state_->calibration_channel_id;
-	record.sequence    = state_->calibration_sequence++;
-	record.logTime     = calibration.timestamp_ns;
-	record.publishTime = calibration.timestamp_ns;
-	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
-	record.dataSize    = serialized.size();
-
-	const auto write_status = state_->writer.write(record);
-	if (!write_status.ok()) {
-		return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
-	}
-	return {};
+	return write_calibration_message(
+		state_->writer,
+		calibration.timestamp_ns,
+		state_->frame_id,
+		state_->calibration_schema_id,
+		state_->depth_calibration_topic,
+		state_->depth_calibration_channel_id,
+		state_->depth_calibration_sequence,
+		calibration);
 }
 
 std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) {
@@ -778,11 +818,13 @@ struct MultiMcapRecordSink::State {
 		mcap::ChannelId video_channel_id{0};
 		mcap::ChannelId depth_channel_id{0};
 		mcap::ChannelId calibration_channel_id{0};
+		mcap::ChannelId depth_calibration_channel_id{0};
 		mcap::ChannelId pose_channel_id{0};
 		mcap::ChannelId body_channel_id{0};
 		std::uint32_t video_sequence{0};
 		std::uint32_t depth_sequence{0};
 		std::uint32_t calibration_sequence{0};
+		std::uint32_t depth_calibration_sequence{0};
 		std::uint32_t pose_sequence{0};
 		std::uint32_t body_sequence{0};
 		CodecType codec{CodecType::H264};
@@ -851,6 +893,7 @@ std::expected<void, std::string> validate_new_stream_config(
 			if (stream.config.topic == topic ||
 				stream.config.depth_topic == topic ||
 				stream.config.calibration_topic == topic ||
+				stream.config.depth_calibration_topic == topic ||
 				stream.config.pose_topic == topic ||
 				stream.config.body_topic == topic) {
 				return true;
@@ -859,14 +902,35 @@ std::expected<void, std::string> validate_new_stream_config(
 		return false;
 	};
 
+	std::vector<std::string> config_topics{};
 	for (const auto *topic : {
 			 &config.topic,
 			 &config.depth_topic,
 			 &config.calibration_topic,
+			 &config.depth_calibration_topic,
 			 &config.pose_topic,
 			 &config.body_topic,
 		 }) {
-		if (topic_in_use(*topic)) {
+		if (!topic->empty()) {
+			config_topics.push_back(*topic);
+		}
+	}
+	std::sort(config_topics.begin(), config_topics.end());
+	for (std::size_t index = 1; index < config_topics.size(); ++index) {
+		if (config_topics[index - 1] == config_topics[index]) {
+			return std::unexpected("duplicate MCAP topic: " + config_topics[index]);
+		}
+	}
+
+	for (const auto *topic : {
+			 &config.topic,
+			 &config.depth_topic,
+			 &config.calibration_topic,
+			 &config.depth_calibration_topic,
+			 &config.pose_topic,
+			 &config.body_topic,
+		 }) {
+		if (!topic->empty() && topic_in_use(*topic)) {
 			return std::unexpected("duplicate MCAP topic: " + *topic);
 		}
 	}
@@ -1091,53 +1155,34 @@ std::expected<void, std::string> MultiMcapRecordSink::write_camera_calibration(
 	if (stream == nullptr) {
 		return std::unexpected(error);
 	}
-	if (stream->config.calibration_topic.empty()) {
-		return std::unexpected("calibration topic is empty");
-	}
+	return write_calibration_message(
+		state_->writer,
+		calibration.timestamp_ns,
+		stream->config.frame_id,
+		state_->calibration_schema_id,
+		stream->config.calibration_topic,
+		stream->calibration_channel_id,
+		stream->calibration_sequence,
+		calibration);
+}
 
-	if (stream->calibration_channel_id == 0) {
-		mcap::Channel calibration_channel(stream->config.calibration_topic, "protobuf", state_->calibration_schema_id);
-		state_->writer.addChannel(calibration_channel);
-		stream->calibration_channel_id = calibration_channel.id;
+std::expected<void, std::string> MultiMcapRecordSink::write_depth_camera_calibration(
+	const StreamId stream_id,
+	const RawCameraCalibrationView &calibration) {
+	std::string error{};
+	auto *stream = stream_state(state_, stream_id, error);
+	if (stream == nullptr) {
+		return std::unexpected(error);
 	}
-
-	foxglove::CameraCalibration message{};
-	*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns);
-	message.set_frame_id(stream->config.frame_id);
-	message.set_width(calibration.width);
-	message.set_height(calibration.height);
-	message.set_distortion_model(std::string(calibration.distortion_model));
-	for (const double value : calibration.distortion) {
-		message.add_d(value);
-	}
-	if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
-		return append;
-	}
-	if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
-		return append;
-	}
-	if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
-		return append;
-	}
-
-	std::string serialized{};
-	if (!message.SerializeToString(&serialized)) {
-		return std::unexpected("failed to serialize foxglove.CameraCalibration");
-	}
-
-	mcap::Message record{};
-	record.channelId   = stream->calibration_channel_id;
-	record.sequence    = stream->calibration_sequence++;
-	record.logTime     = calibration.timestamp_ns;
-	record.publishTime = calibration.timestamp_ns;
-	record.data        = reinterpret_cast<const std::byte *>(serialized.data());
-	record.dataSize    = serialized.size();
-
-	const auto write_status = state_->writer.write(record);
-	if (!write_status.ok()) {
-		return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
-	}
-	return {};
+	return write_calibration_message(
+		state_->writer,
+		calibration.timestamp_ns,
+		stream->config.frame_id,
+		state_->calibration_schema_id,
+		stream->config.depth_calibration_topic,
+		stream->depth_calibration_channel_id,
+		stream->depth_calibration_sequence,
+		calibration);
 }
 
 std::expected<void, std::string> MultiMcapRecordSink::write_pose(
diff --git a/src/testers/mcap_multi_record_tester.cpp b/src/testers/mcap_multi_record_tester.cpp
index dd1058f..b7566ab 100644
--- a/src/testers/mcap_multi_record_tester.cpp
+++ b/src/testers/mcap_multi_record_tester.cpp
@@ -86,6 +86,7 @@ int main(int argc, char **argv) {
 		.topic = "/zed1/video",
 		.depth_topic = "/zed1/depth",
 		.calibration_topic = "/zed1/calibration",
+		.depth_calibration_topic = "/zed1/depth_calibration",
 		.pose_topic = "/zed1/pose",
 		.body_topic = "/zed1/body",
 		.frame_id = "zed1",
@@ -94,6 +95,7 @@ int main(int argc, char **argv) {
 		.topic = "/zed2/video",
 		.depth_topic = "/zed2/depth",
 		.calibration_topic = "/zed2/calibration",
+		.depth_calibration_topic = "/zed2/depth_calibration",
 		.pose_topic = "/zed2/pose",
 		.body_topic = "/zed2/body",
 		.frame_id = "zed2",
@@ -162,6 +164,19 @@ int main(int argc, char **argv) {
 			spdlog::error("failed to write calibration: {}", write.error());
 			return exit_code(TesterExitCode::WriteError);
 		}
+		if (auto write = sink->write_depth_camera_calibration(stream_id, cvmmap_streamer::record::RawCameraCalibrationView{
+				.timestamp_ns = 100,
+				.width = 320,
+				.height = 240,
+				.distortion_model = "plumb_bob",
+				.distortion = distortion,
+				.intrinsic_matrix = intrinsic_matrix,
+				.rectification_matrix = rectification_matrix,
+				.projection_matrix = projection_matrix,
+			}); !write) {
+			spdlog::error("failed to write depth calibration: {}", write.error());
+			return exit_code(TesterExitCode::WriteError);
+		}
 
 		if (auto write = sink->write_pose(stream_id, cvmmap_streamer::record::RawPoseView{
 				.timestamp_ns = 100,
@@ -254,10 +269,12 @@ int main(int argc, char **argv) {
 			 "/zed1/video",
 			 "/zed1/depth",
 			 "/zed1/calibration",
+			 "/zed1/depth_calibration",
 			 "/zed1/pose",
 			 "/zed2/video",
 			 "/zed2/depth",
 			 "/zed2/calibration",
+			 "/zed2/depth_calibration",
 			 "/zed2/pose",
 		 }) {
 		if (topic_counts[topic] != 1) {
diff --git a/src/tools/zed_svo_to_mcap.cpp b/src/tools/zed_svo_to_mcap.cpp
index c682352..6a6a702 100644
--- a/src/tools/zed_svo_to_mcap.cpp
+++ b/src/tools/zed_svo_to_mcap.cpp
@@ -53,7 +53,8 @@ struct CliOptions {
 	std::string codec{"h265"};
 	std::string encoder_device{"auto"};
 	std::string mcap_compression{"zstd"};
-	std::string depth_mode{"quality"};
+	std::string depth_mode{"neural"};
+	std::string depth_size{"optimal"};
 	bool with_pose{false};
 	std::uint32_t start_frame{0};
 	std::uint32_t end_frame{0};
@@ -62,6 +63,7 @@ struct CliOptions {
 	std::string video_topic{"/camera/video"};
 	std::string depth_topic{"/camera/depth"};
 	std::string calibration_topic{"/camera/calibration"};
+	std::string depth_calibration_topic{"/camera/depth_calibration"};
 	std::string pose_topic{"/camera/pose"};
 	std::string world_frame_id{"world"};
 	std::string pose_config_path{};
@@ -88,37 +90,9 @@ struct TrackingSample {
 	std::array<double, 4> orientation{};
 };
 
-struct CameraStream {
-	SourceSpec source{};
-	std::unique_ptr<sl::Camera> camera{};
-	sl::RuntimeParameters runtime{};
-	sl::Mat current_left_frame{};
-	sl::Mat current_depth_frame{};
-	sl::Mat next_left_frame{};
-	sl::Mat next_depth_frame{};
-	TrackingSample current_tracking{};
-	TrackingSample next_tracking{};
-	std::uint64_t current_timestamp_ns{0};
-	std::uint64_t next_timestamp_ns{0};
-	std::uint64_t first_timestamp_ns{0};
-	std::uint64_t last_timestamp_ns{0};
-	std::uint64_t total_frames{0};
-	std::uint64_t nominal_frame_period_ns{0};
-	std::uint64_t dropped_frames{0};
-	float fps{0.0f};
+struct CalibrationData {
 	std::uint32_t width{0};
 	std::uint32_t height{0};
-	int sync_position{-1};
-	bool has_next{false};
-	bool tracking_enabled{false};
-	bool calibration_written{false};
-	std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
-	int serial_number{0};
-	PoseTrackingOptions pose_tracking{};
-	cvmmap_streamer::ipc::FrameInfo frame_info{};
-	std::optional<cvmmap_streamer::encode::EncoderBackend> backend{};
-	cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
-	cvmmap_streamer::record::MultiMcapRecordSink::StreamId mcap_stream_id{0};
 	std::array<double, 5> distortion{
 		0.0, 0.0, 0.0, 0.0, 0.0,
 	};
@@ -139,6 +113,45 @@ struct CameraStream {
 	};
 };
 
+struct CameraStream {
+	SourceSpec source{};
+	std::unique_ptr<sl::Camera> camera{};
+	sl::RuntimeParameters runtime{};
+	sl::Resolution depth_image_size{};
+	sl::Mat current_left_frame{};
+	sl::Mat current_depth_frame{};
+	sl::Mat next_left_frame{};
+	sl::Mat next_depth_frame{};
+	TrackingSample current_tracking{};
+	TrackingSample next_tracking{};
+	std::uint64_t current_timestamp_ns{0};
+	std::uint64_t next_timestamp_ns{0};
+	std::uint64_t first_timestamp_ns{0};
+	std::uint64_t last_timestamp_ns{0};
+	std::uint64_t total_frames{0};
+	std::uint64_t nominal_frame_period_ns{0};
+	std::uint64_t dropped_frames{0};
+	float fps{0.0f};
+	std::uint32_t width{0};
+	std::uint32_t height{0};
+	std::uint32_t depth_width{0};
+	std::uint32_t depth_height{0};
+	int sync_position{-1};
+	bool has_next{false};
+	bool tracking_enabled{false};
+	bool calibration_written{false};
+	bool depth_calibration_written{false};
+	std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
+	int serial_number{0};
+	PoseTrackingOptions pose_tracking{};
+	cvmmap_streamer::ipc::FrameInfo frame_info{};
+	std::optional<cvmmap_streamer::encode::EncoderBackend> backend{};
+	cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
+	cvmmap_streamer::record::MultiMcapRecordSink::StreamId mcap_stream_id{0};
+	CalibrationData video_calibration{};
+	CalibrationData depth_calibration{};
+};
+
 [[nodiscard]]
 constexpr int exit_code(const ToolExitCode code) {
 	return static_cast<int>(code);
@@ -208,6 +221,9 @@ std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state)
 	return zed_string(sl::toString(state));
 }
 
+[[nodiscard]]
+std::string lowercase(std::string value);
+
 [[nodiscard]]
 std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) {
 	if (raw == "h264") {
@@ -249,19 +265,41 @@ std::expected<cvmmap_streamer::McapCompression, std::string> parse_mcap_compress
 
 [[nodiscard]]
 std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) {
-	if (raw == "neural") {
+	auto normalized = lowercase(std::string(raw));
+	std::replace(normalized.begin(), normalized.end(), '-', '_');
+	if (normalized == "neural_light") {
+		return sl::DEPTH_MODE::NEURAL_LIGHT;
+	}
+	if (normalized == "neural") {
 		return sl::DEPTH_MODE::NEURAL;
 	}
-	if (raw == "quality") {
-		return sl::DEPTH_MODE::QUALITY;
+	if (normalized == "neural_plus") {
+		return sl::DEPTH_MODE::NEURAL_PLUS;
 	}
-	if (raw == "performance") {
-		return sl::DEPTH_MODE::PERFORMANCE;
+	return std::unexpected(
+		"invalid depth mode: '" + std::string(raw) + "' (expected: neural_light|neural|neural_plus)");
+}
+
+[[nodiscard]]
+std::expected<sl::Resolution, std::string> parse_depth_size(const std::string_view raw) {
+	auto normalized = lowercase(std::string(raw));
+	std::replace(normalized.begin(), normalized.end(), '-', '_');
+	if (normalized == "optimal") {
+		return sl::Resolution(-1, -1);
 	}
-	if (raw == "ultra") {
-		return sl::DEPTH_MODE::ULTRA;
+	if (normalized == "native") {
+		return sl::Resolution(0, 0);
 	}
-	return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)");
+
+	static const std::regex size_pattern{R"(^([1-9][0-9]*)[xX]([1-9][0-9]*)$)"};
+	std::smatch match{};
+	const auto size_string = std::string(raw);
+	if (!std::regex_match(size_string, match, size_pattern)) {
+		return std::unexpected(
+			"invalid depth size: '" + size_string + "' (expected: optimal|native|<width>x<height>)");
+	}
+
+	return sl::Resolution(std::stoi(match[1].str()), std::stoi(match[2].str()));
 }
 
 [[nodiscard]]
@@ -339,6 +377,60 @@ std::string normalize_config_token(std::string value) {
 	return value;
 }
 
+[[nodiscard]]
+CalibrationData make_calibration_data(const sl::CameraInformation &camera_info) {
+	CalibrationData calibration{};
+	const auto &camera_config = camera_info.camera_configuration;
+	const auto &left_camera = camera_config.calibration_parameters.left_cam;
+	const auto resolved_width = left_camera.image_size.width > 0
+		? left_camera.image_size.width
+		: camera_config.resolution.width;
+	const auto resolved_height = left_camera.image_size.height > 0
+		? left_camera.image_size.height
+		: camera_config.resolution.height;
+	calibration.width = static_cast<std::uint32_t>(resolved_width);
+	calibration.height = static_cast<std::uint32_t>(resolved_height);
+	calibration.intrinsic_matrix = {
+		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
+		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
+		0.0, 0.0, 1.0,
+	};
+	calibration.projection_matrix = {
+		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
+		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
+		0.0, 0.0, 1.0, 0.0,
+	};
+	return calibration;
+}
+
+[[nodiscard]]
+std::string describe_depth_resolution_request(const sl::Resolution &resolution) {
+	if (resolution.width == -1 && resolution.height == -1) {
+		return "optimal";
+	}
+	if (resolution.width == 0 && resolution.height == 0) {
+		return "native";
+	}
+	return std::to_string(resolution.width) + "x" + std::to_string(resolution.height);
+}
+
+[[nodiscard]]
+std::expected<void, std::string> validate_depth_resolution_request(
+	const sl::Resolution &requested_resolution,
+	const std::uint32_t native_width,
+	const std::uint32_t native_height) {
+	if (requested_resolution.width <= 0 || requested_resolution.height <= 0) {
+		return {};
+	}
+	if (requested_resolution.width > static_cast<int>(native_width) ||
+		requested_resolution.height > static_cast<int>(native_height)) {
+		return std::unexpected(
+			"requested depth size " + describe_depth_resolution_request(requested_resolution) +
+			" exceeds native camera resolution " + std::to_string(native_width) + "x" + std::to_string(native_height));
+	}
+	return {};
+}
+
 [[nodiscard]]
 std::expected<sl::COORDINATE_SYSTEM, std::string> parse_coordinate_system(const std::string_view raw) {
 	const auto normalized = normalize_config_token(std::string(raw));
@@ -592,7 +684,11 @@ std::expected<void, std::string> read_frame_data(
 		return std::unexpected(valid.error());
 	}
 
-	const auto depth_status = stream.camera->retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU);
+	const auto depth_status = stream.camera->retrieveMeasure(
+		depth_frame,
+		sl::MEASURE::DEPTH_U16_MM,
+		sl::MEM::CPU,
+		stream.depth_image_size);
 	if (depth_status != sl::ERROR_CODE::SUCCESS) {
 		return std::unexpected(
 			"failed to retrieve depth map for " + stream.source.label + ": " + zed_status_string(depth_status));
@@ -733,11 +829,13 @@ std::expected<CameraStream, std::string> open_camera_stream(
 	const cvmmap_streamer::CodecType codec,
 	const cvmmap_streamer::EncoderDeviceType encoder_device,
 	const sl::DEPTH_MODE depth_mode,
+	const sl::Resolution depth_size,
 	const PoseTrackingOptions &pose_tracking) {
 	CameraStream stream{};
 	stream.source = source;
 	stream.camera = std::make_unique<sl::Camera>();
 	stream.pose_tracking = pose_tracking;
+	stream.depth_image_size = depth_size;
 
 	sl::InitParameters init{};
 	init.input.setFromSVOFile(source.path.c_str());
@@ -774,7 +872,6 @@ std::expected<CameraStream, std::string> open_camera_stream(
 
 	const auto camera_info = stream.camera->getCameraInformation();
 	const auto &camera_config = camera_info.camera_configuration;
-	const auto &left_camera = camera_config.calibration_parameters.left_cam;
 	stream.serial_number = camera_info.serial_number;
 	stream.width = static_cast<std::uint32_t>(camera_config.resolution.width);
 	stream.height = static_cast<std::uint32_t>(camera_config.resolution.height);
@@ -783,17 +880,10 @@ std::expected<CameraStream, std::string> open_camera_stream(
 	if (stream.width == 0 || stream.height == 0) {
 		return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
 	}
-
-	stream.intrinsic_matrix = {
-		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
-		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
-		0.0, 0.0, 1.0,
-	};
-	stream.projection_matrix = {
-		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
-		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
-		0.0, 0.0, 1.0, 0.0,
-	};
+	if (auto valid = validate_depth_resolution_request(depth_size, stream.width, stream.height); !valid) {
+		return std::unexpected(valid.error());
+	}
+	stream.video_calibration = make_calibration_data(camera_info);
 
 	stream.frame_info = cvmmap_streamer::ipc::FrameInfo{
 		.width = static_cast<std::uint16_t>(stream.width),
@@ -828,6 +918,12 @@ std::expected<CameraStream, std::string> open_camera_stream(
 	if (!first_frame) {
 		return std::unexpected(first_frame.error());
 	}
+	stream.depth_width = static_cast<std::uint32_t>(stream.current_depth_frame.getWidth());
+	stream.depth_height = static_cast<std::uint32_t>(stream.current_depth_frame.getHeight());
+	stream.depth_calibration = make_calibration_data(
+		stream.camera->getCameraInformation(sl::Resolution(
+			static_cast<int>(stream.depth_width),
+			static_cast<int>(stream.depth_height))));
 	stream.first_timestamp_ns = first_timestamp_ns;
 
 	auto last_timestamp_ns = read_last_readable_timestamp(stream);
@@ -912,6 +1008,9 @@ std::expected<void, std::string> register_mcap_streams(
 		config.topic = namespace_topic(stream.source.label, options.video_topic);
 		config.depth_topic = namespace_topic(stream.source.label, options.depth_topic);
 		config.calibration_topic = namespace_topic(stream.source.label, options.calibration_topic);
+		if (stream.depth_width != stream.width || stream.depth_height != stream.height) {
+			config.depth_calibration_topic = namespace_topic(stream.source.label, options.depth_calibration_topic);
+		}
 		config.pose_topic = options.with_pose ? namespace_topic(stream.source.label, options.pose_topic) : "";
 		config.body_topic = namespace_topic(stream.source.label, "/camera/body");
 		config.frame_id = multi_frame_id(options, stream.source.label);
@@ -1082,13 +1181,13 @@ std::expected<void, std::string> encode_and_write_group(
 		if (!stream.calibration_written) {
 			cvmmap_streamer::record::RawCameraCalibrationView calibration{
 				.timestamp_ns = group_timestamp_ns,
-				.width = stream.width,
-				.height = stream.height,
+				.width = stream.video_calibration.width,
+				.height = stream.video_calibration.height,
 				.distortion_model = "plumb_bob",
-				.distortion = stream.distortion,
-				.intrinsic_matrix = stream.intrinsic_matrix,
-				.rectification_matrix = stream.rectification_matrix,
-				.projection_matrix = stream.projection_matrix,
+				.distortion = stream.video_calibration.distortion,
+				.intrinsic_matrix = stream.video_calibration.intrinsic_matrix,
+				.rectification_matrix = stream.video_calibration.rectification_matrix,
+				.projection_matrix = stream.video_calibration.projection_matrix,
 			};
 			if (auto write = sink.write_camera_calibration(stream.mcap_stream_id, calibration); !write) {
 				return std::unexpected("failed to write calibration for " + stream.source.label + ": " + write.error());
@@ -1096,8 +1195,34 @@ std::expected<void, std::string> encode_and_write_group(
 			stream.calibration_written = true;
 		}
 
+		if (!stream.depth_calibration_written &&
+			(stream.depth_width != stream.width || stream.depth_height != stream.height)) {
+			cvmmap_streamer::record::RawCameraCalibrationView depth_calibration{
+				.timestamp_ns = group_timestamp_ns,
+				.width = stream.depth_calibration.width,
+				.height = stream.depth_calibration.height,
+				.distortion_model = "plumb_bob",
+				.distortion = stream.depth_calibration.distortion,
+				.intrinsic_matrix = stream.depth_calibration.intrinsic_matrix,
+				.rectification_matrix = stream.depth_calibration.rectification_matrix,
+				.projection_matrix = stream.depth_calibration.projection_matrix,
+			};
+			if (auto write = sink.write_depth_camera_calibration(stream.mcap_stream_id, depth_calibration); !write) {
+				return std::unexpected("failed to write depth calibration for " + stream.source.label + ": " + write.error());
+			}
+			stream.depth_calibration_written = true;
+		}
+
+		const auto depth_width = static_cast<std::uint32_t>(stream.current_depth_frame.getWidth());
+		const auto depth_height = static_cast<std::uint32_t>(stream.current_depth_frame.getHeight());
+		if (depth_width != stream.depth_width || depth_height != stream.depth_height) {
+			return std::unexpected(
+				"depth resolution changed unexpectedly for " + stream.source.label + ": " +
+				std::to_string(depth_width) + "x" + std::to_string(depth_height) + " vs " +
+				std::to_string(stream.depth_width) + "x" + std::to_string(stream.depth_height));
+		}
 		const auto depth_step_bytes = stream.current_depth_frame.getStepBytes(sl::MEM::CPU);
-		const auto packed_depth_bytes = static_cast<std::size_t>(stream.width) * sizeof(std::uint16_t);
+		const auto packed_depth_bytes = static_cast<std::size_t>(depth_width) * sizeof(std::uint16_t);
 		if (depth_step_bytes < packed_depth_bytes) {
 			return std::unexpected(
 				"depth stride " + std::to_string(depth_step_bytes) + " is smaller than packed row size " +
@@ -1109,7 +1234,7 @@ std::expected<void, std::string> encode_and_write_group(
 		if (depth_step_bytes == packed_depth_bytes) {
 			depth_pixels = std::span<const std::uint16_t>(
 				stream.current_depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
-				static_cast<std::size_t>(stream.width) * static_cast<std::size_t>(stream.height));
+				static_cast<std::size_t>(depth_width) * static_cast<std::size_t>(depth_height));
 		} else {
 			compact_depth = copy_compact_u16_plane(stream.current_depth_frame);
 			depth_pixels = *compact_depth;
@@ -1117,8 +1242,8 @@ std::expected<void, std::string> encode_and_write_group(
 
 		cvmmap_streamer::record::RawDepthMapU16View depth_map{
 			.timestamp_ns = group_timestamp_ns,
-			.width = stream.width,
-			.height = stream.height,
+			.width = depth_width,
+			.height = depth_height,
 			.pixels = depth_pixels,
 		};
 		if (auto write = sink.write_depth_map_u16(stream.mcap_stream_id, depth_map); !write) {
@@ -1230,6 +1355,7 @@ int run_single_source(
 	const cvmmap_streamer::EncoderDeviceType encoder_device,
 	const cvmmap_streamer::McapCompression compression,
 	const sl::DEPTH_MODE depth_mode,
+	const sl::Resolution depth_size,
 	const PoseTrackingOptions &pose_tracking) {
 	const auto input_path = std::filesystem::path{options.input_paths.front()};
 	if (!std::filesystem::is_regular_file(input_path)) {
@@ -1319,7 +1445,6 @@ int run_single_source(
 
 	const auto camera_info = camera.getCameraInformation();
 	const auto &camera_config = camera_info.camera_configuration;
-	const auto &left_camera = camera_config.calibration_parameters.left_cam;
 	const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
 	const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
 	if (width == 0 || height == 0) {
@@ -1327,6 +1452,11 @@ int run_single_source(
 		spdlog::error("camera resolution reported by the ZED SDK is invalid");
 		return exit_code(ToolExitCode::RuntimeError);
 	}
+	if (auto valid = validate_depth_resolution_request(depth_size, width, height); !valid) {
+		close_camera();
+		spdlog::error("{}", valid.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
 
 	cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
 	config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
@@ -1339,6 +1469,7 @@ int run_single_source(
 	config.record.mcap.topic = options.video_topic;
 	config.record.mcap.depth_topic = options.depth_topic;
 	config.record.mcap.calibration_topic = options.calibration_topic;
+	config.record.mcap.depth_calibration_topic = options.depth_calibration_topic;
 	config.record.mcap.pose_topic = options.pose_topic;
 	config.record.mcap.frame_id = options.frame_id;
 	config.record.mcap.compression = compression;
@@ -1382,30 +1513,17 @@ int run_single_source(
 		return exit_code(ToolExitCode::RuntimeError);
 	}
 
-	const std::array<double, 5> distortion{
-		0.0, 0.0, 0.0, 0.0, 0.0,
-	};
-	const std::array<double, 9> intrinsic_matrix{
-		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
-		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
-		0.0, 0.0, 1.0,
-	};
-	const std::array<double, 9> rectification_matrix{
-		1.0, 0.0, 0.0,
-		0.0, 1.0, 0.0,
-		0.0, 0.0, 1.0,
-	};
-	const std::array<double, 12> projection_matrix{
-		static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
-		0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
-		0.0, 0.0, 1.0, 0.0,
-	};
+	const auto video_calibration = make_calibration_data(camera_info);
 
 	sl::RuntimeParameters runtime_parameters{};
 	sl::Mat left_frame{};
 	sl::Mat depth_frame{};
 	sl::Pose pose{};
 	bool calibration_written{false};
+	bool depth_calibration_written{false};
+	std::uint32_t depth_width{0};
+	std::uint32_t depth_height{0};
+	std::optional<CalibrationData> depth_calibration{};
 	std::uint64_t emitted_frames{0};
 	std::optional<std::uint64_t> last_timestamp_ns{};
 	std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
@@ -1454,7 +1572,7 @@ int run_single_source(
 			return exit_code(ToolExitCode::RuntimeError);
 		}
 
-		const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU);
+		const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU, depth_size);
 		if (depth_status != sl::ERROR_CODE::SUCCESS) {
 			progress.finish(emitted_frames, false);
 			sink->close();
@@ -1522,13 +1640,13 @@ int run_single_source(
 		if (!calibration_written) {
 			cvmmap_streamer::record::RawCameraCalibrationView calibration{
 				.timestamp_ns = timestamp_ns,
-				.width = width,
-				.height = height,
+				.width = video_calibration.width,
+				.height = video_calibration.height,
 				.distortion_model = "plumb_bob",
-				.distortion = distortion,
-				.intrinsic_matrix = intrinsic_matrix,
-				.rectification_matrix = rectification_matrix,
-				.projection_matrix = projection_matrix,
+				.distortion = video_calibration.distortion,
+				.intrinsic_matrix = video_calibration.intrinsic_matrix,
+				.rectification_matrix = video_calibration.rectification_matrix,
+				.projection_matrix = video_calibration.projection_matrix,
 			};
 			if (auto write = sink->write_camera_calibration(calibration); !write) {
 				progress.finish(emitted_frames, false);
@@ -1541,8 +1659,63 @@ int run_single_source(
 			calibration_written = true;
 		}
 
+		const auto actual_depth_width = static_cast<std::uint32_t>(depth_frame.getWidth());
+		const auto actual_depth_height = static_cast<std::uint32_t>(depth_frame.getHeight());
+		if (depth_width == 0 || depth_height == 0) {
+			depth_width = actual_depth_width;
+			depth_height = actual_depth_height;
+			depth_calibration = make_calibration_data(camera.getCameraInformation(sl::Resolution(
+				static_cast<int>(depth_width),
+				static_cast<int>(depth_height))));
+			if (depth_width != width || depth_height != height) {
+				spdlog::info(
+					"exporting depth at {}x{} (requested {}) while video remains {}x{}",
+					depth_width,
+					depth_height,
+					describe_depth_resolution_request(depth_size),
+					width,
+					height);
+			}
+		} else if (actual_depth_width != depth_width || actual_depth_height != depth_height) {
+			progress.finish(emitted_frames, false);
+			sink->close();
+			backend->shutdown();
+			close_camera();
+			spdlog::error(
+				"depth resolution changed unexpectedly from {}x{} to {}x{}",
+				depth_width,
+				depth_height,
+				actual_depth_width,
+				actual_depth_height);
+			return exit_code(ToolExitCode::RuntimeError);
+		}
+
+		if (!depth_calibration_written &&
+			(depth_width != width || depth_height != height) &&
+			depth_calibration.has_value()) {
+			cvmmap_streamer::record::RawCameraCalibrationView calibration{
+				.timestamp_ns = timestamp_ns,
+				.width = depth_calibration->width,
+				.height = depth_calibration->height,
+				.distortion_model = "plumb_bob",
+				.distortion = depth_calibration->distortion,
+				.intrinsic_matrix = depth_calibration->intrinsic_matrix,
+				.rectification_matrix = depth_calibration->rectification_matrix,
+				.projection_matrix = depth_calibration->projection_matrix,
+			};
+			if (auto write = sink->write_depth_camera_calibration(calibration); !write) {
+				progress.finish(emitted_frames, false);
+				sink->close();
+				backend->shutdown();
+				close_camera();
+				spdlog::error("failed to write depth calibration: {}", write.error());
+				return exit_code(ToolExitCode::RuntimeError);
+			}
+			depth_calibration_written = true;
+		}
+
 		const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU);
-		const auto packed_depth_bytes = static_cast<std::size_t>(width) * sizeof(std::uint16_t);
+		const auto packed_depth_bytes = static_cast<std::size_t>(depth_width) * sizeof(std::uint16_t);
 		if (depth_step_bytes < packed_depth_bytes) {
 			progress.finish(emitted_frames, false);
 			sink->close();
@@ -1557,7 +1730,7 @@ int run_single_source(
 		if (depth_step_bytes == packed_depth_bytes) {
 			depth_pixels = std::span<const std::uint16_t>(
 				depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
-				static_cast<std::size_t>(width) * static_cast<std::size_t>(height));
+				static_cast<std::size_t>(depth_width) * static_cast<std::size_t>(depth_height));
 		} else {
 			compact_depth = copy_compact_u16_plane(depth_frame);
 			depth_pixels = *compact_depth;
@@ -1565,8 +1738,8 @@ int run_single_source(
 
 		cvmmap_streamer::record::RawDepthMapU16View depth_map{
 			.timestamp_ns = timestamp_ns,
-			.width = width,
-			.height = height,
+			.width = depth_width,
+			.height = depth_height,
 			.pixels = depth_pixels,
 		};
 		if (auto write = sink->write_depth_map_u16(depth_map); !write) {
@@ -1663,6 +1836,7 @@ int run_multi_source(
 	const cvmmap_streamer::EncoderDeviceType encoder_device,
 	const cvmmap_streamer::McapCompression compression,
 	const sl::DEPTH_MODE depth_mode,
+	const sl::Resolution depth_size,
 	const PoseTrackingOptions &pose_tracking) {
 	if (options.has_end_frame && options.end_frame < options.start_frame) {
 		spdlog::error(
@@ -1684,7 +1858,7 @@ int run_multi_source(
 			close_camera_streams(streams);
 			return interrupted_exit_code();
 		}
-		auto stream = open_camera_stream(source, options, codec, encoder_device, depth_mode, pose_tracking);
+		auto stream = open_camera_stream(source, options, codec, encoder_device, depth_mode, depth_size, pose_tracking);
 		if (!stream) {
 			close_camera_streams(streams);
 			spdlog::error("{}", stream.error());
@@ -1910,8 +2084,10 @@ int main(int argc, char **argv) {
 	app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available");
 	app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)")
 		->check(CLI::IsMember({"none", "lz4", "zstd"}));
-	app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
-		->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
+	app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural_light|neural|neural_plus)")
+		->check(CLI::IsMember({"neural_light", "neural", "neural_plus"}));
+	app.add_option("--depth-size", options.depth_size, "Depth output size (optimal|native|<width>x<height>)")
+		->default_val("optimal");
 	app.add_option(
 		"--start-frame",
 		options.start_frame,
@@ -1926,6 +2102,10 @@ int main(int argc, char **argv) {
 	app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");
 	app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap");
 	app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration");
+	app.add_option(
+		"--depth-calibration-topic",
+		options.depth_calibration_topic,
+		"MCAP topic for foxglove.CameraCalibration aligned to the exported depth resolution");
 	app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame");
 	app.add_option(
 		"--pose-config",
@@ -1967,6 +2147,11 @@ int main(int argc, char **argv) {
 		spdlog::error("{}", depth_mode.error());
 		return exit_code(ToolExitCode::UsageError);
 	}
+	auto depth_size = parse_depth_size(options.depth_size);
+	if (!depth_size) {
+		spdlog::error("{}", depth_size.error());
+		return exit_code(ToolExitCode::UsageError);
+	}
 	auto pose_tracking = load_pose_tracking_options(options);
 	if (!pose_tracking) {
 		spdlog::error("{}", pose_tracking.error());
@@ -1982,8 +2167,25 @@ int main(int argc, char **argv) {
 	const auto output_path = derive_output_path(options, *sources);
 	if (sources->size() == 1) {
 		options.input_paths = {sources->front().path.string()};
-		return run_single_source(options, output_path, *codec, *encoder_device, *compression, *depth_mode, *pose_tracking);
+		return run_single_source(
+			options,
+			output_path,
+			*codec,
+			*encoder_device,
+			*compression,
+			*depth_mode,
+			*depth_size,
+			*pose_tracking);
 	}
 
-	return run_multi_source(options, *sources, output_path, *codec, *encoder_device, *compression, *depth_mode, *pose_tracking);
+	return run_multi_source(
+		options,
+		*sources,
+		output_path,
+		*codec,
+		*encoder_device,
+		*compression,
+		*depth_mode,
+		*depth_size,
+		*pose_tracking);
 }