feat(zed): export depth at neural optimal resolution

Restrict ZED depth export to neural modes and default offline MCAP conversion to the SDK-selected optimal depth size instead of camera resolution.

Add a configurable --depth-size option, propagate the actual returned depth dimensions into DepthMap metadata, and emit a dedicated depth calibration topic when depth resolution differs from video.

Update the batch and recording helper scripts to use the new neural-only depth mode surface and pass through depth sizing.

Verification:
- cmake --build build --target zed_svo_to_mcap mcap_multi_record_tester mcap_pose_record_tester -j4
- build/bin/mcap_multi_record_tester
- build/bin/mcap_pose_record_tester
- build/bin/zed_svo_to_mcap --input /workspaces/data/kindergarten/jump/experiment/2/2026-03-18T11-27-15/2026-03-18T11-27-15_zed4.svo2 --output /tmp/zed4_neural_optimal_test.mcap --codec h264 --encoder-device software --mcap-compression zstd --depth-mode neural --depth-size optimal --start-frame 0 --end-frame 9
This commit is contained in:
2026-03-20 12:04:37 +00:00
parent 7808b89a03
commit 2f74a9561d
8 changed files with 497 additions and 190 deletions
@@ -86,6 +86,7 @@ struct McapRecordConfig {
std::string topic{"/camera/video"}; std::string topic{"/camera/video"};
std::string depth_topic{"/camera/depth"}; std::string depth_topic{"/camera/depth"};
std::string calibration_topic{"/camera/calibration"}; std::string calibration_topic{"/camera/calibration"};
std::string depth_calibration_topic{"/camera/depth_calibration"};
std::string pose_topic{"/camera/pose"}; std::string pose_topic{"/camera/pose"};
std::string body_topic{"/camera/body"}; std::string body_topic{"/camera/body"};
std::string frame_id{"camera"}; std::string frame_id{"camera"};
@@ -61,6 +61,7 @@ struct McapRecordStreamConfig {
std::string topic{"/camera/video"}; std::string topic{"/camera/video"};
std::string depth_topic{"/camera/depth"}; std::string depth_topic{"/camera/depth"};
std::string calibration_topic{"/camera/calibration"}; std::string calibration_topic{"/camera/calibration"};
std::string depth_calibration_topic{"/camera/depth_calibration"};
std::string pose_topic{"/camera/pose"}; std::string pose_topic{"/camera/pose"};
std::string body_topic{"/camera/body"}; std::string body_topic{"/camera/body"};
std::string frame_id{"camera"}; std::string frame_id{"camera"};
@@ -97,6 +98,9 @@ public:
[[nodiscard]] [[nodiscard]]
std::expected<void, std::string> write_camera_calibration(const RawCameraCalibrationView &calibration); std::expected<void, std::string> write_camera_calibration(const RawCameraCalibrationView &calibration);
[[nodiscard]]
std::expected<void, std::string> write_depth_camera_calibration(const RawCameraCalibrationView &calibration);
[[nodiscard]] [[nodiscard]]
std::expected<void, std::string> write_pose(const RawPoseView &pose); std::expected<void, std::string> write_pose(const RawPoseView &pose);
@@ -165,6 +169,11 @@ public:
StreamId stream_id, StreamId stream_id,
const RawCameraCalibrationView &calibration); const RawCameraCalibrationView &calibration);
[[nodiscard]]
std::expected<void, std::string> write_depth_camera_calibration(
StreamId stream_id,
const RawCameraCalibrationView &calibration);
[[nodiscard]] [[nodiscard]]
std::expected<void, std::string> write_pose( std::expected<void, std::string> write_pose(
StreamId stream_id, StreamId stream_id,
+13 -2
View File
@@ -34,6 +34,7 @@ class BatchConfig:
encoder_device: str encoder_device: str
mcap_compression: str mcap_compression: str
depth_mode: str depth_mode: str
depth_size: str
with_pose: bool with_pose: bool
pose_config: Path | None pose_config: Path | None
world_frame_id: str | None world_frame_id: str | None
@@ -287,6 +288,8 @@ def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
config.mcap_compression, config.mcap_compression,
"--depth-mode", "--depth-mode",
config.depth_mode, config.depth_mode,
"--depth-size",
config.depth_size,
] ]
if config.with_pose: if config.with_pose:
command.append("--with-pose") command.append("--with-pose")
@@ -564,8 +567,14 @@ def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -
) )
@click.option( @click.option(
"--depth-mode", "--depth-mode",
type=click.Choice(("neural", "quality", "performance", "ultra")), type=click.Choice(("neural_light", "neural", "neural_plus")),
default="quality", default="neural",
show_default=True,
)
@click.option(
"--depth-size",
type=str,
default="optimal",
show_default=True, show_default=True,
) )
@click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.") @click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.")
@@ -615,6 +624,7 @@ def main(
encoder_device: str, encoder_device: str,
mcap_compression: str, mcap_compression: str,
depth_mode: str, depth_mode: str,
depth_size: str,
with_pose: bool, with_pose: bool,
pose_config: Path | None, pose_config: Path | None,
world_frame_id: str | None, world_frame_id: str | None,
@@ -638,6 +648,7 @@ def main(
encoder_device=encoder_device, encoder_device=encoder_device,
mcap_compression=mcap_compression, mcap_compression=mcap_compression,
depth_mode=depth_mode, depth_mode=depth_mode,
depth_size=depth_size,
with_pose=with_pose, with_pose=with_pose,
pose_config=pose_config.expanduser().resolve() if pose_config is not None else None, pose_config=pose_config.expanduser().resolve() if pose_config is not None else None,
world_frame_id=world_frame_id, world_frame_id=world_frame_id,
+8 -1
View File
@@ -66,10 +66,15 @@ def parse_args() -> argparse.Namespace:
) )
parser.add_argument( parser.add_argument(
"--depth-mode", "--depth-mode",
choices=("neural", "quality", "performance", "ultra"), choices=("neural_light", "neural", "neural_plus"),
default="neural", default="neural",
help="Depth mode passed to zed_svo_to_mcap", help="Depth mode passed to zed_svo_to_mcap",
) )
parser.add_argument(
"--depth-size",
default="optimal",
help="Depth size passed to zed_svo_to_mcap (optimal|native|<width>x<height>)",
)
parser.add_argument("--start-frame", type=int, default=0, help="First SVO frame to convert") parser.add_argument("--start-frame", type=int, default=0, help="First SVO frame to convert")
parser.add_argument("--end-frame", type=int, help="Last SVO frame to convert") parser.add_argument("--end-frame", type=int, help="Last SVO frame to convert")
parser.add_argument( parser.add_argument(
@@ -289,6 +294,8 @@ def convert_svo(
args.mcap_compression, args.mcap_compression,
"--depth-mode", "--depth-mode",
args.depth_mode, args.depth_mode,
"--depth-size",
args.depth_size,
"--start-frame", "--start-frame",
str(args.start_frame), str(args.start_frame),
] ]
+15
View File
@@ -369,6 +369,10 @@ std::expected<void, std::string> apply_toml_file(RuntimeConfig &config, const st
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.calibration_topic = *value; config.record.mcap.calibration_topic = *value;
} }
if (auto value = toml_value<std::string>(table, "record.mcap.depth_calibration_topic")) {
config.record.mcap.enabled = true;
config.record.mcap.depth_calibration_topic = *value;
}
if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) { if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) {
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.pose_topic = *value; config.record.mcap.pose_topic = *value;
@@ -581,6 +585,7 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
std::string mcap_topic_raw{}; std::string mcap_topic_raw{};
std::string mcap_depth_topic_raw{}; std::string mcap_depth_topic_raw{};
std::string mcap_calibration_topic_raw{}; std::string mcap_calibration_topic_raw{};
std::string mcap_depth_calibration_topic_raw{};
std::string mcap_pose_topic_raw{}; std::string mcap_pose_topic_raw{};
std::string mcap_body_topic_raw{}; std::string mcap_body_topic_raw{};
std::string mcap_frame_id_raw{}; std::string mcap_frame_id_raw{};
@@ -625,6 +630,7 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
app.add_option("--mcap-topic", mcap_topic_raw); app.add_option("--mcap-topic", mcap_topic_raw);
app.add_option("--mcap-depth-topic", mcap_depth_topic_raw); app.add_option("--mcap-depth-topic", mcap_depth_topic_raw);
app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw); app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw);
app.add_option("--mcap-depth-calibration-topic", mcap_depth_calibration_topic_raw);
app.add_option("--mcap-pose-topic", mcap_pose_topic_raw); app.add_option("--mcap-pose-topic", mcap_pose_topic_raw);
app.add_option("--mcap-body-topic", mcap_body_topic_raw); app.add_option("--mcap-body-topic", mcap_body_topic_raw);
app.add_option("--mcap-frame-id", mcap_frame_id_raw); app.add_option("--mcap-frame-id", mcap_frame_id_raw);
@@ -746,6 +752,10 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.calibration_topic = mcap_calibration_topic_raw; config.record.mcap.calibration_topic = mcap_calibration_topic_raw;
} }
if (!mcap_depth_calibration_topic_raw.empty()) {
config.record.mcap.enabled = true;
config.record.mcap.depth_calibration_topic = mcap_depth_calibration_topic_raw;
}
if (!mcap_pose_topic_raw.empty()) { if (!mcap_pose_topic_raw.empty()) {
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.pose_topic = mcap_pose_topic_raw; config.record.mcap.pose_topic = mcap_pose_topic_raw;
@@ -894,6 +904,10 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
if (config.record.mcap.calibration_topic.empty()) { if (config.record.mcap.calibration_topic.empty()) {
return std::unexpected("invalid MCAP config: calibration_topic must not be empty"); return std::unexpected("invalid MCAP config: calibration_topic must not be empty");
} }
if (!config.record.mcap.depth_calibration_topic.empty() &&
config.record.mcap.depth_calibration_topic == config.record.mcap.calibration_topic) {
return std::unexpected("invalid MCAP config: depth_calibration_topic must differ from calibration_topic");
}
if (config.record.mcap.pose_topic.empty()) { if (config.record.mcap.pose_topic.empty()) {
return std::unexpected("invalid MCAP config: pose_topic must not be empty"); return std::unexpected("invalid MCAP config: pose_topic must not be empty");
} }
@@ -941,6 +955,7 @@ std::string summarize_runtime_config(const RuntimeConfig &config) {
ss << ", mcap.topic=" << config.record.mcap.topic; ss << ", mcap.topic=" << config.record.mcap.topic;
ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic; ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic;
ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic; ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic;
ss << ", mcap.depth_calibration_topic=" << config.record.mcap.depth_calibration_topic;
ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic; ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic;
ss << ", mcap.body_topic=" << config.record.mcap.body_topic; ss << ", mcap.body_topic=" << config.record.mcap.body_topic;
ss << ", mcap.frame_id=" << config.record.mcap.frame_id; ss << ", mcap.frame_id=" << config.record.mcap.frame_id;
+134 -89
View File
@@ -405,6 +405,65 @@ std::expected<void, std::string> write_depth_message(
return {}; return {};
} }
[[nodiscard]]
std::expected<void, std::string> write_calibration_message(
mcap::McapWriter &writer,
std::uint64_t timestamp_ns,
std::string_view frame_id,
mcap::SchemaId schema_id,
std::string_view topic,
mcap::ChannelId &channel_id,
std::uint32_t &sequence,
const RawCameraCalibrationView &calibration) {
if (topic.empty()) {
return std::unexpected("calibration topic is empty");
}
if (channel_id == 0) {
mcap::Channel calibration_channel(std::string(topic), "protobuf", schema_id);
writer.addChannel(calibration_channel);
channel_id = calibration_channel.id;
}
foxglove::CameraCalibration message{};
*message.mutable_timestamp() = to_proto_timestamp(timestamp_ns);
message.set_frame_id(std::string(frame_id));
message.set_width(calibration.width);
message.set_height(calibration.height);
message.set_distortion_model(std::string(calibration.distortion_model));
for (const double value : calibration.distortion) {
message.add_d(value);
}
if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
return append;
}
std::string serialized{};
if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize foxglove.CameraCalibration");
}
mcap::Message record{};
record.channelId = channel_id;
record.sequence = sequence++;
record.logTime = timestamp_ns;
record.publishTime = timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size();
const auto write_status = writer.write(record);
if (!write_status.ok()) {
return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
}
return {};
}
} }
struct McapRecordSink::State { struct McapRecordSink::State {
@@ -412,6 +471,7 @@ struct McapRecordSink::State {
std::string path{}; std::string path{};
std::string frame_id{}; std::string frame_id{};
std::string calibration_topic{}; std::string calibration_topic{};
std::string depth_calibration_topic{};
std::string pose_topic{}; std::string pose_topic{};
std::string body_topic{}; std::string body_topic{};
mcap::SchemaId calibration_schema_id{0}; mcap::SchemaId calibration_schema_id{0};
@@ -419,11 +479,13 @@ struct McapRecordSink::State {
mcap::ChannelId video_channel_id{0}; mcap::ChannelId video_channel_id{0};
mcap::ChannelId depth_channel_id{0}; mcap::ChannelId depth_channel_id{0};
mcap::ChannelId calibration_channel_id{0}; mcap::ChannelId calibration_channel_id{0};
mcap::ChannelId depth_calibration_channel_id{0};
mcap::ChannelId pose_channel_id{0}; mcap::ChannelId pose_channel_id{0};
mcap::ChannelId body_channel_id{0}; mcap::ChannelId body_channel_id{0};
std::uint32_t video_sequence{0}; std::uint32_t video_sequence{0};
std::uint32_t depth_sequence{0}; std::uint32_t depth_sequence{0};
std::uint32_t calibration_sequence{0}; std::uint32_t calibration_sequence{0};
std::uint32_t depth_calibration_sequence{0};
std::uint32_t pose_sequence{0}; std::uint32_t pose_sequence{0};
std::uint32_t body_sequence{0}; std::uint32_t body_sequence{0};
CodecType codec{CodecType::H264}; CodecType codec{CodecType::H264};
@@ -456,6 +518,7 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
state->path = config.record.mcap.path; state->path = config.record.mcap.path;
state->frame_id = config.record.mcap.frame_id; state->frame_id = config.record.mcap.frame_id;
state->calibration_topic = config.record.mcap.calibration_topic; state->calibration_topic = config.record.mcap.calibration_topic;
state->depth_calibration_topic = config.record.mcap.depth_calibration_topic;
state->pose_topic = config.record.mcap.pose_topic; state->pose_topic = config.record.mcap.pose_topic;
state->body_topic = config.record.mcap.body_topic; state->body_topic = config.record.mcap.body_topic;
@@ -622,53 +685,30 @@ std::expected<void, std::string> McapRecordSink::write_camera_calibration(const
if (state_ == nullptr) { if (state_ == nullptr) {
return std::unexpected("MCAP sink is not open"); return std::unexpected("MCAP sink is not open");
} }
if (state_->calibration_topic.empty()) { return write_calibration_message(
return std::unexpected("calibration topic is empty"); state_->writer,
calibration.timestamp_ns,
state_->frame_id,
state_->calibration_schema_id,
state_->calibration_topic,
state_->calibration_channel_id,
state_->calibration_sequence,
calibration);
} }
if (state_->calibration_channel_id == 0) { std::expected<void, std::string> McapRecordSink::write_depth_camera_calibration(const RawCameraCalibrationView &calibration) {
mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id); if (state_ == nullptr) {
state_->writer.addChannel(calibration_channel); return std::unexpected("MCAP sink is not open");
state_->calibration_channel_id = calibration_channel.id;
} }
return write_calibration_message(
foxglove::CameraCalibration message{}; state_->writer,
*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns); calibration.timestamp_ns,
message.set_frame_id(state_->frame_id); state_->frame_id,
message.set_width(calibration.width); state_->calibration_schema_id,
message.set_height(calibration.height); state_->depth_calibration_topic,
message.set_distortion_model(std::string(calibration.distortion_model)); state_->depth_calibration_channel_id,
for (const double value : calibration.distortion) { state_->depth_calibration_sequence,
message.add_d(value); calibration);
}
if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
return append;
}
std::string serialized{};
if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize foxglove.CameraCalibration");
}
mcap::Message record{};
record.channelId = state_->calibration_channel_id;
record.sequence = state_->calibration_sequence++;
record.logTime = calibration.timestamp_ns;
record.publishTime = calibration.timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size();
const auto write_status = state_->writer.write(record);
if (!write_status.ok()) {
return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
}
return {};
} }
std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) { std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) {
@@ -778,11 +818,13 @@ struct MultiMcapRecordSink::State {
mcap::ChannelId video_channel_id{0}; mcap::ChannelId video_channel_id{0};
mcap::ChannelId depth_channel_id{0}; mcap::ChannelId depth_channel_id{0};
mcap::ChannelId calibration_channel_id{0}; mcap::ChannelId calibration_channel_id{0};
mcap::ChannelId depth_calibration_channel_id{0};
mcap::ChannelId pose_channel_id{0}; mcap::ChannelId pose_channel_id{0};
mcap::ChannelId body_channel_id{0}; mcap::ChannelId body_channel_id{0};
std::uint32_t video_sequence{0}; std::uint32_t video_sequence{0};
std::uint32_t depth_sequence{0}; std::uint32_t depth_sequence{0};
std::uint32_t calibration_sequence{0}; std::uint32_t calibration_sequence{0};
std::uint32_t depth_calibration_sequence{0};
std::uint32_t pose_sequence{0}; std::uint32_t pose_sequence{0};
std::uint32_t body_sequence{0}; std::uint32_t body_sequence{0};
CodecType codec{CodecType::H264}; CodecType codec{CodecType::H264};
@@ -851,6 +893,7 @@ std::expected<void, std::string> validate_new_stream_config(
if (stream.config.topic == topic || if (stream.config.topic == topic ||
stream.config.depth_topic == topic || stream.config.depth_topic == topic ||
stream.config.calibration_topic == topic || stream.config.calibration_topic == topic ||
stream.config.depth_calibration_topic == topic ||
stream.config.pose_topic == topic || stream.config.pose_topic == topic ||
stream.config.body_topic == topic) { stream.config.body_topic == topic) {
return true; return true;
@@ -859,14 +902,35 @@ std::expected<void, std::string> validate_new_stream_config(
return false; return false;
}; };
std::vector<std::string> config_topics{};
for (const auto *topic : { for (const auto *topic : {
&config.topic, &config.topic,
&config.depth_topic, &config.depth_topic,
&config.calibration_topic, &config.calibration_topic,
&config.depth_calibration_topic,
&config.pose_topic, &config.pose_topic,
&config.body_topic, &config.body_topic,
}) { }) {
if (topic_in_use(*topic)) { if (!topic->empty()) {
config_topics.push_back(*topic);
}
}
std::sort(config_topics.begin(), config_topics.end());
for (std::size_t index = 1; index < config_topics.size(); ++index) {
if (config_topics[index - 1] == config_topics[index]) {
return std::unexpected("duplicate MCAP topic: " + config_topics[index]);
}
}
for (const auto *topic : {
&config.topic,
&config.depth_topic,
&config.calibration_topic,
&config.depth_calibration_topic,
&config.pose_topic,
&config.body_topic,
}) {
if (!topic->empty() && topic_in_use(*topic)) {
return std::unexpected("duplicate MCAP topic: " + *topic); return std::unexpected("duplicate MCAP topic: " + *topic);
} }
} }
@@ -1091,53 +1155,34 @@ std::expected<void, std::string> MultiMcapRecordSink::write_camera_calibration(
if (stream == nullptr) { if (stream == nullptr) {
return std::unexpected(error); return std::unexpected(error);
} }
if (stream->config.calibration_topic.empty()) { return write_calibration_message(
return std::unexpected("calibration topic is empty"); state_->writer,
calibration.timestamp_ns,
stream->config.frame_id,
state_->calibration_schema_id,
stream->config.calibration_topic,
stream->calibration_channel_id,
stream->calibration_sequence,
calibration);
} }
if (stream->calibration_channel_id == 0) { std::expected<void, std::string> MultiMcapRecordSink::write_depth_camera_calibration(
mcap::Channel calibration_channel(stream->config.calibration_topic, "protobuf", state_->calibration_schema_id); const StreamId stream_id,
state_->writer.addChannel(calibration_channel); const RawCameraCalibrationView &calibration) {
stream->calibration_channel_id = calibration_channel.id; std::string error{};
auto *stream = stream_state(state_, stream_id, error);
if (stream == nullptr) {
return std::unexpected(error);
} }
return write_calibration_message(
foxglove::CameraCalibration message{}; state_->writer,
*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns); calibration.timestamp_ns,
message.set_frame_id(stream->config.frame_id); stream->config.frame_id,
message.set_width(calibration.width); state_->calibration_schema_id,
message.set_height(calibration.height); stream->config.depth_calibration_topic,
message.set_distortion_model(std::string(calibration.distortion_model)); stream->depth_calibration_channel_id,
for (const double value : calibration.distortion) { stream->depth_calibration_sequence,
message.add_d(value); calibration);
}
if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
return append;
}
std::string serialized{};
if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize foxglove.CameraCalibration");
}
mcap::Message record{};
record.channelId = stream->calibration_channel_id;
record.sequence = stream->calibration_sequence++;
record.logTime = calibration.timestamp_ns;
record.publishTime = calibration.timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size();
const auto write_status = state_->writer.write(record);
if (!write_status.ok()) {
return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
}
return {};
} }
std::expected<void, std::string> MultiMcapRecordSink::write_pose( std::expected<void, std::string> MultiMcapRecordSink::write_pose(
+17
View File
@@ -86,6 +86,7 @@ int main(int argc, char **argv) {
.topic = "/zed1/video", .topic = "/zed1/video",
.depth_topic = "/zed1/depth", .depth_topic = "/zed1/depth",
.calibration_topic = "/zed1/calibration", .calibration_topic = "/zed1/calibration",
.depth_calibration_topic = "/zed1/depth_calibration",
.pose_topic = "/zed1/pose", .pose_topic = "/zed1/pose",
.body_topic = "/zed1/body", .body_topic = "/zed1/body",
.frame_id = "zed1", .frame_id = "zed1",
@@ -94,6 +95,7 @@ int main(int argc, char **argv) {
.topic = "/zed2/video", .topic = "/zed2/video",
.depth_topic = "/zed2/depth", .depth_topic = "/zed2/depth",
.calibration_topic = "/zed2/calibration", .calibration_topic = "/zed2/calibration",
.depth_calibration_topic = "/zed2/depth_calibration",
.pose_topic = "/zed2/pose", .pose_topic = "/zed2/pose",
.body_topic = "/zed2/body", .body_topic = "/zed2/body",
.frame_id = "zed2", .frame_id = "zed2",
@@ -162,6 +164,19 @@ int main(int argc, char **argv) {
spdlog::error("failed to write calibration: {}", write.error()); spdlog::error("failed to write calibration: {}", write.error());
return exit_code(TesterExitCode::WriteError); return exit_code(TesterExitCode::WriteError);
} }
if (auto write = sink->write_depth_camera_calibration(stream_id, cvmmap_streamer::record::RawCameraCalibrationView{
.timestamp_ns = 100,
.width = 320,
.height = 240,
.distortion_model = "plumb_bob",
.distortion = distortion,
.intrinsic_matrix = intrinsic_matrix,
.rectification_matrix = rectification_matrix,
.projection_matrix = projection_matrix,
}); !write) {
spdlog::error("failed to write depth calibration: {}", write.error());
return exit_code(TesterExitCode::WriteError);
}
if (auto write = sink->write_pose(stream_id, cvmmap_streamer::record::RawPoseView{ if (auto write = sink->write_pose(stream_id, cvmmap_streamer::record::RawPoseView{
.timestamp_ns = 100, .timestamp_ns = 100,
@@ -254,10 +269,12 @@ int main(int argc, char **argv) {
"/zed1/video", "/zed1/video",
"/zed1/depth", "/zed1/depth",
"/zed1/calibration", "/zed1/calibration",
"/zed1/depth_calibration",
"/zed1/pose", "/zed1/pose",
"/zed2/video", "/zed2/video",
"/zed2/depth", "/zed2/depth",
"/zed2/calibration", "/zed2/calibration",
"/zed2/depth_calibration",
"/zed2/pose", "/zed2/pose",
}) { }) {
if (topic_counts[topic] != 1) { if (topic_counts[topic] != 1) {
+298 -96
View File
@@ -53,7 +53,8 @@ struct CliOptions {
std::string codec{"h265"}; std::string codec{"h265"};
std::string encoder_device{"auto"}; std::string encoder_device{"auto"};
std::string mcap_compression{"zstd"}; std::string mcap_compression{"zstd"};
std::string depth_mode{"quality"}; std::string depth_mode{"neural"};
std::string depth_size{"optimal"};
bool with_pose{false}; bool with_pose{false};
std::uint32_t start_frame{0}; std::uint32_t start_frame{0};
std::uint32_t end_frame{0}; std::uint32_t end_frame{0};
@@ -62,6 +63,7 @@ struct CliOptions {
std::string video_topic{"/camera/video"}; std::string video_topic{"/camera/video"};
std::string depth_topic{"/camera/depth"}; std::string depth_topic{"/camera/depth"};
std::string calibration_topic{"/camera/calibration"}; std::string calibration_topic{"/camera/calibration"};
std::string depth_calibration_topic{"/camera/depth_calibration"};
std::string pose_topic{"/camera/pose"}; std::string pose_topic{"/camera/pose"};
std::string world_frame_id{"world"}; std::string world_frame_id{"world"};
std::string pose_config_path{}; std::string pose_config_path{};
@@ -88,37 +90,9 @@ struct TrackingSample {
std::array<double, 4> orientation{}; std::array<double, 4> orientation{};
}; };
struct CameraStream { struct CalibrationData {
SourceSpec source{};
std::unique_ptr<sl::Camera> camera{};
sl::RuntimeParameters runtime{};
sl::Mat current_left_frame{};
sl::Mat current_depth_frame{};
sl::Mat next_left_frame{};
sl::Mat next_depth_frame{};
TrackingSample current_tracking{};
TrackingSample next_tracking{};
std::uint64_t current_timestamp_ns{0};
std::uint64_t next_timestamp_ns{0};
std::uint64_t first_timestamp_ns{0};
std::uint64_t last_timestamp_ns{0};
std::uint64_t total_frames{0};
std::uint64_t nominal_frame_period_ns{0};
std::uint64_t dropped_frames{0};
float fps{0.0f};
std::uint32_t width{0}; std::uint32_t width{0};
std::uint32_t height{0}; std::uint32_t height{0};
int sync_position{-1};
bool has_next{false};
bool tracking_enabled{false};
bool calibration_written{false};
std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
int serial_number{0};
PoseTrackingOptions pose_tracking{};
cvmmap_streamer::ipc::FrameInfo frame_info{};
std::optional<cvmmap_streamer::encode::EncoderBackend> backend{};
cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
cvmmap_streamer::record::MultiMcapRecordSink::StreamId mcap_stream_id{0};
std::array<double, 5> distortion{ std::array<double, 5> distortion{
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
}; };
@@ -139,6 +113,45 @@ struct CameraStream {
}; };
}; };
struct CameraStream {
SourceSpec source{};
std::unique_ptr<sl::Camera> camera{};
sl::RuntimeParameters runtime{};
sl::Resolution depth_image_size{};
sl::Mat current_left_frame{};
sl::Mat current_depth_frame{};
sl::Mat next_left_frame{};
sl::Mat next_depth_frame{};
TrackingSample current_tracking{};
TrackingSample next_tracking{};
std::uint64_t current_timestamp_ns{0};
std::uint64_t next_timestamp_ns{0};
std::uint64_t first_timestamp_ns{0};
std::uint64_t last_timestamp_ns{0};
std::uint64_t total_frames{0};
std::uint64_t nominal_frame_period_ns{0};
std::uint64_t dropped_frames{0};
float fps{0.0f};
std::uint32_t width{0};
std::uint32_t height{0};
std::uint32_t depth_width{0};
std::uint32_t depth_height{0};
int sync_position{-1};
bool has_next{false};
bool tracking_enabled{false};
bool calibration_written{false};
bool depth_calibration_written{false};
std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
int serial_number{0};
PoseTrackingOptions pose_tracking{};
cvmmap_streamer::ipc::FrameInfo frame_info{};
std::optional<cvmmap_streamer::encode::EncoderBackend> backend{};
cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
cvmmap_streamer::record::MultiMcapRecordSink::StreamId mcap_stream_id{0};
CalibrationData video_calibration{};
CalibrationData depth_calibration{};
};
[[nodiscard]] [[nodiscard]]
constexpr int exit_code(const ToolExitCode code) { constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code); return static_cast<int>(code);
@@ -208,6 +221,9 @@ std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state)
return zed_string(sl::toString(state)); return zed_string(sl::toString(state));
} }
[[nodiscard]]
std::string lowercase(std::string value);
[[nodiscard]] [[nodiscard]]
std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) { std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) {
if (raw == "h264") { if (raw == "h264") {
@@ -249,19 +265,41 @@ std::expected<cvmmap_streamer::McapCompression, std::string> parse_mcap_compress
[[nodiscard]] [[nodiscard]]
std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) { std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) {
if (raw == "neural") { auto normalized = lowercase(std::string(raw));
std::replace(normalized.begin(), normalized.end(), '-', '_');
if (normalized == "neural_light") {
return sl::DEPTH_MODE::NEURAL_LIGHT;
}
if (normalized == "neural") {
return sl::DEPTH_MODE::NEURAL; return sl::DEPTH_MODE::NEURAL;
} }
if (raw == "quality") { if (normalized == "neural_plus") {
return sl::DEPTH_MODE::QUALITY; return sl::DEPTH_MODE::NEURAL_PLUS;
} }
if (raw == "performance") { return std::unexpected(
return sl::DEPTH_MODE::PERFORMANCE; "invalid depth mode: '" + std::string(raw) + "' (expected: neural_light|neural|neural_plus)");
} }
if (raw == "ultra") {
return sl::DEPTH_MODE::ULTRA; [[nodiscard]]
std::expected<sl::Resolution, std::string> parse_depth_size(const std::string_view raw) {
auto normalized = lowercase(std::string(raw));
std::replace(normalized.begin(), normalized.end(), '-', '_');
if (normalized == "optimal") {
return sl::Resolution(-1, -1);
} }
return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)"); if (normalized == "native") {
return sl::Resolution(0, 0);
}
static const std::regex size_pattern{R"(^([1-9][0-9]*)[xX]([1-9][0-9]*)$)"};
std::smatch match{};
const auto size_string = std::string(raw);
if (!std::regex_match(size_string, match, size_pattern)) {
return std::unexpected(
"invalid depth size: '" + size_string + "' (expected: optimal|native|<width>x<height>)");
}
return sl::Resolution(std::stoi(match[1].str()), std::stoi(match[2].str()));
} }
[[nodiscard]] [[nodiscard]]
@@ -339,6 +377,60 @@ std::string normalize_config_token(std::string value) {
return value; return value;
} }
[[nodiscard]]
CalibrationData make_calibration_data(const sl::CameraInformation &camera_info) {
CalibrationData calibration{};
const auto &camera_config = camera_info.camera_configuration;
const auto &left_camera = camera_config.calibration_parameters.left_cam;
const auto resolved_width = left_camera.image_size.width > 0
? left_camera.image_size.width
: camera_config.resolution.width;
const auto resolved_height = left_camera.image_size.height > 0
? left_camera.image_size.height
: camera_config.resolution.height;
calibration.width = static_cast<std::uint32_t>(resolved_width);
calibration.height = static_cast<std::uint32_t>(resolved_height);
calibration.intrinsic_matrix = {
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
0.0, 0.0, 1.0,
};
calibration.projection_matrix = {
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
0.0, 0.0, 1.0, 0.0,
};
return calibration;
}
[[nodiscard]]
std::string describe_depth_resolution_request(const sl::Resolution &resolution) {
if (resolution.width == -1 && resolution.height == -1) {
return "optimal";
}
if (resolution.width == 0 && resolution.height == 0) {
return "native";
}
return std::to_string(resolution.width) + "x" + std::to_string(resolution.height);
}
[[nodiscard]]
std::expected<void, std::string> validate_depth_resolution_request(
const sl::Resolution &requested_resolution,
const std::uint32_t native_width,
const std::uint32_t native_height) {
if (requested_resolution.width <= 0 || requested_resolution.height <= 0) {
return {};
}
if (requested_resolution.width > static_cast<int>(native_width) ||
requested_resolution.height > static_cast<int>(native_height)) {
return std::unexpected(
"requested depth size " + describe_depth_resolution_request(requested_resolution) +
" exceeds native camera resolution " + std::to_string(native_width) + "x" + std::to_string(native_height));
}
return {};
}
[[nodiscard]] [[nodiscard]]
std::expected<sl::COORDINATE_SYSTEM, std::string> parse_coordinate_system(const std::string_view raw) { std::expected<sl::COORDINATE_SYSTEM, std::string> parse_coordinate_system(const std::string_view raw) {
const auto normalized = normalize_config_token(std::string(raw)); const auto normalized = normalize_config_token(std::string(raw));
@@ -592,7 +684,11 @@ std::expected<void, std::string> read_frame_data(
return std::unexpected(valid.error()); return std::unexpected(valid.error());
} }
const auto depth_status = stream.camera->retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU); const auto depth_status = stream.camera->retrieveMeasure(
depth_frame,
sl::MEASURE::DEPTH_U16_MM,
sl::MEM::CPU,
stream.depth_image_size);
if (depth_status != sl::ERROR_CODE::SUCCESS) { if (depth_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected( return std::unexpected(
"failed to retrieve depth map for " + stream.source.label + ": " + zed_status_string(depth_status)); "failed to retrieve depth map for " + stream.source.label + ": " + zed_status_string(depth_status));
@@ -733,11 +829,13 @@ std::expected<CameraStream, std::string> open_camera_stream(
const cvmmap_streamer::CodecType codec, const cvmmap_streamer::CodecType codec,
const cvmmap_streamer::EncoderDeviceType encoder_device, const cvmmap_streamer::EncoderDeviceType encoder_device,
const sl::DEPTH_MODE depth_mode, const sl::DEPTH_MODE depth_mode,
const sl::Resolution depth_size,
const PoseTrackingOptions &pose_tracking) { const PoseTrackingOptions &pose_tracking) {
CameraStream stream{}; CameraStream stream{};
stream.source = source; stream.source = source;
stream.camera = std::make_unique<sl::Camera>(); stream.camera = std::make_unique<sl::Camera>();
stream.pose_tracking = pose_tracking; stream.pose_tracking = pose_tracking;
stream.depth_image_size = depth_size;
sl::InitParameters init{}; sl::InitParameters init{};
init.input.setFromSVOFile(source.path.c_str()); init.input.setFromSVOFile(source.path.c_str());
@@ -774,7 +872,6 @@ std::expected<CameraStream, std::string> open_camera_stream(
const auto camera_info = stream.camera->getCameraInformation(); const auto camera_info = stream.camera->getCameraInformation();
const auto &camera_config = camera_info.camera_configuration; const auto &camera_config = camera_info.camera_configuration;
const auto &left_camera = camera_config.calibration_parameters.left_cam;
stream.serial_number = camera_info.serial_number; stream.serial_number = camera_info.serial_number;
stream.width = static_cast<std::uint32_t>(camera_config.resolution.width); stream.width = static_cast<std::uint32_t>(camera_config.resolution.width);
stream.height = static_cast<std::uint32_t>(camera_config.resolution.height); stream.height = static_cast<std::uint32_t>(camera_config.resolution.height);
@@ -783,17 +880,10 @@ std::expected<CameraStream, std::string> open_camera_stream(
if (stream.width == 0 || stream.height == 0) { if (stream.width == 0 || stream.height == 0) {
return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string()); return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
} }
if (auto valid = validate_depth_resolution_request(depth_size, stream.width, stream.height); !valid) {
stream.intrinsic_matrix = { return std::unexpected(valid.error());
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), }
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), stream.video_calibration = make_calibration_data(camera_info);
0.0, 0.0, 1.0,
};
stream.projection_matrix = {
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
0.0, 0.0, 1.0, 0.0,
};
stream.frame_info = cvmmap_streamer::ipc::FrameInfo{ stream.frame_info = cvmmap_streamer::ipc::FrameInfo{
.width = static_cast<std::uint16_t>(stream.width), .width = static_cast<std::uint16_t>(stream.width),
@@ -828,6 +918,12 @@ std::expected<CameraStream, std::string> open_camera_stream(
if (!first_frame) { if (!first_frame) {
return std::unexpected(first_frame.error()); return std::unexpected(first_frame.error());
} }
stream.depth_width = static_cast<std::uint32_t>(stream.current_depth_frame.getWidth());
stream.depth_height = static_cast<std::uint32_t>(stream.current_depth_frame.getHeight());
stream.depth_calibration = make_calibration_data(
stream.camera->getCameraInformation(sl::Resolution(
static_cast<int>(stream.depth_width),
static_cast<int>(stream.depth_height))));
stream.first_timestamp_ns = first_timestamp_ns; stream.first_timestamp_ns = first_timestamp_ns;
auto last_timestamp_ns = read_last_readable_timestamp(stream); auto last_timestamp_ns = read_last_readable_timestamp(stream);
@@ -912,6 +1008,9 @@ std::expected<void, std::string> register_mcap_streams(
config.topic = namespace_topic(stream.source.label, options.video_topic); config.topic = namespace_topic(stream.source.label, options.video_topic);
config.depth_topic = namespace_topic(stream.source.label, options.depth_topic); config.depth_topic = namespace_topic(stream.source.label, options.depth_topic);
config.calibration_topic = namespace_topic(stream.source.label, options.calibration_topic); config.calibration_topic = namespace_topic(stream.source.label, options.calibration_topic);
if (stream.depth_width != stream.width || stream.depth_height != stream.height) {
config.depth_calibration_topic = namespace_topic(stream.source.label, options.depth_calibration_topic);
}
config.pose_topic = options.with_pose ? namespace_topic(stream.source.label, options.pose_topic) : ""; config.pose_topic = options.with_pose ? namespace_topic(stream.source.label, options.pose_topic) : "";
config.body_topic = namespace_topic(stream.source.label, "/camera/body"); config.body_topic = namespace_topic(stream.source.label, "/camera/body");
config.frame_id = multi_frame_id(options, stream.source.label); config.frame_id = multi_frame_id(options, stream.source.label);
@@ -1082,13 +1181,13 @@ std::expected<void, std::string> encode_and_write_group(
if (!stream.calibration_written) { if (!stream.calibration_written) {
cvmmap_streamer::record::RawCameraCalibrationView calibration{ cvmmap_streamer::record::RawCameraCalibrationView calibration{
.timestamp_ns = group_timestamp_ns, .timestamp_ns = group_timestamp_ns,
.width = stream.width, .width = stream.video_calibration.width,
.height = stream.height, .height = stream.video_calibration.height,
.distortion_model = "plumb_bob", .distortion_model = "plumb_bob",
.distortion = stream.distortion, .distortion = stream.video_calibration.distortion,
.intrinsic_matrix = stream.intrinsic_matrix, .intrinsic_matrix = stream.video_calibration.intrinsic_matrix,
.rectification_matrix = stream.rectification_matrix, .rectification_matrix = stream.video_calibration.rectification_matrix,
.projection_matrix = stream.projection_matrix, .projection_matrix = stream.video_calibration.projection_matrix,
}; };
if (auto write = sink.write_camera_calibration(stream.mcap_stream_id, calibration); !write) { if (auto write = sink.write_camera_calibration(stream.mcap_stream_id, calibration); !write) {
return std::unexpected("failed to write calibration for " + stream.source.label + ": " + write.error()); return std::unexpected("failed to write calibration for " + stream.source.label + ": " + write.error());
@@ -1096,8 +1195,34 @@ std::expected<void, std::string> encode_and_write_group(
stream.calibration_written = true; stream.calibration_written = true;
} }
if (!stream.depth_calibration_written &&
(stream.depth_width != stream.width || stream.depth_height != stream.height)) {
cvmmap_streamer::record::RawCameraCalibrationView depth_calibration{
.timestamp_ns = group_timestamp_ns,
.width = stream.depth_calibration.width,
.height = stream.depth_calibration.height,
.distortion_model = "plumb_bob",
.distortion = stream.depth_calibration.distortion,
.intrinsic_matrix = stream.depth_calibration.intrinsic_matrix,
.rectification_matrix = stream.depth_calibration.rectification_matrix,
.projection_matrix = stream.depth_calibration.projection_matrix,
};
if (auto write = sink.write_depth_camera_calibration(stream.mcap_stream_id, depth_calibration); !write) {
return std::unexpected("failed to write depth calibration for " + stream.source.label + ": " + write.error());
}
stream.depth_calibration_written = true;
}
const auto depth_width = static_cast<std::uint32_t>(stream.current_depth_frame.getWidth());
const auto depth_height = static_cast<std::uint32_t>(stream.current_depth_frame.getHeight());
if (depth_width != stream.depth_width || depth_height != stream.depth_height) {
return std::unexpected(
"depth resolution changed unexpectedly for " + stream.source.label + ": " +
std::to_string(depth_width) + "x" + std::to_string(depth_height) + " vs " +
std::to_string(stream.depth_width) + "x" + std::to_string(stream.depth_height));
}
const auto depth_step_bytes = stream.current_depth_frame.getStepBytes(sl::MEM::CPU); const auto depth_step_bytes = stream.current_depth_frame.getStepBytes(sl::MEM::CPU);
const auto packed_depth_bytes = static_cast<std::size_t>(stream.width) * sizeof(std::uint16_t); const auto packed_depth_bytes = static_cast<std::size_t>(depth_width) * sizeof(std::uint16_t);
if (depth_step_bytes < packed_depth_bytes) { if (depth_step_bytes < packed_depth_bytes) {
return std::unexpected( return std::unexpected(
"depth stride " + std::to_string(depth_step_bytes) + " is smaller than packed row size " + "depth stride " + std::to_string(depth_step_bytes) + " is smaller than packed row size " +
@@ -1109,7 +1234,7 @@ std::expected<void, std::string> encode_and_write_group(
if (depth_step_bytes == packed_depth_bytes) { if (depth_step_bytes == packed_depth_bytes) {
depth_pixels = std::span<const std::uint16_t>( depth_pixels = std::span<const std::uint16_t>(
stream.current_depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU), stream.current_depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
static_cast<std::size_t>(stream.width) * static_cast<std::size_t>(stream.height)); static_cast<std::size_t>(depth_width) * static_cast<std::size_t>(depth_height));
} else { } else {
compact_depth = copy_compact_u16_plane(stream.current_depth_frame); compact_depth = copy_compact_u16_plane(stream.current_depth_frame);
depth_pixels = *compact_depth; depth_pixels = *compact_depth;
@@ -1117,8 +1242,8 @@ std::expected<void, std::string> encode_and_write_group(
cvmmap_streamer::record::RawDepthMapU16View depth_map{ cvmmap_streamer::record::RawDepthMapU16View depth_map{
.timestamp_ns = group_timestamp_ns, .timestamp_ns = group_timestamp_ns,
.width = stream.width, .width = depth_width,
.height = stream.height, .height = depth_height,
.pixels = depth_pixels, .pixels = depth_pixels,
}; };
if (auto write = sink.write_depth_map_u16(stream.mcap_stream_id, depth_map); !write) { if (auto write = sink.write_depth_map_u16(stream.mcap_stream_id, depth_map); !write) {
@@ -1230,6 +1355,7 @@ int run_single_source(
const cvmmap_streamer::EncoderDeviceType encoder_device, const cvmmap_streamer::EncoderDeviceType encoder_device,
const cvmmap_streamer::McapCompression compression, const cvmmap_streamer::McapCompression compression,
const sl::DEPTH_MODE depth_mode, const sl::DEPTH_MODE depth_mode,
const sl::Resolution depth_size,
const PoseTrackingOptions &pose_tracking) { const PoseTrackingOptions &pose_tracking) {
const auto input_path = std::filesystem::path{options.input_paths.front()}; const auto input_path = std::filesystem::path{options.input_paths.front()};
if (!std::filesystem::is_regular_file(input_path)) { if (!std::filesystem::is_regular_file(input_path)) {
@@ -1319,7 +1445,6 @@ int run_single_source(
const auto camera_info = camera.getCameraInformation(); const auto camera_info = camera.getCameraInformation();
const auto &camera_config = camera_info.camera_configuration; const auto &camera_config = camera_info.camera_configuration;
const auto &left_camera = camera_config.calibration_parameters.left_cam;
const auto width = static_cast<std::uint32_t>(camera_config.resolution.width); const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
const auto height = static_cast<std::uint32_t>(camera_config.resolution.height); const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
if (width == 0 || height == 0) { if (width == 0 || height == 0) {
@@ -1327,6 +1452,11 @@ int run_single_source(
spdlog::error("camera resolution reported by the ZED SDK is invalid"); spdlog::error("camera resolution reported by the ZED SDK is invalid");
return exit_code(ToolExitCode::RuntimeError); return exit_code(ToolExitCode::RuntimeError);
} }
if (auto valid = validate_depth_resolution_request(depth_size, width, height); !valid) {
close_camera();
spdlog::error("{}", valid.error());
return exit_code(ToolExitCode::UsageError);
}
cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults(); cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg; config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
@@ -1339,6 +1469,7 @@ int run_single_source(
config.record.mcap.topic = options.video_topic; config.record.mcap.topic = options.video_topic;
config.record.mcap.depth_topic = options.depth_topic; config.record.mcap.depth_topic = options.depth_topic;
config.record.mcap.calibration_topic = options.calibration_topic; config.record.mcap.calibration_topic = options.calibration_topic;
config.record.mcap.depth_calibration_topic = options.depth_calibration_topic;
config.record.mcap.pose_topic = options.pose_topic; config.record.mcap.pose_topic = options.pose_topic;
config.record.mcap.frame_id = options.frame_id; config.record.mcap.frame_id = options.frame_id;
config.record.mcap.compression = compression; config.record.mcap.compression = compression;
@@ -1382,30 +1513,17 @@ int run_single_source(
return exit_code(ToolExitCode::RuntimeError); return exit_code(ToolExitCode::RuntimeError);
} }
const std::array<double, 5> distortion{ const auto video_calibration = make_calibration_data(camera_info);
0.0, 0.0, 0.0, 0.0, 0.0,
};
const std::array<double, 9> intrinsic_matrix{
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
0.0, 0.0, 1.0,
};
const std::array<double, 9> rectification_matrix{
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
};
const std::array<double, 12> projection_matrix{
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
0.0, 0.0, 1.0, 0.0,
};
sl::RuntimeParameters runtime_parameters{}; sl::RuntimeParameters runtime_parameters{};
sl::Mat left_frame{}; sl::Mat left_frame{};
sl::Mat depth_frame{}; sl::Mat depth_frame{};
sl::Pose pose{}; sl::Pose pose{};
bool calibration_written{false}; bool calibration_written{false};
bool depth_calibration_written{false};
std::uint32_t depth_width{0};
std::uint32_t depth_height{0};
std::optional<CalibrationData> depth_calibration{};
std::uint64_t emitted_frames{0}; std::uint64_t emitted_frames{0};
std::optional<std::uint64_t> last_timestamp_ns{}; std::optional<std::uint64_t> last_timestamp_ns{};
std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{}; std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
@@ -1454,7 +1572,7 @@ int run_single_source(
return exit_code(ToolExitCode::RuntimeError); return exit_code(ToolExitCode::RuntimeError);
} }
const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU); const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU, depth_size);
if (depth_status != sl::ERROR_CODE::SUCCESS) { if (depth_status != sl::ERROR_CODE::SUCCESS) {
progress.finish(emitted_frames, false); progress.finish(emitted_frames, false);
sink->close(); sink->close();
@@ -1522,13 +1640,13 @@ int run_single_source(
if (!calibration_written) { if (!calibration_written) {
cvmmap_streamer::record::RawCameraCalibrationView calibration{ cvmmap_streamer::record::RawCameraCalibrationView calibration{
.timestamp_ns = timestamp_ns, .timestamp_ns = timestamp_ns,
.width = width, .width = video_calibration.width,
.height = height, .height = video_calibration.height,
.distortion_model = "plumb_bob", .distortion_model = "plumb_bob",
.distortion = distortion, .distortion = video_calibration.distortion,
.intrinsic_matrix = intrinsic_matrix, .intrinsic_matrix = video_calibration.intrinsic_matrix,
.rectification_matrix = rectification_matrix, .rectification_matrix = video_calibration.rectification_matrix,
.projection_matrix = projection_matrix, .projection_matrix = video_calibration.projection_matrix,
}; };
if (auto write = sink->write_camera_calibration(calibration); !write) { if (auto write = sink->write_camera_calibration(calibration); !write) {
progress.finish(emitted_frames, false); progress.finish(emitted_frames, false);
@@ -1541,8 +1659,63 @@ int run_single_source(
calibration_written = true; calibration_written = true;
} }
const auto actual_depth_width = static_cast<std::uint32_t>(depth_frame.getWidth());
const auto actual_depth_height = static_cast<std::uint32_t>(depth_frame.getHeight());
if (depth_width == 0 || depth_height == 0) {
depth_width = actual_depth_width;
depth_height = actual_depth_height;
depth_calibration = make_calibration_data(camera.getCameraInformation(sl::Resolution(
static_cast<int>(depth_width),
static_cast<int>(depth_height))));
if (depth_width != width || depth_height != height) {
spdlog::info(
"exporting depth at {}x{} (requested {}) while video remains {}x{}",
depth_width,
depth_height,
describe_depth_resolution_request(depth_size),
width,
height);
}
} else if (actual_depth_width != depth_width || actual_depth_height != depth_height) {
progress.finish(emitted_frames, false);
sink->close();
backend->shutdown();
close_camera();
spdlog::error(
"depth resolution changed unexpectedly from {}x{} to {}x{}",
depth_width,
depth_height,
actual_depth_width,
actual_depth_height);
return exit_code(ToolExitCode::RuntimeError);
}
if (!depth_calibration_written &&
(depth_width != width || depth_height != height) &&
depth_calibration.has_value()) {
cvmmap_streamer::record::RawCameraCalibrationView calibration{
.timestamp_ns = timestamp_ns,
.width = depth_calibration->width,
.height = depth_calibration->height,
.distortion_model = "plumb_bob",
.distortion = depth_calibration->distortion,
.intrinsic_matrix = depth_calibration->intrinsic_matrix,
.rectification_matrix = depth_calibration->rectification_matrix,
.projection_matrix = depth_calibration->projection_matrix,
};
if (auto write = sink->write_depth_camera_calibration(calibration); !write) {
progress.finish(emitted_frames, false);
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to write depth calibration: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
depth_calibration_written = true;
}
const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU); const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU);
const auto packed_depth_bytes = static_cast<std::size_t>(width) * sizeof(std::uint16_t); const auto packed_depth_bytes = static_cast<std::size_t>(depth_width) * sizeof(std::uint16_t);
if (depth_step_bytes < packed_depth_bytes) { if (depth_step_bytes < packed_depth_bytes) {
progress.finish(emitted_frames, false); progress.finish(emitted_frames, false);
sink->close(); sink->close();
@@ -1557,7 +1730,7 @@ int run_single_source(
if (depth_step_bytes == packed_depth_bytes) { if (depth_step_bytes == packed_depth_bytes) {
depth_pixels = std::span<const std::uint16_t>( depth_pixels = std::span<const std::uint16_t>(
depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU), depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
static_cast<std::size_t>(width) * static_cast<std::size_t>(height)); static_cast<std::size_t>(depth_width) * static_cast<std::size_t>(depth_height));
} else { } else {
compact_depth = copy_compact_u16_plane(depth_frame); compact_depth = copy_compact_u16_plane(depth_frame);
depth_pixels = *compact_depth; depth_pixels = *compact_depth;
@@ -1565,8 +1738,8 @@ int run_single_source(
cvmmap_streamer::record::RawDepthMapU16View depth_map{ cvmmap_streamer::record::RawDepthMapU16View depth_map{
.timestamp_ns = timestamp_ns, .timestamp_ns = timestamp_ns,
.width = width, .width = depth_width,
.height = height, .height = depth_height,
.pixels = depth_pixels, .pixels = depth_pixels,
}; };
if (auto write = sink->write_depth_map_u16(depth_map); !write) { if (auto write = sink->write_depth_map_u16(depth_map); !write) {
@@ -1663,6 +1836,7 @@ int run_multi_source(
const cvmmap_streamer::EncoderDeviceType encoder_device, const cvmmap_streamer::EncoderDeviceType encoder_device,
const cvmmap_streamer::McapCompression compression, const cvmmap_streamer::McapCompression compression,
const sl::DEPTH_MODE depth_mode, const sl::DEPTH_MODE depth_mode,
const sl::Resolution depth_size,
const PoseTrackingOptions &pose_tracking) { const PoseTrackingOptions &pose_tracking) {
if (options.has_end_frame && options.end_frame < options.start_frame) { if (options.has_end_frame && options.end_frame < options.start_frame) {
spdlog::error( spdlog::error(
@@ -1684,7 +1858,7 @@ int run_multi_source(
close_camera_streams(streams); close_camera_streams(streams);
return interrupted_exit_code(); return interrupted_exit_code();
} }
auto stream = open_camera_stream(source, options, codec, encoder_device, depth_mode, pose_tracking); auto stream = open_camera_stream(source, options, codec, encoder_device, depth_mode, depth_size, pose_tracking);
if (!stream) { if (!stream) {
close_camera_streams(streams); close_camera_streams(streams);
spdlog::error("{}", stream.error()); spdlog::error("{}", stream.error());
@@ -1910,8 +2084,10 @@ int main(int argc, char **argv) {
app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available"); app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available");
app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)") app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)")
->check(CLI::IsMember({"none", "lz4", "zstd"})); ->check(CLI::IsMember({"none", "lz4", "zstd"}));
app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)") app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural_light|neural|neural_plus)")
->check(CLI::IsMember({"neural", "quality", "performance", "ultra"})); ->check(CLI::IsMember({"neural_light", "neural", "neural_plus"}));
app.add_option("--depth-size", options.depth_size, "Depth output size (optimal|native|<width>x<height>)")
->default_val("optimal");
app.add_option( app.add_option(
"--start-frame", "--start-frame",
options.start_frame, options.start_frame,
@@ -1926,6 +2102,10 @@ int main(int argc, char **argv) {
app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo"); app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");
app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap"); app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap");
app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration"); app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration");
app.add_option(
"--depth-calibration-topic",
options.depth_calibration_topic,
"MCAP topic for foxglove.CameraCalibration aligned to the exported depth resolution");
app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame"); app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame");
app.add_option( app.add_option(
"--pose-config", "--pose-config",
@@ -1967,6 +2147,11 @@ int main(int argc, char **argv) {
spdlog::error("{}", depth_mode.error()); spdlog::error("{}", depth_mode.error());
return exit_code(ToolExitCode::UsageError); return exit_code(ToolExitCode::UsageError);
} }
auto depth_size = parse_depth_size(options.depth_size);
if (!depth_size) {
spdlog::error("{}", depth_size.error());
return exit_code(ToolExitCode::UsageError);
}
auto pose_tracking = load_pose_tracking_options(options); auto pose_tracking = load_pose_tracking_options(options);
if (!pose_tracking) { if (!pose_tracking) {
spdlog::error("{}", pose_tracking.error()); spdlog::error("{}", pose_tracking.error());
@@ -1982,8 +2167,25 @@ int main(int argc, char **argv) {
const auto output_path = derive_output_path(options, *sources); const auto output_path = derive_output_path(options, *sources);
if (sources->size() == 1) { if (sources->size() == 1) {
options.input_paths = {sources->front().path.string()}; options.input_paths = {sources->front().path.string()};
return run_single_source(options, output_path, *codec, *encoder_device, *compression, *depth_mode, *pose_tracking); return run_single_source(
options,
output_path,
*codec,
*encoder_device,
*compression,
*depth_mode,
*depth_size,
*pose_tracking);
} }
return run_multi_source(options, *sources, output_path, *codec, *encoder_device, *compression, *depth_mode, *pose_tracking); return run_multi_source(
options,
*sources,
output_path,
*codec,
*encoder_device,
*compression,
*depth_mode,
*depth_size,
*pose_tracking);
} }