diff --git a/CMakeLists.txt b/CMakeLists.txt index 04823eb..fc8ee28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,12 +10,6 @@ find_package(Threads REQUIRED) find_package(cppzmq QUIET) set(CVMMAP_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../cv-mmap" CACHE PATH "Path to a local cv-mmap checkout") set(CVMMAP_LOCAL_BUILD "${CVMMAP_LOCAL_ROOT}/build/core" CACHE PATH "Path to local cv-mmap build artifacts") -if ( - NOT cvmmap-core_DIR - AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreConfig.cmake" - AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreTargets.cmake") - set(cvmmap-core_DIR "${CVMMAP_LOCAL_BUILD}") -endif() if (cvmmap-core_DIR) find_package(cvmmap-core CONFIG QUIET) endif() @@ -24,6 +18,9 @@ find_package(spdlog REQUIRED) find_package(Protobuf REQUIRED) find_package(PkgConfig REQUIRED) find_package(rvl CONFIG QUIET) +set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK") +find_package(ZED REQUIRED) +find_package(CUDA ${ZED_CUDA_VERSION} REQUIRED) add_subdirectory(third_party) @@ -48,7 +45,7 @@ if (NOT TARGET cvmmap::client) endif() if (NOT TARGET rvl::rvl) - set(RVL_LOCAL_ROOT "/home/crosstyan/Code/rvl_impl") + set(RVL_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../rvl_impl" CACHE PATH "Path to a local rvl_impl checkout") set(RVL_LOCAL_BUILD "${RVL_LOCAL_ROOT}/build/core") if ( EXISTS "${RVL_LOCAL_ROOT}/core/include/rvl/rvl.hpp" @@ -66,7 +63,13 @@ add_library(cvmmap_streamer_foxglove_proto STATIC) protobuf_generate( TARGET cvmmap_streamer_foxglove_proto LANGUAGE cpp - PROTOS "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto" + PROTOS + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto" + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CameraCalibration.proto" + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/PoseInFrame.proto" + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Pose.proto" + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Quaternion.proto" + "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Vector3.proto" IMPORT_DIRS "${CMAKE_CURRENT_LIST_DIR}/proto") add_library(cvmmap_streamer_depth_proto STATIC) protobuf_generate( @@ -78,13 +81,16 @@ add_library(cvmmap_streamer_protobuf INTERFACE) target_include_directories(cvmmap_streamer_foxglove_proto PUBLIC "${CMAKE_CURRENT_BINARY_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}/proto" ${Protobuf_INCLUDE_DIRS}) target_include_directories(cvmmap_streamer_depth_proto PUBLIC "${CMAKE_CURRENT_BINARY_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}/proto" ${Protobuf_INCLUDE_DIRS}) target_include_directories(cvmmap_streamer_protobuf INTERFACE + "${CMAKE_CURRENT_BINARY_DIR}/proto" ${Protobuf_INCLUDE_DIRS}) if (TARGET protobuf::libprotobuf) target_link_libraries(cvmmap_streamer_protobuf INTERFACE protobuf::libprotobuf) @@ -106,8 +112,38 @@ target_include_directories(cvmmap_streamer_mcap_runtime target_link_libraries(cvmmap_streamer_mcap_runtime PUBLIC mcap::mcap + PkgConfig::ZSTD + PkgConfig::LZ4) + +add_library(cvmmap_streamer_record_support STATIC + src/encode/encoder_backend.cpp + src/encode/ffmpeg_encoder_backend.cpp + src/record/protobuf_descriptor.cpp + src/record/mcap_record_sink.cpp) +target_include_directories(cvmmap_streamer_record_support + PUBLIC + "${CMAKE_CURRENT_LIST_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries(cvmmap_streamer_record_support + PUBLIC + cvmmap_streamer_foxglove_proto + cvmmap_streamer_depth_proto + cvmmap_streamer_mcap_runtime + PkgConfig::FFMPEG PkgConfig::ZSTD - PkgConfig::LZ4) + PkgConfig::LZ4 + rvl::rvl + mcap::mcap + msft_proxy4::proxy + cvmmap_streamer_protobuf) +if (TARGET spdlog::spdlog) + target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog::spdlog) +elseif (TARGET spdlog) + target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog) +endif() +if (TARGET PkgConfig::PROTOBUF_PKG) + target_link_libraries(cvmmap_streamer_record_support PUBLIC PkgConfig::PROTOBUF_PKG) +endif() add_library(cvmmap_streamer_common STATIC src/ipc/help.cpp @@ -119,11 +155,7 @@ add_library(cvmmap_streamer_common STATIC src/metrics/latency_tracker.cpp src/pipeline/pipeline_runtime.cpp src/protocol/rtmp_output.cpp - src/protocol/rtp_publisher.cpp - src/encode/encoder_backend.cpp - src/encode/ffmpeg_encoder_backend.cpp - src/record/protobuf_descriptor.cpp - src/record/mcap_record_sink.cpp) + src/protocol/rtp_publisher.cpp) target_include_directories(cvmmap_streamer_common PUBLIC @@ -132,18 +164,14 @@ target_include_directories(cvmmap_streamer_common set(CVMMAP_STREAMER_LINK_DEPS Threads::Threads - cvmmap_streamer_foxglove_proto - cvmmap_streamer_depth_proto - cvmmap_streamer_mcap_runtime + cvmmap_streamer_record_support PkgConfig::FFMPEG PkgConfig::ZSTD PkgConfig::LZ4 - rvl::rvl cvmmap::client CLI11::CLI11 tomlplusplus::tomlplusplus - mcap::mcap - msft_proxy4::proxy) + mcap::mcap) if (TARGET cppzmq::cppzmq) list(APPEND CVMMAP_STREAMER_LINK_DEPS cppzmq::cppzmq) @@ -193,6 +221,7 @@ add_cvmmap_binary(ipc_snapshot_tester src/testers/ipc_snapshot_tester.cpp) add_cvmmap_binary(mcap_depth_record_tester src/testers/mcap_depth_record_tester.cpp) add_cvmmap_binary(mcap_body_record_tester src/testers/mcap_body_record_tester.cpp) add_cvmmap_binary(mcap_body_inspector src/testers/mcap_body_inspector.cpp) +add_cvmmap_binary(mcap_pose_record_tester src/testers/mcap_pose_record_tester.cpp) add_executable(mcap_reader_tester src/testers/mcap_reader_tester.cpp) target_include_directories(mcap_reader_tester @@ -247,3 +276,34 @@ endif() set_target_properties(mcap_replay_tester PROPERTIES OUTPUT_NAME "mcap_replay_tester" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") + +add_executable( + zed_svo_to_mcap + src/tools/zed_svo_to_mcap.cpp + src/config/runtime_config.cpp) +target_include_directories(zed_svo_to_mcap + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}" + ${ZED_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS}) +target_link_directories(zed_svo_to_mcap + PRIVATE + ${ZED_LIBRARY_DIR} + ${CUDA_LIBRARY_DIRS}) +target_link_libraries(zed_svo_to_mcap + PRIVATE + cvmmap_streamer_record_support + CLI11::CLI11 + tomlplusplus::tomlplusplus + ${ZED_LIBRARIES} + ${CUDA_CUDA_LIBRARY} + ${CUDA_CUDART_LIBRARY}) +if (TARGET spdlog::spdlog) + target_link_libraries(zed_svo_to_mcap PRIVATE spdlog::spdlog) +elseif (TARGET spdlog) + target_link_libraries(zed_svo_to_mcap PRIVATE spdlog) +endif() +set_target_properties(zed_svo_to_mcap PROPERTIES + OUTPUT_NAME "zed_svo_to_mcap" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") diff --git a/include/cvmmap_streamer/config/runtime_config.hpp b/include/cvmmap_streamer/config/runtime_config.hpp index 63cd8d2..1e9320b 100644 --- a/include/cvmmap_streamer/config/runtime_config.hpp +++ b/include/cvmmap_streamer/config/runtime_config.hpp @@ -85,6 +85,8 @@ struct McapRecordConfig { std::string path{"capture.mcap"}; std::string topic{"/camera/video"}; std::string depth_topic{"/camera/depth"}; + std::string calibration_topic{"/camera/calibration"}; + std::string pose_topic{"/camera/pose"}; std::string body_topic{"/camera/body"}; std::string frame_id{"camera"}; McapCompression compression{McapCompression::Zstd}; diff --git a/include/cvmmap_streamer/encode/encoder_backend.hpp b/include/cvmmap_streamer/encode/encoder_backend.hpp index 77e2150..423d0e4 100644 --- a/include/cvmmap_streamer/encode/encoder_backend.hpp +++ b/include/cvmmap_streamer/encode/encoder_backend.hpp @@ -17,6 +17,7 @@ namespace cvmmap_streamer::encode { struct RawVideoFrame { ipc::FrameInfo info{}; std::uint64_t source_timestamp_ns{0}; + std::size_t row_stride_bytes{0}; std::span bytes{}; }; diff --git a/include/cvmmap_streamer/record/mcap_record_sink.hpp b/include/cvmmap_streamer/record/mcap_record_sink.hpp index e89e582..8ce2b4d 100644 --- a/include/cvmmap_streamer/record/mcap_record_sink.hpp +++ b/include/cvmmap_streamer/record/mcap_record_sink.hpp @@ -4,6 +4,7 @@ #include "cvmmap_streamer/encode/encoded_access_unit.hpp" #include "cvmmap_streamer/ipc/contracts.hpp" +#include #include #include #include @@ -26,6 +27,31 @@ struct RawDepthMapView { std::span pixels{}; }; +struct RawDepthMapU16View { + std::uint64_t timestamp_ns{0}; + std::uint32_t width{0}; + std::uint32_t height{0}; + std::span pixels{}; +}; + +struct RawCameraCalibrationView { + std::uint64_t timestamp_ns{0}; + std::uint32_t width{0}; + std::uint32_t height{0}; + std::string_view distortion_model{}; + std::span distortion{}; + std::span intrinsic_matrix{}; + std::span rectification_matrix{}; + std::span projection_matrix{}; +}; + +struct RawPoseView { + std::uint64_t timestamp_ns{0}; + std::string_view reference_frame_id{}; + std::array position{}; + std::array orientation{}; +}; + struct RawBodyTrackingMessageView { std::uint64_t timestamp_ns{0}; std::span bytes{}; @@ -56,6 +82,15 @@ public: [[nodiscard]] std::expected write_depth_map(const RawDepthMapView &depth_map); + [[nodiscard]] + std::expected write_depth_map_u16(const RawDepthMapU16View &depth_map); + + [[nodiscard]] + std::expected write_camera_calibration(const RawCameraCalibrationView &calibration); + + [[nodiscard]] + std::expected write_pose(const RawPoseView &pose); + [[nodiscard]] std::expected write_body_tracking_message(const RawBodyTrackingMessageView &body_message); diff --git a/proto/foxglove/CameraCalibration.proto b/proto/foxglove/CameraCalibration.proto new file mode 100644 index 0000000..9184bd1 --- /dev/null +++ b/proto/foxglove/CameraCalibration.proto @@ -0,0 +1,37 @@ +// Generated by https://github.com/foxglove/foxglove-sdk + +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +package foxglove; + +// Camera calibration parameters +message CameraCalibration { + // Timestamp of calibration data + google.protobuf.Timestamp timestamp = 1; + + // Frame of reference for the camera. The origin of the frame is the optical center of the camera. +x points to the right in the image, +y points down, and +z points into the plane of the image. + string frame_id = 9; + + // Image width + fixed32 width = 2; + + // Image height + fixed32 height = 3; + + // Name of distortion model + string distortion_model = 4; + + // Distortion parameters + repeated double D = 5; + + // Intrinsic camera matrix (3x3 row-major matrix) + repeated double K = 6; + + // Rectification matrix (3x3 row-major matrix) + repeated double R = 7; + + // Projection/camera matrix (3x4 row-major matrix) + repeated double P = 8; +} diff --git a/proto/foxglove/Pose.proto b/proto/foxglove/Pose.proto new file mode 100644 index 0000000..20343a2 --- /dev/null +++ b/proto/foxglove/Pose.proto @@ -0,0 +1,17 @@ +// Generated by https://github.com/foxglove/foxglove-sdk + +syntax = "proto3"; + +import "proto/foxglove/Quaternion.proto"; +import "proto/foxglove/Vector3.proto"; + +package foxglove; + +// A position and orientation for an object or reference frame in 3D space +message Pose { + // Point denoting position in 3D space + foxglove.Vector3 position = 1; + + // Quaternion denoting orientation in 3D space + foxglove.Quaternion orientation = 2; +} diff --git a/proto/foxglove/PoseInFrame.proto b/proto/foxglove/PoseInFrame.proto new file mode 100644 index 0000000..2171c6f --- /dev/null +++ b/proto/foxglove/PoseInFrame.proto @@ -0,0 +1,20 @@ +// Generated by https://github.com/foxglove/foxglove-sdk + +syntax = "proto3"; + +import "proto/foxglove/Pose.proto"; +import "google/protobuf/timestamp.proto"; + +package foxglove; + +// A timestamped pose for an object or reference frame in 3D space +message PoseInFrame { + // Timestamp of pose + google.protobuf.Timestamp timestamp = 1; + + // Frame of reference for pose position and orientation + string frame_id = 2; + + // Pose in 3D space + foxglove.Pose pose = 3; +} diff --git a/proto/foxglove/Quaternion.proto b/proto/foxglove/Quaternion.proto new file mode 100644 index 0000000..2b753d8 --- /dev/null +++ b/proto/foxglove/Quaternion.proto @@ -0,0 +1,20 @@ +// Generated by https://github.com/foxglove/foxglove-sdk + +syntax = "proto3"; + +package foxglove; + +// A quaternion representing a rotation in 3D space +message Quaternion { + // x value + double x = 1; + + // y value + double y = 2; + + // z value + double z = 3; + + // w value + double w = 4; +} diff --git a/proto/foxglove/Vector3.proto b/proto/foxglove/Vector3.proto new file mode 100644 index 0000000..c4fce6e --- /dev/null +++ b/proto/foxglove/Vector3.proto @@ -0,0 +1,17 @@ +// Generated by https://github.com/foxglove/foxglove-sdk + +syntax = "proto3"; + +package foxglove; + +// A vector in 3D space that represents a direction only +message Vector3 { + // x coordinate length + double x = 1; + + // y coordinate length + double y = 2; + + // z coordinate length + double z = 3; +} diff --git a/src/config/runtime_config.cpp b/src/config/runtime_config.cpp index 1608b60..7937567 100644 --- a/src/config/runtime_config.cpp +++ b/src/config/runtime_config.cpp @@ -365,6 +365,14 @@ std::expected apply_toml_file(RuntimeConfig &config, const st config.record.mcap.enabled = true; config.record.mcap.depth_topic = *value; } + if (auto value = toml_value(table, "record.mcap.calibration_topic")) { + config.record.mcap.enabled = true; + config.record.mcap.calibration_topic = *value; + } + if (auto value = toml_value(table, "record.mcap.pose_topic")) { + config.record.mcap.enabled = true; + config.record.mcap.pose_topic = *value; + } if (auto value = toml_value(table, "record.mcap.body_topic")) { config.record.mcap.enabled = true; config.record.mcap.body_topic = *value; @@ -568,6 +576,8 @@ std::expected parse_runtime_config(int argc, char ** std::string mcap_path_raw{}; std::string mcap_topic_raw{}; std::string mcap_depth_topic_raw{}; + std::string mcap_calibration_topic_raw{}; + std::string mcap_pose_topic_raw{}; std::string mcap_body_topic_raw{}; std::string mcap_frame_id_raw{}; std::string mcap_compression_raw{}; @@ -610,6 +620,8 @@ std::expected parse_runtime_config(int argc, char ** app.add_option("--mcap-path", mcap_path_raw); app.add_option("--mcap-topic", mcap_topic_raw); app.add_option("--mcap-depth-topic", mcap_depth_topic_raw); + app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw); + app.add_option("--mcap-pose-topic", mcap_pose_topic_raw); app.add_option("--mcap-body-topic", mcap_body_topic_raw); app.add_option("--mcap-frame-id", mcap_frame_id_raw); app.add_option("--mcap-compression", mcap_compression_raw); @@ -726,6 +738,14 @@ std::expected parse_runtime_config(int argc, char ** config.record.mcap.enabled = true; config.record.mcap.depth_topic = mcap_depth_topic_raw; } + if (!mcap_calibration_topic_raw.empty()) { + config.record.mcap.enabled = true; + config.record.mcap.calibration_topic = mcap_calibration_topic_raw; + } + if (!mcap_pose_topic_raw.empty()) { + config.record.mcap.enabled = true; + config.record.mcap.pose_topic = mcap_pose_topic_raw; + } if (!mcap_body_topic_raw.empty()) { config.record.mcap.enabled = true; config.record.mcap.body_topic = mcap_body_topic_raw; @@ -867,6 +887,12 @@ std::expected validate_runtime_config(const RuntimeConfig &co if (config.record.mcap.depth_topic.empty()) { return std::unexpected("invalid MCAP config: depth_topic must not be empty"); } + if (config.record.mcap.calibration_topic.empty()) { + return std::unexpected("invalid MCAP config: calibration_topic must not be empty"); + } + if (config.record.mcap.pose_topic.empty()) { + return std::unexpected("invalid MCAP config: pose_topic must not be empty"); + } if (config.record.mcap.body_topic.empty()) { return std::unexpected("invalid MCAP config: body_topic must not be empty"); } @@ -910,6 +936,8 @@ std::string summarize_runtime_config(const RuntimeConfig &config) { ss << ", mcap.path=" << config.record.mcap.path; ss << ", mcap.topic=" << config.record.mcap.topic; ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic; + ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic; + ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic; ss << ", mcap.body_topic=" << config.record.mcap.body_topic; ss << ", mcap.frame_id=" << config.record.mcap.frame_id; ss << ", mcap.compression=" << to_string(config.record.mcap.compression); diff --git a/src/encode/ffmpeg_encoder_backend.cpp b/src/encode/ffmpeg_encoder_backend.cpp index 9c2fd55..aa86dff 100644 --- a/src/encode/ffmpeg_encoder_backend.cpp +++ b/src/encode/ffmpeg_encoder_backend.cpp @@ -200,6 +200,9 @@ public: 1) < 0) { return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into FFmpeg image arrays"); } + if (frame.row_stride_bytes != 0) { + input_frame.linesize[0] = static_cast(frame.row_stride_bytes); + } sws_scale( scaler_, diff --git a/src/record/mcap_record_sink.cpp b/src/record/mcap_record_sink.cpp index 0dfea5f..c53241b 100644 --- a/src/record/mcap_record_sink.cpp +++ b/src/record/mcap_record_sink.cpp @@ -3,8 +3,10 @@ #include "cvmmap_streamer/record/mcap_record_sink.hpp" #include "protobuf_descriptor.hpp" -#include "cvmmap_streamer/DepthMap.pb.h" -#include "foxglove/CompressedVideo.pb.h" +#include "proto/cvmmap_streamer/DepthMap.pb.h" +#include "proto/foxglove/CameraCalibration.pb.h" +#include "proto/foxglove/CompressedVideo.pb.h" +#include "proto/foxglove/PoseInFrame.pb.h" #include #include @@ -293,14 +295,14 @@ std::expected encode_depth_payload(const RawDe }; } - std::vector depth_m(pixel_count, std::numeric_limits::quiet_NaN()); - float finite_max_m = 0.0f; - for (std::size_t index = 0; index < pixel_count; ++index) { - float sample = depth_map.pixels[index]; - if (source_unit == ipc::DepthUnit::Millimeter && std::isfinite(sample)) { - sample *= 0.001f; - } - if (!std::isfinite(sample) || sample <= 0.0f) { + std::vector depth_m(pixel_count, std::numeric_limits::quiet_NaN()); + float finite_max_m = 0.0f; + for (std::size_t index = 0; index < pixel_count; ++index) { + float sample = depth_map.pixels[index]; + if (source_unit == ipc::DepthUnit::Millimeter && std::isfinite(sample)) { + sample *= 0.001f; + } + if (!std::isfinite(sample) || sample <= 0.0f) { continue; } @@ -321,18 +323,108 @@ std::expected encode_depth_payload(const RawDe } } +[[nodiscard]] +std::expected encode_depth_payload(const RawDepthMapU16View &depth_map) { + const auto pixel_count = static_cast(depth_map.width) * static_cast(depth_map.height); + if (depth_map.width == 0 || depth_map.height == 0) { + return std::unexpected("depth map dimensions must be non-zero"); + } + if (pixel_count != depth_map.pixels.size()) { + return std::unexpected("depth map dimensions do not match the pixel buffer"); + } + + try { + std::vector pixels(depth_map.pixels.begin(), depth_map.pixels.end()); + return EncodedDepthPayload{ + .encoding = DepthEncoding::RvlU16Lossless, + .storage_unit = ipc::DepthUnit::Millimeter, + .bytes = rvl::compress_image(pixels, depth_map.height, depth_map.width), + }; + } catch (const std::exception &error) { + return std::unexpected(std::string("failed to RVL-encode depth map: ") + error.what()); + } +} + +[[nodiscard]] +std::expected append_repeated_double( + google::protobuf::RepeatedField *field, + std::span values, + std::size_t expected_size, + std::string_view field_name) { + if (values.size() != expected_size) { + return std::unexpected( + "expected " + std::to_string(expected_size) + " values for " + std::string(field_name)); + } + field->Reserve(static_cast(values.size())); + for (const double value : values) { + field->Add(value); + } + return {}; +} + +[[nodiscard]] +std::expected write_depth_message( + mcap::McapWriter &writer, + std::uint64_t timestamp_ns, + std::string_view frame_id, + std::uint32_t width, + std::uint32_t height, + ipc::DepthUnit source_unit, + mcap::ChannelId channel_id, + std::uint32_t &sequence, + const EncodedDepthPayload &encoded) { + cvmmap_streamer::DepthMap message{}; + *message.mutable_timestamp() = to_proto_timestamp(timestamp_ns); + message.set_frame_id(std::string(frame_id)); + message.set_width(width); + message.set_height(height); + message.set_source_unit(to_proto_depth_unit(source_unit)); + message.set_storage_unit(to_proto_storage_unit(encoded.storage_unit)); + message.set_encoding(to_proto_depth_encoding(encoded.encoding)); + message.set_data( + reinterpret_cast(encoded.bytes.data()), + static_cast(encoded.bytes.size())); + + std::string serialized{}; + if (!message.SerializeToString(&serialized)) { + return std::unexpected("failed to serialize cvmmap_streamer.DepthMap"); + } + + mcap::Message record{}; + record.channelId = channel_id; + record.sequence = sequence++; + record.logTime = timestamp_ns; + record.publishTime = timestamp_ns; + record.data = reinterpret_cast(serialized.data()); + record.dataSize = serialized.size(); + + const auto write_status = writer.write(record); + if (!write_status.ok()) { + return std::unexpected("failed to write MCAP depth message: " + write_status.message); + } + return {}; +} + } struct McapRecordSink::State { mcap::McapWriter writer{}; std::string path{}; std::string frame_id{}; + std::string calibration_topic{}; + std::string pose_topic{}; std::string body_topic{}; + mcap::SchemaId calibration_schema_id{0}; + mcap::SchemaId pose_schema_id{0}; mcap::ChannelId video_channel_id{0}; mcap::ChannelId depth_channel_id{0}; + mcap::ChannelId calibration_channel_id{0}; + mcap::ChannelId pose_channel_id{0}; mcap::ChannelId body_channel_id{0}; std::uint32_t video_sequence{0}; std::uint32_t depth_sequence{0}; + std::uint32_t calibration_sequence{0}; + std::uint32_t pose_sequence{0}; std::uint32_t body_sequence{0}; CodecType codec{CodecType::H264}; std::vector keyframe_preamble{}; @@ -363,6 +455,8 @@ std::expected McapRecordSink::create( auto state = std::make_unique(); state->path = config.record.mcap.path; state->frame_id = config.record.mcap.frame_id; + state->calibration_topic = config.record.mcap.calibration_topic; + state->pose_topic = config.record.mcap.pose_topic; state->body_topic = config.record.mcap.body_topic; mcap::McapWriterOptions options(""); @@ -398,6 +492,26 @@ std::expected McapRecordSink::create( state->writer.addChannel(depth_channel); state->depth_channel_id = depth_channel.id; + const auto calibration_descriptor_set = build_file_descriptor_set(foxglove::CameraCalibration::descriptor()); + std::string calibration_schema_bytes{}; + if (!calibration_descriptor_set.SerializeToString(&calibration_schema_bytes)) { + return std::unexpected("failed to serialize foxglove.CameraCalibration descriptor set"); + } + + mcap::Schema calibration_schema("foxglove.CameraCalibration", "protobuf", calibration_schema_bytes); + state->writer.addSchema(calibration_schema); + state->calibration_schema_id = calibration_schema.id; + + const auto pose_descriptor_set = build_file_descriptor_set(foxglove::PoseInFrame::descriptor()); + std::string pose_schema_bytes{}; + if (!pose_descriptor_set.SerializeToString(&pose_schema_bytes)) { + return std::unexpected("failed to serialize foxglove.PoseInFrame descriptor set"); + } + + mcap::Schema pose_schema("foxglove.PoseInFrame", "protobuf", pose_schema_bytes); + state->writer.addSchema(pose_schema); + state->pose_schema_id = pose_schema.id; + sink.state_ = state.release(); auto update = sink.update_stream_info(stream_info); if (!update) { @@ -470,34 +584,137 @@ std::expected McapRecordSink::write_depth_map(const RawDepthM return std::unexpected(encoded.error()); } - cvmmap_streamer::DepthMap message{}; - *message.mutable_timestamp() = to_proto_timestamp(depth_map.timestamp_ns); + return write_depth_message( + state_->writer, + depth_map.timestamp_ns, + state_->frame_id, + depth_map.width, + depth_map.height, + source_unit, + state_->depth_channel_id, + state_->depth_sequence, + *encoded); +} + +std::expected McapRecordSink::write_depth_map_u16(const RawDepthMapU16View &depth_map) { + if (state_ == nullptr) { + return std::unexpected("MCAP sink is not open"); + } + + auto encoded = encode_depth_payload(depth_map); + if (!encoded) { + return std::unexpected(encoded.error()); + } + + return write_depth_message( + state_->writer, + depth_map.timestamp_ns, + state_->frame_id, + depth_map.width, + depth_map.height, + ipc::DepthUnit::Millimeter, + state_->depth_channel_id, + state_->depth_sequence, + *encoded); +} + +std::expected McapRecordSink::write_camera_calibration(const RawCameraCalibrationView &calibration) { + if (state_ == nullptr) { + return std::unexpected("MCAP sink is not open"); + } + if (state_->calibration_topic.empty()) { + return std::unexpected("calibration topic is empty"); + } + + if (state_->calibration_channel_id == 0) { + mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id); + state_->writer.addChannel(calibration_channel); + state_->calibration_channel_id = calibration_channel.id; + } + + foxglove::CameraCalibration message{}; + *message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns); message.set_frame_id(state_->frame_id); - message.set_width(depth_map.width); - message.set_height(depth_map.height); - message.set_source_unit(to_proto_depth_unit(source_unit)); - message.set_storage_unit(to_proto_storage_unit(encoded->storage_unit)); - message.set_encoding(to_proto_depth_encoding(encoded->encoding)); - message.set_data( - reinterpret_cast(encoded->bytes.data()), - static_cast(encoded->bytes.size())); + message.set_width(calibration.width); + message.set_height(calibration.height); + message.set_distortion_model(std::string(calibration.distortion_model)); + for (const double value : calibration.distortion) { + message.add_d(value); + } + if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) { + return append; + } + if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) { + return append; + } + if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) { + return append; + } std::string serialized{}; if (!message.SerializeToString(&serialized)) { - return std::unexpected("failed to serialize cvmmap_streamer.DepthMap"); + return std::unexpected("failed to serialize foxglove.CameraCalibration"); } mcap::Message record{}; - record.channelId = state_->depth_channel_id; - record.sequence = state_->depth_sequence++; - record.logTime = depth_map.timestamp_ns; - record.publishTime = depth_map.timestamp_ns; + record.channelId = state_->calibration_channel_id; + record.sequence = state_->calibration_sequence++; + record.logTime = calibration.timestamp_ns; + record.publishTime = calibration.timestamp_ns; record.data = reinterpret_cast(serialized.data()); record.dataSize = serialized.size(); const auto write_status = state_->writer.write(record); if (!write_status.ok()) { - return std::unexpected("failed to write MCAP depth message: " + write_status.message); + return std::unexpected("failed to write MCAP calibration message: " + write_status.message); + } + return {}; +} + +std::expected McapRecordSink::write_pose(const RawPoseView &pose) { + if (state_ == nullptr) { + return std::unexpected("MCAP sink is not open"); + } + if (state_->pose_topic.empty()) { + return std::unexpected("pose topic is empty"); + } + + if (state_->pose_channel_id == 0) { + mcap::Channel pose_channel(state_->pose_topic, "protobuf", state_->pose_schema_id); + state_->writer.addChannel(pose_channel); + state_->pose_channel_id = pose_channel.id; + } + + foxglove::PoseInFrame message{}; + *message.mutable_timestamp() = to_proto_timestamp(pose.timestamp_ns); + message.set_frame_id(std::string(pose.reference_frame_id)); + auto *pose_message = message.mutable_pose(); + auto *position = pose_message->mutable_position(); + position->set_x(pose.position[0]); + position->set_y(pose.position[1]); + position->set_z(pose.position[2]); + auto *orientation = pose_message->mutable_orientation(); + orientation->set_x(pose.orientation[0]); + orientation->set_y(pose.orientation[1]); + orientation->set_z(pose.orientation[2]); + orientation->set_w(pose.orientation[3]); + + std::string serialized{}; + if (!message.SerializeToString(&serialized)) { + return std::unexpected("failed to serialize foxglove.PoseInFrame"); + } + + mcap::Message record{}; + record.channelId = state_->pose_channel_id; + record.sequence = state_->pose_sequence++; + record.logTime = pose.timestamp_ns; + record.publishTime = pose.timestamp_ns; + record.data = reinterpret_cast(serialized.data()); + record.dataSize = serialized.size(); + + const auto write_status = state_->writer.write(record); + if (!write_status.ok()) { + return std::unexpected("failed to write MCAP pose message: " + write_status.message); } return {}; } diff --git a/src/testers/mcap_body_record_tester.cpp b/src/testers/mcap_body_record_tester.cpp index 16f363c..b843f49 100644 --- a/src/testers/mcap_body_record_tester.cpp +++ b/src/testers/mcap_body_record_tester.cpp @@ -5,7 +5,7 @@ #include "cvmmap_streamer/common.h" #include "cvmmap_streamer/record/mcap_record_sink.hpp" -#include "foxglove/CompressedVideo.pb.h" +#include "proto/foxglove/CompressedVideo.pb.h" #include diff --git a/src/testers/mcap_depth_record_tester.cpp b/src/testers/mcap_depth_record_tester.cpp index 6059ba5..0a41f67 100644 --- a/src/testers/mcap_depth_record_tester.cpp +++ b/src/testers/mcap_depth_record_tester.cpp @@ -1,9 +1,9 @@ #include -#include "cvmmap_streamer/DepthMap.pb.h" +#include "proto/cvmmap_streamer/DepthMap.pb.h" #include "cvmmap_streamer/common.h" #include "cvmmap_streamer/record/mcap_record_sink.hpp" -#include "foxglove/CompressedVideo.pb.h" +#include "proto/foxglove/CompressedVideo.pb.h" #include #include diff --git a/src/testers/mcap_pose_record_tester.cpp b/src/testers/mcap_pose_record_tester.cpp new file mode 100644 index 0000000..5998184 --- /dev/null +++ b/src/testers/mcap_pose_record_tester.cpp @@ -0,0 +1,279 @@ +#include + +#include "proto/cvmmap_streamer/DepthMap.pb.h" +#include "cvmmap_streamer/common.h" +#include "cvmmap_streamer/record/mcap_record_sink.hpp" +#include "proto/foxglove/CameraCalibration.pb.h" +#include "proto/foxglove/CompressedVideo.pb.h" +#include "proto/foxglove/PoseInFrame.pb.h" + +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +enum class TesterExitCode : int { + Success = 0, + CreateError = 2, + WriteError = 3, + OpenError = 4, + VerificationError = 5, +}; + +[[nodiscard]] +constexpr int exit_code(const TesterExitCode code) { + return static_cast(code); +} + +[[nodiscard]] +bool approx_equal(const double lhs, const double rhs, const double tolerance = 1e-9) { + return std::fabs(lhs - rhs) <= tolerance; +} + +} + +int main(int argc, char **argv) { + if (cvmmap_streamer::has_help_flag(argc, argv)) { + cvmmap_streamer::print_help("mcap_pose_record_tester"); + return exit_code(TesterExitCode::Success); + } + + const std::filesystem::path output_path = + argc > 1 + ? std::filesystem::path(argv[1]) + : std::filesystem::temp_directory_path() / "cvmmap_streamer_pose_record_test.mcap"; + if (output_path.has_parent_path()) { + std::filesystem::create_directories(output_path.parent_path()); + } + + cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults(); + config.record.mcap.enabled = true; + config.record.mcap.path = output_path.string(); + config.record.mcap.topic = "/camera/video"; + config.record.mcap.depth_topic = "/camera/depth"; + config.record.mcap.calibration_topic = "/camera/calibration"; + config.record.mcap.pose_topic = "/camera/pose"; + config.record.mcap.frame_id = "camera"; + config.record.mcap.compression = cvmmap_streamer::McapCompression::None; + + cvmmap_streamer::encode::EncodedStreamInfo stream_info{}; + stream_info.codec = cvmmap_streamer::CodecType::H264; + + auto sink = cvmmap_streamer::record::McapRecordSink::create(config, stream_info); + if (!sink) { + spdlog::error("failed to create MCAP sink: {}", sink.error()); + return exit_code(TesterExitCode::CreateError); + } + + cvmmap_streamer::encode::EncodedAccessUnit access_unit{}; + access_unit.codec = cvmmap_streamer::CodecType::H264; + access_unit.source_timestamp_ns = 100; + access_unit.stream_pts_ns = 100; + access_unit.keyframe = false; + access_unit.annexb_bytes = {0x00, 0x00, 0x00, 0x01, 0x09, 0x10}; + if (auto write = sink->write_access_unit(access_unit); !write) { + spdlog::error("failed to write video access unit: {}", write.error()); + return exit_code(TesterExitCode::WriteError); + } + + const std::vector depth_pixels{ + 1000, 2000, + 0, 1500, + }; + if (auto write = sink->write_depth_map_u16(cvmmap_streamer::record::RawDepthMapU16View{ + .timestamp_ns = 101, + .width = 2, + .height = 2, + .pixels = depth_pixels, + }); !write) { + spdlog::error("failed to write depth map: {}", write.error()); + return exit_code(TesterExitCode::WriteError); + } + + const std::vector distortion{0.0, 0.0, 0.0, 0.0, 0.0}; + const std::vector intrinsic_matrix{ + 500.0, 0.0, 320.0, + 0.0, 500.0, 240.0, + 0.0, 0.0, 1.0, + }; + const std::vector rectification_matrix{ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + }; + const std::vector projection_matrix{ + 500.0, 0.0, 320.0, 0.0, + 0.0, 500.0, 240.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + }; + if (auto write = sink->write_camera_calibration(cvmmap_streamer::record::RawCameraCalibrationView{ + .timestamp_ns = 102, + .width = 640, + .height = 480, + .distortion_model = "plumb_bob", + .distortion = distortion, + .intrinsic_matrix = intrinsic_matrix, + .rectification_matrix = rectification_matrix, + .projection_matrix = projection_matrix, + }); !write) { + spdlog::error("failed to write calibration: {}", write.error()); + return exit_code(TesterExitCode::WriteError); + } + + if (auto write = sink->write_pose(cvmmap_streamer::record::RawPoseView{ + .timestamp_ns = 103, + .reference_frame_id = "world", + .position = {1.0, 2.0, 3.0}, + .orientation = {0.1, 0.2, 0.3, 0.9}, + }); !write) { + spdlog::error("failed to write pose: {}", write.error()); + return exit_code(TesterExitCode::WriteError); + } + + sink->close(); + + mcap::McapReader reader{}; + const auto open_status = reader.open(output_path.string()); + if (!open_status.ok()) { + spdlog::error("failed to open MCAP file '{}': {}", output_path.string(), open_status.message); + return exit_code(TesterExitCode::OpenError); + } + + std::uint64_t video_messages{0}; + std::uint64_t depth_messages{0}; + std::uint64_t calibration_messages{0}; + std::uint64_t pose_messages{0}; + + auto messages = reader.readMessages(); + for (auto it = messages.begin(); it != messages.end(); ++it) { + if (it->schema == nullptr || it->channel == nullptr) { + spdlog::error("MCAP message missing schema or channel"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + if (it->schema->encoding != "protobuf" || it->channel->messageEncoding != "protobuf") { + spdlog::error("unexpected message encoding in MCAP"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + + if (it->schema->name == "foxglove.CompressedVideo") { + foxglove::CompressedVideo video{}; + if (!video.ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { + spdlog::error("failed to parse foxglove.CompressedVideo"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + if (it->channel->topic != "/camera/video" || video.frame_id() != "camera" || video.data().empty()) { + spdlog::error("video message verification failed"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + video_messages += 1; + continue; + } + + if (it->schema->name == "cvmmap_streamer.DepthMap") { + cvmmap_streamer::DepthMap depth{}; + if (!depth.ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { + spdlog::error("failed to parse cvmmap_streamer.DepthMap"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + if (it->channel->topic != "/camera/depth" || + depth.frame_id() != "camera" || + depth.encoding() != cvmmap_streamer::DepthMap::RVL_U16_LOSSLESS) { + spdlog::error("depth message metadata verification failed"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + const auto decoded = rvl::decompress_image(std::span( + reinterpret_cast(depth.data().data()), + depth.data().size())); + if (decoded.pixels.size() != depth_pixels.size() || + decoded.pixels[0] != 1000 || + decoded.pixels[1] != 2000 || + decoded.pixels[2] != 0 || + decoded.pixels[3] != 1500) { + spdlog::error("depth RVL round-trip verification failed"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + depth_messages += 1; + continue; + } + + if (it->schema->name == "foxglove.CameraCalibration") { + foxglove::CameraCalibration calibration{}; + if (!calibration.ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { + spdlog::error("failed to parse foxglove.CameraCalibration"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + if (it->channel->topic != "/camera/calibration" || + calibration.frame_id() != "camera" || + calibration.width() != 640 || + calibration.height() != 480 || + calibration.distortion_model() != "plumb_bob" || + calibration.d_size() != 5 || + calibration.k_size() != 9 || + calibration.r_size() != 9 || + calibration.p_size() != 12 || + !approx_equal(calibration.k(0), 500.0) || + !approx_equal(calibration.k(2), 320.0) || + !approx_equal(calibration.p(5), 500.0)) { + spdlog::error("calibration message verification failed"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + calibration_messages += 1; + continue; + } + + if (it->schema->name == "foxglove.PoseInFrame") { + foxglove::PoseInFrame pose{}; + if (!pose.ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { + spdlog::error("failed to parse foxglove.PoseInFrame"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + if (it->channel->topic != "/camera/pose" || + pose.frame_id() != "world" || + !approx_equal(pose.pose().position().x(), 1.0) || + !approx_equal(pose.pose().position().y(), 2.0) || + !approx_equal(pose.pose().position().z(), 3.0) || + !approx_equal(pose.pose().orientation().x(), 0.1) || + !approx_equal(pose.pose().orientation().y(), 0.2) || + !approx_equal(pose.pose().orientation().z(), 0.3) || + !approx_equal(pose.pose().orientation().w(), 0.9)) { + spdlog::error("pose message verification failed"); + reader.close(); + return exit_code(TesterExitCode::VerificationError); + } + pose_messages += 1; + } + } + + reader.close(); + + if (video_messages != 1 || depth_messages != 1 || calibration_messages != 1 || pose_messages != 1) { + spdlog::error( + "unexpected message counts: video={} depth={} calibration={} pose={}", + video_messages, + depth_messages, + calibration_messages, + pose_messages); + return exit_code(TesterExitCode::VerificationError); + } + + spdlog::info( + "validated same-file MCAP video+depth+calibration+pose recording at '{}'", + output_path.string()); + return exit_code(TesterExitCode::Success); +} diff --git a/src/tools/zed_svo_to_mcap.cpp b/src/tools/zed_svo_to_mcap.cpp new file mode 100644 index 0000000..3ea2d4f --- /dev/null +++ b/src/tools/zed_svo_to_mcap.cpp @@ -0,0 +1,645 @@ +#include +#include + +#include + +#include "cvmmap_streamer/config/runtime_config.hpp" +#include "cvmmap_streamer/core/status.hpp" +#include "cvmmap_streamer/encode/encoder_backend.hpp" +#include "cvmmap_streamer/ipc/contracts.hpp" +#include "cvmmap_streamer/record/mcap_record_sink.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +enum class ToolExitCode : int { + Success = 0, + UsageError = 2, + RuntimeError = 3, +}; + +struct CliOptions { + std::string input_path{}; + std::string output_path{}; + std::string codec{"h264"}; + std::string encoder_device{"auto"}; + std::string mcap_compression{"zstd"}; + std::string depth_mode{"neural"}; + bool with_pose{false}; + std::uint32_t start_frame{0}; + std::uint32_t end_frame{0}; + bool has_end_frame{false}; + std::string frame_id{"camera"}; + std::string video_topic{"/camera/video"}; + std::string depth_topic{"/camera/depth"}; + std::string calibration_topic{"/camera/calibration"}; + std::string pose_topic{"/camera/pose"}; +}; + +[[nodiscard]] +constexpr int exit_code(const ToolExitCode code) { + return static_cast(code); +} + +[[nodiscard]] +std::string zed_string(const sl::String &value) { + return std::string(value.c_str() == nullptr ? "" : value.c_str()); +} + +[[nodiscard]] +std::string zed_status_string(const sl::ERROR_CODE code) { + return zed_string(sl::toString(code)); +} + +[[nodiscard]] +std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state) { + return zed_string(sl::toString(state)); +} + +[[nodiscard]] +std::expected parse_codec(const std::string_view raw) { + if (raw == "h264") { + return cvmmap_streamer::CodecType::H264; + } + if (raw == "h265") { + return cvmmap_streamer::CodecType::H265; + } + return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)"); +} + +[[nodiscard]] +std::expected parse_encoder_device(const std::string_view raw) { + if (raw == "auto") { + return cvmmap_streamer::EncoderDeviceType::Auto; + } + if (raw == "nvidia") { + return cvmmap_streamer::EncoderDeviceType::Nvidia; + } + if (raw == "software") { + return cvmmap_streamer::EncoderDeviceType::Software; + } + return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)"); +} + +[[nodiscard]] +std::expected parse_mcap_compression(const std::string_view raw) { + if (raw == "none") { + return cvmmap_streamer::McapCompression::None; + } + if (raw == "lz4") { + return cvmmap_streamer::McapCompression::Lz4; + } + if (raw == "zstd") { + return cvmmap_streamer::McapCompression::Zstd; + } + return std::unexpected("invalid mcap compression: '" + std::string(raw) + "' (expected: none|lz4|zstd)"); +} + +[[nodiscard]] +std::expected parse_depth_mode(const std::string_view raw) { + if (raw == "neural") { + return sl::DEPTH_MODE::NEURAL; + } + if (raw == "quality") { + return sl::DEPTH_MODE::QUALITY; + } + if (raw == "performance") { + return sl::DEPTH_MODE::PERFORMANCE; + } + if (raw == "ultra") { + return sl::DEPTH_MODE::ULTRA; + } + return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)"); +} + +[[nodiscard]] +std::uint64_t frame_period_ns(const float fps) { + if (!(fps > 0.0f)) { + return 33'333'333ull; + } + return static_cast(std::llround(1'000'000'000.0 / static_cast(fps))); +} + +[[nodiscard]] +std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { + if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { + return std::unexpected(std::string(label) + " must be U8_C3"); + } + if (mat.getWidth() == 0 || mat.getHeight() == 0) { + return std::unexpected(std::string(label) + " dimensions must be non-zero"); + } + if (mat.getPtr(sl::MEM::CPU) == nullptr) { + return std::unexpected(std::string(label) + " CPU buffer is null"); + } + return {}; +} + +[[nodiscard]] +std::expected validate_u16c1_mat(const sl::Mat &mat, const std::string_view label) { + if (mat.getDataType() != sl::MAT_TYPE::U16_C1) { + return std::unexpected(std::string(label) + " must be U16_C1"); + } + if (mat.getWidth() == 0 || mat.getHeight() == 0) { + return std::unexpected(std::string(label) + " dimensions must be non-zero"); + } + if (mat.getPtr(sl::MEM::CPU) == nullptr) { + return std::unexpected(std::string(label) + " CPU buffer is null"); + } + return {}; +} + +[[nodiscard]] +std::vector copy_compact_u16_plane(const sl::Mat &mat) { + const auto width = static_cast(mat.getWidth()); + const auto height = static_cast(mat.getHeight()); + const auto row_bytes = width * sizeof(std::uint16_t); + const auto step_bytes = mat.getStepBytes(sl::MEM::CPU); + auto *src = mat.getPtr(sl::MEM::CPU); + + std::vector compact(width * height, 0); + auto *dst = reinterpret_cast(compact.data()); + for (std::size_t row = 0; row < height; ++row) { + std::memcpy( + dst + row * row_bytes, + src + row * step_bytes, + row_bytes); + } + return compact; +} + +[[nodiscard]] +std::expected write_access_units( + cvmmap_streamer::record::McapRecordSink &sink, + const std::vector &access_units) { + for (const auto &access_unit : access_units) { + if (auto write = sink.write_access_unit(access_unit); !write) { + return std::unexpected(write.error()); + } + } + return {}; +} + +[[nodiscard]] +std::expected flush_and_write( + cvmmap_streamer::record::McapRecordSink &sink, + cvmmap_streamer::encode::EncoderBackend &backend) { + auto flushed = backend->flush(); + if (!flushed) { + return std::unexpected(cvmmap_streamer::format_error(flushed.error())); + } + return write_access_units(sink, *flushed); +} + +} + +int main(int argc, char **argv) { + CliOptions options{}; + + CLI::App app{"zed_svo_to_mcap - convert ZED SVO/SVO2 playback to MCAP"}; + app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required(); + app.add_option("--output", options.output_path, "Output MCAP file")->required(); + app.add_option("--codec", options.codec, "Video codec (h264|h265)") + ->check(CLI::IsMember({"h264", "h265"})); + app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") + ->check(CLI::IsMember({"auto", "nvidia", "software"})); + app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available"); + app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)") + ->check(CLI::IsMember({"none", "lz4", "zstd"})); + app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)") + ->check(CLI::IsMember({"neural", "quality", "performance", "ultra"})); + app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)") + ->check(CLI::NonNegativeNumber); + auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)") + ->check(CLI::NonNegativeNumber); + app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics"); + app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo"); + app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap"); + app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration"); + app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame"); + + try { + app.parse(argc, argv); + } catch (const CLI::ParseError &error) { + return app.exit(error); + } + options.has_end_frame = end_frame_option->count() > 0; + + auto codec = parse_codec(options.codec); + if (!codec) { + spdlog::error("{}", codec.error()); + return exit_code(ToolExitCode::UsageError); + } + auto encoder_device = parse_encoder_device(options.encoder_device); + if (!encoder_device) { + spdlog::error("{}", encoder_device.error()); + return exit_code(ToolExitCode::UsageError); + } + auto compression = parse_mcap_compression(options.mcap_compression); + if (!compression) { + spdlog::error("{}", compression.error()); + return exit_code(ToolExitCode::UsageError); + } + auto depth_mode = parse_depth_mode(options.depth_mode); + if (!depth_mode) { + spdlog::error("{}", depth_mode.error()); + return exit_code(ToolExitCode::UsageError); + } + + if (options.output_path.empty()) { + spdlog::error("output path must not be empty"); + return exit_code(ToolExitCode::UsageError); + } + if (options.has_end_frame && options.end_frame < options.start_frame) { + spdlog::error( + "invalid frame range: start-frame={} end-frame={}", + options.start_frame, + options.end_frame); + return exit_code(ToolExitCode::UsageError); + } + + const std::filesystem::path output_path{options.output_path}; + if (output_path.has_parent_path()) { + std::filesystem::create_directories(output_path.parent_path()); + } + + sl::Camera camera{}; + bool tracking_enabled{false}; + + auto close_camera = [&]() { + if (tracking_enabled) { + camera.disablePositionalTracking(); + tracking_enabled = false; + } + if (camera.isOpened()) { + camera.close(); + } + }; + + sl::InitParameters init{}; + init.input.setFromSVOFile(options.input_path.c_str()); + init.svo_real_time_mode = false; + init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; + init.coordinate_units = sl::UNIT::METER; + init.depth_mode = *depth_mode; + init.sdk_verbose = false; + + const auto open_status = camera.open(init); + if (open_status != sl::ERROR_CODE::SUCCESS) { + spdlog::error( + "failed to open SVO '{}': {}", + options.input_path, + zed_status_string(open_status)); + return exit_code(ToolExitCode::RuntimeError); + } + + const auto total_frames = camera.getSVONumberOfFrames(); + if (total_frames <= 0) { + close_camera(); + spdlog::error("input SVO has no frames"); + return exit_code(ToolExitCode::RuntimeError); + } + if (options.start_frame >= static_cast(total_frames)) { + close_camera(); + spdlog::error( + "start-frame {} is out of range for {} frames", + options.start_frame, + total_frames); + return exit_code(ToolExitCode::UsageError); + } + if (options.has_end_frame && options.end_frame >= static_cast(total_frames)) { + close_camera(); + spdlog::error( + "end-frame {} is out of range for {} frames", + options.end_frame, + total_frames); + return exit_code(ToolExitCode::UsageError); + } + + camera.setSVOPosition(static_cast(options.start_frame)); + + if (options.with_pose) { + sl::PositionalTrackingParameters tracking_parameters{}; + const auto tracking_status = camera.enablePositionalTracking(tracking_parameters); + if (tracking_status == sl::ERROR_CODE::SUCCESS) { + tracking_enabled = true; + } else { + spdlog::warn( + "positional tracking unavailable for '{}': {}. continuing without pose output", + options.input_path, + zed_status_string(tracking_status)); + } + } + + const auto camera_info = camera.getCameraInformation(); + const auto &camera_config = camera_info.camera_configuration; + const auto &left_camera = camera_config.calibration_parameters.left_cam; + const auto width = static_cast(camera_config.resolution.width); + const auto height = static_cast(camera_config.resolution.height); + if (width == 0 || height == 0) { + close_camera(); + spdlog::error("camera resolution reported by the ZED SDK is invalid"); + return exit_code(ToolExitCode::RuntimeError); + } + + cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults(); + config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg; + config.encoder.device = *encoder_device; + config.encoder.codec = *codec; + config.encoder.gop = 30; + config.encoder.b_frames = 0; + config.record.mcap.enabled = true; + config.record.mcap.path = output_path.string(); + config.record.mcap.topic = options.video_topic; + config.record.mcap.depth_topic = options.depth_topic; + config.record.mcap.calibration_topic = options.calibration_topic; + config.record.mcap.pose_topic = options.pose_topic; + config.record.mcap.frame_id = options.frame_id; + config.record.mcap.compression = *compression; + + cvmmap_streamer::ipc::FrameInfo frame_info{ + .width = static_cast(width), + .height = static_cast(height), + .channels = 3, + .depth = cvmmap_streamer::ipc::Depth::U8, + .pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR, + .buffer_size = width * height * 3u, + }; + + auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config); + if (!backend_result) { + close_camera(); + spdlog::error("failed to create encoder backend: {}", cvmmap_streamer::format_error(backend_result.error())); + return exit_code(ToolExitCode::RuntimeError); + } + auto backend = std::move(*backend_result); + + if (auto init_status = backend->init(config, frame_info); !init_status) { + close_camera(); + spdlog::error("failed to initialize encoder backend: {}", cvmmap_streamer::format_error(init_status.error())); + return exit_code(ToolExitCode::RuntimeError); + } + + auto stream_info = backend->stream_info(); + if (!stream_info) { + backend->shutdown(); + close_camera(); + spdlog::error("encoder backend did not provide stream info: {}", cvmmap_streamer::format_error(stream_info.error())); + return exit_code(ToolExitCode::RuntimeError); + } + + auto sink = cvmmap_streamer::record::McapRecordSink::create(config, *stream_info); + if (!sink) { + backend->shutdown(); + close_camera(); + spdlog::error("failed to create MCAP sink: {}", sink.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + const std::array distortion{ + 0.0, 0.0, 0.0, 0.0, 0.0, + }; + const std::array intrinsic_matrix{ + static_cast(left_camera.fx), 0.0, static_cast(left_camera.cx), + 0.0, static_cast(left_camera.fy), static_cast(left_camera.cy), + 0.0, 0.0, 1.0, + }; + const std::array rectification_matrix{ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + }; + const std::array projection_matrix{ + static_cast(left_camera.fx), 0.0, static_cast(left_camera.cx), 0.0, + 0.0, static_cast(left_camera.fy), static_cast(left_camera.cy), 0.0, + 0.0, 0.0, 1.0, 0.0, + }; + + sl::RuntimeParameters runtime_parameters{}; + sl::Mat left_frame{}; + sl::Mat depth_frame{}; + sl::Pose pose{}; + bool calibration_written{false}; + std::uint64_t emitted_frames{0}; + std::optional last_timestamp_ns{}; + std::optional last_tracking_state{}; + const auto last_frame = options.has_end_frame + ? options.end_frame + : static_cast(total_frames - 1); + const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps); + + while (options.start_frame + emitted_frames <= last_frame) { + const auto grab_status = camera.grab(runtime_parameters); + if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { + break; + } + if (grab_status != sl::ERROR_CODE::SUCCESS) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status)); + return exit_code(ToolExitCode::RuntimeError); + } + + const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU); + if (image_status != sl::ERROR_CODE::SUCCESS) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status)); + return exit_code(ToolExitCode::RuntimeError); + } + if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("{}", valid.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU); + if (depth_status != sl::ERROR_CODE::SUCCESS) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to retrieve depth map: {}", zed_status_string(depth_status)); + return exit_code(ToolExitCode::RuntimeError); + } + if (auto valid = validate_u16c1_mat(depth_frame, "depth map"); !valid) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("{}", valid.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); + if (timestamp_ns == 0) { + timestamp_ns = emitted_frames * nominal_frame_period_ns; + } + if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) { + timestamp_ns = *last_timestamp_ns + 1; + } + last_timestamp_ns = timestamp_ns; + + const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU); + const auto video_bytes = std::span( + left_frame.getPtr(sl::MEM::CPU), + video_step_bytes * left_frame.getHeight()); + cvmmap_streamer::encode::RawVideoFrame raw_video{ + .info = frame_info, + .source_timestamp_ns = timestamp_ns, + .row_stride_bytes = video_step_bytes, + .bytes = video_bytes, + }; + + if (auto push = backend->push_frame(raw_video); !push) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to encode frame: {}", cvmmap_streamer::format_error(push.error())); + return exit_code(ToolExitCode::RuntimeError); + } + + auto drained = backend->drain(); + if (!drained) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to drain encoded access units: {}", cvmmap_streamer::format_error(drained.error())); + return exit_code(ToolExitCode::RuntimeError); + } + if (auto write = write_access_units(*sink, *drained); !write) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to write video access unit: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + if (!calibration_written) { + cvmmap_streamer::record::RawCameraCalibrationView calibration{ + .timestamp_ns = timestamp_ns, + .width = width, + .height = height, + .distortion_model = "plumb_bob", + .distortion = distortion, + .intrinsic_matrix = intrinsic_matrix, + .rectification_matrix = rectification_matrix, + .projection_matrix = projection_matrix, + }; + if (auto write = sink->write_camera_calibration(calibration); !write) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to write calibration: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + calibration_written = true; + } + + const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU); + const auto packed_depth_bytes = static_cast(width) * sizeof(std::uint16_t); + if (depth_step_bytes < packed_depth_bytes) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("depth stride {} is smaller than packed row size {}", depth_step_bytes, packed_depth_bytes); + return exit_code(ToolExitCode::RuntimeError); + } + + std::optional> compact_depth{}; + std::span depth_pixels{}; + if (depth_step_bytes == packed_depth_bytes) { + depth_pixels = std::span( + depth_frame.getPtr(sl::MEM::CPU), + static_cast(width) * static_cast(height)); + } else { + compact_depth = copy_compact_u16_plane(depth_frame); + depth_pixels = *compact_depth; + } + + cvmmap_streamer::record::RawDepthMapU16View depth_map{ + .timestamp_ns = timestamp_ns, + .width = width, + .height = height, + .pixels = depth_pixels, + }; + if (auto write = sink->write_depth_map_u16(depth_map); !write) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to write depth map: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + if (tracking_enabled) { + const auto tracking_state = camera.getPosition(pose, sl::REFERENCE_FRAME::WORLD); + if (!last_tracking_state || *last_tracking_state != tracking_state) { + last_tracking_state = tracking_state; + if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) { + spdlog::warn( + "pose tracking state changed to {} at frame {}", + zed_tracking_state_string(tracking_state), + options.start_frame + emitted_frames); + } + } + if (tracking_state == sl::POSITIONAL_TRACKING_STATE::OK) { + const auto translation = pose.getTranslation(); + const auto orientation = pose.getOrientation(); + cvmmap_streamer::record::RawPoseView pose_view{ + .timestamp_ns = timestamp_ns, + .reference_frame_id = "world", + .position = { + static_cast(translation.x), + static_cast(translation.y), + static_cast(translation.z), + }, + .orientation = { + static_cast(orientation.x), + static_cast(orientation.y), + static_cast(orientation.z), + static_cast(orientation.w), + }, + }; + if (auto write = sink->write_pose(pose_view); !write) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to write pose: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + } + } + + emitted_frames += 1; + } + + if (auto flushed = flush_and_write(*sink, backend); !flushed) { + sink->close(); + backend->shutdown(); + close_camera(); + spdlog::error("failed to finalize encoded video: {}", flushed.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + sink->close(); + backend->shutdown(); + close_camera(); + + spdlog::info( + "wrote {} frame(s) from '{}' to '{}'", + emitted_frames, + options.input_path, + output_path.string()); + return exit_code(ToolExitCode::Success); +}