Add ZED SVO to MCAP conversion tool

This commit is contained in:
2026-03-17 02:03:59 +00:00
parent ee53d1958e
commit 0fef0595fb
16 changed files with 1430 additions and 49 deletions
+79 -19
View File
@@ -10,12 +10,6 @@ find_package(Threads REQUIRED)
find_package(cppzmq QUIET) find_package(cppzmq QUIET)
set(CVMMAP_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../cv-mmap" CACHE PATH "Path to a local cv-mmap checkout") set(CVMMAP_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../cv-mmap" CACHE PATH "Path to a local cv-mmap checkout")
set(CVMMAP_LOCAL_BUILD "${CVMMAP_LOCAL_ROOT}/build/core" CACHE PATH "Path to local cv-mmap build artifacts") set(CVMMAP_LOCAL_BUILD "${CVMMAP_LOCAL_ROOT}/build/core" CACHE PATH "Path to local cv-mmap build artifacts")
if (
NOT cvmmap-core_DIR
AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreConfig.cmake"
AND EXISTS "${CVMMAP_LOCAL_BUILD}/cvmmap-coreTargets.cmake")
set(cvmmap-core_DIR "${CVMMAP_LOCAL_BUILD}")
endif()
if (cvmmap-core_DIR) if (cvmmap-core_DIR)
find_package(cvmmap-core CONFIG QUIET) find_package(cvmmap-core CONFIG QUIET)
endif() endif()
@@ -24,6 +18,9 @@ find_package(spdlog REQUIRED)
find_package(Protobuf REQUIRED) find_package(Protobuf REQUIRED)
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
find_package(rvl CONFIG QUIET) find_package(rvl CONFIG QUIET)
set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK")
find_package(ZED REQUIRED)
find_package(CUDA ${ZED_CUDA_VERSION} REQUIRED)
add_subdirectory(third_party) add_subdirectory(third_party)
@@ -48,7 +45,7 @@ if (NOT TARGET cvmmap::client)
endif() endif()
if (NOT TARGET rvl::rvl) if (NOT TARGET rvl::rvl)
set(RVL_LOCAL_ROOT "/home/crosstyan/Code/rvl_impl") set(RVL_LOCAL_ROOT "${CMAKE_CURRENT_LIST_DIR}/../rvl_impl" CACHE PATH "Path to a local rvl_impl checkout")
set(RVL_LOCAL_BUILD "${RVL_LOCAL_ROOT}/build/core") set(RVL_LOCAL_BUILD "${RVL_LOCAL_ROOT}/build/core")
if ( if (
EXISTS "${RVL_LOCAL_ROOT}/core/include/rvl/rvl.hpp" EXISTS "${RVL_LOCAL_ROOT}/core/include/rvl/rvl.hpp"
@@ -66,7 +63,13 @@ add_library(cvmmap_streamer_foxglove_proto STATIC)
protobuf_generate( protobuf_generate(
TARGET cvmmap_streamer_foxglove_proto TARGET cvmmap_streamer_foxglove_proto
LANGUAGE cpp LANGUAGE cpp
PROTOS "${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto" PROTOS
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CompressedVideo.proto"
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/CameraCalibration.proto"
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/PoseInFrame.proto"
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Pose.proto"
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Quaternion.proto"
"${CMAKE_CURRENT_LIST_DIR}/proto/foxglove/Vector3.proto"
IMPORT_DIRS "${CMAKE_CURRENT_LIST_DIR}/proto") IMPORT_DIRS "${CMAKE_CURRENT_LIST_DIR}/proto")
add_library(cvmmap_streamer_depth_proto STATIC) add_library(cvmmap_streamer_depth_proto STATIC)
protobuf_generate( protobuf_generate(
@@ -78,13 +81,16 @@ add_library(cvmmap_streamer_protobuf INTERFACE)
target_include_directories(cvmmap_streamer_foxglove_proto target_include_directories(cvmmap_streamer_foxglove_proto
PUBLIC PUBLIC
"${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
"${CMAKE_CURRENT_BINARY_DIR}/proto"
${Protobuf_INCLUDE_DIRS}) ${Protobuf_INCLUDE_DIRS})
target_include_directories(cvmmap_streamer_depth_proto target_include_directories(cvmmap_streamer_depth_proto
PUBLIC PUBLIC
"${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
"${CMAKE_CURRENT_BINARY_DIR}/proto"
${Protobuf_INCLUDE_DIRS}) ${Protobuf_INCLUDE_DIRS})
target_include_directories(cvmmap_streamer_protobuf target_include_directories(cvmmap_streamer_protobuf
INTERFACE INTERFACE
"${CMAKE_CURRENT_BINARY_DIR}/proto"
${Protobuf_INCLUDE_DIRS}) ${Protobuf_INCLUDE_DIRS})
if (TARGET protobuf::libprotobuf) if (TARGET protobuf::libprotobuf)
target_link_libraries(cvmmap_streamer_protobuf INTERFACE protobuf::libprotobuf) target_link_libraries(cvmmap_streamer_protobuf INTERFACE protobuf::libprotobuf)
@@ -109,6 +115,36 @@ target_link_libraries(cvmmap_streamer_mcap_runtime
PkgConfig::ZSTD PkgConfig::ZSTD
PkgConfig::LZ4) PkgConfig::LZ4)
add_library(cvmmap_streamer_record_support STATIC
src/encode/encoder_backend.cpp
src/encode/ffmpeg_encoder_backend.cpp
src/record/protobuf_descriptor.cpp
src/record/mcap_record_sink.cpp)
target_include_directories(cvmmap_streamer_record_support
PUBLIC
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(cvmmap_streamer_record_support
PUBLIC
cvmmap_streamer_foxglove_proto
cvmmap_streamer_depth_proto
cvmmap_streamer_mcap_runtime
PkgConfig::FFMPEG
PkgConfig::ZSTD
PkgConfig::LZ4
rvl::rvl
mcap::mcap
msft_proxy4::proxy
cvmmap_streamer_protobuf)
if (TARGET spdlog::spdlog)
target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(cvmmap_streamer_record_support PUBLIC spdlog)
endif()
if (TARGET PkgConfig::PROTOBUF_PKG)
target_link_libraries(cvmmap_streamer_record_support PUBLIC PkgConfig::PROTOBUF_PKG)
endif()
add_library(cvmmap_streamer_common STATIC add_library(cvmmap_streamer_common STATIC
src/ipc/help.cpp src/ipc/help.cpp
src/config/runtime_config.cpp src/config/runtime_config.cpp
@@ -119,11 +155,7 @@ add_library(cvmmap_streamer_common STATIC
src/metrics/latency_tracker.cpp src/metrics/latency_tracker.cpp
src/pipeline/pipeline_runtime.cpp src/pipeline/pipeline_runtime.cpp
src/protocol/rtmp_output.cpp src/protocol/rtmp_output.cpp
src/protocol/rtp_publisher.cpp src/protocol/rtp_publisher.cpp)
src/encode/encoder_backend.cpp
src/encode/ffmpeg_encoder_backend.cpp
src/record/protobuf_descriptor.cpp
src/record/mcap_record_sink.cpp)
target_include_directories(cvmmap_streamer_common target_include_directories(cvmmap_streamer_common
PUBLIC PUBLIC
@@ -132,18 +164,14 @@ target_include_directories(cvmmap_streamer_common
set(CVMMAP_STREAMER_LINK_DEPS set(CVMMAP_STREAMER_LINK_DEPS
Threads::Threads Threads::Threads
cvmmap_streamer_foxglove_proto cvmmap_streamer_record_support
cvmmap_streamer_depth_proto
cvmmap_streamer_mcap_runtime
PkgConfig::FFMPEG PkgConfig::FFMPEG
PkgConfig::ZSTD PkgConfig::ZSTD
PkgConfig::LZ4 PkgConfig::LZ4
rvl::rvl
cvmmap::client cvmmap::client
CLI11::CLI11 CLI11::CLI11
tomlplusplus::tomlplusplus tomlplusplus::tomlplusplus
mcap::mcap mcap::mcap)
msft_proxy4::proxy)
if (TARGET cppzmq::cppzmq) if (TARGET cppzmq::cppzmq)
list(APPEND CVMMAP_STREAMER_LINK_DEPS cppzmq::cppzmq) list(APPEND CVMMAP_STREAMER_LINK_DEPS cppzmq::cppzmq)
@@ -193,6 +221,7 @@ add_cvmmap_binary(ipc_snapshot_tester src/testers/ipc_snapshot_tester.cpp)
add_cvmmap_binary(mcap_depth_record_tester src/testers/mcap_depth_record_tester.cpp) add_cvmmap_binary(mcap_depth_record_tester src/testers/mcap_depth_record_tester.cpp)
add_cvmmap_binary(mcap_body_record_tester src/testers/mcap_body_record_tester.cpp) add_cvmmap_binary(mcap_body_record_tester src/testers/mcap_body_record_tester.cpp)
add_cvmmap_binary(mcap_body_inspector src/testers/mcap_body_inspector.cpp) add_cvmmap_binary(mcap_body_inspector src/testers/mcap_body_inspector.cpp)
add_cvmmap_binary(mcap_pose_record_tester src/testers/mcap_pose_record_tester.cpp)
add_executable(mcap_reader_tester src/testers/mcap_reader_tester.cpp) add_executable(mcap_reader_tester src/testers/mcap_reader_tester.cpp)
target_include_directories(mcap_reader_tester target_include_directories(mcap_reader_tester
@@ -247,3 +276,34 @@ endif()
set_target_properties(mcap_replay_tester PROPERTIES set_target_properties(mcap_replay_tester PROPERTIES
OUTPUT_NAME "mcap_replay_tester" OUTPUT_NAME "mcap_replay_tester"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
add_executable(
zed_svo_to_mcap
src/tools/zed_svo_to_mcap.cpp
src/config/runtime_config.cpp)
target_include_directories(zed_svo_to_mcap
PRIVATE
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}"
${ZED_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS})
target_link_directories(zed_svo_to_mcap
PRIVATE
${ZED_LIBRARY_DIR}
${CUDA_LIBRARY_DIRS})
target_link_libraries(zed_svo_to_mcap
PRIVATE
cvmmap_streamer_record_support
CLI11::CLI11
tomlplusplus::tomlplusplus
${ZED_LIBRARIES}
${CUDA_CUDA_LIBRARY}
${CUDA_CUDART_LIBRARY})
if (TARGET spdlog::spdlog)
target_link_libraries(zed_svo_to_mcap PRIVATE spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(zed_svo_to_mcap PRIVATE spdlog)
endif()
set_target_properties(zed_svo_to_mcap PROPERTIES
OUTPUT_NAME "zed_svo_to_mcap"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
@@ -85,6 +85,8 @@ struct McapRecordConfig {
std::string path{"capture.mcap"}; std::string path{"capture.mcap"};
std::string topic{"/camera/video"}; std::string topic{"/camera/video"};
std::string depth_topic{"/camera/depth"}; std::string depth_topic{"/camera/depth"};
std::string calibration_topic{"/camera/calibration"};
std::string pose_topic{"/camera/pose"};
std::string body_topic{"/camera/body"}; std::string body_topic{"/camera/body"};
std::string frame_id{"camera"}; std::string frame_id{"camera"};
McapCompression compression{McapCompression::Zstd}; McapCompression compression{McapCompression::Zstd};
@@ -17,6 +17,7 @@ namespace cvmmap_streamer::encode {
struct RawVideoFrame { struct RawVideoFrame {
ipc::FrameInfo info{}; ipc::FrameInfo info{};
std::uint64_t source_timestamp_ns{0}; std::uint64_t source_timestamp_ns{0};
std::size_t row_stride_bytes{0};
std::span<const std::uint8_t> bytes{}; std::span<const std::uint8_t> bytes{};
}; };
@@ -4,6 +4,7 @@
#include "cvmmap_streamer/encode/encoded_access_unit.hpp" #include "cvmmap_streamer/encode/encoded_access_unit.hpp"
#include "cvmmap_streamer/ipc/contracts.hpp" #include "cvmmap_streamer/ipc/contracts.hpp"
#include <array>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <expected> #include <expected>
@@ -26,6 +27,31 @@ struct RawDepthMapView {
std::span<const float> pixels{}; std::span<const float> pixels{};
}; };
struct RawDepthMapU16View {
std::uint64_t timestamp_ns{0};
std::uint32_t width{0};
std::uint32_t height{0};
std::span<const std::uint16_t> pixels{};
};
struct RawCameraCalibrationView {
std::uint64_t timestamp_ns{0};
std::uint32_t width{0};
std::uint32_t height{0};
std::string_view distortion_model{};
std::span<const double> distortion{};
std::span<const double> intrinsic_matrix{};
std::span<const double> rectification_matrix{};
std::span<const double> projection_matrix{};
};
struct RawPoseView {
std::uint64_t timestamp_ns{0};
std::string_view reference_frame_id{};
std::array<double, 3> position{};
std::array<double, 4> orientation{};
};
struct RawBodyTrackingMessageView { struct RawBodyTrackingMessageView {
std::uint64_t timestamp_ns{0}; std::uint64_t timestamp_ns{0};
std::span<const std::uint8_t> bytes{}; std::span<const std::uint8_t> bytes{};
@@ -56,6 +82,15 @@ public:
[[nodiscard]] [[nodiscard]]
std::expected<void, std::string> write_depth_map(const RawDepthMapView &depth_map); std::expected<void, std::string> write_depth_map(const RawDepthMapView &depth_map);
[[nodiscard]]
std::expected<void, std::string> write_depth_map_u16(const RawDepthMapU16View &depth_map);
[[nodiscard]]
std::expected<void, std::string> write_camera_calibration(const RawCameraCalibrationView &calibration);
[[nodiscard]]
std::expected<void, std::string> write_pose(const RawPoseView &pose);
[[nodiscard]] [[nodiscard]]
std::expected<void, std::string> write_body_tracking_message(const RawBodyTrackingMessageView &body_message); std::expected<void, std::string> write_body_tracking_message(const RawBodyTrackingMessageView &body_message);
+37
View File
@@ -0,0 +1,37 @@
// Generated by https://github.com/foxglove/foxglove-sdk
syntax = "proto3";
import "google/protobuf/timestamp.proto";
package foxglove;
// Camera calibration parameters
message CameraCalibration {
// Timestamp of calibration data
google.protobuf.Timestamp timestamp = 1;
// Frame of reference for the camera. The origin of the frame is the optical center of the camera. +x points to the right in the image, +y points down, and +z points into the plane of the image.
string frame_id = 9;
// Image width
fixed32 width = 2;
// Image height
fixed32 height = 3;
// Name of distortion model
string distortion_model = 4;
// Distortion parameters
repeated double D = 5;
// Intrinsic camera matrix (3x3 row-major matrix)
repeated double K = 6;
// Rectification matrix (3x3 row-major matrix)
repeated double R = 7;
// Projection/camera matrix (3x4 row-major matrix)
repeated double P = 8;
}
+17
View File
@@ -0,0 +1,17 @@
// Generated by https://github.com/foxglove/foxglove-sdk
syntax = "proto3";
import "proto/foxglove/Quaternion.proto";
import "proto/foxglove/Vector3.proto";
package foxglove;
// A position and orientation for an object or reference frame in 3D space
message Pose {
// Point denoting position in 3D space
foxglove.Vector3 position = 1;
// Quaternion denoting orientation in 3D space
foxglove.Quaternion orientation = 2;
}
+20
View File
@@ -0,0 +1,20 @@
// Generated by https://github.com/foxglove/foxglove-sdk
syntax = "proto3";
import "proto/foxglove/Pose.proto";
import "google/protobuf/timestamp.proto";
package foxglove;
// A timestamped pose for an object or reference frame in 3D space
message PoseInFrame {
// Timestamp of pose
google.protobuf.Timestamp timestamp = 1;
// Frame of reference for pose position and orientation
string frame_id = 2;
// Pose in 3D space
foxglove.Pose pose = 3;
}
+20
View File
@@ -0,0 +1,20 @@
// Generated by https://github.com/foxglove/foxglove-sdk
syntax = "proto3";
package foxglove;
// A quaternion representing a rotation in 3D space
message Quaternion {
// x value
double x = 1;
// y value
double y = 2;
// z value
double z = 3;
// w value
double w = 4;
}
+17
View File
@@ -0,0 +1,17 @@
// Generated by https://github.com/foxglove/foxglove-sdk
syntax = "proto3";
package foxglove;
// A vector in 3D space that represents a direction only
message Vector3 {
// x coordinate length
double x = 1;
// y coordinate length
double y = 2;
// z coordinate length
double z = 3;
}
+28
View File
@@ -365,6 +365,14 @@ std::expected<void, std::string> apply_toml_file(RuntimeConfig &config, const st
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.depth_topic = *value; config.record.mcap.depth_topic = *value;
} }
if (auto value = toml_value<std::string>(table, "record.mcap.calibration_topic")) {
config.record.mcap.enabled = true;
config.record.mcap.calibration_topic = *value;
}
if (auto value = toml_value<std::string>(table, "record.mcap.pose_topic")) {
config.record.mcap.enabled = true;
config.record.mcap.pose_topic = *value;
}
if (auto value = toml_value<std::string>(table, "record.mcap.body_topic")) { if (auto value = toml_value<std::string>(table, "record.mcap.body_topic")) {
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.body_topic = *value; config.record.mcap.body_topic = *value;
@@ -568,6 +576,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
std::string mcap_path_raw{}; std::string mcap_path_raw{};
std::string mcap_topic_raw{}; std::string mcap_topic_raw{};
std::string mcap_depth_topic_raw{}; std::string mcap_depth_topic_raw{};
std::string mcap_calibration_topic_raw{};
std::string mcap_pose_topic_raw{};
std::string mcap_body_topic_raw{}; std::string mcap_body_topic_raw{};
std::string mcap_frame_id_raw{}; std::string mcap_frame_id_raw{};
std::string mcap_compression_raw{}; std::string mcap_compression_raw{};
@@ -610,6 +620,8 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
app.add_option("--mcap-path", mcap_path_raw); app.add_option("--mcap-path", mcap_path_raw);
app.add_option("--mcap-topic", mcap_topic_raw); app.add_option("--mcap-topic", mcap_topic_raw);
app.add_option("--mcap-depth-topic", mcap_depth_topic_raw); app.add_option("--mcap-depth-topic", mcap_depth_topic_raw);
app.add_option("--mcap-calibration-topic", mcap_calibration_topic_raw);
app.add_option("--mcap-pose-topic", mcap_pose_topic_raw);
app.add_option("--mcap-body-topic", mcap_body_topic_raw); app.add_option("--mcap-body-topic", mcap_body_topic_raw);
app.add_option("--mcap-frame-id", mcap_frame_id_raw); app.add_option("--mcap-frame-id", mcap_frame_id_raw);
app.add_option("--mcap-compression", mcap_compression_raw); app.add_option("--mcap-compression", mcap_compression_raw);
@@ -726,6 +738,14 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.depth_topic = mcap_depth_topic_raw; config.record.mcap.depth_topic = mcap_depth_topic_raw;
} }
if (!mcap_calibration_topic_raw.empty()) {
config.record.mcap.enabled = true;
config.record.mcap.calibration_topic = mcap_calibration_topic_raw;
}
if (!mcap_pose_topic_raw.empty()) {
config.record.mcap.enabled = true;
config.record.mcap.pose_topic = mcap_pose_topic_raw;
}
if (!mcap_body_topic_raw.empty()) { if (!mcap_body_topic_raw.empty()) {
config.record.mcap.enabled = true; config.record.mcap.enabled = true;
config.record.mcap.body_topic = mcap_body_topic_raw; config.record.mcap.body_topic = mcap_body_topic_raw;
@@ -867,6 +887,12 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
if (config.record.mcap.depth_topic.empty()) { if (config.record.mcap.depth_topic.empty()) {
return std::unexpected("invalid MCAP config: depth_topic must not be empty"); return std::unexpected("invalid MCAP config: depth_topic must not be empty");
} }
if (config.record.mcap.calibration_topic.empty()) {
return std::unexpected("invalid MCAP config: calibration_topic must not be empty");
}
if (config.record.mcap.pose_topic.empty()) {
return std::unexpected("invalid MCAP config: pose_topic must not be empty");
}
if (config.record.mcap.body_topic.empty()) { if (config.record.mcap.body_topic.empty()) {
return std::unexpected("invalid MCAP config: body_topic must not be empty"); return std::unexpected("invalid MCAP config: body_topic must not be empty");
} }
@@ -910,6 +936,8 @@ std::string summarize_runtime_config(const RuntimeConfig &config) {
ss << ", mcap.path=" << config.record.mcap.path; ss << ", mcap.path=" << config.record.mcap.path;
ss << ", mcap.topic=" << config.record.mcap.topic; ss << ", mcap.topic=" << config.record.mcap.topic;
ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic; ss << ", mcap.depth_topic=" << config.record.mcap.depth_topic;
ss << ", mcap.calibration_topic=" << config.record.mcap.calibration_topic;
ss << ", mcap.pose_topic=" << config.record.mcap.pose_topic;
ss << ", mcap.body_topic=" << config.record.mcap.body_topic; ss << ", mcap.body_topic=" << config.record.mcap.body_topic;
ss << ", mcap.frame_id=" << config.record.mcap.frame_id; ss << ", mcap.frame_id=" << config.record.mcap.frame_id;
ss << ", mcap.compression=" << to_string(config.record.mcap.compression); ss << ", mcap.compression=" << to_string(config.record.mcap.compression);
+3
View File
@@ -200,6 +200,9 @@ public:
1) < 0) { 1) < 0) {
return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into FFmpeg image arrays"); return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into FFmpeg image arrays");
} }
if (frame.row_stride_bytes != 0) {
input_frame.linesize[0] = static_cast<int>(frame.row_stride_bytes);
}
sws_scale( sws_scale(
scaler_, scaler_,
+235 -18
View File
@@ -3,8 +3,10 @@
#include "cvmmap_streamer/record/mcap_record_sink.hpp" #include "cvmmap_streamer/record/mcap_record_sink.hpp"
#include "protobuf_descriptor.hpp" #include "protobuf_descriptor.hpp"
#include "cvmmap_streamer/DepthMap.pb.h" #include "proto/cvmmap_streamer/DepthMap.pb.h"
#include "foxglove/CompressedVideo.pb.h" #include "proto/foxglove/CameraCalibration.pb.h"
#include "proto/foxglove/CompressedVideo.pb.h"
#include "proto/foxglove/PoseInFrame.pb.h"
#include <rvl/rvl.hpp> #include <rvl/rvl.hpp>
#include <google/protobuf/timestamp.pb.h> #include <google/protobuf/timestamp.pb.h>
@@ -321,18 +323,108 @@ std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDe
} }
} }
[[nodiscard]]
std::expected<EncodedDepthPayload, std::string> encode_depth_payload(const RawDepthMapU16View &depth_map) {
const auto pixel_count = static_cast<std::size_t>(depth_map.width) * static_cast<std::size_t>(depth_map.height);
if (depth_map.width == 0 || depth_map.height == 0) {
return std::unexpected("depth map dimensions must be non-zero");
}
if (pixel_count != depth_map.pixels.size()) {
return std::unexpected("depth map dimensions do not match the pixel buffer");
}
try {
std::vector<std::uint16_t> pixels(depth_map.pixels.begin(), depth_map.pixels.end());
return EncodedDepthPayload{
.encoding = DepthEncoding::RvlU16Lossless,
.storage_unit = ipc::DepthUnit::Millimeter,
.bytes = rvl::compress_image(pixels, depth_map.height, depth_map.width),
};
} catch (const std::exception &error) {
return std::unexpected(std::string("failed to RVL-encode depth map: ") + error.what());
}
}
[[nodiscard]]
std::expected<void, std::string> append_repeated_double(
google::protobuf::RepeatedField<double> *field,
std::span<const double> values,
std::size_t expected_size,
std::string_view field_name) {
if (values.size() != expected_size) {
return std::unexpected(
"expected " + std::to_string(expected_size) + " values for " + std::string(field_name));
}
field->Reserve(static_cast<int>(values.size()));
for (const double value : values) {
field->Add(value);
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> write_depth_message(
mcap::McapWriter &writer,
std::uint64_t timestamp_ns,
std::string_view frame_id,
std::uint32_t width,
std::uint32_t height,
ipc::DepthUnit source_unit,
mcap::ChannelId channel_id,
std::uint32_t &sequence,
const EncodedDepthPayload &encoded) {
cvmmap_streamer::DepthMap message{};
*message.mutable_timestamp() = to_proto_timestamp(timestamp_ns);
message.set_frame_id(std::string(frame_id));
message.set_width(width);
message.set_height(height);
message.set_source_unit(to_proto_depth_unit(source_unit));
message.set_storage_unit(to_proto_storage_unit(encoded.storage_unit));
message.set_encoding(to_proto_depth_encoding(encoded.encoding));
message.set_data(
reinterpret_cast<const char *>(encoded.bytes.data()),
static_cast<int>(encoded.bytes.size()));
std::string serialized{};
if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize cvmmap_streamer.DepthMap");
}
mcap::Message record{};
record.channelId = channel_id;
record.sequence = sequence++;
record.logTime = timestamp_ns;
record.publishTime = timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size();
const auto write_status = writer.write(record);
if (!write_status.ok()) {
return std::unexpected("failed to write MCAP depth message: " + write_status.message);
}
return {};
}
} }
struct McapRecordSink::State { struct McapRecordSink::State {
mcap::McapWriter writer{}; mcap::McapWriter writer{};
std::string path{}; std::string path{};
std::string frame_id{}; std::string frame_id{};
std::string calibration_topic{};
std::string pose_topic{};
std::string body_topic{}; std::string body_topic{};
mcap::SchemaId calibration_schema_id{0};
mcap::SchemaId pose_schema_id{0};
mcap::ChannelId video_channel_id{0}; mcap::ChannelId video_channel_id{0};
mcap::ChannelId depth_channel_id{0}; mcap::ChannelId depth_channel_id{0};
mcap::ChannelId calibration_channel_id{0};
mcap::ChannelId pose_channel_id{0};
mcap::ChannelId body_channel_id{0}; mcap::ChannelId body_channel_id{0};
std::uint32_t video_sequence{0}; std::uint32_t video_sequence{0};
std::uint32_t depth_sequence{0}; std::uint32_t depth_sequence{0};
std::uint32_t calibration_sequence{0};
std::uint32_t pose_sequence{0};
std::uint32_t body_sequence{0}; std::uint32_t body_sequence{0};
CodecType codec{CodecType::H264}; CodecType codec{CodecType::H264};
std::vector<std::uint8_t> keyframe_preamble{}; std::vector<std::uint8_t> keyframe_preamble{};
@@ -363,6 +455,8 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
auto state = std::make_unique<State>(); auto state = std::make_unique<State>();
state->path = config.record.mcap.path; state->path = config.record.mcap.path;
state->frame_id = config.record.mcap.frame_id; state->frame_id = config.record.mcap.frame_id;
state->calibration_topic = config.record.mcap.calibration_topic;
state->pose_topic = config.record.mcap.pose_topic;
state->body_topic = config.record.mcap.body_topic; state->body_topic = config.record.mcap.body_topic;
mcap::McapWriterOptions options(""); mcap::McapWriterOptions options("");
@@ -398,6 +492,26 @@ std::expected<McapRecordSink, std::string> McapRecordSink::create(
state->writer.addChannel(depth_channel); state->writer.addChannel(depth_channel);
state->depth_channel_id = depth_channel.id; state->depth_channel_id = depth_channel.id;
const auto calibration_descriptor_set = build_file_descriptor_set(foxglove::CameraCalibration::descriptor());
std::string calibration_schema_bytes{};
if (!calibration_descriptor_set.SerializeToString(&calibration_schema_bytes)) {
return std::unexpected("failed to serialize foxglove.CameraCalibration descriptor set");
}
mcap::Schema calibration_schema("foxglove.CameraCalibration", "protobuf", calibration_schema_bytes);
state->writer.addSchema(calibration_schema);
state->calibration_schema_id = calibration_schema.id;
const auto pose_descriptor_set = build_file_descriptor_set(foxglove::PoseInFrame::descriptor());
std::string pose_schema_bytes{};
if (!pose_descriptor_set.SerializeToString(&pose_schema_bytes)) {
return std::unexpected("failed to serialize foxglove.PoseInFrame descriptor set");
}
mcap::Schema pose_schema("foxglove.PoseInFrame", "protobuf", pose_schema_bytes);
state->writer.addSchema(pose_schema);
state->pose_schema_id = pose_schema.id;
sink.state_ = state.release(); sink.state_ = state.release();
auto update = sink.update_stream_info(stream_info); auto update = sink.update_stream_info(stream_info);
if (!update) { if (!update) {
@@ -470,34 +584,137 @@ std::expected<void, std::string> McapRecordSink::write_depth_map(const RawDepthM
return std::unexpected(encoded.error()); return std::unexpected(encoded.error());
} }
cvmmap_streamer::DepthMap message{}; return write_depth_message(
*message.mutable_timestamp() = to_proto_timestamp(depth_map.timestamp_ns); state_->writer,
depth_map.timestamp_ns,
state_->frame_id,
depth_map.width,
depth_map.height,
source_unit,
state_->depth_channel_id,
state_->depth_sequence,
*encoded);
}
std::expected<void, std::string> McapRecordSink::write_depth_map_u16(const RawDepthMapU16View &depth_map) {
if (state_ == nullptr) {
return std::unexpected("MCAP sink is not open");
}
auto encoded = encode_depth_payload(depth_map);
if (!encoded) {
return std::unexpected(encoded.error());
}
return write_depth_message(
state_->writer,
depth_map.timestamp_ns,
state_->frame_id,
depth_map.width,
depth_map.height,
ipc::DepthUnit::Millimeter,
state_->depth_channel_id,
state_->depth_sequence,
*encoded);
}
std::expected<void, std::string> McapRecordSink::write_camera_calibration(const RawCameraCalibrationView &calibration) {
if (state_ == nullptr) {
return std::unexpected("MCAP sink is not open");
}
if (state_->calibration_topic.empty()) {
return std::unexpected("calibration topic is empty");
}
if (state_->calibration_channel_id == 0) {
mcap::Channel calibration_channel(state_->calibration_topic, "protobuf", state_->calibration_schema_id);
state_->writer.addChannel(calibration_channel);
state_->calibration_channel_id = calibration_channel.id;
}
foxglove::CameraCalibration message{};
*message.mutable_timestamp() = to_proto_timestamp(calibration.timestamp_ns);
message.set_frame_id(state_->frame_id); message.set_frame_id(state_->frame_id);
message.set_width(depth_map.width); message.set_width(calibration.width);
message.set_height(depth_map.height); message.set_height(calibration.height);
message.set_source_unit(to_proto_depth_unit(source_unit)); message.set_distortion_model(std::string(calibration.distortion_model));
message.set_storage_unit(to_proto_storage_unit(encoded->storage_unit)); for (const double value : calibration.distortion) {
message.set_encoding(to_proto_depth_encoding(encoded->encoding)); message.add_d(value);
message.set_data( }
reinterpret_cast<const char *>(encoded->bytes.data()), if (auto append = append_repeated_double(message.mutable_k(), calibration.intrinsic_matrix, 9, "K"); !append) {
static_cast<int>(encoded->bytes.size())); return append;
}
if (auto append = append_repeated_double(message.mutable_r(), calibration.rectification_matrix, 9, "R"); !append) {
return append;
}
if (auto append = append_repeated_double(message.mutable_p(), calibration.projection_matrix, 12, "P"); !append) {
return append;
}
std::string serialized{}; std::string serialized{};
if (!message.SerializeToString(&serialized)) { if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize cvmmap_streamer.DepthMap"); return std::unexpected("failed to serialize foxglove.CameraCalibration");
} }
mcap::Message record{}; mcap::Message record{};
record.channelId = state_->depth_channel_id; record.channelId = state_->calibration_channel_id;
record.sequence = state_->depth_sequence++; record.sequence = state_->calibration_sequence++;
record.logTime = depth_map.timestamp_ns; record.logTime = calibration.timestamp_ns;
record.publishTime = depth_map.timestamp_ns; record.publishTime = calibration.timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data()); record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size(); record.dataSize = serialized.size();
const auto write_status = state_->writer.write(record); const auto write_status = state_->writer.write(record);
if (!write_status.ok()) { if (!write_status.ok()) {
return std::unexpected("failed to write MCAP depth message: " + write_status.message); return std::unexpected("failed to write MCAP calibration message: " + write_status.message);
}
return {};
}
std::expected<void, std::string> McapRecordSink::write_pose(const RawPoseView &pose) {
if (state_ == nullptr) {
return std::unexpected("MCAP sink is not open");
}
if (state_->pose_topic.empty()) {
return std::unexpected("pose topic is empty");
}
if (state_->pose_channel_id == 0) {
mcap::Channel pose_channel(state_->pose_topic, "protobuf", state_->pose_schema_id);
state_->writer.addChannel(pose_channel);
state_->pose_channel_id = pose_channel.id;
}
foxglove::PoseInFrame message{};
*message.mutable_timestamp() = to_proto_timestamp(pose.timestamp_ns);
message.set_frame_id(std::string(pose.reference_frame_id));
auto *pose_message = message.mutable_pose();
auto *position = pose_message->mutable_position();
position->set_x(pose.position[0]);
position->set_y(pose.position[1]);
position->set_z(pose.position[2]);
auto *orientation = pose_message->mutable_orientation();
orientation->set_x(pose.orientation[0]);
orientation->set_y(pose.orientation[1]);
orientation->set_z(pose.orientation[2]);
orientation->set_w(pose.orientation[3]);
std::string serialized{};
if (!message.SerializeToString(&serialized)) {
return std::unexpected("failed to serialize foxglove.PoseInFrame");
}
mcap::Message record{};
record.channelId = state_->pose_channel_id;
record.sequence = state_->pose_sequence++;
record.logTime = pose.timestamp_ns;
record.publishTime = pose.timestamp_ns;
record.data = reinterpret_cast<const std::byte *>(serialized.data());
record.dataSize = serialized.size();
const auto write_status = state_->writer.write(record);
if (!write_status.ok()) {
return std::unexpected("failed to write MCAP pose message: " + write_status.message);
} }
return {}; return {};
} }
+1 -1
View File
@@ -5,7 +5,7 @@
#include "cvmmap_streamer/common.h" #include "cvmmap_streamer/common.h"
#include "cvmmap_streamer/record/mcap_record_sink.hpp" #include "cvmmap_streamer/record/mcap_record_sink.hpp"
#include "foxglove/CompressedVideo.pb.h" #include "proto/foxglove/CompressedVideo.pb.h"
#include <spdlog/spdlog.h> #include <spdlog/spdlog.h>
+2 -2
View File
@@ -1,9 +1,9 @@
#include <mcap/reader.hpp> #include <mcap/reader.hpp>
#include "cvmmap_streamer/DepthMap.pb.h" #include "proto/cvmmap_streamer/DepthMap.pb.h"
#include "cvmmap_streamer/common.h" #include "cvmmap_streamer/common.h"
#include "cvmmap_streamer/record/mcap_record_sink.hpp" #include "cvmmap_streamer/record/mcap_record_sink.hpp"
#include "foxglove/CompressedVideo.pb.h" #include "proto/foxglove/CompressedVideo.pb.h"
#include <rvl/rvl.hpp> #include <rvl/rvl.hpp>
#include <spdlog/spdlog.h> #include <spdlog/spdlog.h>
+279
View File
@@ -0,0 +1,279 @@
#include <mcap/reader.hpp>
#include "proto/cvmmap_streamer/DepthMap.pb.h"
#include "cvmmap_streamer/common.h"
#include "cvmmap_streamer/record/mcap_record_sink.hpp"
#include "proto/foxglove/CameraCalibration.pb.h"
#include "proto/foxglove/CompressedVideo.pb.h"
#include "proto/foxglove/PoseInFrame.pb.h"
#include <rvl/rvl.hpp>
#include <spdlog/spdlog.h>
#include <cmath>
#include <cstdint>
#include <filesystem>
#include <string>
#include <vector>
namespace {
enum class TesterExitCode : int {
Success = 0,
CreateError = 2,
WriteError = 3,
OpenError = 4,
VerificationError = 5,
};
[[nodiscard]]
constexpr int exit_code(const TesterExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
bool approx_equal(const double lhs, const double rhs, const double tolerance = 1e-9) {
return std::fabs(lhs - rhs) <= tolerance;
}
}
int main(int argc, char **argv) {
if (cvmmap_streamer::has_help_flag(argc, argv)) {
cvmmap_streamer::print_help("mcap_pose_record_tester");
return exit_code(TesterExitCode::Success);
}
const std::filesystem::path output_path =
argc > 1
? std::filesystem::path(argv[1])
: std::filesystem::temp_directory_path() / "cvmmap_streamer_pose_record_test.mcap";
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
config.record.mcap.enabled = true;
config.record.mcap.path = output_path.string();
config.record.mcap.topic = "/camera/video";
config.record.mcap.depth_topic = "/camera/depth";
config.record.mcap.calibration_topic = "/camera/calibration";
config.record.mcap.pose_topic = "/camera/pose";
config.record.mcap.frame_id = "camera";
config.record.mcap.compression = cvmmap_streamer::McapCompression::None;
cvmmap_streamer::encode::EncodedStreamInfo stream_info{};
stream_info.codec = cvmmap_streamer::CodecType::H264;
auto sink = cvmmap_streamer::record::McapRecordSink::create(config, stream_info);
if (!sink) {
spdlog::error("failed to create MCAP sink: {}", sink.error());
return exit_code(TesterExitCode::CreateError);
}
cvmmap_streamer::encode::EncodedAccessUnit access_unit{};
access_unit.codec = cvmmap_streamer::CodecType::H264;
access_unit.source_timestamp_ns = 100;
access_unit.stream_pts_ns = 100;
access_unit.keyframe = false;
access_unit.annexb_bytes = {0x00, 0x00, 0x00, 0x01, 0x09, 0x10};
if (auto write = sink->write_access_unit(access_unit); !write) {
spdlog::error("failed to write video access unit: {}", write.error());
return exit_code(TesterExitCode::WriteError);
}
const std::vector<std::uint16_t> depth_pixels{
1000, 2000,
0, 1500,
};
if (auto write = sink->write_depth_map_u16(cvmmap_streamer::record::RawDepthMapU16View{
.timestamp_ns = 101,
.width = 2,
.height = 2,
.pixels = depth_pixels,
}); !write) {
spdlog::error("failed to write depth map: {}", write.error());
return exit_code(TesterExitCode::WriteError);
}
const std::vector<double> distortion{0.0, 0.0, 0.0, 0.0, 0.0};
const std::vector<double> intrinsic_matrix{
500.0, 0.0, 320.0,
0.0, 500.0, 240.0,
0.0, 0.0, 1.0,
};
const std::vector<double> rectification_matrix{
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
};
const std::vector<double> projection_matrix{
500.0, 0.0, 320.0, 0.0,
0.0, 500.0, 240.0, 0.0,
0.0, 0.0, 1.0, 0.0,
};
if (auto write = sink->write_camera_calibration(cvmmap_streamer::record::RawCameraCalibrationView{
.timestamp_ns = 102,
.width = 640,
.height = 480,
.distortion_model = "plumb_bob",
.distortion = distortion,
.intrinsic_matrix = intrinsic_matrix,
.rectification_matrix = rectification_matrix,
.projection_matrix = projection_matrix,
}); !write) {
spdlog::error("failed to write calibration: {}", write.error());
return exit_code(TesterExitCode::WriteError);
}
if (auto write = sink->write_pose(cvmmap_streamer::record::RawPoseView{
.timestamp_ns = 103,
.reference_frame_id = "world",
.position = {1.0, 2.0, 3.0},
.orientation = {0.1, 0.2, 0.3, 0.9},
}); !write) {
spdlog::error("failed to write pose: {}", write.error());
return exit_code(TesterExitCode::WriteError);
}
sink->close();
mcap::McapReader reader{};
const auto open_status = reader.open(output_path.string());
if (!open_status.ok()) {
spdlog::error("failed to open MCAP file '{}': {}", output_path.string(), open_status.message);
return exit_code(TesterExitCode::OpenError);
}
std::uint64_t video_messages{0};
std::uint64_t depth_messages{0};
std::uint64_t calibration_messages{0};
std::uint64_t pose_messages{0};
auto messages = reader.readMessages();
for (auto it = messages.begin(); it != messages.end(); ++it) {
if (it->schema == nullptr || it->channel == nullptr) {
spdlog::error("MCAP message missing schema or channel");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->schema->encoding != "protobuf" || it->channel->messageEncoding != "protobuf") {
spdlog::error("unexpected message encoding in MCAP");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->schema->name == "foxglove.CompressedVideo") {
foxglove::CompressedVideo video{};
if (!video.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
spdlog::error("failed to parse foxglove.CompressedVideo");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->channel->topic != "/camera/video" || video.frame_id() != "camera" || video.data().empty()) {
spdlog::error("video message verification failed");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
video_messages += 1;
continue;
}
if (it->schema->name == "cvmmap_streamer.DepthMap") {
cvmmap_streamer::DepthMap depth{};
if (!depth.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
spdlog::error("failed to parse cvmmap_streamer.DepthMap");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->channel->topic != "/camera/depth" ||
depth.frame_id() != "camera" ||
depth.encoding() != cvmmap_streamer::DepthMap::RVL_U16_LOSSLESS) {
spdlog::error("depth message metadata verification failed");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
const auto decoded = rvl::decompress_image(std::span<const std::uint8_t>(
reinterpret_cast<const std::uint8_t *>(depth.data().data()),
depth.data().size()));
if (decoded.pixels.size() != depth_pixels.size() ||
decoded.pixels[0] != 1000 ||
decoded.pixels[1] != 2000 ||
decoded.pixels[2] != 0 ||
decoded.pixels[3] != 1500) {
spdlog::error("depth RVL round-trip verification failed");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
depth_messages += 1;
continue;
}
if (it->schema->name == "foxglove.CameraCalibration") {
foxglove::CameraCalibration calibration{};
if (!calibration.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
spdlog::error("failed to parse foxglove.CameraCalibration");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->channel->topic != "/camera/calibration" ||
calibration.frame_id() != "camera" ||
calibration.width() != 640 ||
calibration.height() != 480 ||
calibration.distortion_model() != "plumb_bob" ||
calibration.d_size() != 5 ||
calibration.k_size() != 9 ||
calibration.r_size() != 9 ||
calibration.p_size() != 12 ||
!approx_equal(calibration.k(0), 500.0) ||
!approx_equal(calibration.k(2), 320.0) ||
!approx_equal(calibration.p(5), 500.0)) {
spdlog::error("calibration message verification failed");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
calibration_messages += 1;
continue;
}
if (it->schema->name == "foxglove.PoseInFrame") {
foxglove::PoseInFrame pose{};
if (!pose.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
spdlog::error("failed to parse foxglove.PoseInFrame");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
if (it->channel->topic != "/camera/pose" ||
pose.frame_id() != "world" ||
!approx_equal(pose.pose().position().x(), 1.0) ||
!approx_equal(pose.pose().position().y(), 2.0) ||
!approx_equal(pose.pose().position().z(), 3.0) ||
!approx_equal(pose.pose().orientation().x(), 0.1) ||
!approx_equal(pose.pose().orientation().y(), 0.2) ||
!approx_equal(pose.pose().orientation().z(), 0.3) ||
!approx_equal(pose.pose().orientation().w(), 0.9)) {
spdlog::error("pose message verification failed");
reader.close();
return exit_code(TesterExitCode::VerificationError);
}
pose_messages += 1;
}
}
reader.close();
if (video_messages != 1 || depth_messages != 1 || calibration_messages != 1 || pose_messages != 1) {
spdlog::error(
"unexpected message counts: video={} depth={} calibration={} pose={}",
video_messages,
depth_messages,
calibration_messages,
pose_messages);
return exit_code(TesterExitCode::VerificationError);
}
spdlog::info(
"validated same-file MCAP video+depth+calibration+pose recording at '{}'",
output_path.string());
return exit_code(TesterExitCode::Success);
}
+645
View File
@@ -0,0 +1,645 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <sl/Camera.hpp>
#include "cvmmap_streamer/config/runtime_config.hpp"
#include "cvmmap_streamer/core/status.hpp"
#include "cvmmap_streamer/encode/encoder_backend.hpp"
#include "cvmmap_streamer/ipc/contracts.hpp"
#include "cvmmap_streamer/record/mcap_record_sink.hpp"
#include <array>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <optional>
#include <span>
#include <string>
#include <string_view>
#include <vector>
namespace {
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
RuntimeError = 3,
};
struct CliOptions {
std::string input_path{};
std::string output_path{};
std::string codec{"h264"};
std::string encoder_device{"auto"};
std::string mcap_compression{"zstd"};
std::string depth_mode{"neural"};
bool with_pose{false};
std::uint32_t start_frame{0};
std::uint32_t end_frame{0};
bool has_end_frame{false};
std::string frame_id{"camera"};
std::string video_topic{"/camera/video"};
std::string depth_topic{"/camera/depth"};
std::string calibration_topic{"/camera/calibration"};
std::string pose_topic{"/camera/pose"};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::string zed_string(const sl::String &value) {
return std::string(value.c_str() == nullptr ? "" : value.c_str());
}
[[nodiscard]]
std::string zed_status_string(const sl::ERROR_CODE code) {
return zed_string(sl::toString(code));
}
[[nodiscard]]
std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state) {
return zed_string(sl::toString(state));
}
[[nodiscard]]
std::expected<cvmmap_streamer::CodecType, std::string> parse_codec(const std::string_view raw) {
if (raw == "h264") {
return cvmmap_streamer::CodecType::H264;
}
if (raw == "h265") {
return cvmmap_streamer::CodecType::H265;
}
return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
}
[[nodiscard]]
std::expected<cvmmap_streamer::EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
if (raw == "auto") {
return cvmmap_streamer::EncoderDeviceType::Auto;
}
if (raw == "nvidia") {
return cvmmap_streamer::EncoderDeviceType::Nvidia;
}
if (raw == "software") {
return cvmmap_streamer::EncoderDeviceType::Software;
}
return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
}
[[nodiscard]]
std::expected<cvmmap_streamer::McapCompression, std::string> parse_mcap_compression(const std::string_view raw) {
if (raw == "none") {
return cvmmap_streamer::McapCompression::None;
}
if (raw == "lz4") {
return cvmmap_streamer::McapCompression::Lz4;
}
if (raw == "zstd") {
return cvmmap_streamer::McapCompression::Zstd;
}
return std::unexpected("invalid mcap compression: '" + std::string(raw) + "' (expected: none|lz4|zstd)");
}
[[nodiscard]]
std::expected<sl::DEPTH_MODE, std::string> parse_depth_mode(const std::string_view raw) {
if (raw == "neural") {
return sl::DEPTH_MODE::NEURAL;
}
if (raw == "quality") {
return sl::DEPTH_MODE::QUALITY;
}
if (raw == "performance") {
return sl::DEPTH_MODE::PERFORMANCE;
}
if (raw == "ultra") {
return sl::DEPTH_MODE::ULTRA;
}
return std::unexpected("invalid depth mode: '" + std::string(raw) + "' (expected: neural|quality|performance|ultra)");
}
[[nodiscard]]
std::uint64_t frame_period_ns(const float fps) {
if (!(fps > 0.0f)) {
return 33'333'333ull;
}
return static_cast<std::uint64_t>(std::llround(1'000'000'000.0 / static_cast<double>(fps)));
}
[[nodiscard]]
std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
return std::unexpected(std::string(label) + " must be U8_C3");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> validate_u16c1_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U16_C1) {
return std::unexpected(std::string(label) + " must be U16_C1");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::ushort1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
[[nodiscard]]
std::vector<std::uint16_t> copy_compact_u16_plane(const sl::Mat &mat) {
const auto width = static_cast<std::size_t>(mat.getWidth());
const auto height = static_cast<std::size_t>(mat.getHeight());
const auto row_bytes = width * sizeof(std::uint16_t);
const auto step_bytes = mat.getStepBytes(sl::MEM::CPU);
auto *src = mat.getPtr<sl::uchar1>(sl::MEM::CPU);
std::vector<std::uint16_t> compact(width * height, 0);
auto *dst = reinterpret_cast<std::uint8_t *>(compact.data());
for (std::size_t row = 0; row < height; ++row) {
std::memcpy(
dst + row * row_bytes,
src + row * step_bytes,
row_bytes);
}
return compact;
}
[[nodiscard]]
std::expected<void, std::string> write_access_units(
cvmmap_streamer::record::McapRecordSink &sink,
const std::vector<cvmmap_streamer::encode::EncodedAccessUnit> &access_units) {
for (const auto &access_unit : access_units) {
if (auto write = sink.write_access_unit(access_unit); !write) {
return std::unexpected(write.error());
}
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> flush_and_write(
cvmmap_streamer::record::McapRecordSink &sink,
cvmmap_streamer::encode::EncoderBackend &backend) {
auto flushed = backend->flush();
if (!flushed) {
return std::unexpected(cvmmap_streamer::format_error(flushed.error()));
}
return write_access_units(sink, *flushed);
}
}
int main(int argc, char **argv) {
CliOptions options{};
CLI::App app{"zed_svo_to_mcap - convert ZED SVO/SVO2 playback to MCAP"};
app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
app.add_option("--output", options.output_path, "Output MCAP file")->required();
app.add_option("--codec", options.codec, "Video codec (h264|h265)")
->check(CLI::IsMember({"h264", "h265"}));
app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
->check(CLI::IsMember({"auto", "nvidia", "software"}));
app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available");
app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)")
->check(CLI::IsMember({"none", "lz4", "zstd"}));
app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural|quality|performance|ultra)")
->check(CLI::IsMember({"neural", "quality", "performance", "ultra"}));
app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics");
app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo");
app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap");
app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration");
app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame");
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &error) {
return app.exit(error);
}
options.has_end_frame = end_frame_option->count() > 0;
auto codec = parse_codec(options.codec);
if (!codec) {
spdlog::error("{}", codec.error());
return exit_code(ToolExitCode::UsageError);
}
auto encoder_device = parse_encoder_device(options.encoder_device);
if (!encoder_device) {
spdlog::error("{}", encoder_device.error());
return exit_code(ToolExitCode::UsageError);
}
auto compression = parse_mcap_compression(options.mcap_compression);
if (!compression) {
spdlog::error("{}", compression.error());
return exit_code(ToolExitCode::UsageError);
}
auto depth_mode = parse_depth_mode(options.depth_mode);
if (!depth_mode) {
spdlog::error("{}", depth_mode.error());
return exit_code(ToolExitCode::UsageError);
}
if (options.output_path.empty()) {
spdlog::error("output path must not be empty");
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame < options.start_frame) {
spdlog::error(
"invalid frame range: start-frame={} end-frame={}",
options.start_frame,
options.end_frame);
return exit_code(ToolExitCode::UsageError);
}
const std::filesystem::path output_path{options.output_path};
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
sl::Camera camera{};
bool tracking_enabled{false};
auto close_camera = [&]() {
if (tracking_enabled) {
camera.disablePositionalTracking();
tracking_enabled = false;
}
if (camera.isOpened()) {
camera.close();
}
};
sl::InitParameters init{};
init.input.setFromSVOFile(options.input_path.c_str());
init.svo_real_time_mode = false;
init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
init.coordinate_units = sl::UNIT::METER;
init.depth_mode = *depth_mode;
init.sdk_verbose = false;
const auto open_status = camera.open(init);
if (open_status != sl::ERROR_CODE::SUCCESS) {
spdlog::error(
"failed to open SVO '{}': {}",
options.input_path,
zed_status_string(open_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto total_frames = camera.getSVONumberOfFrames();
if (total_frames <= 0) {
close_camera();
spdlog::error("input SVO has no frames");
return exit_code(ToolExitCode::RuntimeError);
}
if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"start-frame {} is out of range for {} frames",
options.start_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"end-frame {} is out of range for {} frames",
options.end_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
camera.setSVOPosition(static_cast<int>(options.start_frame));
if (options.with_pose) {
sl::PositionalTrackingParameters tracking_parameters{};
const auto tracking_status = camera.enablePositionalTracking(tracking_parameters);
if (tracking_status == sl::ERROR_CODE::SUCCESS) {
tracking_enabled = true;
} else {
spdlog::warn(
"positional tracking unavailable for '{}': {}. continuing without pose output",
options.input_path,
zed_status_string(tracking_status));
}
}
const auto camera_info = camera.getCameraInformation();
const auto &camera_config = camera_info.camera_configuration;
const auto &left_camera = camera_config.calibration_parameters.left_cam;
const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
if (width == 0 || height == 0) {
close_camera();
spdlog::error("camera resolution reported by the ZED SDK is invalid");
return exit_code(ToolExitCode::RuntimeError);
}
cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
config.encoder.device = *encoder_device;
config.encoder.codec = *codec;
config.encoder.gop = 30;
config.encoder.b_frames = 0;
config.record.mcap.enabled = true;
config.record.mcap.path = output_path.string();
config.record.mcap.topic = options.video_topic;
config.record.mcap.depth_topic = options.depth_topic;
config.record.mcap.calibration_topic = options.calibration_topic;
config.record.mcap.pose_topic = options.pose_topic;
config.record.mcap.frame_id = options.frame_id;
config.record.mcap.compression = *compression;
cvmmap_streamer::ipc::FrameInfo frame_info{
.width = static_cast<std::uint16_t>(width),
.height = static_cast<std::uint16_t>(height),
.channels = 3,
.depth = cvmmap_streamer::ipc::Depth::U8,
.pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR,
.buffer_size = width * height * 3u,
};
auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config);
if (!backend_result) {
close_camera();
spdlog::error("failed to create encoder backend: {}", cvmmap_streamer::format_error(backend_result.error()));
return exit_code(ToolExitCode::RuntimeError);
}
auto backend = std::move(*backend_result);
if (auto init_status = backend->init(config, frame_info); !init_status) {
close_camera();
spdlog::error("failed to initialize encoder backend: {}", cvmmap_streamer::format_error(init_status.error()));
return exit_code(ToolExitCode::RuntimeError);
}
auto stream_info = backend->stream_info();
if (!stream_info) {
backend->shutdown();
close_camera();
spdlog::error("encoder backend did not provide stream info: {}", cvmmap_streamer::format_error(stream_info.error()));
return exit_code(ToolExitCode::RuntimeError);
}
auto sink = cvmmap_streamer::record::McapRecordSink::create(config, *stream_info);
if (!sink) {
backend->shutdown();
close_camera();
spdlog::error("failed to create MCAP sink: {}", sink.error());
return exit_code(ToolExitCode::RuntimeError);
}
const std::array<double, 5> distortion{
0.0, 0.0, 0.0, 0.0, 0.0,
};
const std::array<double, 9> intrinsic_matrix{
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx),
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy),
0.0, 0.0, 1.0,
};
const std::array<double, 9> rectification_matrix{
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
};
const std::array<double, 12> projection_matrix{
static_cast<double>(left_camera.fx), 0.0, static_cast<double>(left_camera.cx), 0.0,
0.0, static_cast<double>(left_camera.fy), static_cast<double>(left_camera.cy), 0.0,
0.0, 0.0, 1.0, 0.0,
};
sl::RuntimeParameters runtime_parameters{};
sl::Mat left_frame{};
sl::Mat depth_frame{};
sl::Pose pose{};
bool calibration_written{false};
std::uint64_t emitted_frames{0};
std::optional<std::uint64_t> last_timestamp_ns{};
std::optional<sl::POSITIONAL_TRACKING_STATE> last_tracking_state{};
const auto last_frame = options.has_end_frame
? options.end_frame
: static_cast<std::uint32_t>(total_frames - 1);
const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
while (options.start_frame + emitted_frames <= last_frame) {
const auto grab_status = camera.grab(runtime_parameters);
if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
break;
}
if (grab_status != sl::ERROR_CODE::SUCCESS) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
if (image_status != sl::ERROR_CODE::SUCCESS) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
return exit_code(ToolExitCode::RuntimeError);
}
if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("{}", valid.error());
return exit_code(ToolExitCode::RuntimeError);
}
const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU);
if (depth_status != sl::ERROR_CODE::SUCCESS) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to retrieve depth map: {}", zed_status_string(depth_status));
return exit_code(ToolExitCode::RuntimeError);
}
if (auto valid = validate_u16c1_mat(depth_frame, "depth map"); !valid) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("{}", valid.error());
return exit_code(ToolExitCode::RuntimeError);
}
auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
if (timestamp_ns == 0) {
timestamp_ns = emitted_frames * nominal_frame_period_ns;
}
if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
timestamp_ns = *last_timestamp_ns + 1;
}
last_timestamp_ns = timestamp_ns;
const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU);
const auto video_bytes = std::span<const std::uint8_t>(
left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
video_step_bytes * left_frame.getHeight());
cvmmap_streamer::encode::RawVideoFrame raw_video{
.info = frame_info,
.source_timestamp_ns = timestamp_ns,
.row_stride_bytes = video_step_bytes,
.bytes = video_bytes,
};
if (auto push = backend->push_frame(raw_video); !push) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to encode frame: {}", cvmmap_streamer::format_error(push.error()));
return exit_code(ToolExitCode::RuntimeError);
}
auto drained = backend->drain();
if (!drained) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to drain encoded access units: {}", cvmmap_streamer::format_error(drained.error()));
return exit_code(ToolExitCode::RuntimeError);
}
if (auto write = write_access_units(*sink, *drained); !write) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to write video access unit: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
if (!calibration_written) {
cvmmap_streamer::record::RawCameraCalibrationView calibration{
.timestamp_ns = timestamp_ns,
.width = width,
.height = height,
.distortion_model = "plumb_bob",
.distortion = distortion,
.intrinsic_matrix = intrinsic_matrix,
.rectification_matrix = rectification_matrix,
.projection_matrix = projection_matrix,
};
if (auto write = sink->write_camera_calibration(calibration); !write) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to write calibration: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
calibration_written = true;
}
const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU);
const auto packed_depth_bytes = static_cast<std::size_t>(width) * sizeof(std::uint16_t);
if (depth_step_bytes < packed_depth_bytes) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("depth stride {} is smaller than packed row size {}", depth_step_bytes, packed_depth_bytes);
return exit_code(ToolExitCode::RuntimeError);
}
std::optional<std::vector<std::uint16_t>> compact_depth{};
std::span<const std::uint16_t> depth_pixels{};
if (depth_step_bytes == packed_depth_bytes) {
depth_pixels = std::span<const std::uint16_t>(
depth_frame.getPtr<sl::ushort1>(sl::MEM::CPU),
static_cast<std::size_t>(width) * static_cast<std::size_t>(height));
} else {
compact_depth = copy_compact_u16_plane(depth_frame);
depth_pixels = *compact_depth;
}
cvmmap_streamer::record::RawDepthMapU16View depth_map{
.timestamp_ns = timestamp_ns,
.width = width,
.height = height,
.pixels = depth_pixels,
};
if (auto write = sink->write_depth_map_u16(depth_map); !write) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to write depth map: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
if (tracking_enabled) {
const auto tracking_state = camera.getPosition(pose, sl::REFERENCE_FRAME::WORLD);
if (!last_tracking_state || *last_tracking_state != tracking_state) {
last_tracking_state = tracking_state;
if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) {
spdlog::warn(
"pose tracking state changed to {} at frame {}",
zed_tracking_state_string(tracking_state),
options.start_frame + emitted_frames);
}
}
if (tracking_state == sl::POSITIONAL_TRACKING_STATE::OK) {
const auto translation = pose.getTranslation();
const auto orientation = pose.getOrientation();
cvmmap_streamer::record::RawPoseView pose_view{
.timestamp_ns = timestamp_ns,
.reference_frame_id = "world",
.position = {
static_cast<double>(translation.x),
static_cast<double>(translation.y),
static_cast<double>(translation.z),
},
.orientation = {
static_cast<double>(orientation.x),
static_cast<double>(orientation.y),
static_cast<double>(orientation.z),
static_cast<double>(orientation.w),
},
};
if (auto write = sink->write_pose(pose_view); !write) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to write pose: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
}
}
emitted_frames += 1;
}
if (auto flushed = flush_and_write(*sink, backend); !flushed) {
sink->close();
backend->shutdown();
close_camera();
spdlog::error("failed to finalize encoded video: {}", flushed.error());
return exit_code(ToolExitCode::RuntimeError);
}
sink->close();
backend->shutdown();
close_camera();
spdlog::info(
"wrote {} frame(s) from '{}' to '{}'",
emitted_frames,
options.input_path,
output_path.string());
return exit_code(ToolExitCode::Success);
}