Add synced ZED SVO grid exporter

This commit is contained in:
2026-03-19 08:26:38 +00:00
parent 83171b415f
commit 2671ac7ba9
6 changed files with 2066 additions and 0 deletions
+702
View File
@@ -0,0 +1,702 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <sl/Camera.hpp>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <expected>
#include <filesystem>
#include <memory>
#include <optional>
#include <regex>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace {
using cvmmap_streamer::zed_tools::EncodeTuning;
using cvmmap_streamer::zed_tools::Mp4Writer;
using cvmmap_streamer::zed_tools::ProgressBar;
using cvmmap_streamer::zed_tools::frame_period_ns;
using cvmmap_streamer::zed_tools::parse_codec;
using cvmmap_streamer::zed_tools::parse_encoder_device;
using cvmmap_streamer::zed_tools::parse_preset;
using cvmmap_streamer::zed_tools::parse_tune;
constexpr std::size_t kExpectedInputCount = 4;
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
RuntimeError = 3,
};
struct CliOptions {
std::vector<std::string> input_paths{};
std::string segment_dir{};
std::string output_path{};
std::string codec{"h265"};
std::string encoder_device{"auto"};
std::string preset{"fast"};
std::string tune{"low-latency"};
int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
double start_offset_seconds{0.0};
double duration_seconds{0.0};
bool has_duration{false};
double output_fps{0.0};
bool has_output_fps{false};
double tile_scale{0.5};
};
struct SourceSpec {
std::filesystem::path path{};
std::string label{};
};
struct CameraStream {
SourceSpec source{};
std::unique_ptr<sl::Camera> camera{};
sl::RuntimeParameters runtime{};
sl::Mat current_frame{};
sl::Mat next_frame{};
std::uint64_t current_timestamp_ns{0};
std::uint64_t next_timestamp_ns{0};
std::uint64_t first_timestamp_ns{0};
std::uint64_t last_timestamp_ns{0};
std::uint64_t total_frames{0};
std::uint64_t nominal_frame_period_ns{0};
float fps{0.0f};
std::uint32_t width{0};
std::uint32_t height{0};
int sync_position{-1};
bool has_next{false};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::string zed_string(const sl::String &value) {
return std::string(value.c_str() == nullptr ? "" : value.c_str());
}
[[nodiscard]]
std::string zed_status_string(const sl::ERROR_CODE code) {
return zed_string(sl::toString(code));
}
[[nodiscard]]
std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
return std::unexpected(std::string(label) + " must be U8_C3");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
[[nodiscard]]
std::expected<std::vector<SourceSpec>, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) {
if (!std::filesystem::is_directory(segment_dir)) {
return std::unexpected("segment directory does not exist: " + segment_dir.string());
}
const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase};
std::vector<std::pair<int, std::filesystem::path>> ordered_paths{};
for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) {
if (!entry.is_regular_file()) {
continue;
}
std::smatch match{};
const auto filename = entry.path().filename().string();
if (!std::regex_match(filename, match, pattern)) {
continue;
}
ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path());
}
std::sort(
ordered_paths.begin(),
ordered_paths.end(),
[](const auto &left, const auto &right) {
return left.first < right.first;
});
if (ordered_paths.size() != kExpectedInputCount) {
return std::unexpected(
"expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size()));
}
std::vector<SourceSpec> sources{};
sources.reserve(ordered_paths.size());
for (const auto &[camera_index, path] : ordered_paths) {
sources.push_back(SourceSpec{
.path = path,
.label = "zed" + std::to_string(camera_index),
});
}
return sources;
}
[[nodiscard]]
std::expected<std::vector<SourceSpec>, std::string> resolve_sources(const CliOptions &options) {
if (!options.segment_dir.empty()) {
return discover_segment_inputs(std::filesystem::path{options.segment_dir});
}
if (options.input_paths.size() != kExpectedInputCount) {
return std::unexpected("repeat --input exactly 4 times");
}
std::vector<SourceSpec> sources{};
sources.reserve(options.input_paths.size());
for (std::size_t index = 0; index < options.input_paths.size(); ++index) {
const auto path = std::filesystem::path{options.input_paths[index]};
if (!std::filesystem::is_regular_file(path)) {
return std::unexpected("input file does not exist: " + path.string());
}
sources.push_back(SourceSpec{
.path = path,
.label = "view" + std::to_string(index + 1),
});
}
return sources;
}
[[nodiscard]]
std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector<SourceSpec> &sources) {
if (!options.output_path.empty()) {
return std::filesystem::path{options.output_path};
}
if (!options.segment_dir.empty()) {
const auto segment_dir = std::filesystem::path{options.segment_dir};
return segment_dir / (segment_dir.filename().string() + "_grid.mp4");
}
auto output_path = sources.front().path;
output_path.replace_extension("");
output_path += "_grid.mp4";
return output_path;
}
[[nodiscard]]
std::string format_unix_timestamp(const std::uint64_t timestamp_ns) {
const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond;
const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull;
return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds);
}
void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) {
const auto text = format_unix_timestamp(timestamp_ns);
int baseline = 0;
const auto font_face = cv::FONT_HERSHEY_SIMPLEX;
const double font_scale = 0.8;
const int thickness = 2;
const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline);
const cv::Point origin{16, 16 + text_size.height};
const cv::Rect background{
8,
8,
text_size.width + 16,
text_size.height + baseline + 16,
};
cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED);
cv::putText(
canvas,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness,
cv::LINE_AA);
}
[[nodiscard]]
std::expected<std::uint64_t, std::string> read_image_timestamp_ns(
sl::Camera &camera,
const std::optional<std::uint64_t> fallback_timestamp_ns,
const std::uint64_t nominal_frame_period_ns) {
auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
if (timestamp_ns == 0) {
if (!fallback_timestamp_ns) {
return std::unexpected("ZED SDK returned a zero image timestamp for the first frame");
}
timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns;
}
return timestamp_ns;
}
[[nodiscard]]
std::expected<void, std::string> read_into_mat(
sl::Camera &camera,
sl::RuntimeParameters &runtime,
sl::Mat &target,
std::optional<std::uint64_t> fallback_timestamp_ns,
std::uint64_t nominal_frame_period_ns,
std::uint64_t &timestamp_ns_out,
const std::string_view label) {
const auto grab_status = camera.grab(runtime);
if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
return std::unexpected("end-of-svo");
}
if (grab_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status));
}
const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
if (image_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status));
}
if (auto valid = validate_u8c3_mat(target, label); !valid) {
return std::unexpected(valid.error());
}
auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns);
if (!timestamp_ns) {
return std::unexpected(timestamp_ns.error());
}
timestamp_ns_out = *timestamp_ns;
return {};
}
[[nodiscard]]
std::expected<void, std::string> fill_next_frame(CameraStream &stream) {
std::uint64_t timestamp_ns = 0;
auto next = read_into_mat(
*stream.camera,
stream.runtime,
stream.next_frame,
stream.current_timestamp_ns,
stream.nominal_frame_period_ns,
timestamp_ns,
stream.source.label);
if (!next) {
if (next.error() == "end-of-svo") {
stream.has_next = false;
return {};
}
return std::unexpected(next.error());
}
stream.next_timestamp_ns = timestamp_ns;
stream.has_next = true;
return {};
}
[[nodiscard]]
std::expected<void, std::string> promote_next_frame(CameraStream &stream) {
if (!stream.has_next) {
return std::unexpected("no buffered next frame is available for " + stream.source.label);
}
std::swap(stream.current_frame, stream.next_frame);
std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns);
stream.has_next = false;
return fill_next_frame(stream);
}
[[nodiscard]]
std::expected<CameraStream, std::string> open_camera_stream(const SourceSpec &source) {
CameraStream stream{};
stream.source = source;
stream.camera = std::make_unique<sl::Camera>();
sl::InitParameters init{};
init.input.setFromSVOFile(source.path.c_str());
init.svo_real_time_mode = false;
init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
init.coordinate_units = sl::UNIT::METER;
init.depth_mode = sl::DEPTH_MODE::NONE;
init.sdk_verbose = false;
const auto open_status = stream.camera->open(init);
if (open_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status));
}
const auto total_frames = stream.camera->getSVONumberOfFrames();
if (total_frames <= 0) {
return std::unexpected("input SVO has no frames: " + source.path.string());
}
stream.total_frames = static_cast<std::uint64_t>(total_frames);
const auto camera_info = stream.camera->getCameraInformation().camera_configuration;
stream.width = static_cast<std::uint32_t>(camera_info.resolution.width);
stream.height = static_cast<std::uint32_t>(camera_info.resolution.height);
stream.fps = camera_info.fps;
stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps);
if (stream.width == 0 || stream.height == 0) {
return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
}
std::uint64_t first_timestamp_ns = 0;
auto first_frame = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
first_timestamp_ns,
source.label);
if (!first_frame) {
return std::unexpected(first_frame.error());
}
stream.first_timestamp_ns = first_timestamp_ns;
stream.camera->setSVOPosition(static_cast<int>(stream.total_frames - 1));
std::uint64_t last_timestamp_ns = 0;
auto last_frame = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
last_timestamp_ns,
source.label);
if (!last_frame) {
return std::unexpected(last_frame.error());
}
stream.last_timestamp_ns = last_timestamp_ns;
return stream;
}
void close_camera_streams(std::vector<CameraStream> &streams) {
for (auto &stream : streams) {
if (stream.camera != nullptr && stream.camera->isOpened()) {
stream.camera->close();
}
}
}
} // namespace
int main(int argc, char **argv) {
CliOptions options{};
CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"};
auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)");
auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files");
input_option->excludes(segment_dir_option);
segment_dir_option->excludes(input_option);
app.add_option("--output", options.output_path, "Output MP4 file");
app.add_option("--codec", options.codec, "Video codec (h264|h265)")
->check(CLI::IsMember({"h264", "h265"}));
app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
->check(CLI::IsMember({"auto", "nvidia", "software"}));
app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
->check(CLI::IsMember({"fast", "balanced", "quality"}));
app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
->check(CLI::IsMember({"low-latency", "balanced"}));
app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
->check(CLI::Range(0, 51));
app.add_option("--gop", options.gop, "Encoder GOP length in frames")
->check(CLI::PositiveNumber);
app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
->check(CLI::NonNegativeNumber);
app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds")
->check(CLI::NonNegativeNumber);
auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync")
->check(CLI::PositiveNumber);
auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)")
->check(CLI::PositiveNumber);
app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution")
->check(CLI::Range(0.1, 1.0));
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &error) {
return app.exit(error);
}
options.has_duration = duration_option->count() > 0;
options.has_output_fps = output_fps_option->count() > 0;
if (options.input_paths.empty() && options.segment_dir.empty()) {
spdlog::error("provide either --segment-dir or repeat --input exactly 4 times");
return exit_code(ToolExitCode::UsageError);
}
if (options.b_frames > options.gop) {
spdlog::error(
"invalid encoder config: b-frames {} must be <= gop {}",
options.b_frames,
options.gop);
return exit_code(ToolExitCode::UsageError);
}
auto codec = parse_codec(options.codec);
if (!codec) {
spdlog::error("{}", codec.error());
return exit_code(ToolExitCode::UsageError);
}
auto encoder_device = parse_encoder_device(options.encoder_device);
if (!encoder_device) {
spdlog::error("{}", encoder_device.error());
return exit_code(ToolExitCode::UsageError);
}
auto preset = parse_preset(options.preset);
if (!preset) {
spdlog::error("{}", preset.error());
return exit_code(ToolExitCode::UsageError);
}
auto tune = parse_tune(options.tune);
if (!tune) {
spdlog::error("{}", tune.error());
return exit_code(ToolExitCode::UsageError);
}
auto sources = resolve_sources(options);
if (!sources) {
spdlog::error("{}", sources.error());
return exit_code(ToolExitCode::UsageError);
}
const auto output_path = derive_grid_output_path(options, *sources);
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
const EncodeTuning tuning{
.preset = *preset,
.tune = *tune,
.quality = options.quality,
.gop = options.gop,
.b_frames = options.b_frames,
};
std::vector<CameraStream> streams{};
streams.reserve(sources->size());
for (const auto &source : *sources) {
auto stream = open_camera_stream(source);
if (!stream) {
close_camera_streams(streams);
spdlog::error("{}", stream.error());
return exit_code(ToolExitCode::RuntimeError);
}
streams.push_back(std::move(*stream));
}
const auto sync_start_ts = std::max_element(
streams.begin(),
streams.end(),
[](const auto &left, const auto &right) {
return left.first_timestamp_ns < right.first_timestamp_ns;
})->first_timestamp_ns;
const auto start_offset_ns = static_cast<std::uint64_t>(std::llround(options.start_offset_seconds * 1'000'000'000.0));
const auto effective_start_ts = sync_start_ts + start_offset_ns;
const auto common_end_ts = std::min_element(
streams.begin(),
streams.end(),
[](const auto &left, const auto &right) {
return left.last_timestamp_ns < right.last_timestamp_ns;
})->last_timestamp_ns;
const auto requested_end_exclusive_ts = options.has_duration
? effective_start_ts + static_cast<std::uint64_t>(std::llround(options.duration_seconds * 1'000'000'000.0))
: common_end_ts + 1;
const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1);
if (effective_start_ts >= output_end_exclusive_ts) {
close_camera_streams(streams);
spdlog::error(
"synced time window is empty: start_ts={} end_ts={}",
effective_start_ts,
output_end_exclusive_ts);
return exit_code(ToolExitCode::UsageError);
}
std::uint32_t source_width = streams.front().width;
std::uint32_t source_height = streams.front().height;
float max_input_fps = streams.front().fps;
for (const auto &stream : streams) {
if (stream.width != source_width || stream.height != source_height) {
close_camera_streams(streams);
spdlog::error(
"all inputs must share the same resolution: expected {}x{}, got {}x{} for {}",
source_width,
source_height,
stream.width,
stream.height,
stream.source.path.string());
return exit_code(ToolExitCode::UsageError);
}
max_input_fps = std::max(max_input_fps, stream.fps);
}
const auto output_fps = options.has_output_fps ? static_cast<float>(options.output_fps) : max_input_fps;
const auto output_period_ns = frame_period_ns(output_fps);
const auto total_frames_to_emit =
static_cast<std::uint64_t>((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns);
for (auto &stream : streams) {
stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts});
if (stream.sync_position < 0) {
close_camera_streams(streams);
spdlog::error(
"failed to compute synced start frame for {} at timestamp {}",
stream.source.path.string(),
effective_start_ts);
return exit_code(ToolExitCode::RuntimeError);
}
stream.camera->setSVOPosition(stream.sync_position);
std::uint64_t current_timestamp_ns = 0;
auto current = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
current_timestamp_ns,
stream.source.label);
if (!current) {
close_camera_streams(streams);
spdlog::error("{}", current.error());
return exit_code(ToolExitCode::RuntimeError);
}
stream.current_timestamp_ns = current_timestamp_ns;
auto next = fill_next_frame(stream);
if (!next) {
close_camera_streams(streams);
spdlog::error("{}", next.error());
return exit_code(ToolExitCode::RuntimeError);
}
while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) {
auto promote = promote_next_frame(stream);
if (!promote) {
close_camera_streams(streams);
spdlog::error("{}", promote.error());
return exit_code(ToolExitCode::RuntimeError);
}
}
spdlog::info(
"ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}",
stream.source.path.string(),
stream.source.label,
stream.sync_position,
stream.first_timestamp_ns,
stream.current_timestamp_ns,
stream.has_next ? stream.next_timestamp_ns : 0);
}
const auto tile_width = static_cast<int>(std::llround(static_cast<double>(source_width) * options.tile_scale));
const auto tile_height = static_cast<int>(std::llround(static_cast<double>(source_height) * options.tile_scale));
if (tile_width <= 0 || tile_height <= 0) {
close_camera_streams(streams);
spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale);
return exit_code(ToolExitCode::UsageError);
}
const auto composite_width = tile_width * 2;
const auto composite_height = tile_height * 2;
Mp4Writer writer{};
if (auto open_writer = writer.open(
output_path,
*codec,
*encoder_device,
static_cast<std::uint32_t>(composite_width),
static_cast<std::uint32_t>(composite_height),
output_fps,
tuning);
!open_writer) {
close_camera_streams(streams);
spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
return exit_code(ToolExitCode::RuntimeError);
}
cv::Mat composite(composite_height, composite_width, CV_8UC3);
std::vector<cv::Mat> resized_tiles(streams.size());
ProgressBar progress{total_frames_to_emit};
for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) {
const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns;
if (target_timestamp_ns >= output_end_exclusive_ts) {
break;
}
for (auto &stream : streams) {
while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) {
auto promote = promote_next_frame(stream);
if (!promote) {
progress.finish(emitted_frames, false);
close_camera_streams(streams);
spdlog::error("{}", promote.error());
return exit_code(ToolExitCode::RuntimeError);
}
}
}
composite.setTo(cv::Scalar(0, 0, 0));
for (std::size_t index = 0; index < streams.size(); ++index) {
auto &stream = streams[index];
cv::Mat source_view(
static_cast<int>(stream.current_frame.getHeight()),
static_cast<int>(stream.current_frame.getWidth()),
CV_8UC3,
stream.current_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
stream.current_frame.getStepBytes(sl::MEM::CPU));
cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA);
const int row = static_cast<int>(index / 2);
const int col = static_cast<int>(index % 2);
const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height};
resized_tiles[index].copyTo(composite(roi));
}
draw_timestamp_overlay(composite, target_timestamp_ns);
if (auto write = writer.write_bgr_frame(
composite.data,
static_cast<std::size_t>(composite.step),
target_timestamp_ns - effective_start_ts);
!write) {
progress.finish(emitted_frames, false);
close_camera_streams(streams);
spdlog::error("failed to encode or mux frame: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.update(emitted_frames + 1);
}
if (auto flush = writer.flush(); !flush) {
progress.finish(total_frames_to_emit, false);
close_camera_streams(streams);
spdlog::error("failed to finalize MP4 output: {}", flush.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.finish(total_frames_to_emit, true);
close_camera_streams(streams);
spdlog::info(
"converted {} synced frames to '{}' using codec={} hardware={}",
total_frames_to_emit,
output_path.string(),
cvmmap_streamer::zed_tools::codec_name(*codec),
writer.using_hardware());
return exit_code(ToolExitCode::Success);
}
+785
View File
@@ -0,0 +1,785 @@
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <spdlog/spdlog.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libavutil/pixfmt.h>
#include <libswscale/swscale.h>
}
#include <chrono>
#include <cmath>
#include <cstdio>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include <unistd.h>
namespace cvmmap_streamer::zed_tools {
namespace {
struct EncoderCandidate {
std::string name{};
bool using_hardware{false};
AVPixelFormat pixel_format{AV_PIX_FMT_NONE};
};
struct ResolvedEncoderSettings {
std::string requested_preset{};
std::string requested_tune{};
std::string mapped_preset{};
std::optional<std::string> mapped_tune{};
std::optional<std::string> rate_control_mode{};
std::string quality_key{};
int quality_value{kDefaultQuality};
std::uint32_t gop{kDefaultGopSize};
std::uint32_t b_frames{kDefaultBFrames};
};
[[nodiscard]]
std::string av_error_string(const int error_code) {
char buffer[AV_ERROR_MAX_STRING_SIZE]{};
av_strerror(error_code, buffer, sizeof(buffer));
return std::string(buffer);
}
[[nodiscard]]
AVCodecID codec_id(const CodecType codec) {
return codec == CodecType::H265 ? AV_CODEC_ID_HEVC : AV_CODEC_ID_H264;
}
[[nodiscard]]
AVRational frame_rate_rational(const float fps) {
if (!(fps > 0.0f)) {
return AVRational{30, 1};
}
const auto scaled = static_cast<int>(std::llround(static_cast<double>(fps) * 1000.0));
if (scaled <= 0) {
return AVRational{30, 1};
}
return AVRational{scaled, 1000};
}
[[nodiscard]]
std::string format_duration(const double seconds_raw) {
const auto seconds = seconds_raw > 0.0 ? static_cast<long long>(std::llround(seconds_raw)) : 0ll;
const auto hours = seconds / 3600;
const auto minutes = (seconds % 3600) / 60;
const auto secs = seconds % 60;
char buffer[32]{};
if (hours > 0) {
std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld:%02lld", hours, minutes, secs);
} else {
std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld", minutes, secs);
}
return std::string(buffer);
}
[[nodiscard]]
std::vector<EncoderCandidate> encoder_candidates(const CodecType codec, const EncoderDeviceType device) {
const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc";
const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264";
switch (device) {
case EncoderDeviceType::Auto:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
case EncoderDeviceType::Nvidia:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
};
case EncoderDeviceType::Software:
return {
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
}
return {};
}
[[nodiscard]]
std::string mapped_preset_value(const EncoderCandidate &candidate, const PresetKind preset) {
if (candidate.using_hardware) {
switch (preset) {
case PresetKind::Fast:
return "p1";
case PresetKind::Balanced:
return "p4";
case PresetKind::Quality:
return "p7";
}
}
switch (preset) {
case PresetKind::Fast:
return "veryfast";
case PresetKind::Balanced:
return "medium";
case PresetKind::Quality:
return "slow";
}
return "veryfast";
}
[[nodiscard]]
std::optional<std::string> mapped_tune_value(const EncoderCandidate &candidate, const TuneKind tune) {
if (candidate.using_hardware) {
return tune == TuneKind::LowLatency ? std::optional<std::string>{"ull"} : std::optional<std::string>{"hq"};
}
if (candidate.name == "libx264" && tune == TuneKind::LowLatency) {
return std::optional<std::string>{"zerolatency"};
}
return std::nullopt;
}
[[nodiscard]]
std::optional<std::string> x265_params_value(const EncoderCandidate &candidate, const TuneKind tune) {
if (candidate.name != "libx265") {
return std::nullopt;
}
if (tune == TuneKind::LowLatency) {
return std::optional<std::string>{"repeat-headers=1:scenecut=0"};
}
return std::optional<std::string>{"repeat-headers=1"};
}
[[nodiscard]]
std::expected<void, std::string> set_string_option(AVCodecContext *context, const char *key, const std::string &value) {
const auto result = av_opt_set(context->priv_data, key, value.c_str(), 0);
if (result < 0) {
return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + value + "': " + av_error_string(result));
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> set_int_option(AVCodecContext *context, const char *key, const std::int64_t value) {
const auto result = av_opt_set_int(context->priv_data, key, value, 0);
if (result < 0) {
return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result));
}
return {};
}
[[nodiscard]]
std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
AVCodecContext *context,
const EncoderCandidate &candidate,
const CodecType codec,
const std::uint32_t width,
const std::uint32_t height,
const AVRational framerate,
const EncodeTuning &tuning) {
context->codec_type = AVMEDIA_TYPE_VIDEO;
context->codec_id = codec_id(codec);
context->width = static_cast<int>(width);
context->height = static_cast<int>(height);
context->pix_fmt = candidate.pixel_format;
context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
context->time_base = AVRational{1, static_cast<int>(kNanosPerSecond)};
context->framerate = framerate;
context->gop_size = static_cast<int>(tuning.gop);
context->max_b_frames = static_cast<int>(tuning.b_frames);
context->thread_count = 1;
ResolvedEncoderSettings resolved{
.requested_preset = std::string(preset_name(tuning.preset)),
.requested_tune = std::string(tune_name(tuning.tune)),
.mapped_preset = mapped_preset_value(candidate, tuning.preset),
.mapped_tune = mapped_tune_value(candidate, tuning.tune),
.quality_value = tuning.quality,
.gop = tuning.gop,
.b_frames = tuning.b_frames,
};
if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) {
return std::unexpected(set.error());
}
if (resolved.mapped_tune) {
if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) {
return std::unexpected(set.error());
}
}
if (candidate.using_hardware) {
resolved.rate_control_mode = "vbr";
resolved.quality_key = "cq";
if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) {
return std::unexpected(set.error());
}
if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) {
return std::unexpected(set.error());
}
if (tuning.tune == TuneKind::LowLatency) {
if (auto set = set_string_option(context, "zerolatency", "1"); !set) {
return std::unexpected(set.error());
}
if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) {
return std::unexpected(set.error());
}
}
} else {
resolved.quality_key = "crf";
if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) {
return std::unexpected(set.error());
}
if (const auto x265_params = x265_params_value(candidate, tuning.tune); x265_params) {
if (auto set = set_string_option(context, "x265-params", *x265_params); !set) {
return std::unexpected(set.error());
}
}
}
if (auto set = set_int_option(context, "forced-idr", 1); !set) {
return std::unexpected(set.error());
}
return resolved;
}
struct OpenedEncoder {
AVCodecContext *context{nullptr};
EncoderCandidate candidate{};
ResolvedEncoderSettings resolved{};
};
[[nodiscard]]
std::expected<OpenedEncoder, std::string> open_encoder(
const CodecType codec,
const EncoderDeviceType device,
const std::uint32_t width,
const std::uint32_t height,
const AVRational framerate,
const EncodeTuning &tuning) {
std::string last_error{};
for (const auto &candidate : encoder_candidates(codec, device)) {
const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str());
if (encoder == nullptr) {
last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable";
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"encoder '{}' unavailable for codec={} in auto mode, trying next candidate",
candidate.name,
codec_name(codec));
continue;
}
return std::unexpected(last_error);
}
auto *context = avcodec_alloc_context3(encoder);
if (context == nullptr) {
return std::unexpected("failed to allocate FFmpeg encoder context");
}
auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning);
if (!resolved) {
avcodec_free_context(&context);
return std::unexpected(resolved.error());
}
const auto open_result = avcodec_open2(context, encoder, nullptr);
if (open_result < 0) {
last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result);
avcodec_free_context(&context);
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"encoder '{}' failed to open in auto mode: {}. trying software fallback",
candidate.name,
av_error_string(open_result));
continue;
}
return std::unexpected(last_error);
}
return OpenedEncoder{
.context = context,
.candidate = candidate,
.resolved = std::move(*resolved),
};
}
if (last_error.empty()) {
last_error = "no usable FFmpeg encoder candidates were configured";
}
return std::unexpected(last_error);
}
} // namespace
struct ProgressBar::Impl {
using Clock = std::chrono::steady_clock;
explicit Impl(const std::uint64_t total_frames_arg)
: total_frames(total_frames_arg),
enabled(::isatty(STDERR_FILENO) == 1),
started_at(Clock::now()),
last_render_at(started_at) {}
void render(const std::uint64_t completed_frames, const bool force) {
if (!enabled || total_frames == 0) {
return;
}
const auto now = Clock::now();
if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
return;
}
last_render_at = now;
rendered = true;
const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames;
const double ratio = static_cast<double>(bounded_completed) / static_cast<double>(total_frames);
const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
std::string bar{};
bar.reserve(24);
for (std::size_t i = 0; i < 24; ++i) {
bar.push_back(i < filled ? '#' : '-');
}
const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
const auto fps = elapsed_seconds > 0.0 ? static_cast<double>(bounded_completed) / elapsed_seconds : 0.0;
const auto eta_seconds = fps > 0.0 ? static_cast<double>(total_frames - bounded_completed) / fps : 0.0;
char line[256]{};
std::snprintf(
line,
sizeof(line),
"\r[%s] %6.2f%% %llu/%llu | %5.1f fps | %s elapsed | %s ETA\x1b[K",
bar.c_str(),
ratio * 100.0,
static_cast<unsigned long long>(bounded_completed),
static_cast<unsigned long long>(total_frames),
fps,
format_duration(elapsed_seconds).c_str(),
format_duration(eta_seconds).c_str());
std::fprintf(stderr, "%s", line);
std::fflush(stderr);
}
std::uint64_t total_frames{0};
bool enabled{false};
bool rendered{false};
Clock::time_point started_at{};
Clock::time_point last_render_at{};
};
struct Mp4Writer::Impl {
[[nodiscard]]
std::expected<void, std::string> open(
const std::filesystem::path &output_path,
const CodecType codec_arg,
const EncoderDeviceType encoder_device,
const std::uint32_t width,
const std::uint32_t height,
const float fps,
const EncodeTuning &tuning) {
close();
codec = codec_arg;
frame_rate = frame_rate_rational(fps);
auto encoder = open_encoder(codec, encoder_device, width, height, frame_rate, tuning);
if (!encoder) {
return std::unexpected(encoder.error());
}
encoder_context = encoder->context;
encoder_name = encoder->candidate.name;
using_hardware = encoder->candidate.using_hardware;
encoder_pixel_format = encoder->candidate.pixel_format;
resolved_settings = std::move(encoder->resolved);
scaler = sws_getCachedContext(
nullptr,
static_cast<int>(width),
static_cast<int>(height),
AV_PIX_FMT_BGR24,
static_cast<int>(width),
static_cast<int>(height),
encoder_pixel_format,
SWS_BILINEAR,
nullptr,
nullptr,
nullptr);
if (scaler == nullptr) {
return std::unexpected("failed to create swscale conversion context");
}
frame = av_frame_alloc();
if (frame == nullptr) {
return std::unexpected("failed to allocate FFmpeg frame");
}
frame->format = encoder_pixel_format;
frame->width = encoder_context->width;
frame->height = encoder_context->height;
const auto frame_buffer_result = av_frame_get_buffer(frame, 32);
if (frame_buffer_result < 0) {
return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer_result));
}
packet = av_packet_alloc();
if (packet == nullptr) {
return std::unexpected("failed to allocate FFmpeg packet");
}
const auto alloc_result = avformat_alloc_output_context2(
&format_context,
nullptr,
"mp4",
output_path.string().c_str());
if (alloc_result < 0 || format_context == nullptr) {
return std::unexpected("failed to allocate MP4 output context: " + av_error_string(alloc_result));
}
video_stream = avformat_new_stream(format_context, nullptr);
if (video_stream == nullptr) {
return std::unexpected("failed to allocate MP4 video stream");
}
video_stream->time_base = encoder_context->time_base;
video_stream->avg_frame_rate = frame_rate;
const auto params_result = avcodec_parameters_from_context(video_stream->codecpar, encoder_context);
if (params_result < 0) {
return std::unexpected("failed to copy encoder parameters into MP4 stream: " + av_error_string(params_result));
}
if ((format_context->oformat->flags & AVFMT_NOFILE) == 0) {
const auto open_result = avio_open2(
&format_context->pb,
output_path.string().c_str(),
AVIO_FLAG_WRITE,
nullptr,
nullptr);
if (open_result < 0) {
return std::unexpected("failed to open output MP4 '" + output_path.string() + "': " + av_error_string(open_result));
}
}
AVDictionary *muxer_options = nullptr;
av_dict_set(&muxer_options, "movflags", "+faststart", 0);
const auto header_result = avformat_write_header(format_context, &muxer_options);
av_dict_free(&muxer_options);
if (header_result < 0) {
return std::unexpected("failed to write MP4 header: " + av_error_string(header_result));
}
spdlog::info(
"ZED_SVO_MP4_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} requested_preset={} requested_tune={} mapped_preset={} mapped_tune={} rc={} {}={} gop={} b_frames={} output={}",
codec_name(codec),
encoder_name,
using_hardware,
width,
height,
frame_rate.num,
frame_rate.den,
resolved_settings.requested_preset,
resolved_settings.requested_tune,
resolved_settings.mapped_preset,
resolved_settings.mapped_tune.value_or("none"),
resolved_settings.rate_control_mode.value_or("auto"),
resolved_settings.quality_key,
resolved_settings.quality_value,
resolved_settings.gop,
resolved_settings.b_frames,
output_path.string());
return {};
}
[[nodiscard]]
std::expected<void, std::string> write_bgr_frame(
const std::uint8_t *data,
const std::size_t row_stride_bytes,
const std::uint64_t relative_timestamp_ns) {
if (encoder_context == nullptr || frame == nullptr || scaler == nullptr || packet == nullptr || video_stream == nullptr) {
return std::unexpected("MP4 writer is not initialized");
}
const auto writable_result = av_frame_make_writable(frame);
if (writable_result < 0) {
return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(writable_result));
}
const std::uint8_t *source_planes[4]{data, nullptr, nullptr, nullptr};
const int source_strides[4]{static_cast<int>(row_stride_bytes), 0, 0, 0};
sws_scale(
scaler,
source_planes,
source_strides,
0,
encoder_context->height,
frame->data,
frame->linesize);
frame->pts = static_cast<std::int64_t>(relative_timestamp_ns);
const auto send_result = avcodec_send_frame(encoder_context, frame);
if (send_result < 0) {
return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result));
}
return drain_packets();
}
[[nodiscard]]
std::expected<void, std::string> flush() {
if (encoder_context == nullptr) {
return {};
}
const auto flush_result = avcodec_send_frame(encoder_context, nullptr);
if (flush_result < 0 && flush_result != AVERROR_EOF) {
return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result));
}
auto drained = drain_packets();
if (!drained) {
return drained;
}
return close_output();
}
[[nodiscard]]
std::expected<void, std::string> drain_packets() {
while (true) {
const auto receive_result = avcodec_receive_packet(encoder_context, packet);
if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) {
break;
}
if (receive_result < 0) {
return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result));
}
packet->stream_index = video_stream->index;
av_packet_rescale_ts(packet, encoder_context->time_base, video_stream->time_base);
const auto write_result = av_interleaved_write_frame(format_context, packet);
av_packet_unref(packet);
if (write_result < 0) {
return std::unexpected("failed to write MP4 packet: " + av_error_string(write_result));
}
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> close_output() {
if (format_context == nullptr || trailer_written) {
return {};
}
const auto trailer_result = av_write_trailer(format_context);
if (trailer_result < 0) {
return std::unexpected("failed to write MP4 trailer: " + av_error_string(trailer_result));
}
trailer_written = true;
return {};
}
void close() {
(void)close_output();
if (packet != nullptr) {
av_packet_free(&packet);
}
if (frame != nullptr) {
av_frame_free(&frame);
}
if (encoder_context != nullptr) {
avcodec_free_context(&encoder_context);
}
if (scaler != nullptr) {
sws_freeContext(scaler);
scaler = nullptr;
}
if (format_context != nullptr) {
if ((format_context->oformat->flags & AVFMT_NOFILE) == 0 && format_context->pb != nullptr) {
avio_closep(&format_context->pb);
}
avformat_free_context(format_context);
format_context = nullptr;
}
video_stream = nullptr;
encoder_name.clear();
using_hardware = false;
trailer_written = false;
resolved_settings = ResolvedEncoderSettings{};
}
~Impl() {
close();
}
CodecType codec{CodecType::H265};
AVCodecContext *encoder_context{nullptr};
AVFormatContext *format_context{nullptr};
AVStream *video_stream{nullptr};
AVFrame *frame{nullptr};
AVPacket *packet{nullptr};
SwsContext *scaler{nullptr};
AVPixelFormat encoder_pixel_format{AV_PIX_FMT_NONE};
AVRational frame_rate{30, 1};
std::string encoder_name{};
ResolvedEncoderSettings resolved_settings{};
bool using_hardware{false};
bool trailer_written{false};
};
std::expected<CodecType, std::string> parse_codec(const std::string_view raw) {
if (raw == "h264") {
return CodecType::H264;
}
if (raw == "h265") {
return CodecType::H265;
}
return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
}
std::expected<EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
if (raw == "auto") {
return EncoderDeviceType::Auto;
}
if (raw == "nvidia") {
return EncoderDeviceType::Nvidia;
}
if (raw == "software") {
return EncoderDeviceType::Software;
}
return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
}
std::expected<PresetKind, std::string> parse_preset(const std::string_view raw) {
if (raw == "fast") {
return PresetKind::Fast;
}
if (raw == "balanced") {
return PresetKind::Balanced;
}
if (raw == "quality") {
return PresetKind::Quality;
}
return std::unexpected("invalid preset: '" + std::string(raw) + "' (expected: fast|balanced|quality)");
}
std::expected<TuneKind, std::string> parse_tune(const std::string_view raw) {
if (raw == "low-latency") {
return TuneKind::LowLatency;
}
if (raw == "balanced") {
return TuneKind::Balanced;
}
return std::unexpected("invalid tune: '" + std::string(raw) + "' (expected: low-latency|balanced)");
}
std::string_view codec_name(const CodecType codec) {
return codec == CodecType::H265 ? "h265" : "h264";
}
std::string_view preset_name(const PresetKind preset) {
switch (preset) {
case PresetKind::Fast:
return "fast";
case PresetKind::Balanced:
return "balanced";
case PresetKind::Quality:
return "quality";
}
return "fast";
}
std::string_view tune_name(const TuneKind tune) {
switch (tune) {
case TuneKind::LowLatency:
return "low-latency";
case TuneKind::Balanced:
return "balanced";
}
return "low-latency";
}
std::uint64_t frame_period_ns(const float fps) {
if (!(fps > 0.0f)) {
return 33'333'333ull;
}
return static_cast<std::uint64_t>(std::llround(1'000'000'000.0 / static_cast<double>(fps)));
}
std::filesystem::path derive_output_path(const std::filesystem::path &input_path) {
auto output_path = input_path;
output_path.replace_extension(".mp4");
return output_path;
}
ProgressBar::ProgressBar(const std::uint64_t total_frames)
: impl_(std::make_unique<Impl>(total_frames)) {}
ProgressBar::~ProgressBar() = default;
void ProgressBar::update(const std::uint64_t completed_frames) {
impl_->render(completed_frames, false);
}
void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) {
if (impl_ == nullptr || !impl_->enabled) {
return;
}
impl_->render(completed_frames, true);
if (!impl_->rendered) {
return;
}
std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
std::fflush(stderr);
}
Mp4Writer::Mp4Writer()
: impl_(std::make_unique<Impl>()) {}
Mp4Writer::Mp4Writer(Mp4Writer &&) noexcept = default;
Mp4Writer &Mp4Writer::operator=(Mp4Writer &&) noexcept = default;
Mp4Writer::~Mp4Writer() = default;
std::expected<void, std::string> Mp4Writer::open(
const std::filesystem::path &output_path,
const CodecType codec,
const EncoderDeviceType encoder_device,
const std::uint32_t width,
const std::uint32_t height,
const float fps,
const EncodeTuning &tuning) {
return impl_->open(output_path, codec, encoder_device, width, height, fps, tuning);
}
std::expected<void, std::string> Mp4Writer::write_bgr_frame(
const std::uint8_t *data,
const std::size_t row_stride_bytes,
const std::uint64_t relative_timestamp_ns) {
return impl_->write_bgr_frame(data, row_stride_bytes, relative_timestamp_ns);
}
std::expected<void, std::string> Mp4Writer::flush() {
return impl_->flush();
}
bool Mp4Writer::using_hardware() const {
return impl_ != nullptr && impl_->using_hardware;
}
} // namespace cvmmap_streamer::zed_tools
+319
View File
@@ -0,0 +1,319 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <sl/Camera.hpp>
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <cstdint>
#include <expected>
#include <filesystem>
#include <optional>
#include <string>
#include <utility>
namespace {
using cvmmap_streamer::zed_tools::EncodeTuning;
using cvmmap_streamer::zed_tools::Mp4Writer;
using cvmmap_streamer::zed_tools::ProgressBar;
using cvmmap_streamer::zed_tools::derive_output_path;
using cvmmap_streamer::zed_tools::frame_period_ns;
using cvmmap_streamer::zed_tools::parse_codec;
using cvmmap_streamer::zed_tools::parse_encoder_device;
using cvmmap_streamer::zed_tools::parse_preset;
using cvmmap_streamer::zed_tools::parse_tune;
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
RuntimeError = 3,
};
struct CliOptions {
std::string input_path{};
std::string output_path{};
std::string codec{"h265"};
std::string encoder_device{"auto"};
std::string preset{"fast"};
std::string tune{"low-latency"};
int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
std::uint32_t start_frame{0};
std::uint32_t end_frame{0};
bool has_end_frame{false};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::string zed_string(const sl::String &value) {
return std::string(value.c_str() == nullptr ? "" : value.c_str());
}
[[nodiscard]]
std::string zed_status_string(const sl::ERROR_CODE code) {
return zed_string(sl::toString(code));
}
[[nodiscard]]
std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
return std::unexpected(std::string(label) + " must be U8_C3");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
} // namespace
int main(int argc, char **argv) {
CliOptions options{};
CLI::App app{"zed_svo_to_mp4 - convert ZED SVO/SVO2 playback to MP4"};
app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
app.add_option("--output", options.output_path, "Output MP4 file (default: input path with .mp4 suffix)");
app.add_option("--codec", options.codec, "Video codec (h264|h265)")
->check(CLI::IsMember({"h264", "h265"}));
app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
->check(CLI::IsMember({"auto", "nvidia", "software"}));
app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
->check(CLI::IsMember({"fast", "balanced", "quality"}));
app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
->check(CLI::IsMember({"low-latency", "balanced"}));
app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
->check(CLI::Range(0, 51));
app.add_option("--gop", options.gop, "Encoder GOP length in frames")
->check(CLI::PositiveNumber);
app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
->check(CLI::NonNegativeNumber);
app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &error) {
return app.exit(error);
}
options.has_end_frame = end_frame_option->count() > 0;
auto codec = parse_codec(options.codec);
if (!codec) {
spdlog::error("{}", codec.error());
return exit_code(ToolExitCode::UsageError);
}
auto encoder_device = parse_encoder_device(options.encoder_device);
if (!encoder_device) {
spdlog::error("{}", encoder_device.error());
return exit_code(ToolExitCode::UsageError);
}
auto preset = parse_preset(options.preset);
if (!preset) {
spdlog::error("{}", preset.error());
return exit_code(ToolExitCode::UsageError);
}
auto tune = parse_tune(options.tune);
if (!tune) {
spdlog::error("{}", tune.error());
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame < options.start_frame) {
spdlog::error(
"invalid frame range: start-frame={} end-frame={}",
options.start_frame,
options.end_frame);
return exit_code(ToolExitCode::UsageError);
}
if (options.b_frames > options.gop) {
spdlog::error(
"invalid encoder config: b-frames {} must be <= gop {}",
options.b_frames,
options.gop);
return exit_code(ToolExitCode::UsageError);
}
const auto output_path = options.output_path.empty()
? derive_output_path(std::filesystem::path{options.input_path})
: std::filesystem::path{options.output_path};
if (output_path.empty()) {
spdlog::error("output path must not be empty");
return exit_code(ToolExitCode::UsageError);
}
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
const EncodeTuning tuning{
.preset = *preset,
.tune = *tune,
.quality = options.quality,
.gop = options.gop,
.b_frames = options.b_frames,
};
sl::Camera camera{};
auto close_camera = [&]() {
if (camera.isOpened()) {
camera.close();
}
};
sl::InitParameters init{};
init.input.setFromSVOFile(options.input_path.c_str());
init.svo_real_time_mode = false;
init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
init.coordinate_units = sl::UNIT::METER;
init.depth_mode = sl::DEPTH_MODE::NONE;
init.sdk_verbose = false;
const auto open_status = camera.open(init);
if (open_status != sl::ERROR_CODE::SUCCESS) {
spdlog::error(
"failed to open SVO '{}': {}",
options.input_path,
zed_status_string(open_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto total_frames = camera.getSVONumberOfFrames();
if (total_frames <= 0) {
close_camera();
spdlog::error("input SVO has no frames");
return exit_code(ToolExitCode::RuntimeError);
}
if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"start-frame {} is out of range for {} frames",
options.start_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"end-frame {} is out of range for {} frames",
options.end_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
camera.setSVOPosition(static_cast<int>(options.start_frame));
const auto camera_info = camera.getCameraInformation();
const auto &camera_config = camera_info.camera_configuration;
const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
if (width == 0 || height == 0) {
close_camera();
spdlog::error("camera resolution reported by the ZED SDK is invalid");
return exit_code(ToolExitCode::RuntimeError);
}
Mp4Writer writer{};
if (auto open_writer = writer.open(output_path, *codec, *encoder_device, width, height, camera_config.fps, tuning); !open_writer) {
close_camera();
spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
return exit_code(ToolExitCode::RuntimeError);
}
sl::RuntimeParameters runtime_parameters{};
sl::Mat left_frame{};
std::optional<std::uint64_t> first_timestamp_ns{};
std::optional<std::uint64_t> last_timestamp_ns{};
std::uint64_t emitted_frames{0};
const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
const auto last_frame = options.has_end_frame
? options.end_frame
: static_cast<std::uint32_t>(total_frames - 1);
const auto total_frames_to_emit = static_cast<std::uint64_t>(last_frame - options.start_frame + 1);
ProgressBar progress{total_frames_to_emit};
while (options.start_frame + emitted_frames <= last_frame) {
const auto grab_status = camera.grab(runtime_parameters);
if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
break;
}
if (grab_status != sl::ERROR_CODE::SUCCESS) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
if (image_status != sl::ERROR_CODE::SUCCESS) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
return exit_code(ToolExitCode::RuntimeError);
}
if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("{}", valid.error());
return exit_code(ToolExitCode::RuntimeError);
}
auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
if (timestamp_ns == 0) {
timestamp_ns = emitted_frames * nominal_frame_period_ns;
}
if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
timestamp_ns = *last_timestamp_ns + 1;
}
last_timestamp_ns = timestamp_ns;
if (!first_timestamp_ns) {
first_timestamp_ns = timestamp_ns;
}
const auto relative_timestamp_ns = timestamp_ns - *first_timestamp_ns;
if (auto write = writer.write_bgr_frame(
left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
left_frame.getStepBytes(sl::MEM::CPU),
relative_timestamp_ns);
!write) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to encode or mux frame: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
emitted_frames += 1;
progress.update(emitted_frames);
}
if (auto flush = writer.flush(); !flush) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to finalize MP4 output: {}", flush.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.finish(emitted_frames, true);
close_camera();
spdlog::info(
"converted {} frames from '{}' to '{}' using codec={} hardware={}",
emitted_frames,
options.input_path,
output_path.string(),
cvmmap_streamer::zed_tools::codec_name(*codec),
writer.using_hardware());
return exit_code(ToolExitCode::Success);
}