#include #include #include #include #include #include "cvmmap_streamer/tools/zed_progress_bar.hpp" #include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" #include #include #include #include #include #include #include #include #include #include #include #include namespace { using cvmmap_streamer::zed_tools::EncodeTuning; using cvmmap_streamer::zed_tools::Mp4Writer; using cvmmap_streamer::zed_tools::ProgressBar; using cvmmap_streamer::zed_tools::frame_period_ns; using cvmmap_streamer::zed_tools::parse_codec; using cvmmap_streamer::zed_tools::parse_encoder_device; using cvmmap_streamer::zed_tools::parse_preset; using cvmmap_streamer::zed_tools::parse_tune; constexpr std::size_t kExpectedInputCount = 4; enum class ToolExitCode : int { Success = 0, UsageError = 2, RuntimeError = 3, }; struct CliOptions { std::vector input_paths{}; std::string segment_dir{}; std::string output_path{}; std::string codec{"h265"}; std::string encoder_device{"auto"}; std::string preset{"fast"}; std::string tune{"low-latency"}; int quality{cvmmap_streamer::zed_tools::kDefaultQuality}; std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize}; std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames}; double start_offset_seconds{0.0}; double duration_seconds{0.0}; bool has_duration{false}; double output_fps{0.0}; bool has_output_fps{false}; double tile_scale{0.5}; }; struct SourceSpec { std::filesystem::path path{}; std::string label{}; }; struct CameraStream { SourceSpec source{}; std::unique_ptr camera{}; sl::RuntimeParameters runtime{}; sl::Mat current_frame{}; sl::Mat next_frame{}; std::uint64_t current_timestamp_ns{0}; std::uint64_t next_timestamp_ns{0}; std::uint64_t first_timestamp_ns{0}; std::uint64_t last_timestamp_ns{0}; std::uint64_t total_frames{0}; std::uint64_t nominal_frame_period_ns{0}; float fps{0.0f}; std::uint32_t width{0}; std::uint32_t height{0}; int sync_position{-1}; bool has_next{false}; }; [[nodiscard]] constexpr int exit_code(const ToolExitCode code) { return static_cast(code); } [[nodiscard]] std::string zed_string(const sl::String &value) { return std::string(value.c_str() == nullptr ? "" : value.c_str()); } [[nodiscard]] std::string zed_status_string(const sl::ERROR_CODE code) { return zed_string(sl::toString(code)); } [[nodiscard]] std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { return std::unexpected(std::string(label) + " must be U8_C3"); } if (mat.getWidth() == 0 || mat.getHeight() == 0) { return std::unexpected(std::string(label) + " dimensions must be non-zero"); } if (mat.getPtr(sl::MEM::CPU) == nullptr) { return std::unexpected(std::string(label) + " CPU buffer is null"); } return {}; } [[nodiscard]] std::expected, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) { if (!std::filesystem::is_directory(segment_dir)) { return std::unexpected("segment directory does not exist: " + segment_dir.string()); } const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase}; std::vector> ordered_paths{}; for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) { if (!entry.is_regular_file()) { continue; } std::smatch match{}; const auto filename = entry.path().filename().string(); if (!std::regex_match(filename, match, pattern)) { continue; } ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path()); } std::sort( ordered_paths.begin(), ordered_paths.end(), [](const auto &left, const auto &right) { return left.first < right.first; }); if (ordered_paths.size() != kExpectedInputCount) { return std::unexpected( "expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size())); } std::vector sources{}; sources.reserve(ordered_paths.size()); for (const auto &[camera_index, path] : ordered_paths) { sources.push_back(SourceSpec{ .path = path, .label = "zed" + std::to_string(camera_index), }); } return sources; } [[nodiscard]] std::expected, std::string> resolve_sources(const CliOptions &options) { if (!options.segment_dir.empty()) { return discover_segment_inputs(std::filesystem::path{options.segment_dir}); } if (options.input_paths.size() != kExpectedInputCount) { return std::unexpected("repeat --input exactly 4 times"); } std::vector sources{}; sources.reserve(options.input_paths.size()); for (std::size_t index = 0; index < options.input_paths.size(); ++index) { const auto path = std::filesystem::path{options.input_paths[index]}; if (!std::filesystem::is_regular_file(path)) { return std::unexpected("input file does not exist: " + path.string()); } sources.push_back(SourceSpec{ .path = path, .label = "view" + std::to_string(index + 1), }); } return sources; } [[nodiscard]] std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector &sources) { if (!options.output_path.empty()) { return std::filesystem::path{options.output_path}; } if (!options.segment_dir.empty()) { const auto segment_dir = std::filesystem::path{options.segment_dir}; return segment_dir / (segment_dir.filename().string() + "_grid.mp4"); } auto output_path = sources.front().path; output_path.replace_extension(""); output_path += "_grid.mp4"; return output_path; } [[nodiscard]] std::string format_unix_timestamp(const std::uint64_t timestamp_ns) { const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond; const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull; return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds); } void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) { const auto text = format_unix_timestamp(timestamp_ns); int baseline = 0; const auto font_face = cv::FONT_HERSHEY_SIMPLEX; const double font_scale = 0.8; const int thickness = 2; const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline); const cv::Point origin{16, 16 + text_size.height}; const cv::Rect background{ 8, 8, text_size.width + 16, text_size.height + baseline + 16, }; cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED); cv::putText( canvas, text, origin, font_face, font_scale, cv::Scalar(255, 255, 255), thickness, cv::LINE_AA); } [[nodiscard]] std::expected read_image_timestamp_ns( sl::Camera &camera, const std::optional fallback_timestamp_ns, const std::uint64_t nominal_frame_period_ns) { auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); if (timestamp_ns == 0) { if (!fallback_timestamp_ns) { return std::unexpected("ZED SDK returned a zero image timestamp for the first frame"); } timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns; } return timestamp_ns; } [[nodiscard]] std::expected read_into_mat( sl::Camera &camera, sl::RuntimeParameters &runtime, sl::Mat &target, std::optional fallback_timestamp_ns, std::uint64_t nominal_frame_period_ns, std::uint64_t ×tamp_ns_out, const std::string_view label) { const auto grab_status = camera.grab(runtime); if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { return std::unexpected("end-of-svo"); } if (grab_status != sl::ERROR_CODE::SUCCESS) { return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status)); } const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU); if (image_status != sl::ERROR_CODE::SUCCESS) { return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status)); } if (auto valid = validate_u8c3_mat(target, label); !valid) { return std::unexpected(valid.error()); } auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns); if (!timestamp_ns) { return std::unexpected(timestamp_ns.error()); } timestamp_ns_out = *timestamp_ns; return {}; } [[nodiscard]] std::expected fill_next_frame(CameraStream &stream) { std::uint64_t timestamp_ns = 0; auto next = read_into_mat( *stream.camera, stream.runtime, stream.next_frame, stream.current_timestamp_ns, stream.nominal_frame_period_ns, timestamp_ns, stream.source.label); if (!next) { if (next.error() == "end-of-svo") { stream.has_next = false; return {}; } return std::unexpected(next.error()); } stream.next_timestamp_ns = timestamp_ns; stream.has_next = true; return {}; } [[nodiscard]] std::expected promote_next_frame(CameraStream &stream) { if (!stream.has_next) { return std::unexpected("no buffered next frame is available for " + stream.source.label); } std::swap(stream.current_frame, stream.next_frame); std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns); stream.has_next = false; return fill_next_frame(stream); } [[nodiscard]] std::expected read_last_readable_timestamp(CameraStream &stream) { const auto last_candidate = static_cast(stream.total_frames - 1); std::string last_error{}; for (int position = last_candidate; position >= 0; --position) { stream.camera->setSVOPosition(position); std::uint64_t timestamp_ns = 0; auto frame = read_into_mat( *stream.camera, stream.runtime, stream.current_frame, std::nullopt, stream.nominal_frame_period_ns, timestamp_ns, stream.source.label); if (frame) { const auto skipped_tail_frames = static_cast(last_candidate - position); if (skipped_tail_frames > 0) { spdlog::warn( "skipping {} unreadable tail frame(s) for {} last_error={}", skipped_tail_frames, stream.source.path.string(), last_error); } return timestamp_ns; } last_error = frame.error(); } return std::unexpected( "failed to read any trailing frame for " + stream.source.path.string() + ": " + last_error); } [[nodiscard]] std::expected open_camera_stream(const SourceSpec &source) { CameraStream stream{}; stream.source = source; stream.camera = std::make_unique(); sl::InitParameters init{}; init.input.setFromSVOFile(source.path.c_str()); init.svo_real_time_mode = false; init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; init.coordinate_units = sl::UNIT::METER; init.depth_mode = sl::DEPTH_MODE::NONE; init.sdk_verbose = false; const auto open_status = stream.camera->open(init); if (open_status != sl::ERROR_CODE::SUCCESS) { return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status)); } const auto total_frames = stream.camera->getSVONumberOfFrames(); if (total_frames <= 0) { return std::unexpected("input SVO has no frames: " + source.path.string()); } stream.total_frames = static_cast(total_frames); const auto camera_info = stream.camera->getCameraInformation().camera_configuration; stream.width = static_cast(camera_info.resolution.width); stream.height = static_cast(camera_info.resolution.height); stream.fps = camera_info.fps; stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps); if (stream.width == 0 || stream.height == 0) { return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string()); } std::uint64_t first_timestamp_ns = 0; auto first_frame = read_into_mat( *stream.camera, stream.runtime, stream.current_frame, std::nullopt, stream.nominal_frame_period_ns, first_timestamp_ns, source.label); if (!first_frame) { return std::unexpected(first_frame.error()); } stream.first_timestamp_ns = first_timestamp_ns; auto last_timestamp_ns = read_last_readable_timestamp(stream); if (!last_timestamp_ns) { return std::unexpected(last_timestamp_ns.error()); } stream.last_timestamp_ns = *last_timestamp_ns; return stream; } void close_camera_streams(std::vector &streams) { for (auto &stream : streams) { if (stream.camera != nullptr && stream.camera->isOpened()) { stream.camera->close(); } } } } // namespace int main(int argc, char **argv) { CliOptions options{}; CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"}; auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)"); auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files"); input_option->excludes(segment_dir_option); segment_dir_option->excludes(input_option); app.add_option("--output", options.output_path, "Output MP4 file"); app.add_option("--codec", options.codec, "Video codec (h264|h265)") ->check(CLI::IsMember({"h264", "h265"})); app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") ->check(CLI::IsMember({"auto", "nvidia", "software"})); app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)") ->check(CLI::IsMember({"fast", "balanced", "quality"})); app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)") ->check(CLI::IsMember({"low-latency", "balanced"})); app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)") ->check(CLI::Range(0, 51)); app.add_option("--gop", options.gop, "Encoder GOP length in frames") ->check(CLI::PositiveNumber); app.add_option("--b-frames", options.b_frames, "Encoder B-frame count") ->check(CLI::NonNegativeNumber); app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds") ->check(CLI::NonNegativeNumber); auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync") ->check(CLI::PositiveNumber); auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)") ->check(CLI::PositiveNumber); app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution") ->check(CLI::Range(0.1, 1.0)); try { app.parse(argc, argv); } catch (const CLI::ParseError &error) { return app.exit(error); } options.has_duration = duration_option->count() > 0; options.has_output_fps = output_fps_option->count() > 0; if (options.input_paths.empty() && options.segment_dir.empty()) { spdlog::error("provide either --segment-dir or repeat --input exactly 4 times"); return exit_code(ToolExitCode::UsageError); } if (options.b_frames > options.gop) { spdlog::error( "invalid encoder config: b-frames {} must be <= gop {}", options.b_frames, options.gop); return exit_code(ToolExitCode::UsageError); } auto codec = parse_codec(options.codec); if (!codec) { spdlog::error("{}", codec.error()); return exit_code(ToolExitCode::UsageError); } auto encoder_device = parse_encoder_device(options.encoder_device); if (!encoder_device) { spdlog::error("{}", encoder_device.error()); return exit_code(ToolExitCode::UsageError); } auto preset = parse_preset(options.preset); if (!preset) { spdlog::error("{}", preset.error()); return exit_code(ToolExitCode::UsageError); } auto tune = parse_tune(options.tune); if (!tune) { spdlog::error("{}", tune.error()); return exit_code(ToolExitCode::UsageError); } auto sources = resolve_sources(options); if (!sources) { spdlog::error("{}", sources.error()); return exit_code(ToolExitCode::UsageError); } const auto output_path = derive_grid_output_path(options, *sources); if (output_path.has_parent_path()) { std::filesystem::create_directories(output_path.parent_path()); } const EncodeTuning tuning{ .preset = *preset, .tune = *tune, .quality = options.quality, .gop = options.gop, .b_frames = options.b_frames, }; std::vector streams{}; streams.reserve(sources->size()); for (const auto &source : *sources) { auto stream = open_camera_stream(source); if (!stream) { close_camera_streams(streams); spdlog::error("{}", stream.error()); return exit_code(ToolExitCode::RuntimeError); } streams.push_back(std::move(*stream)); } const auto sync_start_ts = std::max_element( streams.begin(), streams.end(), [](const auto &left, const auto &right) { return left.first_timestamp_ns < right.first_timestamp_ns; })->first_timestamp_ns; const auto start_offset_ns = static_cast(std::llround(options.start_offset_seconds * 1'000'000'000.0)); const auto effective_start_ts = sync_start_ts + start_offset_ns; const auto common_end_ts = std::min_element( streams.begin(), streams.end(), [](const auto &left, const auto &right) { return left.last_timestamp_ns < right.last_timestamp_ns; })->last_timestamp_ns; const auto requested_end_exclusive_ts = options.has_duration ? effective_start_ts + static_cast(std::llround(options.duration_seconds * 1'000'000'000.0)) : common_end_ts + 1; const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1); if (effective_start_ts >= output_end_exclusive_ts) { close_camera_streams(streams); spdlog::error( "synced time window is empty: start_ts={} end_ts={}", effective_start_ts, output_end_exclusive_ts); return exit_code(ToolExitCode::UsageError); } std::uint32_t source_width = streams.front().width; std::uint32_t source_height = streams.front().height; float max_input_fps = streams.front().fps; for (const auto &stream : streams) { if (stream.width != source_width || stream.height != source_height) { close_camera_streams(streams); spdlog::error( "all inputs must share the same resolution: expected {}x{}, got {}x{} for {}", source_width, source_height, stream.width, stream.height, stream.source.path.string()); return exit_code(ToolExitCode::UsageError); } max_input_fps = std::max(max_input_fps, stream.fps); } const auto output_fps = options.has_output_fps ? static_cast(options.output_fps) : max_input_fps; const auto output_period_ns = frame_period_ns(output_fps); const auto total_frames_to_emit = static_cast((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns); for (auto &stream : streams) { stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts}); if (stream.sync_position < 0) { close_camera_streams(streams); spdlog::error( "failed to compute synced start frame for {} at timestamp {}", stream.source.path.string(), effective_start_ts); return exit_code(ToolExitCode::RuntimeError); } stream.camera->setSVOPosition(stream.sync_position); std::uint64_t current_timestamp_ns = 0; auto current = read_into_mat( *stream.camera, stream.runtime, stream.current_frame, std::nullopt, stream.nominal_frame_period_ns, current_timestamp_ns, stream.source.label); if (!current) { close_camera_streams(streams); spdlog::error("{}", current.error()); return exit_code(ToolExitCode::RuntimeError); } stream.current_timestamp_ns = current_timestamp_ns; auto next = fill_next_frame(stream); if (!next) { close_camera_streams(streams); spdlog::error("{}", next.error()); return exit_code(ToolExitCode::RuntimeError); } while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) { auto promote = promote_next_frame(stream); if (!promote) { close_camera_streams(streams); spdlog::error("{}", promote.error()); return exit_code(ToolExitCode::RuntimeError); } } spdlog::info( "ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}", stream.source.path.string(), stream.source.label, stream.sync_position, stream.first_timestamp_ns, stream.current_timestamp_ns, stream.has_next ? stream.next_timestamp_ns : 0); } const auto tile_width = static_cast(std::llround(static_cast(source_width) * options.tile_scale)); const auto tile_height = static_cast(std::llround(static_cast(source_height) * options.tile_scale)); if (tile_width <= 0 || tile_height <= 0) { close_camera_streams(streams); spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale); return exit_code(ToolExitCode::UsageError); } const auto composite_width = tile_width * 2; const auto composite_height = tile_height * 2; Mp4Writer writer{}; if (auto open_writer = writer.open( output_path, *codec, *encoder_device, static_cast(composite_width), static_cast(composite_height), output_fps, tuning); !open_writer) { close_camera_streams(streams); spdlog::error("failed to initialize MP4 writer: {}", open_writer.error()); return exit_code(ToolExitCode::RuntimeError); } cv::Mat composite(composite_height, composite_width, CV_8UC3); std::vector resized_tiles(streams.size()); ProgressBar progress{total_frames_to_emit}; for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) { const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns; if (target_timestamp_ns >= output_end_exclusive_ts) { break; } for (auto &stream : streams) { while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) { auto promote = promote_next_frame(stream); if (!promote) { progress.finish(emitted_frames, false); close_camera_streams(streams); spdlog::error("{}", promote.error()); return exit_code(ToolExitCode::RuntimeError); } } } composite.setTo(cv::Scalar(0, 0, 0)); for (std::size_t index = 0; index < streams.size(); ++index) { auto &stream = streams[index]; cv::Mat source_view( static_cast(stream.current_frame.getHeight()), static_cast(stream.current_frame.getWidth()), CV_8UC3, stream.current_frame.getPtr(sl::MEM::CPU), stream.current_frame.getStepBytes(sl::MEM::CPU)); cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA); const int row = static_cast(index / 2); const int col = static_cast(index % 2); const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height}; resized_tiles[index].copyTo(composite(roi)); } draw_timestamp_overlay(composite, target_timestamp_ns); if (auto write = writer.write_bgr_frame( composite.data, static_cast(composite.step), target_timestamp_ns - effective_start_ts); !write) { progress.finish(emitted_frames, false); close_camera_streams(streams); spdlog::error("failed to encode or mux frame: {}", write.error()); return exit_code(ToolExitCode::RuntimeError); } progress.update(emitted_frames + 1); } if (auto flush = writer.flush(); !flush) { progress.finish(total_frames_to_emit, false); close_camera_streams(streams); spdlog::error("failed to finalize MP4 output: {}", flush.error()); return exit_code(ToolExitCode::RuntimeError); } progress.finish(total_frames_to_emit, true); close_camera_streams(streams); spdlog::info( "converted {} synced frames to '{}' using codec={} hardware={}", total_frames_to_emit, output_path.string(), cvmmap_streamer::zed_tools::codec_name(*codec), writer.using_hardware()); return exit_code(ToolExitCode::Success); }