diff --git a/CMakeLists.txt b/CMakeLists.txt index 3553fb5..5ade442 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,8 @@ set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +include(GNUInstallDirs) + find_package(Threads REQUIRED) find_package(cppzmq QUIET) if (DEFINED CVMMAP_STREAMER_USE_SYSTEM_CNATS) @@ -49,6 +51,7 @@ find_package(ZeroMQ QUIET) find_package(spdlog REQUIRED) find_package(Protobuf REQUIRED) find_package(PkgConfig REQUIRED) +find_package(OpenCV REQUIRED COMPONENTS core imgproc) find_package(rvl CONFIG QUIET) set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK") find_package(ZED REQUIRED) @@ -362,3 +365,88 @@ endif() set_target_properties(zed_svo_to_mcap PROPERTIES OUTPUT_NAME "zed_svo_to_mcap" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") + +add_library( + cvmmap_streamer_zed_svo_mp4_support + STATIC + src/tools/zed_svo_mp4_support.cpp) +target_include_directories(cvmmap_streamer_zed_svo_mp4_support + PUBLIC + "${CMAKE_CURRENT_LIST_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries(cvmmap_streamer_zed_svo_mp4_support + PUBLIC + PkgConfig::FFMPEG) +if (TARGET spdlog::spdlog) + target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog::spdlog) +elseif (TARGET spdlog) + target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog) +endif() + +add_executable( + zed_svo_to_mp4 + src/tools/zed_svo_to_mp4.cpp) +target_include_directories(zed_svo_to_mp4 + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}" + ${ZED_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS}) +target_link_directories(zed_svo_to_mp4 + PRIVATE + ${ZED_LIBRARY_DIR} + ${CUDA_LIBRARY_DIRS}) +target_link_libraries(zed_svo_to_mp4 + PRIVATE + CLI11::CLI11 + cvmmap_streamer_zed_svo_mp4_support + ${ZED_LIBRARIES} + ${CUDA_CUDA_LIBRARY} + ${CUDA_CUDART_LIBRARY}) +if (TARGET spdlog::spdlog) + target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog::spdlog) +elseif (TARGET spdlog) + target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog) +endif() +set_target_properties(zed_svo_to_mp4 PROPERTIES + OUTPUT_NAME "zed_svo_to_mp4" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") + +add_executable( + zed_svo_grid_to_mp4 + src/tools/zed_svo_grid_to_mp4.cpp) +target_include_directories(zed_svo_grid_to_mp4 + PRIVATE + "${CMAKE_CURRENT_LIST_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}" + ${ZED_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS}) +target_link_directories(zed_svo_grid_to_mp4 + PRIVATE + ${ZED_LIBRARY_DIR} + ${CUDA_LIBRARY_DIRS}) +target_link_libraries(zed_svo_grid_to_mp4 + PRIVATE + CLI11::CLI11 + cvmmap_streamer_zed_svo_mp4_support + ${ZED_LIBRARIES} + ${CUDA_CUDA_LIBRARY} + ${CUDA_CUDART_LIBRARY} + ${OpenCV_LIBS}) +if (TARGET spdlog::spdlog) + target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog::spdlog) +elseif (TARGET spdlog) + target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog) +endif() +set_target_properties(zed_svo_grid_to_mp4 PROPERTIES + OUTPUT_NAME "zed_svo_grid_to_mp4" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") + +install( + TARGETS + cvmmap_streamer + zed_svo_to_mcap + zed_svo_to_mp4 + zed_svo_grid_to_mp4 + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") diff --git a/README.md b/README.md index 41a1917..e6d10b3 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,61 @@ cmake --build build ls -la build/{cvmmap_streamer,rtp_receiver_tester,rtmp_stub_tester} ``` +### ZED SVO/SVO2 To MP4 + +The repo also includes an offline conversion tool for the left ZED color stream: + +```bash +CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \ +./build/bin/zed_svo_to_mp4 \ + --input /workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41_zed1.svo2 \ + --encoder-device auto \ + --preset balanced \ + --quality 20 \ + --start-frame 0 \ + --end-frame 89 +``` + +By default the tool writes `foo.mp4` next to `foo.svo` or `foo.svo2`, defaults to `h265`, and shows a tqdm-like progress bar when stderr is attached to a TTY. `--encoder-device auto` tries NVENC first and falls back to software (`libx264` or `libx265`) if the hardware encoder is unavailable or cannot be opened. + +### Batch ZED SVO2 To MP4 + +Python dependencies for the batch wrapper are managed with `uv`: + +```bash +uv sync +``` + +Use the wrapper to recurse through a folder, run `zed_svo_to_mp4` on every matched `.svo2`, and show one aggregate tqdm progress bar: + +```bash +uv run python scripts/zed_batch_svo_to_mp4.py \ + /workspaces/data/kindergarten/bar \ + --pattern '*.svo2' \ + --recursive \ + --jobs 2 \ + --encoder-device auto \ + --start-frame 0 \ + --end-frame 29 \ + --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 +``` + +The batch tool mirrors the common encoder options from `zed_svo_to_mp4`, skips existing sibling `.mp4` outputs by default, and continues after failures while returning a nonzero exit code if any conversion fails. + +### ZED SVO Grid To MP4 + +Use the grid converter to merge four synced ZED recordings into a 2x2 CCTV-style MP4 with a Unix timestamp overlay in the top-left corner: + +```bash +./build/bin/zed_svo_grid_to_mp4 \ + --segment-dir /workspaces/data/kindergarten/bar/2026-03-18T11-59-41 \ + --encoder-device auto \ + --codec h265 \ + --duration-seconds 2 +``` + +The tool syncs the four inputs using the same common-start timestamp rule as the ZED multi-camera playback sample, defaults to a 2x2 layout ordered as `zed1 zed2 / zed3 zed4`, and writes `/_grid.mp4` unless `--output` is provided. By default each tile is scaled to `0.5x`, so a four-camera 1920x1200 segment produces a 1920x1200 composite. Use repeated `--input` flags instead of `--segment-dir` when you want explicit row-major ordering. + ### Mandatory Acceptance (Standalone) Run the full mandatory acceptance suite. This executes the complete protocol/codec matrix without requiring external servers. diff --git a/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp b/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp new file mode 100644 index 0000000..1ef4e2e --- /dev/null +++ b/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp @@ -0,0 +1,117 @@ +#pragma once + +#include "cvmmap_streamer/config/runtime_config.hpp" + +#include +#include +#include +#include +#include +#include + +namespace cvmmap_streamer::zed_tools { + +using cvmmap_streamer::CodecType; +using cvmmap_streamer::EncoderDeviceType; + +inline constexpr std::uint32_t kDefaultGopSize = 30; +inline constexpr std::uint32_t kDefaultBFrames = 0; +inline constexpr int kDefaultQuality = 23; +inline constexpr std::uint64_t kNanosPerSecond = 1'000'000'000ull; + +enum class PresetKind : std::uint8_t { + Fast, + Balanced, + Quality, +}; + +enum class TuneKind : std::uint8_t { + LowLatency, + Balanced, +}; + +struct EncodeTuning { + PresetKind preset{PresetKind::Fast}; + TuneKind tune{TuneKind::LowLatency}; + int quality{kDefaultQuality}; + std::uint32_t gop{kDefaultGopSize}; + std::uint32_t b_frames{kDefaultBFrames}; +}; + +[[nodiscard]] +std::expected parse_codec(std::string_view raw); + +[[nodiscard]] +std::expected parse_encoder_device(std::string_view raw); + +[[nodiscard]] +std::expected parse_preset(std::string_view raw); + +[[nodiscard]] +std::expected parse_tune(std::string_view raw); + +[[nodiscard]] +std::string_view codec_name(CodecType codec); + +[[nodiscard]] +std::string_view preset_name(PresetKind preset); + +[[nodiscard]] +std::string_view tune_name(TuneKind tune); + +[[nodiscard]] +std::uint64_t frame_period_ns(float fps); + +[[nodiscard]] +std::filesystem::path derive_output_path(const std::filesystem::path &input_path); + +class ProgressBar { +public: + explicit ProgressBar(std::uint64_t total_frames); + ~ProgressBar(); + + void update(std::uint64_t completed_frames); + void finish(std::uint64_t completed_frames, bool success); + +private: + struct Impl; + std::unique_ptr impl_{}; +}; + +class Mp4Writer { +public: + Mp4Writer(); + Mp4Writer(const Mp4Writer &) = delete; + Mp4Writer &operator=(const Mp4Writer &) = delete; + Mp4Writer(Mp4Writer &&) noexcept; + Mp4Writer &operator=(Mp4Writer &&) noexcept; + ~Mp4Writer(); + + [[nodiscard]] + std::expected open( + const std::filesystem::path &output_path, + CodecType codec, + EncoderDeviceType encoder_device, + std::uint32_t width, + std::uint32_t height, + float fps, + const EncodeTuning &tuning); + + [[nodiscard]] + std::expected write_bgr_frame( + const std::uint8_t *data, + std::size_t row_stride_bytes, + std::uint64_t relative_timestamp_ns); + + [[nodiscard]] + std::expected flush(); + + [[nodiscard]] + bool using_hardware() const; + +private: + struct Impl; + std::unique_ptr impl_{}; +}; + +} // namespace cvmmap_streamer::zed_tools diff --git a/src/tools/zed_svo_grid_to_mp4.cpp b/src/tools/zed_svo_grid_to_mp4.cpp new file mode 100644 index 0000000..60bcff0 --- /dev/null +++ b/src/tools/zed_svo_grid_to_mp4.cpp @@ -0,0 +1,702 @@ +#include +#include + +#include + +#include +#include + +#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +using cvmmap_streamer::zed_tools::EncodeTuning; +using cvmmap_streamer::zed_tools::Mp4Writer; +using cvmmap_streamer::zed_tools::ProgressBar; +using cvmmap_streamer::zed_tools::frame_period_ns; +using cvmmap_streamer::zed_tools::parse_codec; +using cvmmap_streamer::zed_tools::parse_encoder_device; +using cvmmap_streamer::zed_tools::parse_preset; +using cvmmap_streamer::zed_tools::parse_tune; + +constexpr std::size_t kExpectedInputCount = 4; + +enum class ToolExitCode : int { + Success = 0, + UsageError = 2, + RuntimeError = 3, +}; + +struct CliOptions { + std::vector input_paths{}; + std::string segment_dir{}; + std::string output_path{}; + std::string codec{"h265"}; + std::string encoder_device{"auto"}; + std::string preset{"fast"}; + std::string tune{"low-latency"}; + int quality{cvmmap_streamer::zed_tools::kDefaultQuality}; + std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize}; + std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames}; + double start_offset_seconds{0.0}; + double duration_seconds{0.0}; + bool has_duration{false}; + double output_fps{0.0}; + bool has_output_fps{false}; + double tile_scale{0.5}; +}; + +struct SourceSpec { + std::filesystem::path path{}; + std::string label{}; +}; + +struct CameraStream { + SourceSpec source{}; + std::unique_ptr camera{}; + sl::RuntimeParameters runtime{}; + sl::Mat current_frame{}; + sl::Mat next_frame{}; + std::uint64_t current_timestamp_ns{0}; + std::uint64_t next_timestamp_ns{0}; + std::uint64_t first_timestamp_ns{0}; + std::uint64_t last_timestamp_ns{0}; + std::uint64_t total_frames{0}; + std::uint64_t nominal_frame_period_ns{0}; + float fps{0.0f}; + std::uint32_t width{0}; + std::uint32_t height{0}; + int sync_position{-1}; + bool has_next{false}; +}; + +[[nodiscard]] +constexpr int exit_code(const ToolExitCode code) { + return static_cast(code); +} + +[[nodiscard]] +std::string zed_string(const sl::String &value) { + return std::string(value.c_str() == nullptr ? "" : value.c_str()); +} + +[[nodiscard]] +std::string zed_status_string(const sl::ERROR_CODE code) { + return zed_string(sl::toString(code)); +} + +[[nodiscard]] +std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { + if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { + return std::unexpected(std::string(label) + " must be U8_C3"); + } + if (mat.getWidth() == 0 || mat.getHeight() == 0) { + return std::unexpected(std::string(label) + " dimensions must be non-zero"); + } + if (mat.getPtr(sl::MEM::CPU) == nullptr) { + return std::unexpected(std::string(label) + " CPU buffer is null"); + } + return {}; +} + +[[nodiscard]] +std::expected, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) { + if (!std::filesystem::is_directory(segment_dir)) { + return std::unexpected("segment directory does not exist: " + segment_dir.string()); + } + + const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase}; + std::vector> ordered_paths{}; + for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) { + if (!entry.is_regular_file()) { + continue; + } + + std::smatch match{}; + const auto filename = entry.path().filename().string(); + if (!std::regex_match(filename, match, pattern)) { + continue; + } + ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path()); + } + + std::sort( + ordered_paths.begin(), + ordered_paths.end(), + [](const auto &left, const auto &right) { + return left.first < right.first; + }); + + if (ordered_paths.size() != kExpectedInputCount) { + return std::unexpected( + "expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size())); + } + + std::vector sources{}; + sources.reserve(ordered_paths.size()); + for (const auto &[camera_index, path] : ordered_paths) { + sources.push_back(SourceSpec{ + .path = path, + .label = "zed" + std::to_string(camera_index), + }); + } + return sources; +} + +[[nodiscard]] +std::expected, std::string> resolve_sources(const CliOptions &options) { + if (!options.segment_dir.empty()) { + return discover_segment_inputs(std::filesystem::path{options.segment_dir}); + } + + if (options.input_paths.size() != kExpectedInputCount) { + return std::unexpected("repeat --input exactly 4 times"); + } + + std::vector sources{}; + sources.reserve(options.input_paths.size()); + for (std::size_t index = 0; index < options.input_paths.size(); ++index) { + const auto path = std::filesystem::path{options.input_paths[index]}; + if (!std::filesystem::is_regular_file(path)) { + return std::unexpected("input file does not exist: " + path.string()); + } + sources.push_back(SourceSpec{ + .path = path, + .label = "view" + std::to_string(index + 1), + }); + } + return sources; +} + +[[nodiscard]] +std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector &sources) { + if (!options.output_path.empty()) { + return std::filesystem::path{options.output_path}; + } + + if (!options.segment_dir.empty()) { + const auto segment_dir = std::filesystem::path{options.segment_dir}; + return segment_dir / (segment_dir.filename().string() + "_grid.mp4"); + } + + auto output_path = sources.front().path; + output_path.replace_extension(""); + output_path += "_grid.mp4"; + return output_path; +} + +[[nodiscard]] +std::string format_unix_timestamp(const std::uint64_t timestamp_ns) { + const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond; + const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull; + return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds); +} + +void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) { + const auto text = format_unix_timestamp(timestamp_ns); + int baseline = 0; + const auto font_face = cv::FONT_HERSHEY_SIMPLEX; + const double font_scale = 0.8; + const int thickness = 2; + const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline); + const cv::Point origin{16, 16 + text_size.height}; + const cv::Rect background{ + 8, + 8, + text_size.width + 16, + text_size.height + baseline + 16, + }; + cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED); + cv::putText( + canvas, + text, + origin, + font_face, + font_scale, + cv::Scalar(255, 255, 255), + thickness, + cv::LINE_AA); +} + +[[nodiscard]] +std::expected read_image_timestamp_ns( + sl::Camera &camera, + const std::optional fallback_timestamp_ns, + const std::uint64_t nominal_frame_period_ns) { + auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); + if (timestamp_ns == 0) { + if (!fallback_timestamp_ns) { + return std::unexpected("ZED SDK returned a zero image timestamp for the first frame"); + } + timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns; + } + return timestamp_ns; +} + +[[nodiscard]] +std::expected read_into_mat( + sl::Camera &camera, + sl::RuntimeParameters &runtime, + sl::Mat &target, + std::optional fallback_timestamp_ns, + std::uint64_t nominal_frame_period_ns, + std::uint64_t ×tamp_ns_out, + const std::string_view label) { + const auto grab_status = camera.grab(runtime); + if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { + return std::unexpected("end-of-svo"); + } + if (grab_status != sl::ERROR_CODE::SUCCESS) { + return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status)); + } + + const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU); + if (image_status != sl::ERROR_CODE::SUCCESS) { + return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status)); + } + if (auto valid = validate_u8c3_mat(target, label); !valid) { + return std::unexpected(valid.error()); + } + + auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns); + if (!timestamp_ns) { + return std::unexpected(timestamp_ns.error()); + } + timestamp_ns_out = *timestamp_ns; + return {}; +} + +[[nodiscard]] +std::expected fill_next_frame(CameraStream &stream) { + std::uint64_t timestamp_ns = 0; + auto next = read_into_mat( + *stream.camera, + stream.runtime, + stream.next_frame, + stream.current_timestamp_ns, + stream.nominal_frame_period_ns, + timestamp_ns, + stream.source.label); + if (!next) { + if (next.error() == "end-of-svo") { + stream.has_next = false; + return {}; + } + return std::unexpected(next.error()); + } + + stream.next_timestamp_ns = timestamp_ns; + stream.has_next = true; + return {}; +} + +[[nodiscard]] +std::expected promote_next_frame(CameraStream &stream) { + if (!stream.has_next) { + return std::unexpected("no buffered next frame is available for " + stream.source.label); + } + + std::swap(stream.current_frame, stream.next_frame); + std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns); + stream.has_next = false; + return fill_next_frame(stream); +} + +[[nodiscard]] +std::expected open_camera_stream(const SourceSpec &source) { + CameraStream stream{}; + stream.source = source; + stream.camera = std::make_unique(); + + sl::InitParameters init{}; + init.input.setFromSVOFile(source.path.c_str()); + init.svo_real_time_mode = false; + init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; + init.coordinate_units = sl::UNIT::METER; + init.depth_mode = sl::DEPTH_MODE::NONE; + init.sdk_verbose = false; + + const auto open_status = stream.camera->open(init); + if (open_status != sl::ERROR_CODE::SUCCESS) { + return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status)); + } + + const auto total_frames = stream.camera->getSVONumberOfFrames(); + if (total_frames <= 0) { + return std::unexpected("input SVO has no frames: " + source.path.string()); + } + stream.total_frames = static_cast(total_frames); + + const auto camera_info = stream.camera->getCameraInformation().camera_configuration; + stream.width = static_cast(camera_info.resolution.width); + stream.height = static_cast(camera_info.resolution.height); + stream.fps = camera_info.fps; + stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps); + if (stream.width == 0 || stream.height == 0) { + return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string()); + } + + std::uint64_t first_timestamp_ns = 0; + auto first_frame = read_into_mat( + *stream.camera, + stream.runtime, + stream.current_frame, + std::nullopt, + stream.nominal_frame_period_ns, + first_timestamp_ns, + source.label); + if (!first_frame) { + return std::unexpected(first_frame.error()); + } + stream.first_timestamp_ns = first_timestamp_ns; + + stream.camera->setSVOPosition(static_cast(stream.total_frames - 1)); + std::uint64_t last_timestamp_ns = 0; + auto last_frame = read_into_mat( + *stream.camera, + stream.runtime, + stream.current_frame, + std::nullopt, + stream.nominal_frame_period_ns, + last_timestamp_ns, + source.label); + if (!last_frame) { + return std::unexpected(last_frame.error()); + } + stream.last_timestamp_ns = last_timestamp_ns; + + return stream; +} + +void close_camera_streams(std::vector &streams) { + for (auto &stream : streams) { + if (stream.camera != nullptr && stream.camera->isOpened()) { + stream.camera->close(); + } + } +} + +} // namespace + +int main(int argc, char **argv) { + CliOptions options{}; + + CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"}; + auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)"); + auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files"); + input_option->excludes(segment_dir_option); + segment_dir_option->excludes(input_option); + app.add_option("--output", options.output_path, "Output MP4 file"); + app.add_option("--codec", options.codec, "Video codec (h264|h265)") + ->check(CLI::IsMember({"h264", "h265"})); + app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") + ->check(CLI::IsMember({"auto", "nvidia", "software"})); + app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)") + ->check(CLI::IsMember({"fast", "balanced", "quality"})); + app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)") + ->check(CLI::IsMember({"low-latency", "balanced"})); + app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)") + ->check(CLI::Range(0, 51)); + app.add_option("--gop", options.gop, "Encoder GOP length in frames") + ->check(CLI::PositiveNumber); + app.add_option("--b-frames", options.b_frames, "Encoder B-frame count") + ->check(CLI::NonNegativeNumber); + app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds") + ->check(CLI::NonNegativeNumber); + auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync") + ->check(CLI::PositiveNumber); + auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)") + ->check(CLI::PositiveNumber); + app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution") + ->check(CLI::Range(0.1, 1.0)); + + try { + app.parse(argc, argv); + } catch (const CLI::ParseError &error) { + return app.exit(error); + } + options.has_duration = duration_option->count() > 0; + options.has_output_fps = output_fps_option->count() > 0; + + if (options.input_paths.empty() && options.segment_dir.empty()) { + spdlog::error("provide either --segment-dir or repeat --input exactly 4 times"); + return exit_code(ToolExitCode::UsageError); + } + if (options.b_frames > options.gop) { + spdlog::error( + "invalid encoder config: b-frames {} must be <= gop {}", + options.b_frames, + options.gop); + return exit_code(ToolExitCode::UsageError); + } + + auto codec = parse_codec(options.codec); + if (!codec) { + spdlog::error("{}", codec.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto encoder_device = parse_encoder_device(options.encoder_device); + if (!encoder_device) { + spdlog::error("{}", encoder_device.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto preset = parse_preset(options.preset); + if (!preset) { + spdlog::error("{}", preset.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto tune = parse_tune(options.tune); + if (!tune) { + spdlog::error("{}", tune.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto sources = resolve_sources(options); + if (!sources) { + spdlog::error("{}", sources.error()); + return exit_code(ToolExitCode::UsageError); + } + + const auto output_path = derive_grid_output_path(options, *sources); + if (output_path.has_parent_path()) { + std::filesystem::create_directories(output_path.parent_path()); + } + + const EncodeTuning tuning{ + .preset = *preset, + .tune = *tune, + .quality = options.quality, + .gop = options.gop, + .b_frames = options.b_frames, + }; + + std::vector streams{}; + streams.reserve(sources->size()); + for (const auto &source : *sources) { + auto stream = open_camera_stream(source); + if (!stream) { + close_camera_streams(streams); + spdlog::error("{}", stream.error()); + return exit_code(ToolExitCode::RuntimeError); + } + streams.push_back(std::move(*stream)); + } + + const auto sync_start_ts = std::max_element( + streams.begin(), + streams.end(), + [](const auto &left, const auto &right) { + return left.first_timestamp_ns < right.first_timestamp_ns; + })->first_timestamp_ns; + const auto start_offset_ns = static_cast(std::llround(options.start_offset_seconds * 1'000'000'000.0)); + const auto effective_start_ts = sync_start_ts + start_offset_ns; + + const auto common_end_ts = std::min_element( + streams.begin(), + streams.end(), + [](const auto &left, const auto &right) { + return left.last_timestamp_ns < right.last_timestamp_ns; + })->last_timestamp_ns; + const auto requested_end_exclusive_ts = options.has_duration + ? effective_start_ts + static_cast(std::llround(options.duration_seconds * 1'000'000'000.0)) + : common_end_ts + 1; + const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1); + if (effective_start_ts >= output_end_exclusive_ts) { + close_camera_streams(streams); + spdlog::error( + "synced time window is empty: start_ts={} end_ts={}", + effective_start_ts, + output_end_exclusive_ts); + return exit_code(ToolExitCode::UsageError); + } + + std::uint32_t source_width = streams.front().width; + std::uint32_t source_height = streams.front().height; + float max_input_fps = streams.front().fps; + for (const auto &stream : streams) { + if (stream.width != source_width || stream.height != source_height) { + close_camera_streams(streams); + spdlog::error( + "all inputs must share the same resolution: expected {}x{}, got {}x{} for {}", + source_width, + source_height, + stream.width, + stream.height, + stream.source.path.string()); + return exit_code(ToolExitCode::UsageError); + } + max_input_fps = std::max(max_input_fps, stream.fps); + } + + const auto output_fps = options.has_output_fps ? static_cast(options.output_fps) : max_input_fps; + const auto output_period_ns = frame_period_ns(output_fps); + const auto total_frames_to_emit = + static_cast((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns); + + for (auto &stream : streams) { + stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts}); + if (stream.sync_position < 0) { + close_camera_streams(streams); + spdlog::error( + "failed to compute synced start frame for {} at timestamp {}", + stream.source.path.string(), + effective_start_ts); + return exit_code(ToolExitCode::RuntimeError); + } + + stream.camera->setSVOPosition(stream.sync_position); + std::uint64_t current_timestamp_ns = 0; + auto current = read_into_mat( + *stream.camera, + stream.runtime, + stream.current_frame, + std::nullopt, + stream.nominal_frame_period_ns, + current_timestamp_ns, + stream.source.label); + if (!current) { + close_camera_streams(streams); + spdlog::error("{}", current.error()); + return exit_code(ToolExitCode::RuntimeError); + } + stream.current_timestamp_ns = current_timestamp_ns; + + auto next = fill_next_frame(stream); + if (!next) { + close_camera_streams(streams); + spdlog::error("{}", next.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) { + auto promote = promote_next_frame(stream); + if (!promote) { + close_camera_streams(streams); + spdlog::error("{}", promote.error()); + return exit_code(ToolExitCode::RuntimeError); + } + } + + spdlog::info( + "ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}", + stream.source.path.string(), + stream.source.label, + stream.sync_position, + stream.first_timestamp_ns, + stream.current_timestamp_ns, + stream.has_next ? stream.next_timestamp_ns : 0); + } + + const auto tile_width = static_cast(std::llround(static_cast(source_width) * options.tile_scale)); + const auto tile_height = static_cast(std::llround(static_cast(source_height) * options.tile_scale)); + if (tile_width <= 0 || tile_height <= 0) { + close_camera_streams(streams); + spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale); + return exit_code(ToolExitCode::UsageError); + } + + const auto composite_width = tile_width * 2; + const auto composite_height = tile_height * 2; + + Mp4Writer writer{}; + if (auto open_writer = writer.open( + output_path, + *codec, + *encoder_device, + static_cast(composite_width), + static_cast(composite_height), + output_fps, + tuning); + !open_writer) { + close_camera_streams(streams); + spdlog::error("failed to initialize MP4 writer: {}", open_writer.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + cv::Mat composite(composite_height, composite_width, CV_8UC3); + std::vector resized_tiles(streams.size()); + ProgressBar progress{total_frames_to_emit}; + + for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) { + const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns; + if (target_timestamp_ns >= output_end_exclusive_ts) { + break; + } + + for (auto &stream : streams) { + while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) { + auto promote = promote_next_frame(stream); + if (!promote) { + progress.finish(emitted_frames, false); + close_camera_streams(streams); + spdlog::error("{}", promote.error()); + return exit_code(ToolExitCode::RuntimeError); + } + } + } + + composite.setTo(cv::Scalar(0, 0, 0)); + for (std::size_t index = 0; index < streams.size(); ++index) { + auto &stream = streams[index]; + cv::Mat source_view( + static_cast(stream.current_frame.getHeight()), + static_cast(stream.current_frame.getWidth()), + CV_8UC3, + stream.current_frame.getPtr(sl::MEM::CPU), + stream.current_frame.getStepBytes(sl::MEM::CPU)); + cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA); + + const int row = static_cast(index / 2); + const int col = static_cast(index % 2); + const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height}; + resized_tiles[index].copyTo(composite(roi)); + } + + draw_timestamp_overlay(composite, target_timestamp_ns); + if (auto write = writer.write_bgr_frame( + composite.data, + static_cast(composite.step), + target_timestamp_ns - effective_start_ts); + !write) { + progress.finish(emitted_frames, false); + close_camera_streams(streams); + spdlog::error("failed to encode or mux frame: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + progress.update(emitted_frames + 1); + } + + if (auto flush = writer.flush(); !flush) { + progress.finish(total_frames_to_emit, false); + close_camera_streams(streams); + spdlog::error("failed to finalize MP4 output: {}", flush.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + progress.finish(total_frames_to_emit, true); + close_camera_streams(streams); + spdlog::info( + "converted {} synced frames to '{}' using codec={} hardware={}", + total_frames_to_emit, + output_path.string(), + cvmmap_streamer::zed_tools::codec_name(*codec), + writer.using_hardware()); + return exit_code(ToolExitCode::Success); +} diff --git a/src/tools/zed_svo_mp4_support.cpp b/src/tools/zed_svo_mp4_support.cpp new file mode 100644 index 0000000..3b7e523 --- /dev/null +++ b/src/tools/zed_svo_mp4_support.cpp @@ -0,0 +1,785 @@ +#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" + +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +} + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cvmmap_streamer::zed_tools { +namespace { + +struct EncoderCandidate { + std::string name{}; + bool using_hardware{false}; + AVPixelFormat pixel_format{AV_PIX_FMT_NONE}; +}; + +struct ResolvedEncoderSettings { + std::string requested_preset{}; + std::string requested_tune{}; + std::string mapped_preset{}; + std::optional mapped_tune{}; + std::optional rate_control_mode{}; + std::string quality_key{}; + int quality_value{kDefaultQuality}; + std::uint32_t gop{kDefaultGopSize}; + std::uint32_t b_frames{kDefaultBFrames}; +}; + +[[nodiscard]] +std::string av_error_string(const int error_code) { + char buffer[AV_ERROR_MAX_STRING_SIZE]{}; + av_strerror(error_code, buffer, sizeof(buffer)); + return std::string(buffer); +} + +[[nodiscard]] +AVCodecID codec_id(const CodecType codec) { + return codec == CodecType::H265 ? AV_CODEC_ID_HEVC : AV_CODEC_ID_H264; +} + +[[nodiscard]] +AVRational frame_rate_rational(const float fps) { + if (!(fps > 0.0f)) { + return AVRational{30, 1}; + } + + const auto scaled = static_cast(std::llround(static_cast(fps) * 1000.0)); + if (scaled <= 0) { + return AVRational{30, 1}; + } + return AVRational{scaled, 1000}; +} + +[[nodiscard]] +std::string format_duration(const double seconds_raw) { + const auto seconds = seconds_raw > 0.0 ? static_cast(std::llround(seconds_raw)) : 0ll; + const auto hours = seconds / 3600; + const auto minutes = (seconds % 3600) / 60; + const auto secs = seconds % 60; + + char buffer[32]{}; + if (hours > 0) { + std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld:%02lld", hours, minutes, secs); + } else { + std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld", minutes, secs); + } + return std::string(buffer); +} + +[[nodiscard]] +std::vector encoder_candidates(const CodecType codec, const EncoderDeviceType device) { + const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc"; + const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264"; + + switch (device) { + case EncoderDeviceType::Auto: + return { + EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12}, + EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P}, + }; + case EncoderDeviceType::Nvidia: + return { + EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12}, + }; + case EncoderDeviceType::Software: + return { + EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P}, + }; + } + + return {}; +} + +[[nodiscard]] +std::string mapped_preset_value(const EncoderCandidate &candidate, const PresetKind preset) { + if (candidate.using_hardware) { + switch (preset) { + case PresetKind::Fast: + return "p1"; + case PresetKind::Balanced: + return "p4"; + case PresetKind::Quality: + return "p7"; + } + } + + switch (preset) { + case PresetKind::Fast: + return "veryfast"; + case PresetKind::Balanced: + return "medium"; + case PresetKind::Quality: + return "slow"; + } + + return "veryfast"; +} + +[[nodiscard]] +std::optional mapped_tune_value(const EncoderCandidate &candidate, const TuneKind tune) { + if (candidate.using_hardware) { + return tune == TuneKind::LowLatency ? std::optional{"ull"} : std::optional{"hq"}; + } + + if (candidate.name == "libx264" && tune == TuneKind::LowLatency) { + return std::optional{"zerolatency"}; + } + return std::nullopt; +} + +[[nodiscard]] +std::optional x265_params_value(const EncoderCandidate &candidate, const TuneKind tune) { + if (candidate.name != "libx265") { + return std::nullopt; + } + if (tune == TuneKind::LowLatency) { + return std::optional{"repeat-headers=1:scenecut=0"}; + } + return std::optional{"repeat-headers=1"}; +} + +[[nodiscard]] +std::expected set_string_option(AVCodecContext *context, const char *key, const std::string &value) { + const auto result = av_opt_set(context->priv_data, key, value.c_str(), 0); + if (result < 0) { + return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + value + "': " + av_error_string(result)); + } + return {}; +} + +[[nodiscard]] +std::expected set_int_option(AVCodecContext *context, const char *key, const std::int64_t value) { + const auto result = av_opt_set_int(context->priv_data, key, value, 0); + if (result < 0) { + return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result)); + } + return {}; +} + +[[nodiscard]] +std::expected configure_codec_context( + AVCodecContext *context, + const EncoderCandidate &candidate, + const CodecType codec, + const std::uint32_t width, + const std::uint32_t height, + const AVRational framerate, + const EncodeTuning &tuning) { + context->codec_type = AVMEDIA_TYPE_VIDEO; + context->codec_id = codec_id(codec); + context->width = static_cast(width); + context->height = static_cast(height); + context->pix_fmt = candidate.pixel_format; + context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + context->time_base = AVRational{1, static_cast(kNanosPerSecond)}; + context->framerate = framerate; + context->gop_size = static_cast(tuning.gop); + context->max_b_frames = static_cast(tuning.b_frames); + context->thread_count = 1; + + ResolvedEncoderSettings resolved{ + .requested_preset = std::string(preset_name(tuning.preset)), + .requested_tune = std::string(tune_name(tuning.tune)), + .mapped_preset = mapped_preset_value(candidate, tuning.preset), + .mapped_tune = mapped_tune_value(candidate, tuning.tune), + .quality_value = tuning.quality, + .gop = tuning.gop, + .b_frames = tuning.b_frames, + }; + + if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) { + return std::unexpected(set.error()); + } + if (resolved.mapped_tune) { + if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) { + return std::unexpected(set.error()); + } + } + + if (candidate.using_hardware) { + resolved.rate_control_mode = "vbr"; + resolved.quality_key = "cq"; + if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) { + return std::unexpected(set.error()); + } + if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) { + return std::unexpected(set.error()); + } + if (tuning.tune == TuneKind::LowLatency) { + if (auto set = set_string_option(context, "zerolatency", "1"); !set) { + return std::unexpected(set.error()); + } + if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) { + return std::unexpected(set.error()); + } + } + } else { + resolved.quality_key = "crf"; + if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) { + return std::unexpected(set.error()); + } + if (const auto x265_params = x265_params_value(candidate, tuning.tune); x265_params) { + if (auto set = set_string_option(context, "x265-params", *x265_params); !set) { + return std::unexpected(set.error()); + } + } + } + + if (auto set = set_int_option(context, "forced-idr", 1); !set) { + return std::unexpected(set.error()); + } + + return resolved; +} + +struct OpenedEncoder { + AVCodecContext *context{nullptr}; + EncoderCandidate candidate{}; + ResolvedEncoderSettings resolved{}; +}; + +[[nodiscard]] +std::expected open_encoder( + const CodecType codec, + const EncoderDeviceType device, + const std::uint32_t width, + const std::uint32_t height, + const AVRational framerate, + const EncodeTuning &tuning) { + std::string last_error{}; + + for (const auto &candidate : encoder_candidates(codec, device)) { + const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str()); + if (encoder == nullptr) { + last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable"; + if (device == EncoderDeviceType::Auto) { + spdlog::warn( + "encoder '{}' unavailable for codec={} in auto mode, trying next candidate", + candidate.name, + codec_name(codec)); + continue; + } + return std::unexpected(last_error); + } + + auto *context = avcodec_alloc_context3(encoder); + if (context == nullptr) { + return std::unexpected("failed to allocate FFmpeg encoder context"); + } + + auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning); + if (!resolved) { + avcodec_free_context(&context); + return std::unexpected(resolved.error()); + } + + const auto open_result = avcodec_open2(context, encoder, nullptr); + if (open_result < 0) { + last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result); + avcodec_free_context(&context); + if (device == EncoderDeviceType::Auto) { + spdlog::warn( + "encoder '{}' failed to open in auto mode: {}. trying software fallback", + candidate.name, + av_error_string(open_result)); + continue; + } + return std::unexpected(last_error); + } + + return OpenedEncoder{ + .context = context, + .candidate = candidate, + .resolved = std::move(*resolved), + }; + } + + if (last_error.empty()) { + last_error = "no usable FFmpeg encoder candidates were configured"; + } + return std::unexpected(last_error); +} + +} // namespace + +struct ProgressBar::Impl { + using Clock = std::chrono::steady_clock; + + explicit Impl(const std::uint64_t total_frames_arg) + : total_frames(total_frames_arg), + enabled(::isatty(STDERR_FILENO) == 1), + started_at(Clock::now()), + last_render_at(started_at) {} + + void render(const std::uint64_t completed_frames, const bool force) { + if (!enabled || total_frames == 0) { + return; + } + + const auto now = Clock::now(); + if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) { + return; + } + last_render_at = now; + rendered = true; + + const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames; + const double ratio = static_cast(bounded_completed) / static_cast(total_frames); + const auto filled = static_cast(std::llround(ratio * 24.0)); + std::string bar{}; + bar.reserve(24); + for (std::size_t i = 0; i < 24; ++i) { + bar.push_back(i < filled ? '#' : '-'); + } + + const auto elapsed_seconds = std::chrono::duration(now - started_at).count(); + const auto fps = elapsed_seconds > 0.0 ? static_cast(bounded_completed) / elapsed_seconds : 0.0; + const auto eta_seconds = fps > 0.0 ? static_cast(total_frames - bounded_completed) / fps : 0.0; + + char line[256]{}; + std::snprintf( + line, + sizeof(line), + "\r[%s] %6.2f%% %llu/%llu | %5.1f fps | %s elapsed | %s ETA\x1b[K", + bar.c_str(), + ratio * 100.0, + static_cast(bounded_completed), + static_cast(total_frames), + fps, + format_duration(elapsed_seconds).c_str(), + format_duration(eta_seconds).c_str()); + std::fprintf(stderr, "%s", line); + std::fflush(stderr); + } + + std::uint64_t total_frames{0}; + bool enabled{false}; + bool rendered{false}; + Clock::time_point started_at{}; + Clock::time_point last_render_at{}; +}; + +struct Mp4Writer::Impl { + [[nodiscard]] + std::expected open( + const std::filesystem::path &output_path, + const CodecType codec_arg, + const EncoderDeviceType encoder_device, + const std::uint32_t width, + const std::uint32_t height, + const float fps, + const EncodeTuning &tuning) { + close(); + + codec = codec_arg; + frame_rate = frame_rate_rational(fps); + auto encoder = open_encoder(codec, encoder_device, width, height, frame_rate, tuning); + if (!encoder) { + return std::unexpected(encoder.error()); + } + + encoder_context = encoder->context; + encoder_name = encoder->candidate.name; + using_hardware = encoder->candidate.using_hardware; + encoder_pixel_format = encoder->candidate.pixel_format; + resolved_settings = std::move(encoder->resolved); + + scaler = sws_getCachedContext( + nullptr, + static_cast(width), + static_cast(height), + AV_PIX_FMT_BGR24, + static_cast(width), + static_cast(height), + encoder_pixel_format, + SWS_BILINEAR, + nullptr, + nullptr, + nullptr); + if (scaler == nullptr) { + return std::unexpected("failed to create swscale conversion context"); + } + + frame = av_frame_alloc(); + if (frame == nullptr) { + return std::unexpected("failed to allocate FFmpeg frame"); + } + frame->format = encoder_pixel_format; + frame->width = encoder_context->width; + frame->height = encoder_context->height; + const auto frame_buffer_result = av_frame_get_buffer(frame, 32); + if (frame_buffer_result < 0) { + return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer_result)); + } + + packet = av_packet_alloc(); + if (packet == nullptr) { + return std::unexpected("failed to allocate FFmpeg packet"); + } + + const auto alloc_result = avformat_alloc_output_context2( + &format_context, + nullptr, + "mp4", + output_path.string().c_str()); + if (alloc_result < 0 || format_context == nullptr) { + return std::unexpected("failed to allocate MP4 output context: " + av_error_string(alloc_result)); + } + + video_stream = avformat_new_stream(format_context, nullptr); + if (video_stream == nullptr) { + return std::unexpected("failed to allocate MP4 video stream"); + } + + video_stream->time_base = encoder_context->time_base; + video_stream->avg_frame_rate = frame_rate; + + const auto params_result = avcodec_parameters_from_context(video_stream->codecpar, encoder_context); + if (params_result < 0) { + return std::unexpected("failed to copy encoder parameters into MP4 stream: " + av_error_string(params_result)); + } + + if ((format_context->oformat->flags & AVFMT_NOFILE) == 0) { + const auto open_result = avio_open2( + &format_context->pb, + output_path.string().c_str(), + AVIO_FLAG_WRITE, + nullptr, + nullptr); + if (open_result < 0) { + return std::unexpected("failed to open output MP4 '" + output_path.string() + "': " + av_error_string(open_result)); + } + } + + AVDictionary *muxer_options = nullptr; + av_dict_set(&muxer_options, "movflags", "+faststart", 0); + const auto header_result = avformat_write_header(format_context, &muxer_options); + av_dict_free(&muxer_options); + if (header_result < 0) { + return std::unexpected("failed to write MP4 header: " + av_error_string(header_result)); + } + + spdlog::info( + "ZED_SVO_MP4_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} requested_preset={} requested_tune={} mapped_preset={} mapped_tune={} rc={} {}={} gop={} b_frames={} output={}", + codec_name(codec), + encoder_name, + using_hardware, + width, + height, + frame_rate.num, + frame_rate.den, + resolved_settings.requested_preset, + resolved_settings.requested_tune, + resolved_settings.mapped_preset, + resolved_settings.mapped_tune.value_or("none"), + resolved_settings.rate_control_mode.value_or("auto"), + resolved_settings.quality_key, + resolved_settings.quality_value, + resolved_settings.gop, + resolved_settings.b_frames, + output_path.string()); + return {}; + } + + [[nodiscard]] + std::expected write_bgr_frame( + const std::uint8_t *data, + const std::size_t row_stride_bytes, + const std::uint64_t relative_timestamp_ns) { + if (encoder_context == nullptr || frame == nullptr || scaler == nullptr || packet == nullptr || video_stream == nullptr) { + return std::unexpected("MP4 writer is not initialized"); + } + + const auto writable_result = av_frame_make_writable(frame); + if (writable_result < 0) { + return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(writable_result)); + } + + const std::uint8_t *source_planes[4]{data, nullptr, nullptr, nullptr}; + const int source_strides[4]{static_cast(row_stride_bytes), 0, 0, 0}; + sws_scale( + scaler, + source_planes, + source_strides, + 0, + encoder_context->height, + frame->data, + frame->linesize); + + frame->pts = static_cast(relative_timestamp_ns); + + const auto send_result = avcodec_send_frame(encoder_context, frame); + if (send_result < 0) { + return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result)); + } + + return drain_packets(); + } + + [[nodiscard]] + std::expected flush() { + if (encoder_context == nullptr) { + return {}; + } + + const auto flush_result = avcodec_send_frame(encoder_context, nullptr); + if (flush_result < 0 && flush_result != AVERROR_EOF) { + return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result)); + } + + auto drained = drain_packets(); + if (!drained) { + return drained; + } + + return close_output(); + } + + [[nodiscard]] + std::expected drain_packets() { + while (true) { + const auto receive_result = avcodec_receive_packet(encoder_context, packet); + if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { + break; + } + if (receive_result < 0) { + return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result)); + } + + packet->stream_index = video_stream->index; + av_packet_rescale_ts(packet, encoder_context->time_base, video_stream->time_base); + + const auto write_result = av_interleaved_write_frame(format_context, packet); + av_packet_unref(packet); + if (write_result < 0) { + return std::unexpected("failed to write MP4 packet: " + av_error_string(write_result)); + } + } + + return {}; + } + + [[nodiscard]] + std::expected close_output() { + if (format_context == nullptr || trailer_written) { + return {}; + } + + const auto trailer_result = av_write_trailer(format_context); + if (trailer_result < 0) { + return std::unexpected("failed to write MP4 trailer: " + av_error_string(trailer_result)); + } + trailer_written = true; + return {}; + } + + void close() { + (void)close_output(); + + if (packet != nullptr) { + av_packet_free(&packet); + } + if (frame != nullptr) { + av_frame_free(&frame); + } + if (encoder_context != nullptr) { + avcodec_free_context(&encoder_context); + } + if (scaler != nullptr) { + sws_freeContext(scaler); + scaler = nullptr; + } + if (format_context != nullptr) { + if ((format_context->oformat->flags & AVFMT_NOFILE) == 0 && format_context->pb != nullptr) { + avio_closep(&format_context->pb); + } + avformat_free_context(format_context); + format_context = nullptr; + } + + video_stream = nullptr; + encoder_name.clear(); + using_hardware = false; + trailer_written = false; + resolved_settings = ResolvedEncoderSettings{}; + } + + ~Impl() { + close(); + } + + CodecType codec{CodecType::H265}; + AVCodecContext *encoder_context{nullptr}; + AVFormatContext *format_context{nullptr}; + AVStream *video_stream{nullptr}; + AVFrame *frame{nullptr}; + AVPacket *packet{nullptr}; + SwsContext *scaler{nullptr}; + AVPixelFormat encoder_pixel_format{AV_PIX_FMT_NONE}; + AVRational frame_rate{30, 1}; + std::string encoder_name{}; + ResolvedEncoderSettings resolved_settings{}; + bool using_hardware{false}; + bool trailer_written{false}; +}; + +std::expected parse_codec(const std::string_view raw) { + if (raw == "h264") { + return CodecType::H264; + } + if (raw == "h265") { + return CodecType::H265; + } + return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)"); +} + +std::expected parse_encoder_device(const std::string_view raw) { + if (raw == "auto") { + return EncoderDeviceType::Auto; + } + if (raw == "nvidia") { + return EncoderDeviceType::Nvidia; + } + if (raw == "software") { + return EncoderDeviceType::Software; + } + return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)"); +} + +std::expected parse_preset(const std::string_view raw) { + if (raw == "fast") { + return PresetKind::Fast; + } + if (raw == "balanced") { + return PresetKind::Balanced; + } + if (raw == "quality") { + return PresetKind::Quality; + } + return std::unexpected("invalid preset: '" + std::string(raw) + "' (expected: fast|balanced|quality)"); +} + +std::expected parse_tune(const std::string_view raw) { + if (raw == "low-latency") { + return TuneKind::LowLatency; + } + if (raw == "balanced") { + return TuneKind::Balanced; + } + return std::unexpected("invalid tune: '" + std::string(raw) + "' (expected: low-latency|balanced)"); +} + +std::string_view codec_name(const CodecType codec) { + return codec == CodecType::H265 ? "h265" : "h264"; +} + +std::string_view preset_name(const PresetKind preset) { + switch (preset) { + case PresetKind::Fast: + return "fast"; + case PresetKind::Balanced: + return "balanced"; + case PresetKind::Quality: + return "quality"; + } + return "fast"; +} + +std::string_view tune_name(const TuneKind tune) { + switch (tune) { + case TuneKind::LowLatency: + return "low-latency"; + case TuneKind::Balanced: + return "balanced"; + } + return "low-latency"; +} + +std::uint64_t frame_period_ns(const float fps) { + if (!(fps > 0.0f)) { + return 33'333'333ull; + } + return static_cast(std::llround(1'000'000'000.0 / static_cast(fps))); +} + +std::filesystem::path derive_output_path(const std::filesystem::path &input_path) { + auto output_path = input_path; + output_path.replace_extension(".mp4"); + return output_path; +} + +ProgressBar::ProgressBar(const std::uint64_t total_frames) + : impl_(std::make_unique(total_frames)) {} + +ProgressBar::~ProgressBar() = default; + +void ProgressBar::update(const std::uint64_t completed_frames) { + impl_->render(completed_frames, false); +} + +void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) { + if (impl_ == nullptr || !impl_->enabled) { + return; + } + + impl_->render(completed_frames, true); + if (!impl_->rendered) { + return; + } + + std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n"); + std::fflush(stderr); +} + +Mp4Writer::Mp4Writer() + : impl_(std::make_unique()) {} + +Mp4Writer::Mp4Writer(Mp4Writer &&) noexcept = default; +Mp4Writer &Mp4Writer::operator=(Mp4Writer &&) noexcept = default; +Mp4Writer::~Mp4Writer() = default; + +std::expected Mp4Writer::open( + const std::filesystem::path &output_path, + const CodecType codec, + const EncoderDeviceType encoder_device, + const std::uint32_t width, + const std::uint32_t height, + const float fps, + const EncodeTuning &tuning) { + return impl_->open(output_path, codec, encoder_device, width, height, fps, tuning); +} + +std::expected Mp4Writer::write_bgr_frame( + const std::uint8_t *data, + const std::size_t row_stride_bytes, + const std::uint64_t relative_timestamp_ns) { + return impl_->write_bgr_frame(data, row_stride_bytes, relative_timestamp_ns); +} + +std::expected Mp4Writer::flush() { + return impl_->flush(); +} + +bool Mp4Writer::using_hardware() const { + return impl_ != nullptr && impl_->using_hardware; +} + +} // namespace cvmmap_streamer::zed_tools diff --git a/src/tools/zed_svo_to_mp4.cpp b/src/tools/zed_svo_to_mp4.cpp new file mode 100644 index 0000000..28e743c --- /dev/null +++ b/src/tools/zed_svo_to_mp4.cpp @@ -0,0 +1,319 @@ +#include +#include + +#include + +#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" + +#include +#include +#include +#include +#include +#include + +namespace { + +using cvmmap_streamer::zed_tools::EncodeTuning; +using cvmmap_streamer::zed_tools::Mp4Writer; +using cvmmap_streamer::zed_tools::ProgressBar; +using cvmmap_streamer::zed_tools::derive_output_path; +using cvmmap_streamer::zed_tools::frame_period_ns; +using cvmmap_streamer::zed_tools::parse_codec; +using cvmmap_streamer::zed_tools::parse_encoder_device; +using cvmmap_streamer::zed_tools::parse_preset; +using cvmmap_streamer::zed_tools::parse_tune; + +enum class ToolExitCode : int { + Success = 0, + UsageError = 2, + RuntimeError = 3, +}; + +struct CliOptions { + std::string input_path{}; + std::string output_path{}; + std::string codec{"h265"}; + std::string encoder_device{"auto"}; + std::string preset{"fast"}; + std::string tune{"low-latency"}; + int quality{cvmmap_streamer::zed_tools::kDefaultQuality}; + std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize}; + std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames}; + std::uint32_t start_frame{0}; + std::uint32_t end_frame{0}; + bool has_end_frame{false}; +}; + +[[nodiscard]] +constexpr int exit_code(const ToolExitCode code) { + return static_cast(code); +} + +[[nodiscard]] +std::string zed_string(const sl::String &value) { + return std::string(value.c_str() == nullptr ? "" : value.c_str()); +} + +[[nodiscard]] +std::string zed_status_string(const sl::ERROR_CODE code) { + return zed_string(sl::toString(code)); +} + +[[nodiscard]] +std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { + if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { + return std::unexpected(std::string(label) + " must be U8_C3"); + } + if (mat.getWidth() == 0 || mat.getHeight() == 0) { + return std::unexpected(std::string(label) + " dimensions must be non-zero"); + } + if (mat.getPtr(sl::MEM::CPU) == nullptr) { + return std::unexpected(std::string(label) + " CPU buffer is null"); + } + return {}; +} + +} // namespace + +int main(int argc, char **argv) { + CliOptions options{}; + + CLI::App app{"zed_svo_to_mp4 - convert ZED SVO/SVO2 playback to MP4"}; + app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required(); + app.add_option("--output", options.output_path, "Output MP4 file (default: input path with .mp4 suffix)"); + app.add_option("--codec", options.codec, "Video codec (h264|h265)") + ->check(CLI::IsMember({"h264", "h265"})); + app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") + ->check(CLI::IsMember({"auto", "nvidia", "software"})); + app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)") + ->check(CLI::IsMember({"fast", "balanced", "quality"})); + app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)") + ->check(CLI::IsMember({"low-latency", "balanced"})); + app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)") + ->check(CLI::Range(0, 51)); + app.add_option("--gop", options.gop, "Encoder GOP length in frames") + ->check(CLI::PositiveNumber); + app.add_option("--b-frames", options.b_frames, "Encoder B-frame count") + ->check(CLI::NonNegativeNumber); + app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)") + ->check(CLI::NonNegativeNumber); + auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)") + ->check(CLI::NonNegativeNumber); + + try { + app.parse(argc, argv); + } catch (const CLI::ParseError &error) { + return app.exit(error); + } + options.has_end_frame = end_frame_option->count() > 0; + + auto codec = parse_codec(options.codec); + if (!codec) { + spdlog::error("{}", codec.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto encoder_device = parse_encoder_device(options.encoder_device); + if (!encoder_device) { + spdlog::error("{}", encoder_device.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto preset = parse_preset(options.preset); + if (!preset) { + spdlog::error("{}", preset.error()); + return exit_code(ToolExitCode::UsageError); + } + + auto tune = parse_tune(options.tune); + if (!tune) { + spdlog::error("{}", tune.error()); + return exit_code(ToolExitCode::UsageError); + } + + if (options.has_end_frame && options.end_frame < options.start_frame) { + spdlog::error( + "invalid frame range: start-frame={} end-frame={}", + options.start_frame, + options.end_frame); + return exit_code(ToolExitCode::UsageError); + } + if (options.b_frames > options.gop) { + spdlog::error( + "invalid encoder config: b-frames {} must be <= gop {}", + options.b_frames, + options.gop); + return exit_code(ToolExitCode::UsageError); + } + + const auto output_path = options.output_path.empty() + ? derive_output_path(std::filesystem::path{options.input_path}) + : std::filesystem::path{options.output_path}; + if (output_path.empty()) { + spdlog::error("output path must not be empty"); + return exit_code(ToolExitCode::UsageError); + } + if (output_path.has_parent_path()) { + std::filesystem::create_directories(output_path.parent_path()); + } + + const EncodeTuning tuning{ + .preset = *preset, + .tune = *tune, + .quality = options.quality, + .gop = options.gop, + .b_frames = options.b_frames, + }; + + sl::Camera camera{}; + auto close_camera = [&]() { + if (camera.isOpened()) { + camera.close(); + } + }; + + sl::InitParameters init{}; + init.input.setFromSVOFile(options.input_path.c_str()); + init.svo_real_time_mode = false; + init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; + init.coordinate_units = sl::UNIT::METER; + init.depth_mode = sl::DEPTH_MODE::NONE; + init.sdk_verbose = false; + + const auto open_status = camera.open(init); + if (open_status != sl::ERROR_CODE::SUCCESS) { + spdlog::error( + "failed to open SVO '{}': {}", + options.input_path, + zed_status_string(open_status)); + return exit_code(ToolExitCode::RuntimeError); + } + + const auto total_frames = camera.getSVONumberOfFrames(); + if (total_frames <= 0) { + close_camera(); + spdlog::error("input SVO has no frames"); + return exit_code(ToolExitCode::RuntimeError); + } + if (options.start_frame >= static_cast(total_frames)) { + close_camera(); + spdlog::error( + "start-frame {} is out of range for {} frames", + options.start_frame, + total_frames); + return exit_code(ToolExitCode::UsageError); + } + if (options.has_end_frame && options.end_frame >= static_cast(total_frames)) { + close_camera(); + spdlog::error( + "end-frame {} is out of range for {} frames", + options.end_frame, + total_frames); + return exit_code(ToolExitCode::UsageError); + } + + camera.setSVOPosition(static_cast(options.start_frame)); + + const auto camera_info = camera.getCameraInformation(); + const auto &camera_config = camera_info.camera_configuration; + const auto width = static_cast(camera_config.resolution.width); + const auto height = static_cast(camera_config.resolution.height); + if (width == 0 || height == 0) { + close_camera(); + spdlog::error("camera resolution reported by the ZED SDK is invalid"); + return exit_code(ToolExitCode::RuntimeError); + } + + Mp4Writer writer{}; + if (auto open_writer = writer.open(output_path, *codec, *encoder_device, width, height, camera_config.fps, tuning); !open_writer) { + close_camera(); + spdlog::error("failed to initialize MP4 writer: {}", open_writer.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + sl::RuntimeParameters runtime_parameters{}; + sl::Mat left_frame{}; + std::optional first_timestamp_ns{}; + std::optional last_timestamp_ns{}; + std::uint64_t emitted_frames{0}; + const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps); + const auto last_frame = options.has_end_frame + ? options.end_frame + : static_cast(total_frames - 1); + const auto total_frames_to_emit = static_cast(last_frame - options.start_frame + 1); + ProgressBar progress{total_frames_to_emit}; + + while (options.start_frame + emitted_frames <= last_frame) { + const auto grab_status = camera.grab(runtime_parameters); + if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { + break; + } + if (grab_status != sl::ERROR_CODE::SUCCESS) { + progress.finish(emitted_frames, false); + close_camera(); + spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status)); + return exit_code(ToolExitCode::RuntimeError); + } + + const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU); + if (image_status != sl::ERROR_CODE::SUCCESS) { + progress.finish(emitted_frames, false); + close_camera(); + spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status)); + return exit_code(ToolExitCode::RuntimeError); + } + if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) { + progress.finish(emitted_frames, false); + close_camera(); + spdlog::error("{}", valid.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); + if (timestamp_ns == 0) { + timestamp_ns = emitted_frames * nominal_frame_period_ns; + } + if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) { + timestamp_ns = *last_timestamp_ns + 1; + } + last_timestamp_ns = timestamp_ns; + + if (!first_timestamp_ns) { + first_timestamp_ns = timestamp_ns; + } + const auto relative_timestamp_ns = timestamp_ns - *first_timestamp_ns; + + if (auto write = writer.write_bgr_frame( + left_frame.getPtr(sl::MEM::CPU), + left_frame.getStepBytes(sl::MEM::CPU), + relative_timestamp_ns); + !write) { + progress.finish(emitted_frames, false); + close_camera(); + spdlog::error("failed to encode or mux frame: {}", write.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + emitted_frames += 1; + progress.update(emitted_frames); + } + + if (auto flush = writer.flush(); !flush) { + progress.finish(emitted_frames, false); + close_camera(); + spdlog::error("failed to finalize MP4 output: {}", flush.error()); + return exit_code(ToolExitCode::RuntimeError); + } + + progress.finish(emitted_frames, true); + close_camera(); + spdlog::info( + "converted {} frames from '{}' to '{}' using codec={} hardware={}", + emitted_frames, + options.input_path, + output_path.string(), + cvmmap_streamer::zed_tools::codec_name(*codec), + writer.using_hardware()); + return exit_code(ToolExitCode::Success); +}