#include "cvmmap_streamer/encode/encoder_backend.hpp" extern "C" { #include #include #include #include #include #include #include } #include #include #include #include #include #include #include #include #include #include #include namespace cvmmap_streamer::encode { namespace { class FfmpegEncoderBackend final : public EncoderBackend { public: ~FfmpegEncoderBackend() override { shutdown(); } [[nodiscard]] std::string_view backend_name() const override { return "ffmpeg"; } [[nodiscard]] bool using_hardware() const override { return using_hardware_; } [[nodiscard]] std::expected init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) override { shutdown(); config_ = &config; frame_info_ = frame_info; codec_ = config.encoder.codec; encoder_pix_fmt_ = pick_encoder_pixel_format(config.encoder.device); auto input_pixel_format = to_av_pixel_format(frame_info.pixel_format); if (!input_pixel_format) { return std::unexpected(input_pixel_format.error()); } input_pix_fmt_ = *input_pixel_format; auto encoder_name = pick_encoder_name(config); if (!encoder_name) { return std::unexpected(encoder_name.error()); } using_hardware_ = encoder_name->find("nvenc") != std::string::npos; const auto *encoder = avcodec_find_encoder_by_name(encoder_name->c_str()); if (encoder == nullptr) { return std::unexpected("FFmpeg encoder '" + *encoder_name + "' is unavailable"); } context_ = avcodec_alloc_context3(encoder); if (context_ == nullptr) { return std::unexpected("failed to allocate FFmpeg encoder context"); } context_->codec_type = AVMEDIA_TYPE_VIDEO; context_->codec_id = encoder->id; context_->width = static_cast(frame_info.width); context_->height = static_cast(frame_info.height); context_->pix_fmt = encoder_pix_fmt_; context_->time_base = AVRational{1, 1000000000}; context_->framerate = AVRational{30, 1}; context_->gop_size = static_cast(config.encoder.gop); context_->max_b_frames = static_cast(config.encoder.b_frames); context_->thread_count = 1; auto codec_setup = configure_codec(*encoder_name, config); if (!codec_setup) { return std::unexpected(codec_setup.error()); } const auto open_result = avcodec_open2(context_, encoder, nullptr); if (open_result < 0) { return std::unexpected("failed to open FFmpeg encoder '" + *encoder_name + "': " + av_error_string(open_result)); } scaler_ = sws_getCachedContext( nullptr, static_cast(frame_info.width), static_cast(frame_info.height), input_pix_fmt_, static_cast(frame_info.width), static_cast(frame_info.height), encoder_pix_fmt_, SWS_BILINEAR, nullptr, nullptr, nullptr); if (scaler_ == nullptr) { return std::unexpected("failed to create swscale conversion context"); } frame_ = av_frame_alloc(); if (frame_ == nullptr) { return std::unexpected("failed to allocate FFmpeg frame"); } frame_->format = encoder_pix_fmt_; frame_->width = context_->width; frame_->height = context_->height; const auto frame_buffer = av_frame_get_buffer(frame_, 32); if (frame_buffer < 0) { return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer)); } packet_ = av_packet_alloc(); if (packet_ == nullptr) { return std::unexpected("failed to allocate FFmpeg packet"); } filtered_packet_ = av_packet_alloc(); if (filtered_packet_ == nullptr) { return std::unexpected("failed to allocate FFmpeg filtered packet"); } auto bitstream_filter = create_bitstream_filter(); if (!bitstream_filter) { return std::unexpected(bitstream_filter.error()); } spdlog::info( "FFMPEG_ENCODER_PATH codec={} device={} encoder={} pix_fmt={}", cvmmap_streamer::to_string(codec_), device_to_string(config.encoder.device), *encoder_name, av_get_pix_fmt_name(encoder_pix_fmt_)); return {}; } [[nodiscard]] std::expected poll() override { return {}; } [[nodiscard]] std::expected push_frame(const RawVideoFrame &frame) override { if (context_ == nullptr || frame_ == nullptr || scaler_ == nullptr) { return std::unexpected("FFmpeg backend not initialized"); } if (frame.bytes.empty()) { return {}; } const auto make_writable = av_frame_make_writable(frame_); if (make_writable < 0) { return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(make_writable)); } AVFrame input_frame{}; input_frame.format = input_pix_fmt_; input_frame.width = static_cast(frame_info_.width); input_frame.height = static_cast(frame_info_.height); if (av_image_fill_arrays( input_frame.data, input_frame.linesize, const_cast(frame.bytes.data()), input_pix_fmt_, input_frame.width, input_frame.height, 1) < 0) { return std::unexpected("failed to map input frame into FFmpeg image arrays"); } sws_scale( scaler_, input_frame.data, input_frame.linesize, 0, input_frame.height, frame_->data, frame_->linesize); if (!first_source_timestamp_ns_) { first_source_timestamp_ns_ = frame.source_timestamp_ns; } frame_->pts = static_cast(frame.source_timestamp_ns - *first_source_timestamp_ns_); const auto send_result = avcodec_send_frame(context_, frame_); if (send_result < 0) { return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result)); } return {}; } [[nodiscard]] std::expected, std::string> drain() override { return drain_packets(); } [[nodiscard]] std::expected, std::string> flush() override { if (context_ == nullptr) { return std::vector{}; } const auto flush_result = avcodec_send_frame(context_, nullptr); if (flush_result < 0 && flush_result != AVERROR_EOF) { return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result)); } return drain_packets(); } void shutdown() override { if (bsf_context_ != nullptr) { av_bsf_free(&bsf_context_); } if (filtered_packet_ != nullptr) { av_packet_free(&filtered_packet_); } if (packet_ != nullptr) { av_packet_free(&packet_); } if (frame_ != nullptr) { av_frame_free(&frame_); } if (context_ != nullptr) { avcodec_free_context(&context_); } if (scaler_ != nullptr) { sws_freeContext(scaler_); scaler_ = nullptr; } first_source_timestamp_ns_.reset(); using_hardware_ = false; } private: [[nodiscard]] static std::string av_error_string(int error_code) { char buffer[AV_ERROR_MAX_STRING_SIZE]{}; av_strerror(error_code, buffer, sizeof(buffer)); return std::string(buffer); } [[nodiscard]] static std::expected to_av_pixel_format(ipc::PixelFormat format) { switch (format) { case ipc::PixelFormat::BGR: return AV_PIX_FMT_BGR24; case ipc::PixelFormat::RGB: return AV_PIX_FMT_RGB24; case ipc::PixelFormat::BGRA: return AV_PIX_FMT_BGRA; case ipc::PixelFormat::RGBA: return AV_PIX_FMT_RGBA; case ipc::PixelFormat::GRAY: return AV_PIX_FMT_GRAY8; default: return std::unexpected("unsupported raw pixel format for FFmpeg backend (supported: BGR/RGB/BGRA/RGBA/GRAY)"); } } [[nodiscard]] static AVPixelFormat pick_encoder_pixel_format(EncoderDeviceType device) { if (device == EncoderDeviceType::Software) { return AV_PIX_FMT_YUV420P; } return AV_PIX_FMT_NV12; } [[nodiscard]] static std::string_view device_to_string(EncoderDeviceType device) { switch (device) { case EncoderDeviceType::Auto: return "auto"; case EncoderDeviceType::Nvidia: return "nvidia"; case EncoderDeviceType::Software: return "software"; } return "unknown"; } [[nodiscard]] std::expected pick_encoder_name(const RuntimeConfig &config) const { const bool prefer_hardware = config.encoder.device != EncoderDeviceType::Software; const bool prefer_software = config.encoder.device == EncoderDeviceType::Software; if (codec_ == CodecType::H265) { if (prefer_hardware && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) { return std::string("hevc_nvenc"); } if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) { if (avcodec_find_encoder_by_name("libx265") != nullptr) { return std::string("libx265"); } } if (!prefer_software && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) { return std::string("hevc_nvenc"); } return std::unexpected("no usable FFmpeg encoder found for h265 (looked for hevc_nvenc, libx265)"); } if (prefer_hardware && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) { return std::string("h264_nvenc"); } if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) { if (avcodec_find_encoder_by_name("libx264") != nullptr) { return std::string("libx264"); } } if (!prefer_software && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) { return std::string("h264_nvenc"); } return std::unexpected("no usable FFmpeg encoder found for h264 (looked for h264_nvenc, libx264)"); } [[nodiscard]] std::expected configure_codec(std::string_view encoder_name, const RuntimeConfig &config) { av_opt_set(context_->priv_data, "preset", encoder_name.find("nvenc") != std::string_view::npos ? "llhq" : "veryfast", 0); if (encoder_name.find("nvenc") != std::string_view::npos) { av_opt_set(context_->priv_data, "tune", "ull", 0); av_opt_set(context_->priv_data, "zerolatency", "1", 0); av_opt_set(context_->priv_data, "rc-lookahead", "0", 0); } else { av_opt_set(context_->priv_data, "tune", "zerolatency", 0); if (encoder_name == "libx265") { av_opt_set(context_->priv_data, "x265-params", "repeat-headers=1:scenecut=0", 0); } } av_opt_set_int(context_->priv_data, "forced-idr", config.latency.force_idr_on_reset ? 1 : 0, 0); return {}; } [[nodiscard]] std::expected create_bitstream_filter() { const char *filter_name = codec_ == CodecType::H265 ? "hevc_mp4toannexb" : "h264_mp4toannexb"; const auto *filter = av_bsf_get_by_name(filter_name); if (filter == nullptr) { return std::unexpected(std::string("required FFmpeg bitstream filter '") + filter_name + "' is unavailable"); } const auto alloc_result = av_bsf_alloc(filter, &bsf_context_); if (alloc_result < 0) { return std::unexpected("failed to allocate FFmpeg bitstream filter: " + av_error_string(alloc_result)); } const auto copy_result = avcodec_parameters_from_context(bsf_context_->par_in, context_); if (copy_result < 0) { return std::unexpected("failed to copy codec parameters into bitstream filter: " + av_error_string(copy_result)); } bsf_context_->time_base_in = context_->time_base; const auto init_result = av_bsf_init(bsf_context_); if (init_result < 0) { return std::unexpected("failed to initialize FFmpeg bitstream filter: " + av_error_string(init_result)); } return {}; } [[nodiscard]] std::expected, std::string> drain_packets() { std::vector access_units{}; while (true) { const auto receive_result = avcodec_receive_packet(context_, packet_); if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { break; } if (receive_result < 0) { return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result)); } const auto bsf_send_result = av_bsf_send_packet(bsf_context_, packet_); if (bsf_send_result < 0) { av_packet_unref(packet_); return std::unexpected("failed to send packet to bitstream filter: " + av_error_string(bsf_send_result)); } av_packet_unref(packet_); while (true) { const auto bsf_receive_result = av_bsf_receive_packet(bsf_context_, filtered_packet_); if (bsf_receive_result == AVERROR(EAGAIN) || bsf_receive_result == AVERROR_EOF) { break; } if (bsf_receive_result < 0) { return std::unexpected("failed to receive filtered packet: " + av_error_string(bsf_receive_result)); } EncodedAccessUnit access_unit{}; access_unit.codec = codec_; access_unit.stream_pts_ns = filtered_packet_->pts == AV_NOPTS_VALUE ? 0ull : static_cast(filtered_packet_->pts); access_unit.source_timestamp_ns = first_source_timestamp_ns_.value_or(0ull) + access_unit.stream_pts_ns; access_unit.keyframe = (filtered_packet_->flags & AV_PKT_FLAG_KEY) != 0; access_unit.annexb_bytes.assign(filtered_packet_->data, filtered_packet_->data + filtered_packet_->size); access_units.push_back(std::move(access_unit)); av_packet_unref(filtered_packet_); } } return access_units; } const RuntimeConfig *config_{nullptr}; ipc::FrameInfo frame_info_{}; CodecType codec_{CodecType::H264}; AVCodecContext *context_{nullptr}; AVPacket *packet_{nullptr}; AVPacket *filtered_packet_{nullptr}; AVFrame *frame_{nullptr}; SwsContext *scaler_{nullptr}; AVBSFContext *bsf_context_{nullptr}; AVPixelFormat input_pix_fmt_{AV_PIX_FMT_NONE}; AVPixelFormat encoder_pix_fmt_{AV_PIX_FMT_NONE}; std::optional first_source_timestamp_ns_{}; bool using_hardware_{false}; }; } std::unique_ptr make_ffmpeg_backend() { return std::make_unique(); } }