434 lines
14 KiB
C++
434 lines
14 KiB
C++
#include "cvmmap_streamer/encode/encoder_backend.hpp"
|
|
|
|
extern "C" {
|
|
#include <libavcodec/avcodec.h>
|
|
#include <libavcodec/bsf.h>
|
|
#include <libavutil/avutil.h>
|
|
#include <libavutil/imgutils.h>
|
|
#include <libavutil/opt.h>
|
|
#include <libavutil/pixfmt.h>
|
|
#include <libswscale/swscale.h>
|
|
}
|
|
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <expected>
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <span>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include <spdlog/spdlog.h>
|
|
|
|
namespace cvmmap_streamer::encode {
|
|
|
|
namespace {
|
|
|
|
class FfmpegEncoderBackend final : public EncoderBackend {
|
|
public:
|
|
~FfmpegEncoderBackend() override {
|
|
shutdown();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::string_view backend_name() const override {
|
|
return "ffmpeg";
|
|
}
|
|
|
|
[[nodiscard]]
|
|
bool using_hardware() const override {
|
|
return using_hardware_;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<void, std::string> init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) override {
|
|
shutdown();
|
|
|
|
config_ = &config;
|
|
frame_info_ = frame_info;
|
|
codec_ = config.encoder.codec;
|
|
encoder_pix_fmt_ = pick_encoder_pixel_format(config.encoder.device);
|
|
|
|
auto input_pixel_format = to_av_pixel_format(frame_info.pixel_format);
|
|
if (!input_pixel_format) {
|
|
return std::unexpected(input_pixel_format.error());
|
|
}
|
|
|
|
input_pix_fmt_ = *input_pixel_format;
|
|
|
|
auto encoder_name = pick_encoder_name(config);
|
|
if (!encoder_name) {
|
|
return std::unexpected(encoder_name.error());
|
|
}
|
|
using_hardware_ = encoder_name->find("nvenc") != std::string::npos;
|
|
|
|
const auto *encoder = avcodec_find_encoder_by_name(encoder_name->c_str());
|
|
if (encoder == nullptr) {
|
|
return std::unexpected("FFmpeg encoder '" + *encoder_name + "' is unavailable");
|
|
}
|
|
|
|
context_ = avcodec_alloc_context3(encoder);
|
|
if (context_ == nullptr) {
|
|
return std::unexpected("failed to allocate FFmpeg encoder context");
|
|
}
|
|
|
|
context_->codec_type = AVMEDIA_TYPE_VIDEO;
|
|
context_->codec_id = encoder->id;
|
|
context_->width = static_cast<int>(frame_info.width);
|
|
context_->height = static_cast<int>(frame_info.height);
|
|
context_->pix_fmt = encoder_pix_fmt_;
|
|
context_->time_base = AVRational{1, 1000000000};
|
|
context_->framerate = AVRational{30, 1};
|
|
context_->gop_size = static_cast<int>(config.encoder.gop);
|
|
context_->max_b_frames = static_cast<int>(config.encoder.b_frames);
|
|
context_->thread_count = 1;
|
|
|
|
auto codec_setup = configure_codec(*encoder_name, config);
|
|
if (!codec_setup) {
|
|
return std::unexpected(codec_setup.error());
|
|
}
|
|
|
|
const auto open_result = avcodec_open2(context_, encoder, nullptr);
|
|
if (open_result < 0) {
|
|
return std::unexpected("failed to open FFmpeg encoder '" + *encoder_name + "': " + av_error_string(open_result));
|
|
}
|
|
|
|
scaler_ = sws_getCachedContext(
|
|
nullptr,
|
|
static_cast<int>(frame_info.width),
|
|
static_cast<int>(frame_info.height),
|
|
input_pix_fmt_,
|
|
static_cast<int>(frame_info.width),
|
|
static_cast<int>(frame_info.height),
|
|
encoder_pix_fmt_,
|
|
SWS_BILINEAR,
|
|
nullptr,
|
|
nullptr,
|
|
nullptr);
|
|
if (scaler_ == nullptr) {
|
|
return std::unexpected("failed to create swscale conversion context");
|
|
}
|
|
|
|
frame_ = av_frame_alloc();
|
|
if (frame_ == nullptr) {
|
|
return std::unexpected("failed to allocate FFmpeg frame");
|
|
}
|
|
frame_->format = encoder_pix_fmt_;
|
|
frame_->width = context_->width;
|
|
frame_->height = context_->height;
|
|
const auto frame_buffer = av_frame_get_buffer(frame_, 32);
|
|
if (frame_buffer < 0) {
|
|
return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer));
|
|
}
|
|
|
|
packet_ = av_packet_alloc();
|
|
if (packet_ == nullptr) {
|
|
return std::unexpected("failed to allocate FFmpeg packet");
|
|
}
|
|
|
|
filtered_packet_ = av_packet_alloc();
|
|
if (filtered_packet_ == nullptr) {
|
|
return std::unexpected("failed to allocate FFmpeg filtered packet");
|
|
}
|
|
|
|
auto bitstream_filter = create_bitstream_filter();
|
|
if (!bitstream_filter) {
|
|
return std::unexpected(bitstream_filter.error());
|
|
}
|
|
|
|
spdlog::info(
|
|
"FFMPEG_ENCODER_PATH codec={} device={} encoder={} pix_fmt={}",
|
|
cvmmap_streamer::to_string(codec_),
|
|
device_to_string(config.encoder.device),
|
|
*encoder_name,
|
|
av_get_pix_fmt_name(encoder_pix_fmt_));
|
|
return {};
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<void, std::string> poll() override {
|
|
return {};
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<void, std::string> push_frame(const RawVideoFrame &frame) override {
|
|
if (context_ == nullptr || frame_ == nullptr || scaler_ == nullptr) {
|
|
return std::unexpected("FFmpeg backend not initialized");
|
|
}
|
|
if (frame.bytes.empty()) {
|
|
return {};
|
|
}
|
|
|
|
const auto make_writable = av_frame_make_writable(frame_);
|
|
if (make_writable < 0) {
|
|
return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(make_writable));
|
|
}
|
|
|
|
AVFrame input_frame{};
|
|
input_frame.format = input_pix_fmt_;
|
|
input_frame.width = static_cast<int>(frame_info_.width);
|
|
input_frame.height = static_cast<int>(frame_info_.height);
|
|
if (av_image_fill_arrays(
|
|
input_frame.data,
|
|
input_frame.linesize,
|
|
const_cast<std::uint8_t *>(frame.bytes.data()),
|
|
input_pix_fmt_,
|
|
input_frame.width,
|
|
input_frame.height,
|
|
1) < 0) {
|
|
return std::unexpected("failed to map input frame into FFmpeg image arrays");
|
|
}
|
|
|
|
sws_scale(
|
|
scaler_,
|
|
input_frame.data,
|
|
input_frame.linesize,
|
|
0,
|
|
input_frame.height,
|
|
frame_->data,
|
|
frame_->linesize);
|
|
|
|
if (!first_source_timestamp_ns_) {
|
|
first_source_timestamp_ns_ = frame.source_timestamp_ns;
|
|
}
|
|
|
|
frame_->pts = static_cast<std::int64_t>(frame.source_timestamp_ns - *first_source_timestamp_ns_);
|
|
const auto send_result = avcodec_send_frame(context_, frame_);
|
|
if (send_result < 0) {
|
|
return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result));
|
|
}
|
|
return {};
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<std::vector<EncodedAccessUnit>, std::string> drain() override {
|
|
return drain_packets();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<std::vector<EncodedAccessUnit>, std::string> flush() override {
|
|
if (context_ == nullptr) {
|
|
return std::vector<EncodedAccessUnit>{};
|
|
}
|
|
const auto flush_result = avcodec_send_frame(context_, nullptr);
|
|
if (flush_result < 0 && flush_result != AVERROR_EOF) {
|
|
return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result));
|
|
}
|
|
return drain_packets();
|
|
}
|
|
|
|
void shutdown() override {
|
|
if (bsf_context_ != nullptr) {
|
|
av_bsf_free(&bsf_context_);
|
|
}
|
|
if (filtered_packet_ != nullptr) {
|
|
av_packet_free(&filtered_packet_);
|
|
}
|
|
if (packet_ != nullptr) {
|
|
av_packet_free(&packet_);
|
|
}
|
|
if (frame_ != nullptr) {
|
|
av_frame_free(&frame_);
|
|
}
|
|
if (context_ != nullptr) {
|
|
avcodec_free_context(&context_);
|
|
}
|
|
if (scaler_ != nullptr) {
|
|
sws_freeContext(scaler_);
|
|
scaler_ = nullptr;
|
|
}
|
|
first_source_timestamp_ns_.reset();
|
|
using_hardware_ = false;
|
|
}
|
|
|
|
private:
|
|
[[nodiscard]]
|
|
static std::string av_error_string(int error_code) {
|
|
char buffer[AV_ERROR_MAX_STRING_SIZE]{};
|
|
av_strerror(error_code, buffer, sizeof(buffer));
|
|
return std::string(buffer);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
static std::expected<AVPixelFormat, std::string> to_av_pixel_format(ipc::PixelFormat format) {
|
|
switch (format) {
|
|
case ipc::PixelFormat::BGR:
|
|
return AV_PIX_FMT_BGR24;
|
|
case ipc::PixelFormat::RGB:
|
|
return AV_PIX_FMT_RGB24;
|
|
case ipc::PixelFormat::BGRA:
|
|
return AV_PIX_FMT_BGRA;
|
|
case ipc::PixelFormat::RGBA:
|
|
return AV_PIX_FMT_RGBA;
|
|
case ipc::PixelFormat::GRAY:
|
|
return AV_PIX_FMT_GRAY8;
|
|
default:
|
|
return std::unexpected("unsupported raw pixel format for FFmpeg backend (supported: BGR/RGB/BGRA/RGBA/GRAY)");
|
|
}
|
|
}
|
|
|
|
[[nodiscard]]
|
|
static AVPixelFormat pick_encoder_pixel_format(EncoderDeviceType device) {
|
|
if (device == EncoderDeviceType::Software) {
|
|
return AV_PIX_FMT_YUV420P;
|
|
}
|
|
return AV_PIX_FMT_NV12;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
static std::string_view device_to_string(EncoderDeviceType device) {
|
|
switch (device) {
|
|
case EncoderDeviceType::Auto:
|
|
return "auto";
|
|
case EncoderDeviceType::Nvidia:
|
|
return "nvidia";
|
|
case EncoderDeviceType::Software:
|
|
return "software";
|
|
}
|
|
return "unknown";
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<std::string, std::string> pick_encoder_name(const RuntimeConfig &config) const {
|
|
const bool prefer_hardware = config.encoder.device != EncoderDeviceType::Software;
|
|
const bool prefer_software = config.encoder.device == EncoderDeviceType::Software;
|
|
if (codec_ == CodecType::H265) {
|
|
if (prefer_hardware && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
|
|
return std::string("hevc_nvenc");
|
|
}
|
|
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
|
|
if (avcodec_find_encoder_by_name("libx265") != nullptr) {
|
|
return std::string("libx265");
|
|
}
|
|
}
|
|
if (!prefer_software && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
|
|
return std::string("hevc_nvenc");
|
|
}
|
|
return std::unexpected("no usable FFmpeg encoder found for h265 (looked for hevc_nvenc, libx265)");
|
|
}
|
|
|
|
if (prefer_hardware && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
|
|
return std::string("h264_nvenc");
|
|
}
|
|
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
|
|
if (avcodec_find_encoder_by_name("libx264") != nullptr) {
|
|
return std::string("libx264");
|
|
}
|
|
}
|
|
if (!prefer_software && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
|
|
return std::string("h264_nvenc");
|
|
}
|
|
return std::unexpected("no usable FFmpeg encoder found for h264 (looked for h264_nvenc, libx264)");
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<void, std::string> configure_codec(std::string_view encoder_name, const RuntimeConfig &config) {
|
|
av_opt_set(context_->priv_data, "preset", encoder_name.find("nvenc") != std::string_view::npos ? "llhq" : "veryfast", 0);
|
|
if (encoder_name.find("nvenc") != std::string_view::npos) {
|
|
av_opt_set(context_->priv_data, "tune", "ull", 0);
|
|
av_opt_set(context_->priv_data, "zerolatency", "1", 0);
|
|
av_opt_set(context_->priv_data, "rc-lookahead", "0", 0);
|
|
} else {
|
|
av_opt_set(context_->priv_data, "tune", "zerolatency", 0);
|
|
if (encoder_name == "libx265") {
|
|
av_opt_set(context_->priv_data, "x265-params", "repeat-headers=1:scenecut=0", 0);
|
|
}
|
|
}
|
|
|
|
av_opt_set_int(context_->priv_data, "forced-idr", config.latency.force_idr_on_reset ? 1 : 0, 0);
|
|
return {};
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<void, std::string> create_bitstream_filter() {
|
|
const char *filter_name = codec_ == CodecType::H265 ? "hevc_mp4toannexb" : "h264_mp4toannexb";
|
|
const auto *filter = av_bsf_get_by_name(filter_name);
|
|
if (filter == nullptr) {
|
|
return std::unexpected(std::string("required FFmpeg bitstream filter '") + filter_name + "' is unavailable");
|
|
}
|
|
|
|
const auto alloc_result = av_bsf_alloc(filter, &bsf_context_);
|
|
if (alloc_result < 0) {
|
|
return std::unexpected("failed to allocate FFmpeg bitstream filter: " + av_error_string(alloc_result));
|
|
}
|
|
|
|
const auto copy_result = avcodec_parameters_from_context(bsf_context_->par_in, context_);
|
|
if (copy_result < 0) {
|
|
return std::unexpected("failed to copy codec parameters into bitstream filter: " + av_error_string(copy_result));
|
|
}
|
|
bsf_context_->time_base_in = context_->time_base;
|
|
|
|
const auto init_result = av_bsf_init(bsf_context_);
|
|
if (init_result < 0) {
|
|
return std::unexpected("failed to initialize FFmpeg bitstream filter: " + av_error_string(init_result));
|
|
}
|
|
return {};
|
|
}
|
|
|
|
[[nodiscard]]
|
|
std::expected<std::vector<EncodedAccessUnit>, std::string> drain_packets() {
|
|
std::vector<EncodedAccessUnit> access_units{};
|
|
while (true) {
|
|
const auto receive_result = avcodec_receive_packet(context_, packet_);
|
|
if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) {
|
|
break;
|
|
}
|
|
if (receive_result < 0) {
|
|
return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result));
|
|
}
|
|
|
|
const auto bsf_send_result = av_bsf_send_packet(bsf_context_, packet_);
|
|
if (bsf_send_result < 0) {
|
|
av_packet_unref(packet_);
|
|
return std::unexpected("failed to send packet to bitstream filter: " + av_error_string(bsf_send_result));
|
|
}
|
|
av_packet_unref(packet_);
|
|
|
|
while (true) {
|
|
const auto bsf_receive_result = av_bsf_receive_packet(bsf_context_, filtered_packet_);
|
|
if (bsf_receive_result == AVERROR(EAGAIN) || bsf_receive_result == AVERROR_EOF) {
|
|
break;
|
|
}
|
|
if (bsf_receive_result < 0) {
|
|
return std::unexpected("failed to receive filtered packet: " + av_error_string(bsf_receive_result));
|
|
}
|
|
|
|
EncodedAccessUnit access_unit{};
|
|
access_unit.codec = codec_;
|
|
access_unit.stream_pts_ns = filtered_packet_->pts == AV_NOPTS_VALUE ? 0ull : static_cast<std::uint64_t>(filtered_packet_->pts);
|
|
access_unit.source_timestamp_ns = first_source_timestamp_ns_.value_or(0ull) + access_unit.stream_pts_ns;
|
|
access_unit.keyframe = (filtered_packet_->flags & AV_PKT_FLAG_KEY) != 0;
|
|
access_unit.annexb_bytes.assign(filtered_packet_->data, filtered_packet_->data + filtered_packet_->size);
|
|
access_units.push_back(std::move(access_unit));
|
|
av_packet_unref(filtered_packet_);
|
|
}
|
|
}
|
|
return access_units;
|
|
}
|
|
|
|
const RuntimeConfig *config_{nullptr};
|
|
ipc::FrameInfo frame_info_{};
|
|
CodecType codec_{CodecType::H264};
|
|
AVCodecContext *context_{nullptr};
|
|
AVPacket *packet_{nullptr};
|
|
AVPacket *filtered_packet_{nullptr};
|
|
AVFrame *frame_{nullptr};
|
|
SwsContext *scaler_{nullptr};
|
|
AVBSFContext *bsf_context_{nullptr};
|
|
AVPixelFormat input_pix_fmt_{AV_PIX_FMT_NONE};
|
|
AVPixelFormat encoder_pix_fmt_{AV_PIX_FMT_NONE};
|
|
std::optional<std::uint64_t> first_source_timestamp_ns_{};
|
|
bool using_hardware_{false};
|
|
};
|
|
|
|
}
|
|
|
|
std::unique_ptr<EncoderBackend> make_ffmpeg_backend() {
|
|
return std::make_unique<FfmpegEncoderBackend>();
|
|
}
|
|
|
|
}
|