feat(streamer): add ffmpeg encoder and mcap recording
This commit is contained in:
@@ -0,0 +1,433 @@
|
||||
#include "cvmmap_streamer/encode/encoder_backend.hpp"
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavcodec/bsf.h>
|
||||
#include <libavutil/avutil.h>
|
||||
#include <libavutil/imgutils.h>
|
||||
#include <libavutil/opt.h>
|
||||
#include <libavutil/pixfmt.h>
|
||||
#include <libswscale/swscale.h>
|
||||
}
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <expected>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
namespace cvmmap_streamer::encode {
|
||||
|
||||
namespace {
|
||||
|
||||
class FfmpegEncoderBackend final : public EncoderBackend {
|
||||
public:
|
||||
~FfmpegEncoderBackend() override {
|
||||
shutdown();
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::string_view backend_name() const override {
|
||||
return "ffmpeg";
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
bool using_hardware() const override {
|
||||
return using_hardware_;
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<void, std::string> init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) override {
|
||||
shutdown();
|
||||
|
||||
config_ = &config;
|
||||
frame_info_ = frame_info;
|
||||
codec_ = config.encoder.codec;
|
||||
encoder_pix_fmt_ = pick_encoder_pixel_format(config.encoder.device);
|
||||
|
||||
auto input_pixel_format = to_av_pixel_format(frame_info.pixel_format);
|
||||
if (!input_pixel_format) {
|
||||
return std::unexpected(input_pixel_format.error());
|
||||
}
|
||||
|
||||
input_pix_fmt_ = *input_pixel_format;
|
||||
|
||||
auto encoder_name = pick_encoder_name(config);
|
||||
if (!encoder_name) {
|
||||
return std::unexpected(encoder_name.error());
|
||||
}
|
||||
using_hardware_ = encoder_name->find("nvenc") != std::string::npos;
|
||||
|
||||
const auto *encoder = avcodec_find_encoder_by_name(encoder_name->c_str());
|
||||
if (encoder == nullptr) {
|
||||
return std::unexpected("FFmpeg encoder '" + *encoder_name + "' is unavailable");
|
||||
}
|
||||
|
||||
context_ = avcodec_alloc_context3(encoder);
|
||||
if (context_ == nullptr) {
|
||||
return std::unexpected("failed to allocate FFmpeg encoder context");
|
||||
}
|
||||
|
||||
context_->codec_type = AVMEDIA_TYPE_VIDEO;
|
||||
context_->codec_id = encoder->id;
|
||||
context_->width = static_cast<int>(frame_info.width);
|
||||
context_->height = static_cast<int>(frame_info.height);
|
||||
context_->pix_fmt = encoder_pix_fmt_;
|
||||
context_->time_base = AVRational{1, 1000000000};
|
||||
context_->framerate = AVRational{30, 1};
|
||||
context_->gop_size = static_cast<int>(config.encoder.gop);
|
||||
context_->max_b_frames = static_cast<int>(config.encoder.b_frames);
|
||||
context_->thread_count = 1;
|
||||
|
||||
auto codec_setup = configure_codec(*encoder_name, config);
|
||||
if (!codec_setup) {
|
||||
return std::unexpected(codec_setup.error());
|
||||
}
|
||||
|
||||
const auto open_result = avcodec_open2(context_, encoder, nullptr);
|
||||
if (open_result < 0) {
|
||||
return std::unexpected("failed to open FFmpeg encoder '" + *encoder_name + "': " + av_error_string(open_result));
|
||||
}
|
||||
|
||||
scaler_ = sws_getCachedContext(
|
||||
nullptr,
|
||||
static_cast<int>(frame_info.width),
|
||||
static_cast<int>(frame_info.height),
|
||||
input_pix_fmt_,
|
||||
static_cast<int>(frame_info.width),
|
||||
static_cast<int>(frame_info.height),
|
||||
encoder_pix_fmt_,
|
||||
SWS_BILINEAR,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
if (scaler_ == nullptr) {
|
||||
return std::unexpected("failed to create swscale conversion context");
|
||||
}
|
||||
|
||||
frame_ = av_frame_alloc();
|
||||
if (frame_ == nullptr) {
|
||||
return std::unexpected("failed to allocate FFmpeg frame");
|
||||
}
|
||||
frame_->format = encoder_pix_fmt_;
|
||||
frame_->width = context_->width;
|
||||
frame_->height = context_->height;
|
||||
const auto frame_buffer = av_frame_get_buffer(frame_, 32);
|
||||
if (frame_buffer < 0) {
|
||||
return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer));
|
||||
}
|
||||
|
||||
packet_ = av_packet_alloc();
|
||||
if (packet_ == nullptr) {
|
||||
return std::unexpected("failed to allocate FFmpeg packet");
|
||||
}
|
||||
|
||||
filtered_packet_ = av_packet_alloc();
|
||||
if (filtered_packet_ == nullptr) {
|
||||
return std::unexpected("failed to allocate FFmpeg filtered packet");
|
||||
}
|
||||
|
||||
auto bitstream_filter = create_bitstream_filter();
|
||||
if (!bitstream_filter) {
|
||||
return std::unexpected(bitstream_filter.error());
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"FFMPEG_ENCODER_PATH codec={} device={} encoder={} pix_fmt={}",
|
||||
cvmmap_streamer::to_string(codec_),
|
||||
device_to_string(config.encoder.device),
|
||||
*encoder_name,
|
||||
av_get_pix_fmt_name(encoder_pix_fmt_));
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<void, std::string> poll() override {
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<void, std::string> push_frame(const RawVideoFrame &frame) override {
|
||||
if (context_ == nullptr || frame_ == nullptr || scaler_ == nullptr) {
|
||||
return std::unexpected("FFmpeg backend not initialized");
|
||||
}
|
||||
if (frame.bytes.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto make_writable = av_frame_make_writable(frame_);
|
||||
if (make_writable < 0) {
|
||||
return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(make_writable));
|
||||
}
|
||||
|
||||
AVFrame input_frame{};
|
||||
input_frame.format = input_pix_fmt_;
|
||||
input_frame.width = static_cast<int>(frame_info_.width);
|
||||
input_frame.height = static_cast<int>(frame_info_.height);
|
||||
if (av_image_fill_arrays(
|
||||
input_frame.data,
|
||||
input_frame.linesize,
|
||||
const_cast<std::uint8_t *>(frame.bytes.data()),
|
||||
input_pix_fmt_,
|
||||
input_frame.width,
|
||||
input_frame.height,
|
||||
1) < 0) {
|
||||
return std::unexpected("failed to map input frame into FFmpeg image arrays");
|
||||
}
|
||||
|
||||
sws_scale(
|
||||
scaler_,
|
||||
input_frame.data,
|
||||
input_frame.linesize,
|
||||
0,
|
||||
input_frame.height,
|
||||
frame_->data,
|
||||
frame_->linesize);
|
||||
|
||||
if (!first_source_timestamp_ns_) {
|
||||
first_source_timestamp_ns_ = frame.source_timestamp_ns;
|
||||
}
|
||||
|
||||
frame_->pts = static_cast<std::int64_t>(frame.source_timestamp_ns - *first_source_timestamp_ns_);
|
||||
const auto send_result = avcodec_send_frame(context_, frame_);
|
||||
if (send_result < 0) {
|
||||
return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<std::vector<EncodedAccessUnit>, std::string> drain() override {
|
||||
return drain_packets();
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<std::vector<EncodedAccessUnit>, std::string> flush() override {
|
||||
if (context_ == nullptr) {
|
||||
return std::vector<EncodedAccessUnit>{};
|
||||
}
|
||||
const auto flush_result = avcodec_send_frame(context_, nullptr);
|
||||
if (flush_result < 0 && flush_result != AVERROR_EOF) {
|
||||
return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result));
|
||||
}
|
||||
return drain_packets();
|
||||
}
|
||||
|
||||
void shutdown() override {
|
||||
if (bsf_context_ != nullptr) {
|
||||
av_bsf_free(&bsf_context_);
|
||||
}
|
||||
if (filtered_packet_ != nullptr) {
|
||||
av_packet_free(&filtered_packet_);
|
||||
}
|
||||
if (packet_ != nullptr) {
|
||||
av_packet_free(&packet_);
|
||||
}
|
||||
if (frame_ != nullptr) {
|
||||
av_frame_free(&frame_);
|
||||
}
|
||||
if (context_ != nullptr) {
|
||||
avcodec_free_context(&context_);
|
||||
}
|
||||
if (scaler_ != nullptr) {
|
||||
sws_freeContext(scaler_);
|
||||
scaler_ = nullptr;
|
||||
}
|
||||
first_source_timestamp_ns_.reset();
|
||||
using_hardware_ = false;
|
||||
}
|
||||
|
||||
private:
|
||||
[[nodiscard]]
|
||||
static std::string av_error_string(int error_code) {
|
||||
char buffer[AV_ERROR_MAX_STRING_SIZE]{};
|
||||
av_strerror(error_code, buffer, sizeof(buffer));
|
||||
return std::string(buffer);
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static std::expected<AVPixelFormat, std::string> to_av_pixel_format(ipc::PixelFormat format) {
|
||||
switch (format) {
|
||||
case ipc::PixelFormat::BGR:
|
||||
return AV_PIX_FMT_BGR24;
|
||||
case ipc::PixelFormat::RGB:
|
||||
return AV_PIX_FMT_RGB24;
|
||||
case ipc::PixelFormat::BGRA:
|
||||
return AV_PIX_FMT_BGRA;
|
||||
case ipc::PixelFormat::RGBA:
|
||||
return AV_PIX_FMT_RGBA;
|
||||
case ipc::PixelFormat::GRAY:
|
||||
return AV_PIX_FMT_GRAY8;
|
||||
default:
|
||||
return std::unexpected("unsupported raw pixel format for FFmpeg backend (supported: BGR/RGB/BGRA/RGBA/GRAY)");
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static AVPixelFormat pick_encoder_pixel_format(EncoderDeviceType device) {
|
||||
if (device == EncoderDeviceType::Software) {
|
||||
return AV_PIX_FMT_YUV420P;
|
||||
}
|
||||
return AV_PIX_FMT_NV12;
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static std::string_view device_to_string(EncoderDeviceType device) {
|
||||
switch (device) {
|
||||
case EncoderDeviceType::Auto:
|
||||
return "auto";
|
||||
case EncoderDeviceType::Nvidia:
|
||||
return "nvidia";
|
||||
case EncoderDeviceType::Software:
|
||||
return "software";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<std::string, std::string> pick_encoder_name(const RuntimeConfig &config) const {
|
||||
const bool prefer_hardware = config.encoder.device != EncoderDeviceType::Software;
|
||||
const bool prefer_software = config.encoder.device == EncoderDeviceType::Software;
|
||||
if (codec_ == CodecType::H265) {
|
||||
if (prefer_hardware && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
|
||||
return std::string("hevc_nvenc");
|
||||
}
|
||||
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
|
||||
if (avcodec_find_encoder_by_name("libx265") != nullptr) {
|
||||
return std::string("libx265");
|
||||
}
|
||||
}
|
||||
if (!prefer_software && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
|
||||
return std::string("hevc_nvenc");
|
||||
}
|
||||
return std::unexpected("no usable FFmpeg encoder found for h265 (looked for hevc_nvenc, libx265)");
|
||||
}
|
||||
|
||||
if (prefer_hardware && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
|
||||
return std::string("h264_nvenc");
|
||||
}
|
||||
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
|
||||
if (avcodec_find_encoder_by_name("libx264") != nullptr) {
|
||||
return std::string("libx264");
|
||||
}
|
||||
}
|
||||
if (!prefer_software && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
|
||||
return std::string("h264_nvenc");
|
||||
}
|
||||
return std::unexpected("no usable FFmpeg encoder found for h264 (looked for h264_nvenc, libx264)");
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<void, std::string> configure_codec(std::string_view encoder_name, const RuntimeConfig &config) {
|
||||
av_opt_set(context_->priv_data, "preset", encoder_name.find("nvenc") != std::string_view::npos ? "llhq" : "veryfast", 0);
|
||||
if (encoder_name.find("nvenc") != std::string_view::npos) {
|
||||
av_opt_set(context_->priv_data, "tune", "ull", 0);
|
||||
av_opt_set(context_->priv_data, "zerolatency", "1", 0);
|
||||
av_opt_set(context_->priv_data, "rc-lookahead", "0", 0);
|
||||
} else {
|
||||
av_opt_set(context_->priv_data, "tune", "zerolatency", 0);
|
||||
if (encoder_name == "libx265") {
|
||||
av_opt_set(context_->priv_data, "x265-params", "repeat-headers=1:scenecut=0", 0);
|
||||
}
|
||||
}
|
||||
|
||||
av_opt_set_int(context_->priv_data, "forced-idr", config.latency.force_idr_on_reset ? 1 : 0, 0);
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<void, std::string> create_bitstream_filter() {
|
||||
const char *filter_name = codec_ == CodecType::H265 ? "hevc_mp4toannexb" : "h264_mp4toannexb";
|
||||
const auto *filter = av_bsf_get_by_name(filter_name);
|
||||
if (filter == nullptr) {
|
||||
return std::unexpected(std::string("required FFmpeg bitstream filter '") + filter_name + "' is unavailable");
|
||||
}
|
||||
|
||||
const auto alloc_result = av_bsf_alloc(filter, &bsf_context_);
|
||||
if (alloc_result < 0) {
|
||||
return std::unexpected("failed to allocate FFmpeg bitstream filter: " + av_error_string(alloc_result));
|
||||
}
|
||||
|
||||
const auto copy_result = avcodec_parameters_from_context(bsf_context_->par_in, context_);
|
||||
if (copy_result < 0) {
|
||||
return std::unexpected("failed to copy codec parameters into bitstream filter: " + av_error_string(copy_result));
|
||||
}
|
||||
bsf_context_->time_base_in = context_->time_base;
|
||||
|
||||
const auto init_result = av_bsf_init(bsf_context_);
|
||||
if (init_result < 0) {
|
||||
return std::unexpected("failed to initialize FFmpeg bitstream filter: " + av_error_string(init_result));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
std::expected<std::vector<EncodedAccessUnit>, std::string> drain_packets() {
|
||||
std::vector<EncodedAccessUnit> access_units{};
|
||||
while (true) {
|
||||
const auto receive_result = avcodec_receive_packet(context_, packet_);
|
||||
if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) {
|
||||
break;
|
||||
}
|
||||
if (receive_result < 0) {
|
||||
return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result));
|
||||
}
|
||||
|
||||
const auto bsf_send_result = av_bsf_send_packet(bsf_context_, packet_);
|
||||
if (bsf_send_result < 0) {
|
||||
av_packet_unref(packet_);
|
||||
return std::unexpected("failed to send packet to bitstream filter: " + av_error_string(bsf_send_result));
|
||||
}
|
||||
av_packet_unref(packet_);
|
||||
|
||||
while (true) {
|
||||
const auto bsf_receive_result = av_bsf_receive_packet(bsf_context_, filtered_packet_);
|
||||
if (bsf_receive_result == AVERROR(EAGAIN) || bsf_receive_result == AVERROR_EOF) {
|
||||
break;
|
||||
}
|
||||
if (bsf_receive_result < 0) {
|
||||
return std::unexpected("failed to receive filtered packet: " + av_error_string(bsf_receive_result));
|
||||
}
|
||||
|
||||
EncodedAccessUnit access_unit{};
|
||||
access_unit.codec = codec_;
|
||||
access_unit.stream_pts_ns = filtered_packet_->pts == AV_NOPTS_VALUE ? 0ull : static_cast<std::uint64_t>(filtered_packet_->pts);
|
||||
access_unit.source_timestamp_ns = first_source_timestamp_ns_.value_or(0ull) + access_unit.stream_pts_ns;
|
||||
access_unit.keyframe = (filtered_packet_->flags & AV_PKT_FLAG_KEY) != 0;
|
||||
access_unit.annexb_bytes.assign(filtered_packet_->data, filtered_packet_->data + filtered_packet_->size);
|
||||
access_units.push_back(std::move(access_unit));
|
||||
av_packet_unref(filtered_packet_);
|
||||
}
|
||||
}
|
||||
return access_units;
|
||||
}
|
||||
|
||||
const RuntimeConfig *config_{nullptr};
|
||||
ipc::FrameInfo frame_info_{};
|
||||
CodecType codec_{CodecType::H264};
|
||||
AVCodecContext *context_{nullptr};
|
||||
AVPacket *packet_{nullptr};
|
||||
AVPacket *filtered_packet_{nullptr};
|
||||
AVFrame *frame_{nullptr};
|
||||
SwsContext *scaler_{nullptr};
|
||||
AVBSFContext *bsf_context_{nullptr};
|
||||
AVPixelFormat input_pix_fmt_{AV_PIX_FMT_NONE};
|
||||
AVPixelFormat encoder_pix_fmt_{AV_PIX_FMT_NONE};
|
||||
std::optional<std::uint64_t> first_source_timestamp_ns_{};
|
||||
bool using_hardware_{false};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
std::unique_ptr<EncoderBackend> make_ffmpeg_backend() {
|
||||
return std::make_unique<FfmpegEncoderBackend>();
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user