feat(encode): add Jetson Multimedia API encoder backend

Integrate a native Jetson Multimedia API encoder path and keep the existing encoded access-unit contract for RTMP, RTP, and recording consumers.

This adds conditional Jetson MMAPI detection in CMake, builds the required NVIDIA sample common classes into a dedicated support library, and compiles the new backend only when the platform dependencies are present.

The runtime selector now lets encoder.backend=auto prefer Jetson MM for NVIDIA hardware requests while keeping encoder.backend=ffmpeg as an explicit FFmpeg path. Device selection semantics are updated so nvidia requires the Jetson backend, auto can fall back to FFmpeg software, and software remains FFmpeg-only.

The Jetson backend converts supported raw inputs through swscale, feeds NvVideoEncoder in YUV420M, emits Annex-B access units, and harvests decoder configuration from a warmup keyframe so downstream packetizers keep their existing contract.

This also splits FFmpeg encoder option handling into a shared header, updates runtime config/help text and tester defaults, and refreshes compatibility/caveat documentation to reflect the new selection behavior.

Build-tree runtime RPATH handling is tightened so GCC 15 builds keep the matching libstdc++ visible locally. Verification covered GCC 15 builds, RTP H.264/H.265 tester runs, RTMP H.264 stub publish, local live SRS smoke with cvmmap://zed1, and remote execution on 192.168.2.155 using the deployed bundle lib directory for the GCC 15 runtime.
This commit is contained in:
2026-04-15 18:37:44 +08:00
parent 16a1a38645
commit 51d03d4279
12 changed files with 1538 additions and 323 deletions
+101 -6
View File
@@ -8,6 +8,9 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
include(GNUInstallDirs)
set(CVMMAP_STREAMER_INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}")
function(cvmmap_streamer_append_runtime_dir out_var runtime_library)
execute_process(
COMMAND "${CMAKE_CXX_COMPILER}" -print-file-name=${runtime_library}
@@ -69,6 +72,17 @@ set(
"${CMAKE_CURRENT_LIST_DIR}/third_party/mcap/include"
CACHE PATH
"Path to MCAP headers")
set(
CVMMAP_STREAMER_ENABLE_JETSON_MM
"AUTO"
CACHE STRING
"Enable Jetson Multimedia API encoder backend: AUTO, ON, or OFF")
set_property(CACHE CVMMAP_STREAMER_ENABLE_JETSON_MM PROPERTY STRINGS AUTO ON OFF)
set(
CVMMAP_STREAMER_JETSON_MMAPI_ROOT
"/usr/src/jetson_multimedia_api"
CACHE PATH
"Path to the Jetson Multimedia API root")
find_package(Threads REQUIRED)
find_package(OpenSSL REQUIRED)
@@ -117,7 +131,7 @@ if (CVMMAP_CNATS_PROVIDER STREQUAL "system")
find_package(cnats CONFIG REQUIRED)
find_package(cvmmap-core CONFIG QUIET PATHS "${CVMMAP_LOCAL_CORE_DIR}" NO_DEFAULT_PATH)
if (NOT TARGET cvmmap::client)
find_package(cvmmap-core CONFIG QUIET)
find_package(cvmmap-core CONFIG REQUIRED PATHS "${CVMMAP_LOCAL_CORE_DIR}" NO_DEFAULT_PATH)
endif()
else()
if (NOT EXISTS "${CVMMAP_LOCAL_NATS_STATIC}")
@@ -240,6 +254,78 @@ else()
set(CVMMAP_STREAMER_HAS_MCAP_DEPTH 0)
endif()
set(_CVMMAP_STREAMER_JETSON_MM_INCLUDE_DIR "${CVMMAP_STREAMER_JETSON_MMAPI_ROOT}/include")
set(_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR "${CVMMAP_STREAMER_JETSON_MMAPI_ROOT}/samples/common/classes")
set(_CVMMAP_STREAMER_JETSON_MM_COMMON_SOURCES
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvBuffer.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvElement.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvElementProfiler.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvLogging.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvV4l2Element.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvV4l2ElementPlane.cpp"
"${_CVMMAP_STREAMER_JETSON_MM_COMMON_DIR}/NvVideoEncoder.cpp")
set(_CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS)
if (NOT EXISTS "${_CVMMAP_STREAMER_JETSON_MM_INCLUDE_DIR}/NvVideoEncoder.h")
list(APPEND _CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS
"Jetson Multimedia API headers at ${_CVMMAP_STREAMER_JETSON_MM_INCLUDE_DIR}")
endif()
foreach(_cvmmap_streamer_jetson_mm_source IN LISTS _CVMMAP_STREAMER_JETSON_MM_COMMON_SOURCES)
if (NOT EXISTS "${_cvmmap_streamer_jetson_mm_source}")
list(APPEND _CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS
"Jetson Multimedia API common source ${_cvmmap_streamer_jetson_mm_source}")
endif()
endforeach()
find_library(
CVMMAP_STREAMER_NVBUFSURFACE_LIBRARY
NAMES nvbufsurface
PATHS
/usr/lib/aarch64-linux-gnu/nvidia
/usr/lib/aarch64-linux-gnu/tegra
/usr/lib/aarch64-linux-gnu
PATH_SUFFIXES nvidia tegra)
if (NOT CVMMAP_STREAMER_NVBUFSURFACE_LIBRARY)
list(APPEND _CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS "libnvbufsurface")
endif()
find_library(CVMMAP_STREAMER_LIBV4L2_LIBRARY NAMES v4l2)
if (NOT CVMMAP_STREAMER_LIBV4L2_LIBRARY)
list(APPEND _CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS "libv4l2")
endif()
if (_CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS)
list(JOIN _CVMMAP_STREAMER_JETSON_MM_MISSING_DEPS ", " _CVMMAP_STREAMER_JETSON_MM_UNAVAILABLE_REASON)
set(_CVMMAP_STREAMER_JETSON_MM_AVAILABLE FALSE)
else()
set(_CVMMAP_STREAMER_JETSON_MM_UNAVAILABLE_REASON "Jetson Multimedia API dependencies")
set(_CVMMAP_STREAMER_JETSON_MM_AVAILABLE TRUE)
endif()
cvmmap_streamer_resolve_feature_mode(
CVMMAP_STREAMER_HAS_JETSON_MM_BOOL
CVMMAP_STREAMER_ENABLE_JETSON_MM
"${CVMMAP_STREAMER_ENABLE_JETSON_MM}"
${_CVMMAP_STREAMER_JETSON_MM_AVAILABLE}
"${_CVMMAP_STREAMER_JETSON_MM_UNAVAILABLE_REASON}")
if (CVMMAP_STREAMER_HAS_JETSON_MM_BOOL)
set(CVMMAP_STREAMER_HAS_JETSON_MM 1)
add_library(cvmmap_streamer_jetson_mmapi STATIC
${_CVMMAP_STREAMER_JETSON_MM_COMMON_SOURCES})
target_include_directories(cvmmap_streamer_jetson_mmapi
PUBLIC
"${_CVMMAP_STREAMER_JETSON_MM_INCLUDE_DIR}")
target_link_libraries(cvmmap_streamer_jetson_mmapi
PUBLIC
Threads::Threads
${CVMMAP_STREAMER_LIBV4L2_LIBRARY}
${CVMMAP_STREAMER_NVBUFSURFACE_LIBRARY})
else()
set(CVMMAP_STREAMER_HAS_JETSON_MM 0)
endif()
if (NOT TARGET cvmmap::client)
if (
EXISTS "${CVMMAP_LOCAL_ROOT}/core/include/cvmmap/client.hpp"
@@ -344,13 +430,17 @@ add_library(cvmmap_streamer_feature_flags INTERFACE)
target_compile_definitions(cvmmap_streamer_feature_flags
INTERFACE
CVMMAP_STREAMER_HAS_MCAP=${CVMMAP_STREAMER_HAS_MCAP}
CVMMAP_STREAMER_HAS_MCAP_DEPTH=${CVMMAP_STREAMER_HAS_MCAP_DEPTH})
CVMMAP_STREAMER_HAS_MCAP_DEPTH=${CVMMAP_STREAMER_HAS_MCAP_DEPTH}
CVMMAP_STREAMER_HAS_JETSON_MM=${CVMMAP_STREAMER_HAS_JETSON_MM})
add_library(cvmmap_streamer_record_support STATIC
src/encode/encoder_backend.cpp
src/encode/ffmpeg_encoder_backend.cpp
src/record/protobuf_descriptor.cpp
src/record/mp4_record_writer.cpp)
if (CVMMAP_STREAMER_HAS_JETSON_MM_BOOL)
target_sources(cvmmap_streamer_record_support PRIVATE src/encode/jetson_mm_encoder_backend.cpp)
endif()
target_include_directories(cvmmap_streamer_record_support
PUBLIC
"${CMAKE_CURRENT_LIST_DIR}/include"
@@ -369,6 +459,9 @@ endif()
if (TARGET PkgConfig::PROTOBUF_PKG)
target_link_libraries(cvmmap_streamer_record_support PUBLIC PkgConfig::PROTOBUF_PKG)
endif()
if (CVMMAP_STREAMER_HAS_JETSON_MM_BOOL)
target_link_libraries(cvmmap_streamer_record_support PUBLIC cvmmap_streamer_jetson_mmapi)
endif()
if (CVMMAP_STREAMER_HAS_MCAP)
add_library(cvmmap_streamer_mcap_runtime STATIC
@@ -465,7 +558,9 @@ endif()
target_link_libraries(cvmmap_streamer_common PUBLIC ${CVMMAP_STREAMER_LINK_DEPS})
function(cvmmap_streamer_apply_build_rpath target)
function(cvmmap_streamer_apply_runtime_rpath target)
set_target_properties(${target} PROPERTIES
INSTALL_RPATH "${CVMMAP_STREAMER_INSTALL_RPATH}")
if (CVMMAP_STREAMER_BUILD_RPATH)
set_target_properties(${target} PROPERTIES
BUILD_RPATH "${CVMMAP_STREAMER_BUILD_RPATH}")
@@ -484,7 +579,7 @@ function(add_cvmmap_binary target source)
set_target_properties(${target} PROPERTIES
OUTPUT_NAME "${target}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
cvmmap_streamer_apply_build_rpath(${target})
cvmmap_streamer_apply_runtime_rpath(${target})
endfunction()
add_cvmmap_binary(cvmmap_streamer src/main_streamer.cpp)
@@ -522,7 +617,7 @@ if (CVMMAP_STREAMER_HAS_MCAP)
set_target_properties(mcap_reader_tester PROPERTIES
OUTPUT_NAME "mcap_reader_tester"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
cvmmap_streamer_apply_build_rpath(mcap_reader_tester)
cvmmap_streamer_apply_runtime_rpath(mcap_reader_tester)
add_executable(mcap_replay_tester src/testers/mcap_replay_tester.cpp)
target_include_directories(mcap_replay_tester
@@ -548,7 +643,7 @@ if (CVMMAP_STREAMER_HAS_MCAP)
set_target_properties(mcap_replay_tester PROPERTIES
OUTPUT_NAME "mcap_replay_tester"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
cvmmap_streamer_apply_build_rpath(mcap_replay_tester)
cvmmap_streamer_apply_runtime_rpath(mcap_replay_tester)
endif()
if (CVMMAP_STREAMER_HAS_MCAP_DEPTH)
+19 -13
View File
@@ -16,30 +16,36 @@ Legacy flags such as `--shm-name`, `--zmq-endpoint`, `--input-mode`, and the dum
## Encoder Path
### FFmpeg Is The Only Encoder Backend
### Jetson Hardware Encode Uses The Jetson Multimedia API Backend
The public backend surface is:
The public backend surface is still:
- `--encoder-backend auto`
- `--encoder-backend ffmpeg`
Both resolve to the FFmpeg encoder path. The removed GStreamer backend is no longer available.
`--encoder-backend ffmpeg` keeps the existing FFmpeg encoder path.
`--encoder-backend auto` may now select the in-repo Jetson Multimedia API backend on Jetson builds when hardware encoding is requested.
### NVENC Is Optional
The removed GStreamer backend is still unavailable.
When `--encoder-device nvidia` is selected, FFmpeg must expose `h264_nvenc` and `hevc_nvenc`.
### NVIDIA Device Mode On Jetson No Longer Depends On FFmpeg Hardware Encoders
Useful local checks:
On Jetson builds with `CVMMAP_STREAMER_ENABLE_JETSON_MM` enabled, `--encoder-device nvidia` uses the Jetson Multimedia API encoder directly rather than desktop `*_nvenc` or Jetson FFmpeg `*_v4l2m2m` encoder exposure.
Selection behavior is now:
- `--encoder-device software` -> FFmpeg software encode (`libx264`/`libx265`)
- `--encoder-device nvidia` -> Jetson Multimedia API backend only; fail if unavailable
- `--encoder-device auto` -> try Jetson Multimedia API first on Jetson builds, then fall back to FFmpeg software
Useful local checks on Jetson:
```bash
ffmpeg -hide_banner -encoders | rg 'nvenc|libx264|libx265'
test -e /dev/v4l2-nvenc
gst-inspect-1.0 nvv4l2h264enc nvv4l2h265enc
```
If NVENC is unavailable, use:
```bash
--encoder-device software
```
FFmpeg encoder enumeration is no longer the authoritative Jetson hardware-encode check for this repo.
### Low-Latency Defaults
@@ -47,7 +53,7 @@ The current low-latency defaults are:
- `gop=30`
- `b_frames=0`
- NVENC preset/tune tuned for low latency
- encoder-family-specific low-latency options where FFmpeg exposes them
This favors immediacy over compression efficiency.
+2 -2
View File
@@ -27,7 +27,7 @@ Notes:
- RTMP is Enhanced RTMP only.
- The custom RTMP packetizer and domestic mode are removed.
- `encoder.backend` remains `auto|ffmpeg`; both resolve to FFmpeg.
- `encoder.backend` remains `auto|ffmpeg`; `auto` may select Jetson Multimedia API on Jetson builds, while `ffmpeg` forces the FFmpeg encoder path.
## Optional Checks (Non-Blocking)
@@ -60,7 +60,7 @@ Current recording scope:
| Setting | Value |
|---------|-------|
| Encoder backend | `auto` -> FFmpeg |
| Encoder backend | `auto` -> Jetson MM on Jetson hardware requests, else FFmpeg |
| RTMP transport | `libavformat` |
| RTMP mode | Enhanced only |
| Encoder device | `auto` |
+110 -66
View File
@@ -263,6 +263,16 @@ CLI::Validator validate_rtp_endpoint() {
std::string{});
}
std::optional<std::string_view> match_cli_flag(std::string_view arg, std::string_view flag) {
if (arg == flag) {
return flag;
}
if (arg.size() > flag.size() && arg.rfind(flag, 0) == 0 && arg[flag.size()] == '=') {
return flag;
}
return std::nullopt;
}
std::optional<std::string> find_disallowed_boolean_assignment(int argc, char **argv) {
struct FlagPair {
std::string_view positive;
@@ -293,6 +303,58 @@ std::optional<std::string> find_disallowed_boolean_assignment(int argc, char **a
return std::nullopt;
}
std::optional<std::string> find_unsupported_mcap_argument(int argc, char **argv) {
constexpr std::array<std::string_view, 10> kMcapFlags{{
"--mcap",
"--no-mcap",
"--mcap-path",
"--mcap-topic",
"--mcap-calibration-topic",
"--mcap-pose-topic",
"--mcap-body-topic",
"--mcap-frame-id",
"--mcap-compression",
"--mcap-depth-topic",
}};
constexpr std::array<std::string_view, 3> kMcapDepthFlags{{
"--mcap-depth",
"--no-mcap-depth",
"--mcap-depth-calibration-topic",
}};
constexpr bool mcap_supported = CVMMAP_STREAMER_HAS_MCAP != 0;
constexpr bool mcap_depth_supported = CVMMAP_STREAMER_HAS_MCAP_DEPTH != 0;
for (int i = 1; i < argc; ++i) {
const std::string_view arg{argv[i]};
if (!mcap_supported) {
for (const auto flag : kMcapFlags) {
if (match_cli_flag(arg, flag)) {
return "unsupported argument in this build: " + std::string(flag) +
" (MCAP recording support is not compiled in)";
}
}
for (const auto flag : kMcapDepthFlags) {
if (match_cli_flag(arg, flag)) {
return "unsupported argument in this build: " + std::string(flag) +
" (MCAP recording support is not compiled in)";
}
}
continue;
}
if (!mcap_depth_supported) {
for (const auto flag : kMcapDepthFlags) {
if (match_cli_flag(arg, flag)) {
return "unsupported argument in this build: " + std::string(flag) +
" (MCAP depth support is not compiled in)";
}
}
}
}
return std::nullopt;
}
bool runtime_supports_mcap() {
return CVMMAP_STREAMER_HAS_MCAP != 0;
}
@@ -301,23 +363,6 @@ bool runtime_supports_mcap_depth() {
return CVMMAP_STREAMER_HAS_MCAP_DEPTH != 0;
}
std::string describe_mcap_option(std::string_view description) {
if (runtime_supports_mcap()) {
return std::string(description);
}
return std::string(description) + " (unavailable in this build; MCAP requests will fail)";
}
std::string describe_mcap_depth_option(std::string_view description) {
if (!runtime_supports_mcap()) {
return std::string(description) + " (unavailable in this build; MCAP support is not compiled in)";
}
if (!runtime_supports_mcap_depth()) {
return std::string(description) + " (unavailable in this build; MCAP depth requests will fail)";
}
return std::string(description);
}
std::expected<void, std::string> validate_mcap_capability_request(const McapRecordConfig &config) {
if (!config.enabled) {
return {};
@@ -879,92 +924,87 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
->check(require_non_empty("--sdp"))
->excludes(rtp_sdp);
const std::string mcap_group = runtime_supports_mcap()
? "MCAP Record"
: "MCAP Record (unsupported in this build; requests will fail)";
const std::string mcap_depth_group = !runtime_supports_mcap()
? "MCAP Depth Record (unsupported in this build; MCAP support is not compiled in)"
: runtime_supports_mcap_depth()
? "MCAP Depth Record"
: "MCAP Depth Record (unsupported in this build; requests will fail)";
app.add_flag("--mcap,!--no-mcap", mcap_enabled_override, describe_mcap_option("Enable or disable MCAP recording"))
->group(mcap_group)
#if CVMMAP_STREAMER_HAS_MCAP
app.add_flag("--mcap,!--no-mcap", mcap_enabled_override, "Enable or disable MCAP recording")
->group("MCAP Record")
->default_str(defaults.record.mcap.enabled ? "true" : "false")
->disable_flag_override();
app.add_flag(
"--mcap-depth,!--no-mcap-depth",
mcap_depth_enabled_override,
describe_mcap_depth_option("Enable or disable MCAP depth recording"))
->group(mcap_depth_group)
->default_str(defaults.record.mcap.depth_enabled ? "true" : "false")
->disable_flag_override();
app.add_option("--mcap-path", mcap_path_override, describe_mcap_option("MCAP output file path"))
->group(mcap_group)
app.add_option("--mcap-path", mcap_path_override, "MCAP output file path")
->group("MCAP Record")
->type_name("PATH")
->check(require_non_empty("--mcap-path"))
->default_str(defaults.record.mcap.path);
app.add_option(
"--mcap-topic",
mcap_topic_override,
describe_mcap_option("Foxglove compressed video topic name"))
->group(mcap_group)
"Foxglove compressed video topic name")
->group("MCAP Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-topic"))
->default_str(defaults.record.mcap.topic);
app.add_option(
"--mcap-depth-topic",
mcap_depth_topic_override,
describe_mcap_depth_option("Depth image topic name"))
->group(mcap_depth_group)
->type_name("TOPIC")
->check(require_non_empty("--mcap-depth-topic"))
->default_str(defaults.record.mcap.depth_topic);
app.add_option(
"--mcap-calibration-topic",
mcap_calibration_topic_override,
describe_mcap_option("RGB camera calibration topic name"))
->group(mcap_group)
"RGB camera calibration topic name")
->group("MCAP Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-calibration-topic"))
->default_str(defaults.record.mcap.calibration_topic);
app.add_option(
"--mcap-depth-calibration-topic",
mcap_depth_calibration_topic_override,
describe_mcap_depth_option("Depth camera calibration topic name"))
->group(mcap_depth_group)
->type_name("TOPIC")
->check(require_non_empty("--mcap-depth-calibration-topic"))
->default_str(defaults.record.mcap.depth_calibration_topic);
app.add_option("--mcap-pose-topic", mcap_pose_topic_override, describe_mcap_option("Pose topic name"))
->group(mcap_group)
app.add_option("--mcap-pose-topic", mcap_pose_topic_override, "Pose topic name")
->group("MCAP Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-pose-topic"))
->default_str(defaults.record.mcap.pose_topic);
app.add_option(
"--mcap-body-topic",
mcap_body_topic_override,
describe_mcap_option("Body tracking topic name"))
->group(mcap_group)
"Body tracking topic name")
->group("MCAP Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-body-topic"))
->default_str(defaults.record.mcap.body_topic);
app.add_option(
"--mcap-frame-id",
mcap_frame_id_override,
describe_mcap_option("Frame ID written into MCAP messages"))
->group(mcap_group)
"Frame ID written into MCAP messages")
->group("MCAP Record")
->type_name("ID")
->check(require_non_empty("--mcap-frame-id"))
->default_str(defaults.record.mcap.frame_id);
app.add_option(
"--mcap-compression",
mcap_compression_override,
describe_mcap_option("MCAP chunk compression mode"))
->group(mcap_group)
"MCAP chunk compression mode")
->group("MCAP Record")
->type_name("MODE")
->transform(canonicalize_option(canonicalize_mcap_compression))
->default_str(std::string(to_string(defaults.record.mcap.compression)));
#if CVMMAP_STREAMER_HAS_MCAP_DEPTH
app.add_flag(
"--mcap-depth,!--no-mcap-depth",
mcap_depth_enabled_override,
"Enable or disable MCAP depth recording")
->group("MCAP Depth Record")
->default_str(defaults.record.mcap.depth_enabled ? "true" : "false")
->disable_flag_override();
app.add_option(
"--mcap-depth-topic",
mcap_depth_topic_override,
"Depth image topic name")
->group("MCAP Depth Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-depth-topic"))
->default_str(defaults.record.mcap.depth_topic);
app.add_option(
"--mcap-depth-calibration-topic",
mcap_depth_calibration_topic_override,
"Depth camera calibration topic name")
->group("MCAP Depth Record")
->type_name("TOPIC")
->check(require_non_empty("--mcap-depth-calibration-topic"))
->default_str(defaults.record.mcap.depth_calibration_topic);
#endif
#endif
app.add_option("--queue-size", queue_size_override, "Pipeline queue depth")
->group("Latency")
@@ -1018,6 +1058,10 @@ std::expected<RuntimeConfig, std::string> parse_runtime_config(int argc, char **
->check(CLI::NonNegativeNumber)
->default_str(std::to_string(defaults.latency.emit_stall_ms));
if (auto unsupported_mcap_argument = find_unsupported_mcap_argument(argc, argv)) {
return std::unexpected(*unsupported_mcap_argument);
}
if (auto invalid_boolean_assignment = find_disallowed_boolean_assignment(argc, argv)) {
return std::unexpected(*invalid_boolean_assignment);
}
@@ -1217,7 +1261,7 @@ std::expected<void, std::string> validate_runtime_config(const RuntimeConfig &co
}
if (config.outputs.rtmp.enabled) {
if (config.encoder.backend == EncoderBackendType::Auto) {
// auto resolves to FFmpeg; nothing else is supported.
// auto may select the Jetson Multimedia API backend on Jetson before falling back to FFmpeg software.
} else if (config.encoder.backend != EncoderBackendType::FFmpeg) {
return std::unexpected("invalid backend/output matrix: RTMP requires encoder.backend=ffmpeg or auto");
}
+125 -1
View File
@@ -1,14 +1,138 @@
#include "cvmmap_streamer/encode/encoder_backend.hpp"
#ifndef CVMMAP_STREAMER_HAS_JETSON_MM
#define CVMMAP_STREAMER_HAS_JETSON_MM 0
#endif
#include <optional>
#include <spdlog/spdlog.h>
namespace cvmmap_streamer::encode {
EncoderBackend make_ffmpeg_backend();
#if CVMMAP_STREAMER_HAS_JETSON_MM
EncoderBackend make_jetson_mm_backend();
#endif
namespace {
class SelectingEncoderBackend {
public:
[[nodiscard]]
std::string_view backend_name() const {
return active_backend_ ? (*active_backend_)->backend_name() : std::string_view{"selecting"};
}
[[nodiscard]]
bool using_hardware() const {
return active_backend_ ? (*active_backend_)->using_hardware() : false;
}
[[nodiscard]]
Status init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) {
shutdown();
#if CVMMAP_STREAMER_HAS_JETSON_MM
if (config.encoder.device != EncoderDeviceType::Software) {
auto jetson_backend = make_jetson_mm_backend();
auto jetson_init = jetson_backend->init(config, frame_info);
if (jetson_init) {
active_backend_.emplace(std::move(jetson_backend));
return {};
}
if (config.encoder.device == EncoderDeviceType::Nvidia) {
return std::unexpected(jetson_init.error());
}
spdlog::warn(
"JETSON_MM_ENCODER_FALLBACK reason='{}' fallback=ffmpeg_software",
format_error(jetson_init.error()));
}
if (config.encoder.device == EncoderDeviceType::Nvidia) {
return unexpected_error(
ERR_BACKEND_UNAVAILABLE,
"Jetson hardware encoder backend is unavailable in this build");
}
#else
if (config.encoder.device == EncoderDeviceType::Nvidia) {
return unexpected_error(
ERR_BACKEND_UNAVAILABLE,
"Jetson hardware encoder backend is unavailable in this build");
}
#endif
auto ffmpeg_backend = make_ffmpeg_backend();
auto ffmpeg_config = config;
if (config.encoder.device == EncoderDeviceType::Auto) {
ffmpeg_config.encoder.device = EncoderDeviceType::Software;
}
auto ffmpeg_init = ffmpeg_backend->init(ffmpeg_config, frame_info);
if (!ffmpeg_init) {
return std::unexpected(ffmpeg_init.error());
}
active_backend_.emplace(std::move(ffmpeg_backend));
return {};
}
[[nodiscard]]
Result<EncodedStreamInfo> stream_info() const {
if (!active_backend_) {
return unexpected_error(ERR_NOT_READY, "encoder backend is unavailable before initialization");
}
return (*active_backend_)->stream_info();
}
[[nodiscard]]
Status poll() {
if (!active_backend_) {
return {};
}
return (*active_backend_)->poll();
}
[[nodiscard]]
Status push_frame(const RawVideoFrame &frame) {
if (!active_backend_) {
return unexpected_error(ERR_NOT_READY, "encoder backend not initialized");
}
return (*active_backend_)->push_frame(frame);
}
[[nodiscard]]
Result<std::vector<EncodedAccessUnit>> drain() {
if (!active_backend_) {
return std::vector<EncodedAccessUnit>{};
}
return (*active_backend_)->drain();
}
[[nodiscard]]
Result<std::vector<EncodedAccessUnit>> flush() {
if (!active_backend_) {
return std::vector<EncodedAccessUnit>{};
}
return (*active_backend_)->flush();
}
void shutdown() {
if (active_backend_) {
(*active_backend_)->shutdown();
active_backend_.reset();
}
}
private:
std::optional<EncoderBackend> active_backend_{};
};
} // namespace
Result<EncoderBackend> make_encoder_backend(const RuntimeConfig &config) {
switch (config.encoder.backend) {
case EncoderBackendType::FFmpeg:
case EncoderBackendType::Auto:
return make_ffmpeg_backend();
case EncoderBackendType::Auto:
return pro::make_proxy<EncoderBackendFacade, SelectingEncoderBackend>();
}
return unexpected_error(ERR_INTERNAL, "unknown encoder backend");
+132 -94
View File
@@ -1,4 +1,5 @@
#include "cvmmap_streamer/encode/encoder_backend.hpp"
#include "ffmpeg_encoder_options.hpp"
extern "C" {
#include <libavcodec/avcodec.h>
@@ -52,10 +53,9 @@ public:
Status init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) {
shutdown();
config_ = &config;
frame_info_ = frame_info;
codec_ = config.encoder.codec;
encoder_pix_fmt_ = pick_encoder_pixel_format(config.encoder.device);
config_ = &config;
frame_info_ = frame_info;
codec_ = config.encoder.codec;
auto input_pixel_format = to_av_pixel_format(frame_info.pixel_format);
if (!input_pixel_format) {
@@ -63,45 +63,15 @@ public:
}
input_pix_fmt_ = *input_pixel_format;
auto encoder_name = pick_encoder_name(config);
if (!encoder_name) {
return std::unexpected(encoder_name.error());
}
using_hardware_ = encoder_name->find("nvenc") != std::string::npos;
const auto *encoder = avcodec_find_encoder_by_name(encoder_name->c_str());
if (encoder == nullptr) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "FFmpeg encoder '" + *encoder_name + "' is unavailable");
auto opened_encoder = open_encoder(config);
if (!opened_encoder) {
return std::unexpected(opened_encoder.error());
}
context_ = avcodec_alloc_context3(encoder);
if (context_ == nullptr) {
return unexpected_error(ERR_ALLOCATION_FAILED, "failed to allocate FFmpeg encoder context");
}
context_->codec_type = AVMEDIA_TYPE_VIDEO;
context_->codec_id = encoder->id;
context_->width = static_cast<int>(frame_info.width);
context_->height = static_cast<int>(frame_info.height);
context_->pix_fmt = encoder_pix_fmt_;
context_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
context_->time_base = AVRational{1, 1000000000};
context_->framerate = AVRational{30, 1};
context_->gop_size = static_cast<int>(config.encoder.gop);
context_->max_b_frames = static_cast<int>(config.encoder.b_frames);
context_->thread_count = 1;
auto codec_setup = configure_codec(*encoder_name, config);
if (!codec_setup) {
return codec_setup;
}
const auto open_result = avcodec_open2(context_, encoder, nullptr);
if (open_result < 0) {
return unexpected_error(
ERR_ENCODER,
"failed to open FFmpeg encoder '" + *encoder_name + "': " + av_error_string(open_result));
}
context_ = opened_encoder->context;
encoder_name_ = std::string(opened_encoder->candidate.name);
encoder_pix_fmt_ = opened_encoder->candidate.pixel_format;
using_hardware_ = opened_encoder->candidate.using_hardware;
scaler_ = sws_getCachedContext(
nullptr,
@@ -151,10 +121,11 @@ public:
stream_info_ = build_stream_info();
spdlog::info(
"FFMPEG_ENCODER_PATH codec={} device={} encoder={} pix_fmt={}",
"FFMPEG_ENCODER_PATH codec={} device={} encoder={} hardware={} pix_fmt={}",
cvmmap_streamer::to_string(codec_),
device_to_string(config.encoder.device),
*encoder_name,
encoder_name_,
using_hardware_,
av_get_pix_fmt_name(encoder_pix_fmt_));
return {};
}
@@ -274,6 +245,7 @@ public:
}
first_source_timestamp_ns_.reset();
stream_info_.reset();
encoder_name_.clear();
using_hardware_ = false;
}
@@ -298,14 +270,6 @@ private:
}
}
[[nodiscard]]
static AVPixelFormat pick_encoder_pixel_format(EncoderDeviceType device) {
if (device == EncoderDeviceType::Software) {
return AV_PIX_FMT_YUV420P;
}
return AV_PIX_FMT_NV12;
}
[[nodiscard]]
static std::string_view device_to_string(EncoderDeviceType device) {
switch (device) {
@@ -320,57 +284,130 @@ private:
}
[[nodiscard]]
Result<std::string> pick_encoder_name(const RuntimeConfig &config) const {
const bool prefer_hardware = config.encoder.device != EncoderDeviceType::Software;
const bool prefer_software = config.encoder.device == EncoderDeviceType::Software;
if (codec_ == CodecType::H265) {
if (prefer_hardware && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
return std::string("hevc_nvenc");
}
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
if (avcodec_find_encoder_by_name("libx265") != nullptr) {
return std::string("libx265");
}
}
if (!prefer_software && avcodec_find_encoder_by_name("hevc_nvenc") != nullptr) {
return std::string("hevc_nvenc");
}
static Result<void> set_string_option(AVCodecContext *context, const char *key, std::string_view value) {
const auto result = av_opt_set(context->priv_data, key, std::string(value).c_str(), 0);
if (result < 0) {
return unexpected_error(
ERR_BACKEND_UNAVAILABLE,
"no usable FFmpeg encoder found for h265 (looked for hevc_nvenc, libx265)");
ERR_ENCODER,
"failed to set FFmpeg encoder option '" + std::string(key) + "=" + std::string(value) + "': " + av_error_string(result));
}
if (prefer_hardware && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
return std::string("h264_nvenc");
}
if (!prefer_hardware || config.encoder.device == EncoderDeviceType::Auto) {
if (avcodec_find_encoder_by_name("libx264") != nullptr) {
return std::string("libx264");
}
}
if (!prefer_software && avcodec_find_encoder_by_name("h264_nvenc") != nullptr) {
return std::string("h264_nvenc");
}
return unexpected_error(
ERR_BACKEND_UNAVAILABLE,
"no usable FFmpeg encoder found for h264 (looked for h264_nvenc, libx264)");
return {};
}
[[nodiscard]]
Status configure_codec(std::string_view encoder_name, const RuntimeConfig &config) {
av_opt_set(context_->priv_data, "preset", encoder_name.find("nvenc") != std::string_view::npos ? "p1" : "veryfast", 0);
if (encoder_name.find("nvenc") != std::string_view::npos) {
av_opt_set(context_->priv_data, "tune", "ull", 0);
av_opt_set(context_->priv_data, "zerolatency", "1", 0);
av_opt_set(context_->priv_data, "rc-lookahead", "0", 0);
} else {
av_opt_set(context_->priv_data, "tune", "zerolatency", 0);
if (encoder_name == "libx265") {
av_opt_set(context_->priv_data, "x265-params", "repeat-headers=1:scenecut=0", 0);
static Result<void> set_int_option(AVCodecContext *context, const char *key, std::int64_t value) {
const auto result = av_opt_set_int(context->priv_data, key, value, 0);
if (result < 0) {
return unexpected_error(
ERR_ENCODER,
"failed to set FFmpeg encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result));
}
return {};
}
struct OpenedEncoder {
AVCodecContext *context{nullptr};
FfmpegEncoderCandidate candidate{};
};
[[nodiscard]]
Result<OpenedEncoder> open_encoder(const RuntimeConfig &config) const {
const auto candidates = ffmpeg_encoder_candidates(codec_, config.encoder.device);
const auto attempted_candidates = ffmpeg_encoder_candidate_list(candidates);
std::string last_error{};
for (const auto &candidate : candidates) {
const auto *encoder = avcodec_find_encoder_by_name(candidate.name.data());
if (encoder == nullptr) {
last_error = "FFmpeg encoder '" + std::string(candidate.name) + "' is unavailable";
spdlog::warn(
"FFmpeg encoder '{}' unavailable in {} mode, trying next candidate",
candidate.name,
device_to_string(config.encoder.device));
continue;
}
auto *context = avcodec_alloc_context3(encoder);
if (context == nullptr) {
return unexpected_error(ERR_ALLOCATION_FAILED, "failed to allocate FFmpeg encoder context");
}
context->codec_type = AVMEDIA_TYPE_VIDEO;
context->codec_id = encoder->id;
context->width = static_cast<int>(frame_info_.width);
context->height = static_cast<int>(frame_info_.height);
context->pix_fmt = candidate.pixel_format;
context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
context->time_base = AVRational{1, 1000000000};
context->framerate = AVRational{30, 1};
context->gop_size = static_cast<int>(config.encoder.gop);
context->max_b_frames = static_cast<int>(config.encoder.b_frames);
context->thread_count = 1;
auto codec_setup = configure_codec(context, candidate, config);
if (!codec_setup) {
last_error = codec_setup.error().detail;
avcodec_free_context(&context);
spdlog::warn(
"FFmpeg encoder '{}' configuration failed in {} mode: {}. trying next candidate",
candidate.name,
device_to_string(config.encoder.device),
codec_setup.error().detail);
continue;
}
const auto open_result = avcodec_open2(context, encoder, nullptr);
if (open_result < 0) {
last_error = "failed to open FFmpeg encoder '" + std::string(candidate.name) + "': " + av_error_string(open_result);
avcodec_free_context(&context);
spdlog::warn(
"FFmpeg encoder '{}' failed to open in {} mode: {}. trying next candidate",
candidate.name,
device_to_string(config.encoder.device),
av_error_string(open_result));
continue;
}
return OpenedEncoder{.context = context, .candidate = candidate};
}
av_opt_set_int(context_->priv_data, "forced-idr", config.latency.force_idr_on_reset ? 1 : 0, 0);
if (last_error.empty()) {
last_error = "no usable FFmpeg encoder found";
}
const auto error_code = config.encoder.device == EncoderDeviceType::Auto ? ERR_ENCODER : ERR_BACKEND_UNAVAILABLE;
return unexpected_error(error_code, last_error + " (attempted: " + attempted_candidates + ")");
}
[[nodiscard]]
static Status configure_codec(AVCodecContext *context, const FfmpegEncoderCandidate &candidate, const RuntimeConfig &config) {
if (const auto preset = ffmpeg_encoder_preset(candidate); preset) {
if (auto set = set_string_option(context, "preset", *preset); !set) {
return set;
}
}
if (const auto tune = ffmpeg_encoder_tune(candidate); tune) {
if (auto set = set_string_option(context, "tune", *tune); !set) {
return set;
}
}
if (const auto x265_params = ffmpeg_encoder_x265_params(candidate); x265_params) {
if (auto set = set_string_option(context, "x265-params", *x265_params); !set) {
return set;
}
}
if (ffmpeg_encoder_supports_nvenc_latency_flags(candidate)) {
if (auto set = set_string_option(context, "zerolatency", "1"); !set) {
return set;
}
if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) {
return set;
}
}
if (ffmpeg_encoder_supports_forced_idr_option(candidate)) {
if (auto set = set_int_option(context, "forced-idr", config.latency.force_idr_on_reset ? 1 : 0); !set) {
return set;
}
}
return {};
}
@@ -482,6 +519,7 @@ private:
AVPixelFormat encoder_pix_fmt_{AV_PIX_FMT_NONE};
std::optional<std::uint64_t> first_source_timestamp_ns_{};
std::optional<EncodedStreamInfo> stream_info_{};
std::string encoder_name_{};
bool using_hardware_{false};
};
+197
View File
@@ -0,0 +1,197 @@
#pragma once
#include "cvmmap_streamer/config/runtime_config.hpp"
extern "C" {
#include <libavutil/pixfmt.h>
}
#include <optional>
#include <string>
#include <string_view>
#include <vector>
namespace cvmmap_streamer::encode {
enum class FfmpegEncoderFamily {
Nvenc,
V4l2M2m,
Omx,
LibX264,
LibX265,
};
struct FfmpegEncoderCandidate {
std::string_view name{};
FfmpegEncoderFamily family{FfmpegEncoderFamily::LibX264};
bool using_hardware{false};
AVPixelFormat pixel_format{AV_PIX_FMT_NONE};
};
[[nodiscard]]
inline std::vector<FfmpegEncoderCandidate> ffmpeg_encoder_candidates(const CodecType codec, const EncoderDeviceType device) {
std::vector<FfmpegEncoderCandidate> candidates{};
auto append_hardware_candidates = [&] {
if (codec == CodecType::H265) {
candidates.push_back(FfmpegEncoderCandidate{
.name = "hevc_nvenc",
.family = FfmpegEncoderFamily::Nvenc,
.using_hardware = true,
.pixel_format = AV_PIX_FMT_NV12,
});
candidates.push_back(FfmpegEncoderCandidate{
.name = "hevc_v4l2m2m",
.family = FfmpegEncoderFamily::V4l2M2m,
.using_hardware = true,
.pixel_format = AV_PIX_FMT_NV12,
});
return;
}
candidates.push_back(FfmpegEncoderCandidate{
.name = "h264_nvenc",
.family = FfmpegEncoderFamily::Nvenc,
.using_hardware = true,
.pixel_format = AV_PIX_FMT_NV12,
});
candidates.push_back(FfmpegEncoderCandidate{
.name = "h264_v4l2m2m",
.family = FfmpegEncoderFamily::V4l2M2m,
.using_hardware = true,
.pixel_format = AV_PIX_FMT_NV12,
});
candidates.push_back(FfmpegEncoderCandidate{
.name = "h264_omx",
.family = FfmpegEncoderFamily::Omx,
.using_hardware = true,
.pixel_format = AV_PIX_FMT_NV12,
});
};
const auto append_software_candidate = [&] {
if (codec == CodecType::H265) {
candidates.push_back(FfmpegEncoderCandidate{
.name = "libx265",
.family = FfmpegEncoderFamily::LibX265,
.using_hardware = false,
.pixel_format = AV_PIX_FMT_YUV420P,
});
return;
}
candidates.push_back(FfmpegEncoderCandidate{
.name = "libx264",
.family = FfmpegEncoderFamily::LibX264,
.using_hardware = false,
.pixel_format = AV_PIX_FMT_YUV420P,
});
};
switch (device) {
case EncoderDeviceType::Auto:
append_hardware_candidates();
append_software_candidate();
break;
case EncoderDeviceType::Nvidia:
append_hardware_candidates();
break;
case EncoderDeviceType::Software:
append_software_candidate();
break;
}
return candidates;
}
[[nodiscard]]
inline std::string ffmpeg_encoder_candidate_list(const std::vector<FfmpegEncoderCandidate> &candidates) {
std::string joined{};
for (const auto &candidate : candidates) {
if (!joined.empty()) {
joined += ", ";
}
joined += candidate.name;
}
return joined;
}
[[nodiscard]]
inline std::optional<std::string_view> ffmpeg_encoder_preset(const FfmpegEncoderCandidate &candidate) {
switch (candidate.family) {
case FfmpegEncoderFamily::Nvenc:
return "p1";
case FfmpegEncoderFamily::LibX264:
case FfmpegEncoderFamily::LibX265:
return "veryfast";
case FfmpegEncoderFamily::V4l2M2m:
case FfmpegEncoderFamily::Omx:
return std::nullopt;
}
return std::nullopt;
}
[[nodiscard]]
inline std::optional<std::string_view> ffmpeg_encoder_tune(const FfmpegEncoderCandidate &candidate) {
switch (candidate.family) {
case FfmpegEncoderFamily::Nvenc:
return "ull";
case FfmpegEncoderFamily::LibX264:
return "zerolatency";
case FfmpegEncoderFamily::V4l2M2m:
case FfmpegEncoderFamily::Omx:
case FfmpegEncoderFamily::LibX265:
return std::nullopt;
}
return std::nullopt;
}
[[nodiscard]]
inline std::optional<std::string_view> ffmpeg_encoder_rate_control_mode(const FfmpegEncoderCandidate &candidate) {
if (candidate.family == FfmpegEncoderFamily::Nvenc) {
return "vbr";
}
return std::nullopt;
}
[[nodiscard]]
inline std::optional<std::string_view> ffmpeg_encoder_quality_key(const FfmpegEncoderCandidate &candidate) {
switch (candidate.family) {
case FfmpegEncoderFamily::Nvenc:
return "cq";
case FfmpegEncoderFamily::LibX264:
case FfmpegEncoderFamily::LibX265:
return "crf";
case FfmpegEncoderFamily::V4l2M2m:
case FfmpegEncoderFamily::Omx:
return std::nullopt;
}
return std::nullopt;
}
[[nodiscard]]
inline std::optional<std::string_view> ffmpeg_encoder_x265_params(const FfmpegEncoderCandidate &candidate) {
if (candidate.family == FfmpegEncoderFamily::LibX265) {
return "repeat-headers=1:scenecut=0";
}
return std::nullopt;
}
[[nodiscard]]
inline bool ffmpeg_encoder_supports_nvenc_latency_flags(const FfmpegEncoderCandidate &candidate) {
return candidate.family == FfmpegEncoderFamily::Nvenc;
}
[[nodiscard]]
inline bool ffmpeg_encoder_supports_forced_idr_option(const FfmpegEncoderCandidate &candidate) {
switch (candidate.family) {
case FfmpegEncoderFamily::Nvenc:
case FfmpegEncoderFamily::LibX264:
case FfmpegEncoderFamily::LibX265:
return true;
case FfmpegEncoderFamily::V4l2M2m:
case FfmpegEncoderFamily::Omx:
return false;
}
return false;
}
} // namespace cvmmap_streamer::encode
+750
View File
@@ -0,0 +1,750 @@
#include "cvmmap_streamer/encode/encoder_backend.hpp"
extern "C" {
#include <libavutil/frame.h>
#include <libavutil/imgutils.h>
#include <libavutil/pixfmt.h>
#include <libswscale/swscale.h>
}
#include <NvBuffer.h>
#include <NvVideoEncoder.h>
#include <nvbufsurface.h>
#include <linux/videodev2.h>
#include <array>
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <mutex>
#include <optional>
#include <string>
#include <string_view>
#include <sys/time.h>
#include <unordered_map>
#include <utility>
#include <vector>
#include <spdlog/spdlog.h>
namespace cvmmap_streamer::encode {
namespace {
constexpr std::uint32_t kEncoderBufferCount = 6;
constexpr std::uint32_t kCapturePlaneSizeImage = 2u * 1024u * 1024u;
constexpr std::uint32_t kDefaultFrameRateNum = 30;
constexpr std::uint32_t kDefaultFrameRateDen = 1;
constexpr auto kWarmupTimeout = std::chrono::seconds(2);
constexpr auto kFlushTimeout = std::chrono::seconds(2);
constexpr std::uint8_t kAnnexBStartCode[4]{0x00, 0x00, 0x00, 0x01};
[[nodiscard]]
Result<AVPixelFormat> to_av_pixel_format(ipc::PixelFormat format) {
switch (format) {
case ipc::PixelFormat::BGR:
return AV_PIX_FMT_BGR24;
case ipc::PixelFormat::RGB:
return AV_PIX_FMT_RGB24;
case ipc::PixelFormat::BGRA:
return AV_PIX_FMT_BGRA;
case ipc::PixelFormat::RGBA:
return AV_PIX_FMT_RGBA;
case ipc::PixelFormat::GRAY:
return AV_PIX_FMT_GRAY8;
default:
return unexpected_error(
ERR_UNSUPPORTED,
"unsupported raw pixel format for Jetson backend (supported: BGR/RGB/BGRA/RGBA/GRAY)");
}
}
[[nodiscard]]
std::uint32_t codec_capture_plane_format(CodecType codec) {
return codec == CodecType::H265 ? V4L2_PIX_FMT_H265 : V4L2_PIX_FMT_H264;
}
[[nodiscard]]
std::uint32_t default_bitrate_bits_per_second(const ipc::FrameInfo &frame_info) {
const auto pixels_per_second =
static_cast<std::uint64_t>(frame_info.width) *
static_cast<std::uint64_t>(frame_info.height) *
static_cast<std::uint64_t>(kDefaultFrameRateNum);
const auto estimated = pixels_per_second / 8u;
return static_cast<std::uint32_t>(std::clamp<std::uint64_t>(estimated, 2'000'000u, 25'000'000u));
}
[[nodiscard]]
std::uint64_t timeval_to_token_us(const timeval &timestamp) {
return static_cast<std::uint64_t>(timestamp.tv_sec) * 1'000'000ull +
static_cast<std::uint64_t>(timestamp.tv_usec);
}
[[nodiscard]]
timeval token_us_to_timeval(const std::uint64_t token_us) {
timeval timestamp{};
timestamp.tv_sec = static_cast<time_t>(token_us / 1'000'000ull);
timestamp.tv_usec = static_cast<suseconds_t>(token_us % 1'000'000ull);
return timestamp;
}
[[nodiscard]]
std::optional<std::pair<std::size_t, std::size_t>> next_start_code(std::span<const std::uint8_t> bytes, std::size_t offset) {
for (std::size_t i = offset; i + 3 <= bytes.size(); ++i) {
if (bytes[i] == 0x00 && bytes[i + 1] == 0x00 && bytes[i + 2] == 0x01) {
return std::pair{i, static_cast<std::size_t>(3)};
}
if (i + 4 <= bytes.size() &&
bytes[i] == 0x00 &&
bytes[i + 1] == 0x00 &&
bytes[i + 2] == 0x00 &&
bytes[i + 3] == 0x01) {
return std::pair{i, static_cast<std::size_t>(4)};
}
}
return std::nullopt;
}
[[nodiscard]]
std::vector<std::span<const std::uint8_t>> split_annexb_nalus(std::span<const std::uint8_t> access_unit) {
std::vector<std::span<const std::uint8_t>> nalus{};
for (std::size_t cursor = 0;;) {
auto current_sc = next_start_code(access_unit, cursor);
if (!current_sc) {
break;
}
const auto payload_begin = current_sc->first + current_sc->second;
auto next_sc = next_start_code(access_unit, payload_begin);
const auto payload_end = next_sc ? next_sc->first : access_unit.size();
if (payload_begin < payload_end) {
nalus.push_back(access_unit.subspan(payload_begin, payload_end - payload_begin));
}
if (!next_sc) {
break;
}
cursor = next_sc->first;
}
return nalus;
}
[[nodiscard]]
bool is_parameter_set_nalu(const CodecType codec, std::span<const std::uint8_t> nalu) {
if (nalu.empty()) {
return false;
}
if (codec == CodecType::H265) {
if (nalu.size() < 2) {
return false;
}
const auto nal_type = static_cast<std::uint8_t>((nalu[0] >> 1) & 0x3fu);
return nal_type == 32 || nal_type == 33 || nal_type == 34;
}
const auto nal_type = static_cast<std::uint8_t>(nalu[0] & 0x1fu);
return nal_type == 7 || nal_type == 8;
}
[[nodiscard]]
std::vector<std::uint8_t> extract_decoder_config_annexb(const CodecType codec, std::span<const std::uint8_t> access_unit) {
std::vector<std::uint8_t> decoder_config{};
for (const auto nalu : split_annexb_nalus(access_unit)) {
if (!is_parameter_set_nalu(codec, nalu)) {
continue;
}
decoder_config.insert(decoder_config.end(), std::begin(kAnnexBStartCode), std::end(kAnnexBStartCode));
decoder_config.insert(decoder_config.end(), nalu.begin(), nalu.end());
}
return decoder_config;
}
class JetsonMmEncoderBackend {
public:
~JetsonMmEncoderBackend() {
shutdown();
}
[[nodiscard]]
std::string_view backend_name() const {
return "jetson_mm";
}
[[nodiscard]]
bool using_hardware() const {
return true;
}
[[nodiscard]]
Status init(const RuntimeConfig &config, const ipc::FrameInfo &frame_info) {
shutdown();
frame_info_ = frame_info;
codec_ = config.encoder.codec;
config_ = &config;
auto input_pixel_format = to_av_pixel_format(frame_info.pixel_format);
if (!input_pixel_format) {
return std::unexpected(input_pixel_format.error());
}
input_pix_fmt_ = *input_pixel_format;
scaler_ = sws_getCachedContext(
nullptr,
static_cast<int>(frame_info.width),
static_cast<int>(frame_info.height),
input_pix_fmt_,
static_cast<int>(frame_info.width),
static_cast<int>(frame_info.height),
AV_PIX_FMT_YUV420P,
SWS_BILINEAR,
nullptr,
nullptr,
nullptr);
if (scaler_ == nullptr) {
return unexpected_error(ERR_EXTERNAL_LIBRARY, "failed to create Jetson swscale conversion context");
}
converted_frame_ = av_frame_alloc();
if (converted_frame_ == nullptr) {
return unexpected_error(ERR_ALLOCATION_FAILED, "failed to allocate Jetson conversion frame");
}
converted_frame_->format = AV_PIX_FMT_YUV420P;
converted_frame_->width = static_cast<int>(frame_info.width);
converted_frame_->height = static_cast<int>(frame_info.height);
const auto frame_buffer = av_frame_get_buffer(converted_frame_, 32);
if (frame_buffer < 0) {
return unexpected_error(ERR_ALLOCATION_FAILED, "failed to allocate Jetson conversion frame buffer");
}
encoder_ = NvVideoEncoder::createVideoEncoder("cvmmap_streamer_jetson_mm");
if (encoder_ == nullptr) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to create Jetson NvVideoEncoder");
}
if (encoder_->setCapturePlaneFormat(
codec_capture_plane_format(codec_),
frame_info.width,
frame_info.height,
kCapturePlaneSizeImage) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder capture plane format");
}
if (encoder_->setOutputPlaneFormat(V4L2_PIX_FMT_YUV420M, frame_info.width, frame_info.height) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder output plane format");
}
if (encoder_->setBitrate(default_bitrate_bits_per_second(frame_info)) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder bitrate");
}
if (codec_ == CodecType::H264) {
if (encoder_->setProfile(V4L2_MPEG_VIDEO_H264_PROFILE_MAIN) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson H.264 profile");
}
if (encoder_->setLevel(V4L2_MPEG_VIDEO_H264_LEVEL_5_1) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson H.264 level");
}
} else {
if (encoder_->setProfile(V4L2_MPEG_VIDEO_H265_PROFILE_MAIN) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson H.265 profile");
}
}
if (encoder_->setRateControlMode(V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder rate control mode");
}
if (encoder_->setIDRInterval(config.encoder.gop) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder IDR interval");
}
if (encoder_->setIFrameInterval(config.encoder.gop) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder I-frame interval");
}
if (encoder_->setNumBFrames(config.encoder.b_frames) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder B-frame count");
}
if (encoder_->setInsertSpsPpsAtIdrEnabled(true) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to enable Jetson SPS/PPS insertion at IDR");
}
if (encoder_->setFrameRate(kDefaultFrameRateNum, kDefaultFrameRateDen) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set Jetson encoder frame rate");
}
if (encoder_->setMaxPerfMode(1) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to enable Jetson max performance mode");
}
if (encoder_->output_plane.setupPlane(V4L2_MEMORY_MMAP, kEncoderBufferCount, true, false) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set up Jetson output plane buffers");
}
if (encoder_->capture_plane.setupPlane(V4L2_MEMORY_MMAP, kEncoderBufferCount, true, false) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to set up Jetson capture plane buffers");
}
if (!encoder_->capture_plane.setDQThreadCallback(&JetsonMmEncoderBackend::capture_plane_dq_callback)) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to register Jetson capture dequeue callback");
}
if (encoder_->output_plane.setStreamStatus(true) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to start Jetson output plane");
}
if (encoder_->capture_plane.setStreamStatus(true) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to start Jetson capture plane");
}
if (encoder_->capture_plane.startDQThread(this) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to start Jetson capture dequeue thread");
}
capture_thread_started_ = true;
for (std::uint32_t i = 0; i < encoder_->capture_plane.getNumBuffers(); ++i) {
v4l2_buffer capture_buffer{};
std::array<v4l2_plane, MAX_PLANES> capture_planes{};
capture_buffer.index = i;
capture_buffer.m.planes = capture_planes.data();
if (encoder_->capture_plane.qBuffer(capture_buffer, nullptr) < 0) {
return unexpected_error(ERR_BACKEND_UNAVAILABLE, "failed to queue empty Jetson capture buffer");
}
}
stream_info_ = EncodedStreamInfo{
.codec = codec_,
.width = frame_info.width,
.height = frame_info.height,
.time_base_num = 1,
.time_base_den = 1'000'000'000u,
.frame_rate_num = kDefaultFrameRateNum,
.frame_rate_den = kDefaultFrameRateDen,
.bitstream_format = EncodedBitstreamFormat::AnnexB,
.decoder_config = {},
};
auto warmup = run_warmup();
if (!warmup) {
return warmup;
}
spdlog::info(
"JETSON_MM_ENCODER_READY codec={} width={} height={} gop={} b_frames={}",
cvmmap_streamer::to_string(codec_),
frame_info.width,
frame_info.height,
config.encoder.gop,
config.encoder.b_frames);
return {};
}
[[nodiscard]]
Result<EncodedStreamInfo> stream_info() const {
if (!stream_info_) {
return unexpected_error(ERR_NOT_READY, "Jetson backend stream info is unavailable before initialization");
}
return *stream_info_;
}
[[nodiscard]]
Status poll() {
return check_async_error();
}
[[nodiscard]]
Status push_frame(const RawVideoFrame &frame) {
auto status = check_async_error();
if (!status) {
return status;
}
if (encoder_ == nullptr || converted_frame_ == nullptr || scaler_ == nullptr) {
return unexpected_error(ERR_NOT_READY, "Jetson backend not initialized");
}
if (frame.bytes.empty()) {
return {};
}
const auto ticket_us = next_ticket_us_++;
const auto stream_pts_ns = ensure_stream_pts(frame.source_timestamp_ns);
auto converted = convert_frame(frame);
if (!converted) {
return converted;
}
return submit_converted_frame(ticket_us, frame.source_timestamp_ns, stream_pts_ns, frame.force_keyframe, false);
}
[[nodiscard]]
Result<std::vector<EncodedAccessUnit>> drain() {
auto status = check_async_error();
if (!status) {
return std::unexpected(status.error());
}
std::vector<EncodedAccessUnit> access_units{};
std::lock_guard lock(mutex_);
access_units.reserve(ready_access_units_.size());
while (!ready_access_units_.empty()) {
access_units.push_back(std::move(ready_access_units_.front()));
ready_access_units_.erase(ready_access_units_.begin());
}
return access_units;
}
[[nodiscard]]
Result<std::vector<EncodedAccessUnit>> flush() {
auto status = check_async_error();
if (!status) {
return std::unexpected(status.error());
}
if (encoder_ == nullptr || eos_sent_) {
return drain();
}
auto eos_submit = submit_end_of_stream();
if (!eos_submit) {
return std::unexpected(eos_submit.error());
}
std::unique_lock lock(mutex_);
const auto completed = condition_.wait_for(lock, kFlushTimeout, [&] {
return capture_eos_ || async_error_.has_value();
});
if (!completed && !capture_eos_) {
return unexpected_error(ERR_ENCODER, "timed out waiting for Jetson encoder EOS");
}
if (async_error_) {
return std::unexpected(*async_error_);
}
lock.unlock();
return drain();
}
void shutdown() {
if (encoder_ != nullptr) {
if (capture_thread_started_) {
encoder_->capture_plane.stopDQThread();
encoder_->capture_plane.waitForDQThread(1000);
capture_thread_started_ = false;
}
encoder_->output_plane.setStreamStatus(false);
encoder_->capture_plane.setStreamStatus(false);
delete encoder_;
encoder_ = nullptr;
}
if (converted_frame_ != nullptr) {
av_frame_free(&converted_frame_);
}
if (scaler_ != nullptr) {
sws_freeContext(scaler_);
scaler_ = nullptr;
}
first_real_source_timestamp_ns_.reset();
stream_info_.reset();
eos_sent_ = false;
capture_eos_ = false;
next_output_buffer_index_ = 0;
next_ticket_us_ = 1;
input_pix_fmt_ = AV_PIX_FMT_NONE;
config_ = nullptr;
std::lock_guard lock(mutex_);
pending_tickets_.clear();
ready_access_units_.clear();
async_error_.reset();
}
private:
struct TicketMetadata {
std::uint64_t source_timestamp_ns{0};
std::uint64_t stream_pts_ns{0};
bool warmup{false};
};
[[nodiscard]]
static bool capture_plane_dq_callback(
v4l2_buffer *v4l2_buf,
NvBuffer *buffer,
NvBuffer * /*shared_buffer*/,
void *data) {
auto *self = static_cast<JetsonMmEncoderBackend *>(data);
return self != nullptr ? self->handle_capture_buffer(v4l2_buf, buffer) : false;
}
[[nodiscard]]
bool handle_capture_buffer(v4l2_buffer *v4l2_buf, NvBuffer *buffer) {
if (v4l2_buf == nullptr || buffer == nullptr) {
store_async_error(unexpected_error(ERR_ENCODER, "Jetson capture dequeue callback received a null buffer").error());
return false;
}
if (buffer->planes[0].bytesused == 0) {
std::lock_guard lock(mutex_);
capture_eos_ = true;
condition_.notify_all();
return false;
}
v4l2_ctrl_videoenc_outputbuf_metadata metadata{};
if (encoder_->getMetadata(v4l2_buf->index, metadata) < 0) {
store_async_error(unexpected_error(ERR_ENCODER, "failed to read Jetson encoder output metadata").error());
return false;
}
TicketMetadata ticket{};
{
std::lock_guard lock(mutex_);
const auto ticket_it = pending_tickets_.find(timeval_to_token_us(v4l2_buf->timestamp));
if (ticket_it == pending_tickets_.end()) {
store_async_error(unexpected_error(ERR_PROTOCOL, "Jetson encoder returned an unknown frame ticket").error());
return false;
}
ticket = ticket_it->second;
pending_tickets_.erase(ticket_it);
}
std::vector<std::uint8_t> annexb_bytes(
buffer->planes[0].data,
buffer->planes[0].data + buffer->planes[0].bytesused);
const auto keyframe = metadata.KeyFrame != 0;
if (keyframe && stream_info_ && stream_info_->decoder_config.empty()) {
stream_info_->decoder_config = extract_decoder_config_annexb(codec_, annexb_bytes);
}
if (!ticket.warmup) {
EncodedAccessUnit access_unit{};
access_unit.codec = codec_;
access_unit.source_timestamp_ns = ticket.source_timestamp_ns;
access_unit.stream_pts_ns = ticket.stream_pts_ns;
access_unit.keyframe = keyframe;
access_unit.annexb_bytes = std::move(annexb_bytes);
std::lock_guard lock(mutex_);
ready_access_units_.push_back(std::move(access_unit));
}
condition_.notify_all();
if (encoder_->capture_plane.qBuffer(*v4l2_buf, nullptr) < 0) {
store_async_error(unexpected_error(ERR_ENCODER, "failed to requeue Jetson capture buffer").error());
return false;
}
return true;
}
void store_async_error(Error error) {
std::lock_guard lock(mutex_);
if (!async_error_) {
async_error_ = std::move(error);
}
condition_.notify_all();
}
[[nodiscard]]
Status check_async_error() const {
std::lock_guard lock(mutex_);
if (async_error_) {
return std::unexpected(*async_error_);
}
return {};
}
[[nodiscard]]
std::uint64_t ensure_stream_pts(const std::uint64_t source_timestamp_ns) {
if (!first_real_source_timestamp_ns_) {
first_real_source_timestamp_ns_ = source_timestamp_ns;
}
return source_timestamp_ns - *first_real_source_timestamp_ns_;
}
[[nodiscard]]
Status convert_frame(const RawVideoFrame &frame) {
const auto make_writable = av_frame_make_writable(converted_frame_);
if (make_writable < 0) {
return unexpected_error(ERR_EXTERNAL_LIBRARY, "failed to make Jetson conversion frame writable");
}
AVFrame input_frame{};
input_frame.format = input_pix_fmt_;
input_frame.width = static_cast<int>(frame_info_.width);
input_frame.height = static_cast<int>(frame_info_.height);
if (av_image_fill_arrays(
input_frame.data,
input_frame.linesize,
const_cast<std::uint8_t *>(frame.bytes.data()),
input_pix_fmt_,
input_frame.width,
input_frame.height,
1) < 0) {
return unexpected_error(ERR_INVALID_ARGUMENT, "failed to map input frame into Jetson conversion image arrays");
}
if (frame.row_stride_bytes != 0) {
input_frame.linesize[0] = static_cast<int>(frame.row_stride_bytes);
}
sws_scale(
scaler_,
input_frame.data,
input_frame.linesize,
0,
input_frame.height,
converted_frame_->data,
converted_frame_->linesize);
return {};
}
void fill_black_frame() {
av_frame_make_writable(converted_frame_);
for (int y = 0; y < converted_frame_->height; ++y) {
std::memset(converted_frame_->data[0] + y * converted_frame_->linesize[0], 16, static_cast<std::size_t>(converted_frame_->width));
}
for (int y = 0; y < converted_frame_->height / 2; ++y) {
std::memset(converted_frame_->data[1] + y * converted_frame_->linesize[1], 128, static_cast<std::size_t>(converted_frame_->width / 2));
std::memset(converted_frame_->data[2] + y * converted_frame_->linesize[2], 128, static_cast<std::size_t>(converted_frame_->width / 2));
}
}
[[nodiscard]]
Status sync_output_buffer_for_device(NvBuffer &buffer) const {
for (std::uint32_t plane = 0; plane < buffer.n_planes; ++plane) {
NvBufSurface *surface = nullptr;
if (NvBufSurfaceFromFd(buffer.planes[plane].fd, reinterpret_cast<void **>(&surface)) != 0 || surface == nullptr) {
return unexpected_error(ERR_EXTERNAL_LIBRARY, "failed to resolve Jetson output plane surface");
}
if (NvBufSurfaceSyncForDevice(surface, 0, static_cast<int>(plane)) != 0) {
return unexpected_error(ERR_EXTERNAL_LIBRARY, "failed to sync Jetson output plane buffer for device");
}
}
return {};
}
[[nodiscard]]
Result<NvBuffer *> acquire_output_buffer(v4l2_buffer &v4l2_buf, std::array<v4l2_plane, MAX_PLANES> &planes) {
planes.fill(v4l2_plane{});
std::memset(&v4l2_buf, 0, sizeof(v4l2_buf));
v4l2_buf.m.planes = planes.data();
NvBuffer *buffer = nullptr;
if (next_output_buffer_index_ < encoder_->output_plane.getNumBuffers()) {
buffer = encoder_->output_plane.getNthBuffer(next_output_buffer_index_);
v4l2_buf.index = next_output_buffer_index_++;
} else if (encoder_->output_plane.dqBuffer(v4l2_buf, &buffer, nullptr, 1000) < 0) {
return unexpected_error(ERR_ENCODER, "failed to dequeue Jetson output plane buffer");
}
if (buffer == nullptr) {
return unexpected_error(ERR_ENCODER, "Jetson output plane returned a null buffer");
}
return buffer;
}
[[nodiscard]]
Status submit_converted_frame(
const std::uint64_t ticket_us,
const std::uint64_t source_timestamp_ns,
const std::uint64_t stream_pts_ns,
const bool force_keyframe,
const bool warmup) {
std::array<v4l2_plane, MAX_PLANES> planes{};
v4l2_buffer v4l2_buf{};
auto acquired = acquire_output_buffer(v4l2_buf, planes);
if (!acquired) {
return std::unexpected(acquired.error());
}
NvBuffer *buffer = *acquired;
if (force_keyframe && encoder_->forceIDR() < 0) {
return unexpected_error(ERR_ENCODER, "failed to force a Jetson IDR frame");
}
for (std::uint32_t plane = 0; plane < buffer->n_planes; ++plane) {
auto &output_plane = buffer->planes[plane];
const auto plane_width = plane == 0 ? frame_info_.width : frame_info_.width / 2;
const auto plane_height = plane == 0 ? frame_info_.height : frame_info_.height / 2;
for (std::uint32_t row = 0; row < plane_height; ++row) {
std::memcpy(
output_plane.data + row * output_plane.fmt.stride,
converted_frame_->data[plane] + row * converted_frame_->linesize[plane],
plane_width);
}
output_plane.bytesused = output_plane.fmt.stride * plane_height;
v4l2_buf.m.planes[plane].bytesused = output_plane.bytesused;
}
v4l2_buf.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY;
v4l2_buf.timestamp = token_us_to_timeval(ticket_us);
auto sync = sync_output_buffer_for_device(*buffer);
if (!sync) {
return sync;
}
{
std::lock_guard lock(mutex_);
pending_tickets_[ticket_us] = TicketMetadata{
.source_timestamp_ns = source_timestamp_ns,
.stream_pts_ns = stream_pts_ns,
.warmup = warmup,
};
}
if (encoder_->output_plane.qBuffer(v4l2_buf, nullptr) < 0) {
std::lock_guard lock(mutex_);
pending_tickets_.erase(ticket_us);
return unexpected_error(ERR_ENCODER, "failed to queue Jetson output frame");
}
return {};
}
[[nodiscard]]
Status submit_end_of_stream() {
std::array<v4l2_plane, MAX_PLANES> planes{};
v4l2_buffer v4l2_buf{};
auto acquired = acquire_output_buffer(v4l2_buf, planes);
if (!acquired) {
return std::unexpected(acquired.error());
}
v4l2_buf.m.planes[0].bytesused = 0;
if (encoder_->output_plane.qBuffer(v4l2_buf, nullptr) < 0) {
return unexpected_error(ERR_ENCODER, "failed to queue Jetson encoder EOS");
}
eos_sent_ = true;
return {};
}
[[nodiscard]]
Status run_warmup() {
fill_black_frame();
const auto warmup_ticket = next_ticket_us_++;
auto warmup_submit = submit_converted_frame(warmup_ticket, 0, 0, true, true);
if (!warmup_submit) {
return warmup_submit;
}
std::unique_lock lock(mutex_);
const auto completed = condition_.wait_for(lock, kWarmupTimeout, [&] {
return async_error_.has_value() || (stream_info_ && !stream_info_->decoder_config.empty());
});
if (!completed || !stream_info_ || stream_info_->decoder_config.empty()) {
return unexpected_error(ERR_ENCODER, "failed to harvest Jetson decoder configuration from warmup frame");
}
if (async_error_) {
return std::unexpected(*async_error_);
}
return {};
}
const RuntimeConfig *config_{nullptr};
ipc::FrameInfo frame_info_{};
CodecType codec_{CodecType::H264};
NvVideoEncoder *encoder_{nullptr};
SwsContext *scaler_{nullptr};
AVFrame *converted_frame_{nullptr};
AVPixelFormat input_pix_fmt_{AV_PIX_FMT_NONE};
std::optional<std::uint64_t> first_real_source_timestamp_ns_{};
std::optional<EncodedStreamInfo> stream_info_{};
std::uint32_t next_output_buffer_index_{0};
std::uint64_t next_ticket_us_{1};
bool capture_thread_started_{false};
bool eos_sent_{false};
mutable std::mutex mutex_{};
std::condition_variable condition_{};
std::unordered_map<std::uint64_t, TicketMetadata> pending_tickets_{};
std::vector<EncodedAccessUnit> ready_access_units_{};
std::optional<Error> async_error_{};
bool capture_eos_{false};
};
} // namespace
EncoderBackend make_jetson_mm_backend() {
return pro::make_proxy<EncoderBackendFacade, JetsonMmEncoderBackend>();
}
} // namespace cvmmap_streamer::encode
+42 -37
View File
@@ -9,43 +9,48 @@ namespace cvmmap_streamer {
namespace {
constexpr std::array<std::string_view, 36> kHelpLines{
"Usage:",
" --help, -h\tshow this message",
"",
"Options:",
" --version\tprint version information",
" --config <path>\tload runtime config from TOML",
" --input-uri <uri>\tcvmmap source URI (example: cvmmap://default)",
" --run-mode <mode>\tpipeline|ingest",
" --codec <codec>\th264|h265",
" --encoder-backend <backend>\tauto|ffmpeg",
" --encoder-device <device>\tauto|nvidia|software",
" --gop <frames>\tencoder GOP length",
" --b-frames <count>\tencoder B-frame count",
" --keep-stream-on-reset <bool>\tkeep RTMP/RTP sessions alive across upstream stream_reset events",
" --rtp\t\tenable RTP output",
" --rtp-endpoint <host:port>\tRTP destination",
" --rtp-payload-type <pt>\tRTP payload type (96-127)",
" --rtp-sdp <path>\twrite optional SDP sidecar",
" --rtmp\t\tenable RTMP output",
" --rtmp-url <url>\tadd RTMP destination (repeatable)",
" --rtmp-transport <mode>\tlibavformat|ffmpeg_process",
" --rtmp-ffmpeg <path>\tffmpeg binary for ffmpeg_process transport",
" --mcap\t\tenable MCAP recording",
" --mcap-depth\t\tenable MCAP depth recording",
" --mcap-path <path>\tMCAP output file",
" --mcap-topic <topic>\tMCAP topic name",
" --mcap-depth-topic <topic>\tMCAP depth topic name (implies --mcap)",
" --mcap-body-topic <topic>\tMCAP body topic name",
" --mcap-frame-id <id>\tFoxglove CompressedVideo frame_id",
" --mcap-compression <mode>\tnone|lz4|zstd",
" record.mcap.depth_enabled\tTOML toggle for optional depth recording",
"",
"Examples:",
" cvmmap_streamer --help",
" cvmmap_streamer --run-mode pipeline --input-uri cvmmap://default --help",
" rtp_receiver_tester --help"};
constexpr auto kHelpLines = std::to_array<std::string_view>({
"Usage:",
" --help, -h\tshow this message",
"",
"Options:",
" --version\tprint version information",
" --config <path>\tload runtime config from TOML",
" --input-uri <uri>\tcvmmap source URI (example: cvmmap://default)",
" --run-mode <mode>\tpipeline|ingest",
" --codec <codec>\th264|h265",
" --encoder-backend <backend>\tauto|ffmpeg",
" --encoder-device <device>\tauto|nvidia|software",
" --gop <frames>\tencoder GOP length",
" --b-frames <count>\tencoder B-frame count",
" --keep-stream-on-reset <bool>\tkeep RTMP/RTP sessions alive across upstream stream_reset events",
" --rtp\t\tenable RTP output",
" --rtp-endpoint <host:port>\tRTP destination",
" --rtp-payload-type <pt>\tRTP payload type (96-127)",
" --rtp-sdp <path>\twrite optional SDP sidecar",
" --rtmp\t\tenable RTMP output",
" --rtmp-url <url>\tadd RTMP destination (repeatable)",
" --rtmp-transport <mode>\tlibavformat|ffmpeg_process",
" --rtmp-ffmpeg <path>\tffmpeg binary for ffmpeg_process transport",
#if CVMMAP_STREAMER_HAS_MCAP
" --mcap\t\tenable MCAP recording",
" --mcap-path <path>\tMCAP output file",
" --mcap-topic <topic>\tMCAP topic name",
" --mcap-body-topic <topic>\tMCAP body topic name",
" --mcap-frame-id <id>\tFoxglove CompressedVideo frame_id",
" --mcap-compression <mode>\tnone|lz4|zstd",
#if CVMMAP_STREAMER_HAS_MCAP_DEPTH
" --mcap-depth\t\tenable MCAP depth recording",
" --mcap-depth-topic <topic>\tMCAP depth topic name (implies --mcap)",
" record.mcap.depth_enabled\tTOML toggle for optional depth recording",
#endif
#endif
"",
"Examples:",
" cvmmap_streamer --help",
" cvmmap_streamer --run-mode pipeline --input-uri cvmmap://default --help",
" rtp_receiver_tester --help"
});
}
+56 -100
View File
@@ -1,4 +1,5 @@
#include "cvmmap_streamer/record/mp4_record_writer.hpp"
#include "../encode/ffmpeg_encoder_options.hpp"
extern "C" {
#include <libavcodec/avcodec.h>
@@ -23,11 +24,6 @@ namespace {
inline constexpr std::uint64_t kNanosPerSecond = 1'000'000'000ull;
struct EncoderCandidate {
std::string name{};
bool using_hardware{false};
AVPixelFormat pixel_format{AV_PIX_FMT_NONE};
};
struct ResolvedEncoderSettings {
std::string mapped_preset{};
@@ -99,52 +95,6 @@ std::uint64_t frame_period_ns(const AVRational frame_rate) {
}
[[nodiscard]]
std::vector<EncoderCandidate> encoder_candidates(const CodecType codec, const EncoderDeviceType device) {
const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc";
const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264";
switch (device) {
case EncoderDeviceType::Auto:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
case EncoderDeviceType::Nvidia:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
};
case EncoderDeviceType::Software:
return {
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
}
return {};
}
[[nodiscard]]
std::string mapped_preset_value(const EncoderCandidate &candidate) {
return candidate.using_hardware ? "p1" : "veryfast";
}
[[nodiscard]]
std::optional<std::string> mapped_tune_value(const EncoderCandidate &candidate) {
if (candidate.using_hardware) {
return std::optional<std::string>{"ull"};
}
if (candidate.name == "libx264") {
return std::optional<std::string>{"zerolatency"};
}
return std::nullopt;
}
[[nodiscard]]
std::optional<std::string> x265_params_value(const EncoderCandidate &candidate) {
if (candidate.name != "libx265") {
return std::nullopt;
}
return std::optional<std::string>{"repeat-headers=1:scenecut=0"};
}
[[nodiscard]]
std::expected<void, std::string> set_string_option(AVCodecContext *context, const char *key, const std::string &value) {
@@ -167,7 +117,7 @@ std::expected<void, std::string> set_int_option(AVCodecContext *context, const c
[[nodiscard]]
std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
AVCodecContext *context,
const EncoderCandidate &candidate,
const encode::FfmpegEncoderCandidate &candidate,
const CodecType codec,
const std::uint32_t width,
const std::uint32_t height,
@@ -186,51 +136,52 @@ std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
context->thread_count = 1;
ResolvedEncoderSettings resolved{
.mapped_preset = mapped_preset_value(candidate),
.mapped_tune = mapped_tune_value(candidate),
.quality_value = tuning.quality,
.gop = tuning.gop,
.b_frames = tuning.b_frames,
};
if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) {
return std::unexpected(set.error());
if (const auto preset = encode::ffmpeg_encoder_preset(candidate); preset) {
resolved.mapped_preset = std::string(*preset);
if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) {
return std::unexpected(set.error());
}
}
if (resolved.mapped_tune) {
if (const auto tune = encode::ffmpeg_encoder_tune(candidate); tune) {
resolved.mapped_tune = std::string(*tune);
if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) {
return std::unexpected(set.error());
}
}
if (candidate.using_hardware) {
resolved.rate_control_mode = "vbr";
resolved.quality_key = "cq";
if (const auto rc_mode = encode::ffmpeg_encoder_rate_control_mode(candidate); rc_mode) {
resolved.rate_control_mode = std::string(*rc_mode);
if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) {
return std::unexpected(set.error());
}
if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) {
}
if (const auto quality_key = encode::ffmpeg_encoder_quality_key(candidate); quality_key) {
resolved.quality_key = std::string(*quality_key);
if (auto set = set_int_option(context, resolved.quality_key.c_str(), resolved.quality_value); !set) {
return std::unexpected(set.error());
}
}
if (const auto x265_params = encode::ffmpeg_encoder_x265_params(candidate); x265_params) {
if (auto set = set_string_option(context, "x265-params", std::string(*x265_params)); !set) {
return std::unexpected(set.error());
}
}
if (encode::ffmpeg_encoder_supports_nvenc_latency_flags(candidate)) {
if (auto set = set_string_option(context, "zerolatency", "1"); !set) {
return std::unexpected(set.error());
}
if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) {
return std::unexpected(set.error());
}
} else {
resolved.quality_key = "crf";
if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) {
}
if (encode::ffmpeg_encoder_supports_forced_idr_option(candidate)) {
if (auto set = set_int_option(context, "forced-idr", 1); !set) {
return std::unexpected(set.error());
}
if (const auto x265_params = x265_params_value(candidate); x265_params) {
if (auto set = set_string_option(context, "x265-params", *x265_params); !set) {
return std::unexpected(set.error());
}
}
}
if (auto set = set_int_option(context, "forced-idr", 1); !set) {
return std::unexpected(set.error());
}
return resolved;
@@ -238,7 +189,7 @@ std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
struct OpenedEncoder {
AVCodecContext *context{nullptr};
EncoderCandidate candidate{};
encode::FfmpegEncoderCandidate candidate{};
ResolvedEncoderSettings resolved{};
};
@@ -250,19 +201,19 @@ std::expected<OpenedEncoder, std::string> open_encoder(
const std::uint32_t height,
const AVRational framerate,
const Mp4EncodeTuning &tuning) {
const auto candidates = encode::ffmpeg_encoder_candidates(codec, device);
const auto attempted_candidates = encode::ffmpeg_encoder_candidate_list(candidates);
std::string last_error{};
for (const auto &candidate : encoder_candidates(codec, device)) {
const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str());
for (const auto &candidate : candidates) {
const auto *encoder = avcodec_find_encoder_by_name(candidate.name.data());
if (encoder == nullptr) {
last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable";
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"MP4 encoder '{}' unavailable in auto mode, trying next candidate",
candidate.name);
continue;
}
return std::unexpected(last_error);
last_error = "FFmpeg encoder '" + std::string(candidate.name) + "' is unavailable";
spdlog::warn(
"MP4 encoder '{}' unavailable in {} mode, trying next candidate",
candidate.name,
to_string(device));
continue;
}
auto *context = avcodec_alloc_context3(encoder);
@@ -272,22 +223,26 @@ std::expected<OpenedEncoder, std::string> open_encoder(
auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning);
if (!resolved) {
last_error = resolved.error();
avcodec_free_context(&context);
return std::unexpected(resolved.error());
spdlog::warn(
"MP4 encoder '{}' configuration failed in {} mode: {}. trying next candidate",
candidate.name,
to_string(device),
resolved.error());
continue;
}
const auto open_result = avcodec_open2(context, encoder, nullptr);
if (open_result < 0) {
last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result);
last_error = "failed to open FFmpeg encoder '" + std::string(candidate.name) + "': " + av_error_string(open_result);
avcodec_free_context(&context);
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"MP4 encoder '{}' failed to open in auto mode: {}. trying software fallback",
candidate.name,
av_error_string(open_result));
continue;
}
return std::unexpected(last_error);
spdlog::warn(
"MP4 encoder '{}' failed to open in {} mode: {}. trying next candidate",
candidate.name,
to_string(device),
av_error_string(open_result));
continue;
}
return OpenedEncoder{
@@ -298,9 +253,9 @@ std::expected<OpenedEncoder, std::string> open_encoder(
}
if (last_error.empty()) {
last_error = "no usable FFmpeg MP4 encoder candidates were configured";
last_error = "no usable FFmpeg encoder found";
}
return std::unexpected(last_error);
return std::unexpected(last_error + " (attempted: " + attempted_candidates + ")");
}
} // namespace
@@ -414,8 +369,10 @@ struct Mp4RecordWriter::Impl {
return std::unexpected("failed to write MP4 header: " + av_error_string(header_result));
}
const auto quality_key = resolved_settings.quality_key.empty() ? std::string("auto") : resolved_settings.quality_key;
const auto quality_value = resolved_settings.quality_key.empty() ? std::string("n/a") : std::to_string(resolved_settings.quality_value);
spdlog::info(
"MP4_RECORD_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} rc={} {}={} gop={} b_frames={} input={} output={}",
"MP4_RECORD_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} rc={} quality={} gop={} b_frames={} input={} output={}",
cvmmap_streamer::to_string(codec),
encoder_name,
using_hardware,
@@ -424,8 +381,7 @@ struct Mp4RecordWriter::Impl {
frame_rate.num,
frame_rate.den,
resolved_settings.rate_control_mode.value_or("auto"),
resolved_settings.quality_key,
resolved_settings.quality_value,
quality_key + "=" + quality_value,
resolved_settings.gop,
resolved_settings.b_frames,
input_pixel_format_name(input_pixel_format),
+2 -2
View File
@@ -52,7 +52,7 @@ struct Config {
[[nodiscard]]
std::expected<Config, int> parse_args(int argc, char **argv) {
Config config{};
CLI::App app{"rtmp_output_tester - publish synthetic encoded video to RTMP using the configured sink"};
CLI::App app{"rtmp_output_tester - publish synthetic encoded video to RTMP using the runtime encoder selection path"};
app.add_option("--rtmp-url", config.rtmp_url, "RTMP destination URL")->required();
app.add_option("--transport", config.transport, "RTMP transport backend (libavformat|ffmpeg_process)")
->check(CLI::IsMember({"libavformat", "ffmpeg_process"}));
@@ -150,7 +150,7 @@ int main(int argc, char **argv) {
}
cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
config.encoder.backend = cvmmap_streamer::EncoderBackendType::Auto;
config.encoder.device = *encoder_device;
config.encoder.codec = *codec;
config.encoder.gop = 15;
+2 -2
View File
@@ -48,7 +48,7 @@ struct Config {
[[nodiscard]]
std::expected<Config, int> parse_args(int argc, char **argv) {
Config config{};
CLI::App app{"rtp_output_tester - publish synthetic encoded video to RTP using the FFmpeg encoder path"};
CLI::App app{"rtp_output_tester - publish synthetic encoded video to RTP using the runtime encoder selection path"};
app.add_option("--host", config.host, "RTP destination host")->required();
app.add_option("--port", config.port, "RTP destination port")->required()->check(CLI::Range(1, 65535));
app.add_option("--payload-type", config.payload_type, "RTP payload type (96-127)")->check(CLI::Range(96, 127));
@@ -128,7 +128,7 @@ int main(int argc, char **argv) {
}
cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults();
config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg;
config.encoder.backend = cvmmap_streamer::EncoderBackendType::Auto;
config.encoder.device = *encoder_device;
config.encoder.codec = *codec;
config.encoder.gop = 15;