diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fd5dd5..f0afc0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,11 +8,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include(GNUInstallDirs) -option( - CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4 - "Build the OpenCV-based zed_svo_grid_to_mp4 tool" - ON) - find_package(Threads REQUIRED) find_package(OpenSSL REQUIRED) if (NOT TARGET OpenSSL::SSL AND DEFINED OPENSSL_SSL_LIBRARY) @@ -80,34 +75,6 @@ find_package(spdlog REQUIRED) find_package(Protobuf REQUIRED) find_package(PkgConfig REQUIRED) find_package(rvl CONFIG QUIET) -set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK") -find_package(ZED QUIET) -set(CVMMAP_HAS_ZED_SDK OFF) -if (ZED_FOUND) - find_package(CUDA ${ZED_CUDA_VERSION} REQUIRED) - find_library(CVMMAP_STREAMER_LIBUSB_LIBRARY NAMES usb-1.0 libusb-1.0) - if (CVMMAP_STREAMER_LIBUSB_LIBRARY) - set(_CVMMAP_STREAMER_ZED_LIBRARIES "") - foreach(_zed_lib IN LISTS ZED_LIBRARIES) - if (_zed_lib STREQUAL "/usr/lib/x86_64-linux-gnu/libusb-1.0.so") - list(APPEND _CVMMAP_STREAMER_ZED_LIBRARIES "${CVMMAP_STREAMER_LIBUSB_LIBRARY}") - else() - list(APPEND _CVMMAP_STREAMER_ZED_LIBRARIES "${_zed_lib}") - endif() - endforeach() - set(ZED_LIBRARIES "${_CVMMAP_STREAMER_ZED_LIBRARIES}") - endif() - set(CVMMAP_HAS_ZED_SDK ON) - message(STATUS "ZED SDK found: enabling zed_svo_to_mcap and zed_svo_to_mp4") -else() - message(STATUS "ZED SDK not found: skipping ZED conversion tools") -endif() - -if (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4 AND CVMMAP_HAS_ZED_SDK) - find_package(OpenCV REQUIRED COMPONENTS core imgproc) -elseif (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4) - message(STATUS "CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=ON but ZED SDK is unavailable; zed_svo_grid_to_mp4 will not be built") -endif() add_subdirectory(third_party) @@ -396,160 +363,7 @@ set_target_properties(mcap_replay_tester PROPERTIES OUTPUT_NAME "mcap_replay_tester" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") -add_executable(mcap_video_bounds src/tools/mcap_video_bounds.cpp) -target_include_directories(mcap_video_bounds - PRIVATE - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(mcap_video_bounds - PRIVATE - CLI11::CLI11 - cvmmap_streamer_foxglove_proto - cvmmap_streamer_mcap_runtime - mcap::mcap - PkgConfig::ZSTD - PkgConfig::LZ4) -if (TARGET spdlog::spdlog) - target_link_libraries(mcap_video_bounds PRIVATE spdlog::spdlog) -elseif (TARGET spdlog) - target_link_libraries(mcap_video_bounds PRIVATE spdlog) -endif() -target_link_libraries(mcap_video_bounds PRIVATE cvmmap_streamer_protobuf) -if (TARGET PkgConfig::PROTOBUF_PKG) - target_link_libraries(mcap_video_bounds PRIVATE PkgConfig::PROTOBUF_PKG) -endif() -set_target_properties(mcap_video_bounds PROPERTIES - OUTPUT_NAME "mcap_video_bounds" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") - set(CVMMAP_STREAMER_INSTALL_TARGETS cvmmap_streamer) -list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS mcap_video_bounds) - -if (CVMMAP_HAS_ZED_SDK) - add_library( - cvmmap_streamer_zed_progress_support - STATIC - src/tools/zed_progress_bar.cpp) - target_include_directories(cvmmap_streamer_zed_progress_support - PUBLIC - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}") - add_executable( - zed_svo_to_mcap - src/tools/zed_svo_to_mcap.cpp - src/config/runtime_config.cpp) - target_include_directories(zed_svo_to_mcap - PRIVATE - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}" - ${ZED_INCLUDE_DIRS} - ${CUDA_INCLUDE_DIRS}) - target_link_directories(zed_svo_to_mcap - PRIVATE - ${ZED_LIBRARY_DIR} - ${CUDA_LIBRARY_DIRS}) - target_link_libraries(zed_svo_to_mcap - PRIVATE - cvmmap_streamer_zed_progress_support - cvmmap_streamer_record_support - CLI11::CLI11 - tomlplusplus::tomlplusplus - ${ZED_LIBRARIES} - ${CUDA_CUDA_LIBRARY} - ${CUDA_CUDART_LIBRARY}) - if (TARGET spdlog::spdlog) - target_link_libraries(zed_svo_to_mcap PRIVATE spdlog::spdlog) - elseif (TARGET spdlog) - target_link_libraries(zed_svo_to_mcap PRIVATE spdlog) - endif() - set_target_properties(zed_svo_to_mcap PROPERTIES - OUTPUT_NAME "zed_svo_to_mcap" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") - list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_to_mcap) - - add_library( - cvmmap_streamer_zed_svo_mp4_support - STATIC - src/tools/zed_svo_mp4_support.cpp) - target_include_directories(cvmmap_streamer_zed_svo_mp4_support - PUBLIC - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}") - target_link_libraries(cvmmap_streamer_zed_svo_mp4_support - PUBLIC - PkgConfig::FFMPEG) - if (TARGET spdlog::spdlog) - target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog::spdlog) - elseif (TARGET spdlog) - target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog) - endif() - - add_executable( - zed_svo_to_mp4 - src/tools/zed_svo_to_mp4.cpp) - target_include_directories(zed_svo_to_mp4 - PRIVATE - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}" - ${ZED_INCLUDE_DIRS} - ${CUDA_INCLUDE_DIRS}) - target_link_directories(zed_svo_to_mp4 - PRIVATE - ${ZED_LIBRARY_DIR} - ${CUDA_LIBRARY_DIRS}) - target_link_libraries(zed_svo_to_mp4 - PRIVATE - CLI11::CLI11 - cvmmap_streamer_zed_progress_support - cvmmap_streamer_zed_svo_mp4_support - ${ZED_LIBRARIES} - ${CUDA_CUDA_LIBRARY} - ${CUDA_CUDART_LIBRARY}) - if (TARGET spdlog::spdlog) - target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog::spdlog) - elseif (TARGET spdlog) - target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog) - endif() - set_target_properties(zed_svo_to_mp4 PROPERTIES - OUTPUT_NAME "zed_svo_to_mp4" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") - list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_to_mp4) - - if (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4) - add_executable( - zed_svo_grid_to_mp4 - src/tools/zed_svo_grid_to_mp4.cpp) - target_include_directories(zed_svo_grid_to_mp4 - PRIVATE - "${CMAKE_CURRENT_LIST_DIR}/include" - "${CMAKE_CURRENT_BINARY_DIR}" - ${ZED_INCLUDE_DIRS} - ${CUDA_INCLUDE_DIRS} - ${OpenCV_INCLUDE_DIRS}) - target_link_directories(zed_svo_grid_to_mp4 - PRIVATE - ${ZED_LIBRARY_DIR} - ${CUDA_LIBRARY_DIRS}) - target_link_libraries(zed_svo_grid_to_mp4 - PRIVATE - CLI11::CLI11 - cvmmap_streamer_zed_progress_support - cvmmap_streamer_zed_svo_mp4_support - ${ZED_LIBRARIES} - ${CUDA_CUDA_LIBRARY} - ${CUDA_CUDART_LIBRARY} - ${OpenCV_LIBS}) - if (TARGET spdlog::spdlog) - target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog::spdlog) - elseif (TARGET spdlog) - target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog) - endif() - set_target_properties(zed_svo_grid_to_mp4 PROPERTIES - OUTPUT_NAME "zed_svo_grid_to_mp4" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin") - list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_grid_to_mp4) - endif() -endif() install( TARGETS ${CVMMAP_STREAMER_INSTALL_TARGETS} diff --git a/README.md b/README.md index 2a10425..0d18047 100644 --- a/README.md +++ b/README.md @@ -45,17 +45,6 @@ cmake -B build -S . cmake --build build ``` -When the ZED SDK is available, the build also enables `zed_svo_to_mcap` and -`zed_svo_to_mp4` automatically. When the SDK is absent, those tools are skipped -and the main streamer plus non-ZED testers still build normally. - -`zed_svo_grid_to_mp4` remains optional and additionally requires OpenCV. Disable -it explicitly with: - -```bash -cmake -B build -S . -DCVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=OFF -``` - ```bash # Use a local cv-mmap build tree cmake -B build -S . \ @@ -69,300 +58,25 @@ cmake --build build ls -la build/{cvmmap_streamer,rtp_receiver_tester,rtmp_stub_tester} ``` -### ZED SVO/SVO2 To MP4 +### Offline ZED Tooling -This tool is only built when the ZED SDK is detected during CMake configure. +Offline ZED conversion, batch wrappers, dataset indexing, and MCAP inspection helpers moved to the sibling repository `../zed-offline-tools`. -The repo also includes an offline conversion tool for the left ZED color stream: +Use that repo for: -```bash -CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \ -./build/bin/zed_svo_to_mp4 \ - --input \ - --encoder-device auto \ - --preset balanced \ - --quality 20 \ - --start-frame 0 \ - --end-frame 89 -``` +- `zed_svo_to_mcap` +- `zed_svo_to_mp4` +- `zed_svo_grid_to_mp4` +- `mcap_video_bounds` +- `scripts/zed_batch_*` +- `scripts/zed_segment_time_index.py` +- `scripts/generate_playlist_config.py` +- `scripts/mcap_bundle_validator.py` +- `scripts/mcap_rgbd_example.py` +- `scripts/mcap_rgbd_viewer.py` +- `scripts/mcap_depth_alignment.py` -By default the tool writes `foo.mp4` next to `foo.svo` or `foo.svo2`, defaults to `h265`, and shows a tqdm-like progress bar when stderr is attached to a TTY. `--encoder-device auto` tries NVENC first and falls back to software (`libx264` or `libx265`) if the hardware encoder is unavailable or cannot be opened. - -### Batch ZED SVO2 To MP4 - -Python dependencies for the batch wrapper are managed with `uv`: - -```bash -uv sync -``` - -Expected multi-camera dataset layout: - -```text -/ -├── svo2_segments_sorted.csv -├── bar/ -│ └── 2026-03-18T11-59-41/ -│ ├── 2026-03-18T11-59-41_zed1.svo2 -│ ├── 2026-03-18T11-59-41_zed2.svo2 -│ ├── 2026-03-18T11-59-41_zed3.svo2 -│ └── 2026-03-18T11-59-41_zed4.svo2 -└── jump/ - └── experiment/ - └── 1/ - └── 2026-03-18T11-26-23/ - ├── 2026-03-18T11-26-23_zed1.svo2 - ├── 2026-03-18T11-26-23_zed2.svo2 - ├── 2026-03-18T11-26-23_zed3.svo2 - └── 2026-03-18T11-26-23_zed4.svo2 -``` - -Placeholders used below: -- ``: dataset root containing multi-camera segment directories -- ``: one multi-camera segment directory containing `*_zedN.svo` or `*_zedN.svo2` -- ``, ``: explicit segment directories -- ``: CSV file with a `segment_dir` column, for example `config/svo2_segments_sorted.sample.csv` -- ``: one single-camera `.svo` or `.svo2` file -- ``: TOML file such as `config/zed_pose_config.toml` - -Use the wrapper to recurse through a folder, run `zed_svo_to_mp4` on every matched `.svo2`, and show one aggregate tqdm progress bar: - -```bash -uv run python scripts/zed_batch_svo_to_mp4.py \ - /bar \ - --pattern '*.svo2' \ - --recursive \ - --jobs 2 \ - --encoder-device auto \ - --start-frame 0 \ - --end-frame 29 \ - --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 -``` - -The batch tool mirrors the common encoder options from `zed_svo_to_mp4`, skips existing sibling `.mp4` outputs by default, and continues after failures while returning a nonzero exit code if any conversion fails. - -### ZED SVO Grid To MP4 - -This tool is only built when the ZED SDK is detected and -`CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=ON`. - -Use the grid converter to merge four synced ZED recordings into a 2x2 CCTV-style MP4 with a Unix timestamp overlay in the top-left corner: - -```bash -./build/bin/zed_svo_grid_to_mp4 \ - --segment-dir \ - --encoder-device auto \ - --codec h265 \ - --duration-seconds 2 -``` - -The tool syncs the four inputs using the same common-start timestamp rule as the ZED multi-camera playback sample, defaults to a 2x2 layout ordered as `zed1 zed2 / zed3 zed4`, and writes `/_grid.mp4` unless `--output` is provided. By default each tile is scaled to `0.5x`, so a four-camera 1920x1200 segment produces a 1920x1200 composite. Use repeated `--input` flags instead of `--segment-dir` when you want explicit row-major ordering. - -Use the batch wrapper to run `zed_svo_grid_to_mp4` over many segment directories with one aggregate progress bar: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --dataset-root \ - --recursive \ - --jobs 2 \ - --encoder-device auto \ - --duration-seconds 2 -``` - -You can also provide the exact segments to convert: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --segment \ - --segment \ - --jobs 2 -``` - -Or preserve a precomputed CSV ordering: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --segments-csv \ - --jobs 2 \ - --duration-seconds 2 -``` - -The batch grid wrapper mirrors the grid encoder options, skips existing `/_grid.mp4` outputs by default, and returns a nonzero exit code if any segment fails. - -When you suspect a previous run left behind partial MP4 files, opt into `ffprobe` validation so broken existing outputs are treated as missing instead of skipped: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --dataset-root \ - --probe-existing \ - --jobs 2 -``` - -Use `--report-existing` to audit existing outputs without launching conversions. The report prints invalid existing files only, while the summary still includes valid and missing counts. This is useful for the partial-write failure mode currently seen as `moov atom not found` in some kindergarten grid MP4s: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - --dataset-root \ - --report-existing -``` - -Use `--dry-run` to preview what the batch wrapper would convert after applying skip logic. Combine it with `--probe-existing` when you want to see which broken existing outputs would be requeued: - -```bash -uv run python scripts/zed_batch_svo_grid_to_mp4.py \ - \ - --probe-existing \ - --dry-run -``` - -#### Expected CSV Input Format - -The `--segments-csv` input expects a header row with at least a `segment_dir` column. Extra columns are allowed and ignored by the batch wrapper. `segment_dir` values may be absolute paths or paths relative to the CSV file's parent directory. Use `--csv-root` to override that base directory. - -Repeated rows for the same `segment_dir` are allowed; the wrapper converts each unique segment once, preserving the first-seen CSV order. The repo includes a small example at `config/svo2_segments_sorted.sample.csv`: - -```csv -timestamp,activity,group_path,segment_dir,camera,relative_path -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed1,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed1.svo2 -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed2,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed2.svo2 -``` - -### Batch ZED Segments To MCAP - -This workflow depends on the `zed_svo_to_mcap` binary, which is only built when -the ZED SDK is detected during CMake configure. - -Use the wrapper to recurse through a dataset root, run `zed_svo_to_mcap --segment-dir` on every matched multi-camera segment, and show interactive table progress on TTYs with durable text logging elsewhere: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --dataset-root \ - --recursive \ - --jobs 2 \ - --cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \ - --start-frame 10 \ - --end-frame 29 -``` - -You can also preserve the precomputed kindergarten CSV ordering: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --segments-csv \ - --jobs 2 \ - --start-frame 10 \ - --end-frame 29 -``` - -Enable per-camera pose export when the segment has valid tracking: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --segment \ - --with-pose \ - --pose-config -``` - -The batch MCAP wrapper writes `/.mcap` by default, skips existing outputs unless told otherwise, and returns a nonzero exit code if any segment fails. -The repo includes a minimal pose config at `config/zed_pose_config.toml` so MCAP conversion does not depend on a separate `cv-mmap` checkout. -In bundled multi-camera timeline mode, `--start-frame` and `--end-frame` mean the first and last emitted bundle indices from the common start timestamp, inclusive. -When stderr is attached to a TTY, `zed_batch_svo_to_mcap.py` uses a `progress-table` view by default; otherwise it emits line-oriented start/completion/failure logs plus periodic heartbeat summaries. Use `--progress-ui table` or `--progress-ui text` to override the automatic mode selection. - -Bundled MCAP export now defaults to `--bundle-policy nearest`. That mode emits one `/bundle` manifest message per bundle timestamp on the common timeline and keeps the original per-camera timestamps on `/zedN/video`, `/zedN/depth`, and optional `/zedN/pose`. Faster cameras are sampled onto the slowest common timeline there, so they can end up with the same message count as slower cameras. Consumers that care about grouping should follow `/bundle` instead of inferring bundle membership from identical message timestamps. - -Use `--bundle-policy strict` when you want thresholded grouping; `--sync-tolerance-ms` only applies in that strict mode. Use `--bundle-policy copy` when you want one MCAP containing all camera namespaces with their original per-camera cadence and no `/bundle` manifest. `copy` disables `--start-frame`, `--end-frame`, and `--sync-tolerance-ms`; `--copy-range common|full` controls whether it trims to the overlap window or preserves each camera’s full timestamp range. -Single-source `zed_svo_to_mcap` now writes the one-camera `copy` shape by default, so `foo_zed4.svo2` exports namespaced topics like `/zed4/video` and `/zed4/depth` with no `/bundle`. See [docs/mcap_layout.md](./docs/mcap_layout.md) for the current bundled/copy contract and [docs/mcap_legacy_single_camera_layout.md](./docs/mcap_legacy_single_camera_layout.md) for the separate legacy `/camera/*` reference. - -For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`. - -For calibration-based depth/RGB mapping, use `scripts/mcap_depth_alignment.py` and [docs/depth_alignment.md](./docs/depth_alignment.md). That helper explains the current affine mapping implied by the exported calibration topics and can export example aligned-depth and overlay PNGs from a chosen MCAP frame. - -### MCAP RGBD Viewer - -The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`. - -Install the optional viewer dependencies first: - -```bash -uv sync --extra viewer -``` - -Then launch the interactive viewer: - -```bash -uv run --extra viewer python scripts/mcap_rgbd_viewer.py \ - /workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap \ - --camera-label zed1 -``` - -You can also use the same script without a GUI to inspect metadata or render a preview PNG: - -```bash -uv run --extra viewer python scripts/mcap_rgbd_viewer.py \ - --summary-only \ - /workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap -``` - -```bash -uv run --extra viewer python scripts/mcap_rgbd_viewer.py \ - --camera-label zed2 \ - --frame-index 150 \ - --export-preview /tmp/mcap_bundled_gap_preview.png \ - /workspaces/data/kindergarten/throw/2026-03-18T12-58-13/2026-03-18T12-58-13.mcap -``` - -The viewer depends on `ffmpeg` being on `PATH` so it can build a seek-friendly preview cache for H.264/H.265 MCAP video streams. -This is intentionally a simple preview script: it transcodes only the RGB video stream into a temporary intra-frame `mjpeg` cache and then uses that same cache for both scrubbing and normal playback. Depth data is not transcoded to `mjpeg`; it stays in the temporary raw depth cache and is decoded and color-mapped on demand. - -### Why Mixed Hardware/Software Mode Exists - -Bundled MCAP export opens one video encoder per camera stream. A four-camera segment therefore consumes four H.264/H.265 encoder sessions at once. - -This matters because NVIDIA's NVENC session limit is separate from raw CUDA utilization. In NVIDIA's Video Codec SDK documentation, non-qualified systems are capped at 8 concurrent encode sessions across all non-qualified GPUs in the system, and NVIDIA's SDK readme still calls out a 5-session GeForce limit in some contexts. In practice, consumer/GeForce hosts often hit NVENC session-init failures before the GPUs look "full" in `nvidia-smi`. - -That is why the batch wrapper supports mixed pools such as two NVENC workers plus two software-encoded workers: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --dataset-root \ - --recursive \ - --overwrite \ - --hardware-jobs 2 \ - --hardware-cuda-visible-devices 0,1 \ - --software-jobs 2 \ - --software-cuda-visible-devices 0,1 \ - --depth-mode neural_plus -``` - -With bundled four-camera segments, `4` all-hardware jobs would try to open about `16` NVENC sessions, which is why mixed mode is the safe default for high-throughput rebuilds on GeForce-class machines. The software workers still use the GPUs for ZED neural depth; only video encoding moves to CPU. - -If you intentionally want to bypass NVIDIA's consumer NVENC session cap, there is an unofficial driver patch at [`keylase/nvidia-patch`](https://github.com/keylase/nvidia-patch). That can make larger all-hardware batches viable, but it is not NVIDIA-supported and should be treated as an explicit ops decision rather than a project requirement. - -Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid outputs are treated as missing and requeued: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --dataset-root \ - --probe-existing \ - --jobs 2 -``` - -Use `--report-existing` to audit existing MCAPs without launching conversions: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --dataset-root \ - --report-existing -``` - -Use `--dry-run` to preview what would be converted after applying skip or probe logic: - -```bash -uv run python scripts/zed_batch_svo_to_mcap.py \ - --segments-csv \ - --probe-existing \ - --dry-run -``` +This repo keeps the live downstream streamer/runtime plus the MCAP contract docs such as [docs/mcap_layout.md](./docs/mcap_layout.md), [docs/mcap_legacy_single_camera_layout.md](./docs/mcap_legacy_single_camera_layout.md), and [docs/mcap_body_tracking.md](./docs/mcap_body_tracking.md). ### Mandatory Acceptance (Standalone) diff --git a/config/svo2_segments_sorted.sample.csv b/config/svo2_segments_sorted.sample.csv deleted file mode 100644 index 66ab3d0..0000000 --- a/config/svo2_segments_sorted.sample.csv +++ /dev/null @@ -1,7 +0,0 @@ -timestamp,activity,group_path,segment_dir,camera,relative_path -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed1,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed1.svo2 -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed2,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed2.svo2 -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed3,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed3.svo2 -2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed4,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed4.svo2 -2026-03-18T11-26-23,jump,jump/experiment/1,jump/experiment/1/2026-03-18T11-26-23,zed1,jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed1.svo2 -2026-03-18T11-26-23,jump,jump/experiment/1,jump/experiment/1/2026-03-18T11-26-23,zed2,jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed2.svo2 diff --git a/config/zed_pose_config.toml b/config/zed_pose_config.toml deleted file mode 100644 index 00e3ec1..0000000 --- a/config/zed_pose_config.toml +++ /dev/null @@ -1,18 +0,0 @@ -# Minimal pose-tracking config for zed_svo_to_mcap. -# The converter currently reads only: -# - zed.coordinate_system -# - zed.body_tracking.reference_frame -# - zed.body_tracking.set_floor_as_origin - -[zed] -# Native ZED 3D/body coordinate system used when reading positional tracking. -# Supported values in this repo are IMAGE and RIGHT_HANDED_Y_UP. -coordinate_system = "IMAGE" - -[zed.body_tracking] -# Reference frame used for per-camera pose estimation. -# Supported values are CAMERA and WORLD. -reference_frame = "CAMERA" - -# When true, WORLD origin is placed on the floor during positional tracking. -set_floor_as_origin = false diff --git a/docs/depth_alignment.md b/docs/depth_alignment.md deleted file mode 100644 index 782422d..0000000 --- a/docs/depth_alignment.md +++ /dev/null @@ -1,86 +0,0 @@ -# Depth Alignment - -Exported ZED MCAP files can carry RGB video and depth at different raster sizes. - -For the current kindergarten `zed4` exports, the common pair is: - -- video: `1920x1200` -- depth: `960x512` - -That means RGB and depth do not share aspect ratio. The files stay alignable because the exporter writes two separate calibration topics: - -- `/{label}/calibration` for video -- `/{label}/depth_calibration` for depth - -See [mcap_layout.md](./mcap_layout.md) for the topic contract. - -## What The Mapping Means - -The correct way to align depth onto RGB is to use the two calibration matrices, not to assume matching pixel grids. - -For the same camera, with zero distortion and identity rectification, the mapping reduces to a 2D affine transform: - -```text -u_rgb = (fx_rgb / fx_depth) * u_depth + (cx_rgb - (fx_rgb / fx_depth) * cx_depth) -v_rgb = (fy_rgb / fy_depth) * v_depth + (cy_rgb - (fy_rgb / fy_depth) * cy_depth) -``` - -and the inverse: - -```text -u_depth = (fx_depth / fx_rgb) * u_rgb + (cx_depth - (fx_depth / fx_rgb) * cx_rgb) -v_depth = (fy_depth / fy_rgb) * v_rgb + (cy_depth - (fy_depth / fy_rgb) * cy_rgb) -``` - -For the sampled kindergarten `zed4` files, those offsets are effectively zero, so the mapping becomes an anisotropic resize: - -```text -u_rgb ~= 2.0 * u_depth -v_rgb ~= 2.34375 * v_depth -``` - -This is why the practical overlay behavior is a stretch, not a crop. - -It is still better to derive the mapping from the two calibration topics than to hardcode `2.0` and `2.34375`, because the exact calibration can vary by camera and export settings. - -## Helper Script - -Use the alignment helper to inspect the calibration pair and optionally export an example overlay: - -```bash -uv run --extra viewer python scripts/mcap_depth_alignment.py \ - /workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41_zed4.mcap \ - --camera-label zed4 -``` - -To export example images: - -```bash -uv run --extra viewer python scripts/mcap_depth_alignment.py \ - /workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41_zed4.mcap \ - --camera-label zed4 \ - --frame-index 400 \ - --output-dir /tmp/zed4_alignment_demo -``` - -That command writes: - -- `rgb_frame.png` -- `depth_native_colorized.png` -- `depth_aligned_to_rgb_colorized.png` -- `depth_overlay_on_rgb.png` -- `rgb_aligned_to_depth.png` - -## What The Helper Actually Does - -The script: - -1. reads `/{label}/calibration` and `/{label}/depth_calibration` -2. computes the affine mapping implied by the two intrinsic matrices -3. decodes one RGB frame and one depth frame from the MCAP -4. warps depth into RGB space with `cv2.warpAffine` -5. optionally warps RGB into depth space with the inverse mapping - -For the current exported ZED MCAP contract, that is the right simple alignment path. - -If a future export starts carrying non-zero distortion or non-identity rectification, consumers should switch from this affine shortcut to a full camera-model reprojection path. diff --git a/docs/mcap_layout.md b/docs/mcap_layout.md index 6636d59..e08be5e 100644 --- a/docs/mcap_layout.md +++ b/docs/mcap_layout.md @@ -137,4 +137,4 @@ For multi-camera `copy` MCAP files, the current validation contract is: Legacy `/camera/*` validation expectations are documented in [mcap_legacy_single_camera_layout.md](./mcap_legacy_single_camera_layout.md). -The repository-level Python helper [scripts/mcap_bundle_validator.py](../scripts/mcap_bundle_validator.py) understands bundled, copy, and legacy `/camera/*` layouts and reports which one it found before applying the corresponding validation rules. +The standalone helper [zed-offline-tools/scripts/mcap_bundle_validator.py](../../zed-offline-tools/scripts/mcap_bundle_validator.py) understands bundled, copy, and legacy `/camera/*` layouts and reports which one it found before applying the corresponding validation rules. diff --git a/docs/mcap_recipes.md b/docs/mcap_recipes.md deleted file mode 100644 index 0eb4cff..0000000 --- a/docs/mcap_recipes.md +++ /dev/null @@ -1,179 +0,0 @@ -# MCAP Recipes - -This guide is the simple, non-GUI path for inspecting RGB+depth MCAP files. - -Use it when you want to: - -- confirm whether an MCAP is bundled, `copy`, or legacy `/camera/*` -- inspect camera labels, message counts, and timestamp ranges -- export one RGB frame and one decoded depth sample as a concrete example -- understand how `/bundle` changes the meaning of timestamps and sample grouping - -For the current bundled/copy layout contract, see [mcap_layout.md](./mcap_layout.md). The older `/camera/*` wire shape is documented separately in [mcap_legacy_single_camera_layout.md](./mcap_legacy_single_camera_layout.md). - -## Quick Summary - -The repository includes a small example helper: - -```bash -uv run python scripts/mcap_rgbd_example.py --help -``` - -It has two commands: - -- `summary`: print layout, per-camera counts, and timestamp ranges -- `export-sample`: write one RGB image plus one depth array/preview - -`summary` works with the base Python dependencies: - -```bash -uv sync -``` - -`export-sample` also needs: - -- `ffmpeg` on `PATH` -- the optional depth decoder binding: - -```bash -uv sync --extra viewer -``` - -## The Practical Cases - -For this helper, there are really two operational cases: - -- `bundled`: multiple namespaced camera topics plus `/bundle` -- single-camera stream with no `/bundle` - -That second case can appear in two wire shapes: - -- `copy`: namespaced topics such as `/zed4/video` -- legacy single-camera: `/camera/video` - -Current single-source `zed_svo_to_mcap` output uses the one-camera `copy` shape by default, so even a one-camera file usually looks like namespaced `/{label}/*` topics with no `/bundle`. - -The helper treats legacy `/camera/*` as compatible with `copy` by using the implicit camera label `camera`. - -## Recipe: Summarize One MCAP - -```bash -uv run python scripts/mcap_rgbd_example.py summary -``` - -What the summary prints: - -- layout and validation status -- camera labels -- per-camera `video`, `depth`, `pose`, `calibration`, `depth_calibration`, and `body` counts -- per-camera video/depth timestamp ranges -- for bundled files only: - - bundle count - - bundle timestamp range - - bundle policy counts - - per-camera present/corrupted-gap/unknown bundle-member counts - -This is the fastest way to answer: - -- “is this file bundled, copy, or legacy single-camera?” -- “which camera labels are inside?” -- “do video and depth counts match?” -- “what timestamp range does each camera cover?” - -## Recipe: Export One RGB + Depth Sample - -```bash -uv run python scripts/mcap_rgbd_example.py export-sample \ - \ - --output-dir /tmp/mcap_sample -``` - -For multi-camera or namespaced one-camera files, choose the camera explicitly when needed: - -```bash -uv run python scripts/mcap_rgbd_example.py export-sample \ - \ - --camera-label zed2 \ - --sample-index 25 \ - --output-dir /tmp/mcap_sample_zed2 -``` - -Outputs: - -- `rgb.png` -- `depth.npy` -- `depth_preview.png` -- `sample_metadata.json` - -`sample_index` is always zero-based per-camera RGB+depth sample order. - -That means: - -- legacy `/camera/*`: sample `N` is `/camera/video[N]` + `/camera/depth[N]` -- `copy`: sample `N` is `/{label}/video[N]` + `/{label}/depth[N]` -- `bundled`: sample `N` is the `N`th present sample for that camera, not bundle index `N` - -In bundled files, `sample_metadata.json` also records the matched `/bundle` member metadata for the selected camera sample. - -## Recipe: Understand Bundled vs Non-Bundled Timing - -Bundled files intentionally separate bundle time from camera sample time: - -- `/bundle.timestamp` is the nominal common-timeline bundle timestamp -- `/zedN/video` and `/zedN/depth` keep the original per-camera sample timestamps - -Copy and legacy single-camera files do not have bundle time at all: - -- there is no `/bundle` -- each camera topic keeps its own original cadence and timestamps - -If you care about grouping, use `/bundle` in bundled files. -For `copy` and legacy single-camera files, treat each camera stream independently. - -## Recipe: Inspect `/bundle` In Python - -The helper script is intentionally small, but sometimes it is easier to inspect `/bundle` directly. -This snippet shows how to print bundle membership for one camera: - -```python -from pathlib import Path - -import zed_batch_svo_to_mcap as batch - - -path = Path("").expanduser().resolve() -camera_label = "zed1" -reader_module = batch.load_mcap_reader() - -with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - if channel.topic != "/bundle": - continue - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - continue - - bundle_class, present_value = batch.load_bundle_manifest_type(schema.data) - bundle = bundle_class() - bundle.ParseFromString(message.data) - - for member in bundle.members: - if str(member.camera_label) != camera_label: - continue - status_value = int(getattr(member, "status", 0)) - status_field = member.DESCRIPTOR.fields_by_name.get("status") - status_enum = status_field.enum_type if status_field is not None else None - status_name = ( - status_enum.values_by_number.get(status_value).name - if status_enum is not None and status_enum.values_by_number.get(status_value) is not None - else str(status_value) - ) - print(bundle.bundle_index, status_name) - break -``` - -This is the important mental model: - -- `bundled`: follow `/bundle` for grouping -- `copy`: treat each namespaced camera as an independent stream -- legacy `/camera/*`: same model as one-camera `copy`, with the implicit label `camera` diff --git a/docs/zed_segment_time_index.md b/docs/zed_segment_time_index.md deleted file mode 100644 index 22b5744..0000000 --- a/docs/zed_segment_time_index.md +++ /dev/null @@ -1,97 +0,0 @@ -# ZED Segment Time Index - -`scripts/zed_segment_time_index.py` builds and queries an embedded DuckDB index for bundled ZED segment folders. - -Default artifact name: - -```text -/segment_time_index.duckdb -``` - -Primary commands: - -```bash -uv run python scripts/zed_segment_time_index.py build -uv run python scripts/zed_segment_time_index.py query --at 2026-03-18T12-00-23 -uv run python scripts/zed_segment_time_index.py query --start 2026-03-18T12-00-23 --end 2026-03-18T12-00-30 -``` - -## Data Source Rules - -- Segment discovery is recursive and follows the same multi-camera layout assumptions as the batch ZED tooling. -- A directory is considered a valid segment when it contains at least two unique `*_zedN.svo` or `*_zedN.svo2` files and no duplicate camera labels. -- Timing is sourced from the segment MCAP, not from the SVO/SVO2 files. -- A valid segment is skipped when it has no `.mcap` file or more than one `.mcap` file in the segment directory. - -## MCAP Bounds Extraction - -`build/bin/mcap_video_bounds` scans `foxglove.CompressedVideo` messages in one MCAP and emits: - -- `start_ns` -- `end_ns` -- `duration_ns` -- `video_message_count` -- `start_iso_utc` -- `end_iso_utc` - -The helper prefers the protobuf `CompressedVideo.timestamp` field and falls back to MCAP `logTime` when that field is zero. - -## DuckDB Layout - -The database contains two tables: `meta` and `segments`. - -### `meta` - -Key-value metadata for the index: - -- `schema_version`: current schema version, currently `1` -- `dataset_root`: absolute dataset root used when the index was built -- `built_at_utc`: build timestamp in UTC -- `default_timezone`: inferred dataset wall-clock timezone used when querying with `--timezone dataset` - -### `segments` - -One row per indexed segment. - -| Column | Type | Meaning | -|---|---|---| -| `segment_dir` | `VARCHAR` | Absolute path to the segment directory | -| `relative_segment_dir` | `VARCHAR` | Path relative to the dataset root | -| `group_path` | `VARCHAR` | Parent path of the segment within the dataset | -| `activity` | `VARCHAR` | First path component under the dataset root | -| `segment_name` | `VARCHAR` | Segment directory basename | -| `mcap_path` | `VARCHAR` | Absolute MCAP path used for timing | -| `start_ns` | `BIGINT` | Earliest video timestamp in nanoseconds since Unix epoch | -| `end_ns` | `BIGINT` | Latest video timestamp in nanoseconds since Unix epoch | -| `duration_ns` | `BIGINT` | `end_ns - start_ns` | -| `start_iso_utc` | `VARCHAR` | UTC rendering of `start_ns` | -| `end_iso_utc` | `VARCHAR` | UTC rendering of `end_ns` | -| `camera_count` | `INTEGER` | Number of discovered camera inputs in the segment directory | -| `camera_labels` | `VARCHAR` | Comma-separated camera labels, for example `zed1,zed2,zed3,zed4` | -| `video_message_count` | `BIGINT` | Number of `foxglove.CompressedVideo` messages observed in the MCAP | -| `index_source` | `VARCHAR` | Current extractor label, currently `mcap_video_bounds` | - -Indexes are created on `start_ns` and `end_ns`. - -## Query Semantics - -- `--at` performs an overlap lookup, not just an exact nanosecond equality check. -- Query precision follows the precision supplied by the user. -- A second-precision value like `2026-03-18T12-00-23` is treated as the whole second `[12:00:23.000, 12:00:23.999999999]`. -- Integer epochs are widened similarly by their apparent unit: - - 10 digits or fewer: seconds - - 11-13 digits: milliseconds - - 14-16 digits: microseconds - - 17+ digits: nanoseconds -- `--start/--end` returns every segment whose `[start_ns, end_ns]` overlaps the requested interval. - -## Timezone Behavior - -- Query default is `--timezone dataset`. -- `dataset` resolves to the `default_timezone` stored in `meta`. -- If inference is unavailable, the script falls back to `local`. -- Explicit values are also accepted: - - `local` - - `UTC` - - fixed offsets such as `UTC+08:00` - - IANA zone names such as `Asia/Shanghai` diff --git a/include/cvmmap_streamer/tools/zed_progress_bar.hpp b/include/cvmmap_streamer/tools/zed_progress_bar.hpp deleted file mode 100644 index 021fe7d..0000000 --- a/include/cvmmap_streamer/tools/zed_progress_bar.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace cvmmap_streamer::zed_tools { - -[[nodiscard]] -bool stderr_supports_progress_bar(); - -class ProgressBar { -public: - explicit ProgressBar(std::uint64_t total_frames); - ~ProgressBar(); - - [[nodiscard]] - bool enabled() const; - - void update(std::uint64_t completed_frames); - void update_fraction(double fraction, std::string_view detail = {}); - void finish(std::uint64_t completed_frames, bool success); - void finish_fraction(double fraction, bool success, std::string_view detail = {}); - -private: - struct Impl; - std::unique_ptr impl_{}; -}; - -} // namespace cvmmap_streamer::zed_tools diff --git a/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp b/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp deleted file mode 100644 index 59a7f7d..0000000 --- a/include/cvmmap_streamer/tools/zed_svo_mp4_support.hpp +++ /dev/null @@ -1,104 +0,0 @@ -#pragma once - -#include "cvmmap_streamer/config/runtime_config.hpp" - -#include -#include -#include -#include -#include -#include - -namespace cvmmap_streamer::zed_tools { - -using cvmmap_streamer::CodecType; -using cvmmap_streamer::EncoderDeviceType; - -inline constexpr std::uint32_t kDefaultGopSize = 30; -inline constexpr std::uint32_t kDefaultBFrames = 0; -inline constexpr int kDefaultQuality = 23; -inline constexpr std::uint64_t kNanosPerSecond = 1'000'000'000ull; - -enum class PresetKind : std::uint8_t { - Fast, - Balanced, - Quality, -}; - -enum class TuneKind : std::uint8_t { - LowLatency, - Balanced, -}; - -struct EncodeTuning { - PresetKind preset{PresetKind::Fast}; - TuneKind tune{TuneKind::LowLatency}; - int quality{kDefaultQuality}; - std::uint32_t gop{kDefaultGopSize}; - std::uint32_t b_frames{kDefaultBFrames}; -}; - -[[nodiscard]] -std::expected parse_codec(std::string_view raw); - -[[nodiscard]] -std::expected parse_encoder_device(std::string_view raw); - -[[nodiscard]] -std::expected parse_preset(std::string_view raw); - -[[nodiscard]] -std::expected parse_tune(std::string_view raw); - -[[nodiscard]] -std::string_view codec_name(CodecType codec); - -[[nodiscard]] -std::string_view preset_name(PresetKind preset); - -[[nodiscard]] -std::string_view tune_name(TuneKind tune); - -[[nodiscard]] -std::uint64_t frame_period_ns(float fps); - -[[nodiscard]] -std::filesystem::path derive_output_path(const std::filesystem::path &input_path); - -class Mp4Writer { -public: - Mp4Writer(); - Mp4Writer(const Mp4Writer &) = delete; - Mp4Writer &operator=(const Mp4Writer &) = delete; - Mp4Writer(Mp4Writer &&) noexcept; - Mp4Writer &operator=(Mp4Writer &&) noexcept; - ~Mp4Writer(); - - [[nodiscard]] - std::expected open( - const std::filesystem::path &output_path, - CodecType codec, - EncoderDeviceType encoder_device, - std::uint32_t width, - std::uint32_t height, - float fps, - const EncodeTuning &tuning); - - [[nodiscard]] - std::expected write_bgr_frame( - const std::uint8_t *data, - std::size_t row_stride_bytes, - std::uint64_t relative_timestamp_ns); - - [[nodiscard]] - std::expected flush(); - - [[nodiscard]] - bool using_hardware() const; - -private: - struct Impl; - std::unique_ptr impl_{}; -}; - -} // namespace cvmmap_streamer::zed_tools diff --git a/scripts/mcap_bundle_validator.py b/scripts/mcap_bundle_validator.py deleted file mode 100644 index 9654d3b..0000000 --- a/scripts/mcap_bundle_validator.py +++ /dev/null @@ -1,362 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -from collections import Counter -from dataclasses import dataclass, field -from pathlib import Path -import re - -import click - -import zed_batch_svo_to_mcap as batch - - -BUNDLE_TOPIC = "/bundle" -CAMERA_PREFIX = "/camera/" -NAMESPACED_TOPIC_PATTERN = re.compile(r"^/([^/]+)/([^/]+)$") - -SINGLE_TOPIC_SCHEMA_NAMES = { - "/camera/video": "foxglove.CompressedVideo", - "/camera/depth": "cvmmap_streamer.DepthMap", - "/camera/calibration": "foxglove.CameraCalibration", - "/camera/depth_calibration": "foxglove.CameraCalibration", - "/camera/pose": "foxglove.PoseInFrame", -} - - -@dataclass(slots=True) -class CameraSummary: - video_messages: int = 0 - depth_messages: int = 0 - pose_messages: int = 0 - calibration_messages: int = 0 - depth_calibration_messages: int = 0 - body_messages: int = 0 - present_members: int = 0 - corrupted_gap_members: int = 0 - unknown_members: int = 0 - - -@dataclass(slots=True) -class McapSummary: - path: Path - layout: str = "unknown" - validation_status: str = "invalid" - validation_reason: str = "" - camera_labels: tuple[str, ...] = () - bundle_count: int = 0 - policy_counts: Counter[str] = field(default_factory=Counter) - camera_stats: dict[str, CameraSummary] = field(default_factory=dict) - schema_mismatches: list[str] = field(default_factory=list) - - -def iter_mcap_paths(inputs: tuple[Path, ...], recursive: bool) -> list[Path]: - discovered: list[Path] = [] - for input_path in inputs: - resolved = input_path.expanduser().resolve() - if resolved.is_file(): - discovered.append(resolved) - continue - if resolved.is_dir(): - pattern = "*.mcap" if not recursive else "**/*.mcap" - discovered.extend(sorted(resolved.glob(pattern))) - continue - raise click.ClickException(f"path does not exist: {resolved}") - return sorted(dict.fromkeys(discovered)) - - -def policy_name_from_message(bundle_message: object) -> str: - descriptor = bundle_message.DESCRIPTOR.enum_types_by_name.get("BundlePolicy") - if descriptor is None: - return str(bundle_message.policy) - value = descriptor.values_by_number.get(bundle_message.policy) - return value.name if value is not None else str(bundle_message.policy) - - -def status_name_from_member(member: object, present_value: int | None) -> str: - if present_value is None: - return "PRESENT" if member.HasField("timestamp") else "UNKNOWN" - field_descriptor = member.DESCRIPTOR.fields_by_name.get("status") - descriptor = field_descriptor.enum_type if field_descriptor is not None else None - if descriptor is None: - return "PRESENT" if member.status == present_value else "UNKNOWN" - value = descriptor.values_by_number.get(member.status) - return value.name if value is not None else str(member.status) - - -def record_single_camera_topic( - summary: McapSummary, - topic: str, - schema_name: str | None, -) -> None: - stats = summary.camera_stats.setdefault("camera", CameraSummary()) - if topic == "/camera/video": - stats.video_messages += 1 - elif topic == "/camera/depth": - stats.depth_messages += 1 - elif topic == "/camera/pose": - stats.pose_messages += 1 - elif topic == "/camera/calibration": - stats.calibration_messages += 1 - elif topic == "/camera/depth_calibration": - stats.depth_calibration_messages += 1 - elif topic == "/camera/body": - stats.body_messages += 1 - - expected_schema = SINGLE_TOPIC_SCHEMA_NAMES.get(topic) - if expected_schema is not None and schema_name != expected_schema: - summary.schema_mismatches.append( - f"{topic}: expected schema '{expected_schema}', got '{schema_name or 'none'}'" - ) - - -def probe_single_camera_output(path: Path) -> batch.OutputProbeResult: - base_probe = batch.probe_output(path, ("camera",), layout="single-camera", bundle_topic=None) - if base_probe.status != "valid": - return base_probe - - reader_module = batch.load_mcap_reader() - stats = CameraSummary() - schema_mismatches: list[str] = [] - - try: - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, _message in reader.iter_messages(): - topic = channel.topic - schema_name = schema.name if schema is not None else None - if topic == "/camera/video": - stats.video_messages += 1 - elif topic == "/camera/depth": - stats.depth_messages += 1 - elif topic == "/camera/pose": - stats.pose_messages += 1 - elif topic == "/camera/calibration": - stats.calibration_messages += 1 - elif topic == "/camera/depth_calibration": - stats.depth_calibration_messages += 1 - elif topic == "/camera/body": - stats.body_messages += 1 - - expected_schema = SINGLE_TOPIC_SCHEMA_NAMES.get(topic) - if expected_schema is not None and schema_name != expected_schema: - schema_mismatches.append( - f"{topic}: expected schema '{expected_schema}', got '{schema_name or 'none'}'" - ) - except Exception as error: # noqa: BLE001 - return batch.OutputProbeResult(output_path=path, status="invalid", reason=str(error)) - - if schema_mismatches: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason=schema_mismatches[0], - ) - if stats.video_messages == 0: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason="single-camera MCAP has no /camera/video messages", - ) - if stats.depth_messages == 0: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason="single-camera MCAP has no /camera/depth messages", - ) - if stats.video_messages != stats.depth_messages: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason=( - "single-camera video/depth count mismatch: " - f"video_messages={stats.video_messages} depth_messages={stats.depth_messages}" - ), - ) - if stats.calibration_messages != 1: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason=( - "single-camera calibration count mismatch: " - f"/camera/calibration={stats.calibration_messages}" - ), - ) - if stats.depth_calibration_messages not in (0, 1): - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason=( - "single-camera depth calibration count mismatch: " - f"/camera/depth_calibration={stats.depth_calibration_messages}" - ), - ) - if stats.pose_messages > stats.video_messages: - return batch.OutputProbeResult( - output_path=path, - status="invalid", - reason=( - "single-camera pose count exceeds video count: " - f"pose_messages={stats.pose_messages} video_messages={stats.video_messages}" - ), - ) - return batch.OutputProbeResult(output_path=path, status="valid") - - -def summarize_mcap(path: Path) -> McapSummary: - reader_module = batch.load_mcap_reader() - summary = McapSummary(path=path) - camera_labels: set[str] = set() - saw_single_camera_topic = False - saw_namespaced_camera_topic = False - saw_bundle_manifest = False - - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - topic = channel.topic - schema_name = schema.name if schema is not None else None - if topic == BUNDLE_TOPIC: - summary.layout = "bundled" - saw_bundle_manifest = True - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - summary.validation_status = "invalid" - summary.validation_reason = f"bundle topic '{BUNDLE_TOPIC}' is missing the BundleManifest schema" - continue - - bundle_class, present_value = batch.load_bundle_manifest_type(schema.data) - bundle = bundle_class() - bundle.ParseFromString(message.data) - summary.bundle_count += 1 - summary.policy_counts[policy_name_from_message(bundle)] += 1 - - for member in bundle.members: - label = str(member.camera_label) - camera_labels.add(label) - stats = summary.camera_stats.setdefault(label, CameraSummary()) - status_name = status_name_from_member(member, present_value) - if status_name == "BUNDLE_MEMBER_STATUS_PRESENT" or status_name == "PRESENT": - stats.present_members += 1 - elif status_name == "BUNDLE_MEMBER_STATUS_CORRUPTED_GAP": - stats.corrupted_gap_members += 1 - else: - stats.unknown_members += 1 - continue - - if topic.startswith(CAMERA_PREFIX): - saw_single_camera_topic = True - if summary.layout == "unknown": - summary.layout = "single-camera" - record_single_camera_topic(summary, topic, schema_name) - continue - - match = NAMESPACED_TOPIC_PATTERN.match(topic) - if not match: - continue - label, stream_kind = match.groups() - if label == "camera": - continue - saw_namespaced_camera_topic = True - if summary.layout == "unknown": - summary.layout = "copy" - camera_labels.add(label) - stats = summary.camera_stats.setdefault(label, CameraSummary()) - if stream_kind == "video": - stats.video_messages += 1 - elif stream_kind == "depth": - stats.depth_messages += 1 - elif stream_kind == "pose": - stats.pose_messages += 1 - elif stream_kind == "calibration": - stats.calibration_messages += 1 - elif stream_kind == "depth_calibration": - stats.depth_calibration_messages += 1 - elif stream_kind == "body": - stats.body_messages += 1 - - if saw_single_camera_topic and saw_namespaced_camera_topic: - summary.layout = "mixed" - summary.validation_status = "invalid" - summary.validation_reason = "MCAP mixes single-camera and multi-camera topic layouts" - return summary - - if saw_namespaced_camera_topic and not saw_bundle_manifest and summary.layout == "bundled": - summary.layout = "copy" - - if summary.layout == "single-camera": - summary.camera_labels = ("camera",) - probe = probe_single_camera_output(path) - summary.validation_status = probe.status - summary.validation_reason = probe.reason - if summary.schema_mismatches and summary.validation_status == "valid": - summary.validation_status = "invalid" - summary.validation_reason = summary.schema_mismatches[0] - return summary - - summary.camera_labels = tuple(sorted(camera_labels)) - if summary.camera_labels: - probe = batch.probe_output( - path, - summary.camera_labels, - layout=summary.layout, - bundle_topic=BUNDLE_TOPIC if summary.layout == "bundled" else None, - ) - summary.validation_status = probe.status - summary.validation_reason = probe.reason - else: - summary.validation_status = "invalid" - summary.validation_reason = "could not infer a supported MCAP layout from topics" - return summary - - -def print_summary(summary: McapSummary) -> None: - status_text = summary.validation_status - layout_text = summary.layout - cameras_text = ",".join(summary.camera_labels) if summary.camera_labels else "-" - policy_text = ",".join( - f"{policy}={count}" - for policy, count in sorted(summary.policy_counts.items()) - ) or "-" - click.echo( - f"{status_text}: {summary.path} [{layout_text}] bundles={summary.bundle_count} " - f"cameras={cameras_text} policies={policy_text}" - ) - for label in summary.camera_labels: - stats = summary.camera_stats[label] - click.echo( - " " - f"{label}: video={stats.video_messages} depth={stats.depth_messages} pose={stats.pose_messages} " - f"calibration={stats.calibration_messages} depth_calibration={stats.depth_calibration_messages} " - f"body={stats.body_messages} present={stats.present_members} " - f"corrupted_gap={stats.corrupted_gap_members} unknown={stats.unknown_members}" - ) - if summary.validation_reason: - click.echo(f" reason: {summary.validation_reason}") - - -@click.command() -@click.argument("paths", nargs=-1, type=click.Path(path_type=Path)) -@click.option("--recursive", is_flag=True, help="Recursively discover *.mcap files under directory inputs.") -def main(paths: tuple[Path, ...], recursive: bool) -> None: - """Summarize and validate legacy single-camera, bundled, or copy-layout MCAP files.""" - if not paths: - raise click.ClickException("provide at least one MCAP file or directory") - - mcap_paths = iter_mcap_paths(paths, recursive=recursive) - if not mcap_paths: - raise click.ClickException("no .mcap files matched the provided inputs") - - invalid_count = 0 - for path in mcap_paths: - summary = summarize_mcap(path) - print_summary(summary) - if summary.validation_status != "valid": - invalid_count += 1 - - if invalid_count: - raise SystemExit(1) - - -if __name__ == "__main__": - main() diff --git a/scripts/mcap_depth_alignment.py b/scripts/mcap_depth_alignment.py deleted file mode 100644 index 271b4b0..0000000 --- a/scripts/mcap_depth_alignment.py +++ /dev/null @@ -1,400 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path -from typing import BinaryIO - -import click -import cv2 -import numpy as np - -import mcap_rgbd_viewer as viewer - - -@dataclass(slots=True, frozen=True) -class CameraCalibration: - width: int - height: int - distortion_model: str - distortion: tuple[float, float, float, float, float] - intrinsic_matrix: tuple[float, float, float, float, float, float, float, float, float] - rectification_matrix: tuple[float, float, float, float, float, float, float, float, float] - projection_matrix: tuple[float, float, float, float, float, float, float, float, float, float, float, float] - - @property - def fx(self) -> float: - return self.intrinsic_matrix[0] - - @property - def fy(self) -> float: - return self.intrinsic_matrix[4] - - @property - def cx(self) -> float: - return self.intrinsic_matrix[2] - - @property - def cy(self) -> float: - return self.intrinsic_matrix[5] - - -@dataclass(slots=True, frozen=True) -class CalibrationPair: - video: CameraCalibration - depth: CameraCalibration - - -@dataclass(slots=True, frozen=True) -class AffineMapping: - scale_x: float - scale_y: float - offset_x: float - offset_y: float - - def matrix(self) -> np.ndarray: - return np.array( - [ - [self.scale_x, 0.0, self.offset_x], - [0.0, self.scale_y, self.offset_y], - ], - dtype=np.float32, - ) - - -def select_camera_label(layout_info: viewer.McapLayoutInfo, camera_label: str | None) -> str: - if camera_label is None: - return layout_info.camera_labels[0] - if camera_label not in layout_info.camera_labels: - available = ", ".join(layout_info.camera_labels) - raise click.ClickException(f"camera label '{camera_label}' not found; available: {available}") - return camera_label - - -def load_calibration(path: Path, topic: str) -> CameraCalibration: - reader_module = viewer.load_mcap_reader() - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - if channel.topic != topic: - continue - if schema is None or schema.name != "foxglove.CameraCalibration": - raise click.ClickException(f"unexpected schema on {topic}: {schema.name if schema else 'none'}") - message_class = viewer.load_message_class(schema.data, "foxglove.CameraCalibration") - payload = message_class() - payload.ParseFromString(message.data) - return CameraCalibration( - width=int(payload.width), - height=int(payload.height), - distortion_model=str(payload.distortion_model), - distortion=tuple(float(value) for value in payload.D[:5]), - intrinsic_matrix=tuple(float(value) for value in payload.K[:9]), - rectification_matrix=tuple(float(value) for value in payload.R[:9]), - projection_matrix=tuple(float(value) for value in payload.P[:12]), - ) - raise click.ClickException(f"missing calibration topic {topic} in {path}") - - -def load_calibration_pair(path: Path, layout_info: viewer.McapLayoutInfo, camera_label: str) -> CalibrationPair: - video_topic = viewer.topic_for(layout_info.layout, camera_label, "calibration") - depth_topic = viewer.topic_for(layout_info.layout, camera_label, "depth_calibration") - return CalibrationPair( - video=load_calibration(path, video_topic), - depth=load_calibration(path, depth_topic), - ) - - -def mapping_from_depth_to_rgb(pair: CalibrationPair) -> AffineMapping: - scale_x = pair.video.fx / pair.depth.fx - scale_y = pair.video.fy / pair.depth.fy - offset_x = pair.video.cx - (scale_x * pair.depth.cx) - offset_y = pair.video.cy - (scale_y * pair.depth.cy) - return AffineMapping(scale_x=scale_x, scale_y=scale_y, offset_x=offset_x, offset_y=offset_y) - - -def mapping_from_rgb_to_depth(pair: CalibrationPair) -> AffineMapping: - scale_x = pair.depth.fx / pair.video.fx - scale_y = pair.depth.fy / pair.video.fy - offset_x = pair.depth.cx - (scale_x * pair.video.cx) - offset_y = pair.depth.cy - (scale_y * pair.video.cy) - return AffineMapping(scale_x=scale_x, scale_y=scale_y, offset_x=offset_x, offset_y=offset_y) - - -def describe_mapping(pair: CalibrationPair) -> str: - depth_to_rgb = mapping_from_depth_to_rgb(pair) - rgb_to_depth = mapping_from_rgb_to_depth(pair) - anisotropic = abs(depth_to_rgb.scale_x - depth_to_rgb.scale_y) > 1e-6 - has_offset = abs(depth_to_rgb.offset_x) > 1e-3 or abs(depth_to_rgb.offset_y) > 1e-3 - shape = "anisotropic stretch" if anisotropic else "uniform scale" - if has_offset: - shape += " with offset" - else: - shape += " with zero offset" - return ( - f"mapping type: {shape}\n" - f"depth->rgb: u_rgb = {depth_to_rgb.scale_x:.9f} * u_depth + {depth_to_rgb.offset_x:.9f}\n" - f"depth->rgb: v_rgb = {depth_to_rgb.scale_y:.9f} * v_depth + {depth_to_rgb.offset_y:.9f}\n" - f"rgb->depth: u_depth = {rgb_to_depth.scale_x:.9f} * u_rgb + {rgb_to_depth.offset_x:.9f}\n" - f"rgb->depth: v_depth = {rgb_to_depth.scale_y:.9f} * v_rgb + {rgb_to_depth.offset_y:.9f}" - ) - - -def is_identity_rectification(calibration: CameraCalibration) -> bool: - expected = (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0) - return max(abs(value - target) for value, target in zip(calibration.rectification_matrix, expected, strict=True)) < 1e-6 - - -def has_zero_distortion(calibration: CameraCalibration) -> bool: - return max(abs(value) for value in calibration.distortion) < 1e-9 - - -def depth_pixel_to_rgb(depth_u: float, depth_v: float, pair: CalibrationPair) -> tuple[float, float]: - mapping = mapping_from_depth_to_rgb(pair) - return ( - (mapping.scale_x * depth_u) + mapping.offset_x, - (mapping.scale_y * depth_v) + mapping.offset_y, - ) - - -def rgb_pixel_to_depth(rgb_u: float, rgb_v: float, pair: CalibrationPair) -> tuple[float, float]: - mapping = mapping_from_rgb_to_depth(pair) - return ( - (mapping.scale_x * rgb_u) + mapping.offset_x, - (mapping.scale_y * rgb_v) + mapping.offset_y, - ) - - -def align_depth_to_rgb( - depth_image: np.ndarray, - pair: CalibrationPair, - *, - interpolation: int = cv2.INTER_NEAREST, -) -> np.ndarray: - mapping = mapping_from_depth_to_rgb(pair) - return cv2.warpAffine( - depth_image, - mapping.matrix(), - (pair.video.width, pair.video.height), - flags=interpolation, - borderMode=cv2.BORDER_CONSTANT, - borderValue=0, - ) - - -def align_rgb_to_depth( - rgb_image: np.ndarray, - pair: CalibrationPair, - *, - interpolation: int = cv2.INTER_LINEAR, -) -> np.ndarray: - mapping = mapping_from_rgb_to_depth(pair) - return cv2.warpAffine( - rgb_image, - mapping.matrix(), - (pair.depth.width, pair.depth.height), - flags=interpolation, - borderMode=cv2.BORDER_CONSTANT, - borderValue=0, - ) - - -def load_depth_array(state: viewer.CameraViewState, depth_index: int, depth_cache_stream: BinaryIO | None = None) -> np.ndarray: - try: - import rvl - except ModuleNotFoundError as error: - raise click.ClickException( - "image export needs the optional rvl-impl binding; run `uv sync --extra viewer`" - ) from error - - ref = state.depth_frames[depth_index] - if depth_cache_stream is None: - with state.depth_cache_path.open("rb") as stream: - stream.seek(ref.offset) - payload = stream.read(ref.length) - else: - depth_cache_stream.seek(ref.offset) - payload = depth_cache_stream.read(ref.length) - - if ref.encoding_name == "RVL_U16_LOSSLESS": - depth = rvl.decompress_u16(payload).reshape(ref.height, ref.width) - return depth.astype(np.float32) - if ref.encoding_name == "RVL_F32": - return rvl.decompress_f32(payload).reshape(ref.height, ref.width).astype(np.float32) - raise click.ClickException(f"unsupported depth encoding '{ref.encoding_name}'") - - -def resolve_present_slot(state: viewer.CameraViewState, frame_index: int) -> tuple[int, viewer.BundleSlot]: - if not state.slots: - raise click.ClickException("MCAP does not contain any viewable RGB+depth pairs") - clamped = max(0, min(frame_index, len(state.slots) - 1)) - slot = state.slots[clamped] - if slot.video_index is not None and slot.depth_index is not None: - return clamped, slot - for delta in range(1, len(state.slots)): - left = clamped - delta - if left >= 0: - candidate = state.slots[left] - if candidate.video_index is not None and candidate.depth_index is not None: - return left, candidate - right = clamped + delta - if right < len(state.slots): - candidate = state.slots[right] - if candidate.video_index is not None and candidate.depth_index is not None: - return right, candidate - raise click.ClickException("could not find a present RGB+depth slot") - - -def colorize_depth(depth_m: np.ndarray, palette_name: str) -> np.ndarray: - valid = np.isfinite(depth_m) & (depth_m > 0.0) - normalized = np.zeros(depth_m.shape, dtype=np.uint8) - if valid.any(): - lo = float(np.percentile(depth_m[valid], 5.0)) - hi = float(np.percentile(depth_m[valid], 95.0)) - span = max(hi - lo, 1e-6) - scaled = np.clip((depth_m - lo) / span, 0.0, 1.0) - normalized[valid] = np.round((1.0 - scaled[valid]) * 255.0).astype(np.uint8) - colormap = viewer.DEPTH_PALETTE_TO_OPENCV[palette_name] - if colormap is None: - colored = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR) - else: - colored = cv2.applyColorMap(normalized, colormap) - colored[~valid] = 0 - return colored - - -def export_example_images( - path: Path, - *, - layout_info: viewer.McapLayoutInfo, - camera_label: str, - pair: CalibrationPair, - frame_index: int, - ffmpeg_bin: str, - output_dir: Path, - palette_name: str, -) -> None: - state = viewer.read_camera_state( - path, - layout_info=layout_info, - camera_label=camera_label, - ffmpeg_bin=ffmpeg_bin, - preview_width=pair.video.width, - ) - try: - resolved_index, slot = resolve_present_slot(state, frame_index) - capture = cv2.VideoCapture(str(state.preview_video_path)) - capture.set(cv2.CAP_PROP_POS_FRAMES, float(slot.video_index)) - ok, rgb_bgr = capture.read() - capture.release() - if not ok or rgb_bgr is None: - raise click.ClickException(f"could not decode RGB frame {slot.video_index}") - - depth_native = load_depth_array(state, slot.depth_index) / 1000.0 - depth_aligned = align_depth_to_rgb(depth_native, pair, interpolation=cv2.INTER_NEAREST) - rgb_aligned = align_rgb_to_depth(rgb_bgr, pair, interpolation=cv2.INTER_LINEAR) - - output_dir.mkdir(parents=True, exist_ok=True) - rgb_path = output_dir / "rgb_frame.png" - depth_native_path = output_dir / "depth_native_colorized.png" - depth_aligned_path = output_dir / "depth_aligned_to_rgb_colorized.png" - overlay_path = output_dir / "depth_overlay_on_rgb.png" - rgb_to_depth_path = output_dir / "rgb_aligned_to_depth.png" - - depth_native_color = colorize_depth(depth_native, palette_name) - depth_aligned_color = colorize_depth(depth_aligned, palette_name) - overlay = cv2.addWeighted(rgb_bgr, 0.72, depth_aligned_color, 0.28, 0.0) - - cv2.imwrite(str(rgb_path), rgb_bgr) - cv2.imwrite(str(depth_native_path), depth_native_color) - cv2.imwrite(str(depth_aligned_path), depth_aligned_color) - cv2.imwrite(str(overlay_path), overlay) - cv2.imwrite(str(rgb_to_depth_path), rgb_aligned) - - click.echo(f"exported slot index: {resolved_index}") - click.echo(f"rgb frame: {rgb_path}") - click.echo(f"native depth: {depth_native_path}") - click.echo(f"depth aligned to rgb: {depth_aligned_path}") - click.echo(f"depth overlay on rgb: {overlay_path}") - click.echo(f"rgb aligned to depth: {rgb_to_depth_path}") - finally: - state.close() - - -@click.command() -@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True, dir_okay=False)) -@click.option("--camera-label", type=str, help="Camera label to inspect; defaults to the first camera in the MCAP.") -@click.option("--frame-index", type=int, default=0, show_default=True, help="Frame or bundle index used for example image export.") -@click.option("--output-dir", type=click.Path(path_type=Path, file_okay=False), help="When set, export an aligned depth example and overlay PNGs here.") -@click.option("--ffmpeg-bin", default="ffmpeg", show_default=True, help="ffmpeg binary used to decode MCAP video for the example export.") -@click.option( - "--depth-palette", - type=click.Choice(tuple(viewer.DEPTH_PALETTE_TO_OPENCV.keys()), case_sensitive=False), - default="Turbo", - show_default=True, - help="Depth palette used for exported example PNGs.", -) -def main( - mcap_path: Path, - camera_label: str | None, - frame_index: int, - output_dir: Path | None, - ffmpeg_bin: str, - depth_palette: str, -) -> None: - """Explain and demonstrate how depth/rgb alignment works for an exported MCAP.""" - layout_info = viewer.infer_layout(mcap_path) - selected_camera = select_camera_label(layout_info, camera_label) - pair = load_calibration_pair(mcap_path, layout_info, selected_camera) - - click.echo(f"path: {mcap_path}") - click.echo(f"layout: {layout_info.layout}") - click.echo(f"camera: {selected_camera}") - click.echo(f"video calibration: {pair.video.width}x{pair.video.height}") - click.echo(f"depth calibration: {pair.depth.width}x{pair.depth.height}") - click.echo( - "video intrinsics: " - f"fx={pair.video.fx:.6f} fy={pair.video.fy:.6f} cx={pair.video.cx:.6f} cy={pair.video.cy:.6f}" - ) - click.echo( - "depth intrinsics: " - f"fx={pair.depth.fx:.6f} fy={pair.depth.fy:.6f} cx={pair.depth.cx:.6f} cy={pair.depth.cy:.6f}" - ) - click.echo( - "zero distortion / identity rectification: " - f"video={has_zero_distortion(pair.video) and is_identity_rectification(pair.video)} " - f"depth={has_zero_distortion(pair.depth) and is_identity_rectification(pair.depth)}" - ) - click.echo(describe_mapping(pair)) - - sample_depth_u = pair.depth.width * 0.5 - sample_depth_v = pair.depth.height * 0.5 - mapped_rgb_u, mapped_rgb_v = depth_pixel_to_rgb(sample_depth_u, sample_depth_v, pair) - click.echo( - "sample center mapping: " - f"depth({sample_depth_u:.3f}, {sample_depth_v:.3f}) -> rgb({mapped_rgb_u:.3f}, {mapped_rgb_v:.3f})" - ) - - sample_rgb_u = pair.video.width * 0.5 - sample_rgb_v = pair.video.height * 0.5 - mapped_depth_u, mapped_depth_v = rgb_pixel_to_depth(sample_rgb_u, sample_rgb_v, pair) - click.echo( - "sample inverse mapping: " - f"rgb({sample_rgb_u:.3f}, {sample_rgb_v:.3f}) -> depth({mapped_depth_u:.3f}, {mapped_depth_v:.3f})" - ) - - if output_dir is not None: - export_example_images( - mcap_path, - layout_info=layout_info, - camera_label=selected_camera, - pair=pair, - frame_index=frame_index, - ffmpeg_bin=ffmpeg_bin, - output_dir=output_dir, - palette_name=depth_palette, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/mcap_rgbd_example.py b/scripts/mcap_rgbd_example.py deleted file mode 100644 index a02f342..0000000 --- a/scripts/mcap_rgbd_example.py +++ /dev/null @@ -1,630 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -from dataclasses import asdict, dataclass, field -import json -from pathlib import Path -import subprocess -import tempfile - -import click -import cv2 -import numpy as np -from google.protobuf import descriptor_pb2, descriptor_pool, message_factory, timestamp_pb2 - -import mcap_bundle_validator as bundle_validator -import zed_batch_svo_to_mcap as batch - - -BUNDLE_TOPIC = "/bundle" -DEPTH_PALETTE_TO_OPENCV = { - "Gray": None, - "Turbo": cv2.COLORMAP_TURBO, - "Inferno": cv2.COLORMAP_INFERNO, - "Plasma": cv2.COLORMAP_PLASMA, - "Viridis": cv2.COLORMAP_VIRIDIS, - "Cividis": cv2.COLORMAP_CIVIDIS, - "Magma": cv2.COLORMAP_MAGMA, - "Parula": cv2.COLORMAP_PARULA, -} -VIDEO_INPUT_FORMATS = {"h264": "h264", "h265": "hevc"} - -_MESSAGE_CLASS_CACHE: dict[tuple[bytes, str], object] = {} - - -@dataclass(slots=True) -class TimestampRange: - start_ns: int | None = None - end_ns: int | None = None - - def update(self, timestamp_ns: int) -> None: - if self.start_ns is None or timestamp_ns < self.start_ns: - self.start_ns = timestamp_ns - if self.end_ns is None or timestamp_ns > self.end_ns: - self.end_ns = timestamp_ns - - -@dataclass(slots=True) -class CameraRanges: - video: TimestampRange = field(default_factory=TimestampRange) - depth: TimestampRange = field(default_factory=TimestampRange) - - -@dataclass(slots=True) -class RecipeSummary: - base: bundle_validator.McapSummary - bundle_timestamps: TimestampRange = field(default_factory=TimestampRange) - camera_ranges: dict[str, CameraRanges] = field(default_factory=dict) - - -@dataclass(slots=True) -class VideoSample: - timestamp_ns: int - format_name: str - stream_index: int - - -@dataclass(slots=True) -class DepthSample: - timestamp_ns: int - payload: bytes - stream_index: int - width: int - height: int - encoding_name: str - source_unit_name: str - storage_unit_name: str - - -@dataclass(slots=True) -class BundleMemberSample: - bundle_index: int - bundle_timestamp_ns: int - member_timestamp_ns: int | None - status_name: str - corrupted_frames_skipped: int - member_stream_index: int - - -def load_message_class(schema_data: bytes, message_type_name: str): - cache_key = (schema_data, message_type_name) - cached = _MESSAGE_CLASS_CACHE.get(cache_key) - if cached is not None: - return cached - - descriptor_set = descriptor_pb2.FileDescriptorSet() - descriptor_set.ParseFromString(schema_data) - pool = descriptor_pool.DescriptorPool() - has_embedded_timestamp = any( - file_descriptor.name == "google/protobuf/timestamp.proto" - for file_descriptor in descriptor_set.file - ) - if has_embedded_timestamp: - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - pool.Add(file_descriptor) - break - else: - pool.AddSerializedFile(timestamp_pb2.DESCRIPTOR.serialized_pb) - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - continue - pool.Add(file_descriptor) - - message_descriptor = pool.FindMessageTypeByName(message_type_name) - message_class = message_factory.GetMessageClass(message_descriptor) - _MESSAGE_CLASS_CACHE[cache_key] = message_class - return message_class - - -def parse_timestamp_ns(timestamp_message: object, fallback_log_time_ns: int) -> int: - seconds = int(getattr(timestamp_message, "seconds", 0)) - nanos = int(getattr(timestamp_message, "nanos", 0)) - if seconds == 0 and nanos == 0: - return fallback_log_time_ns - return seconds * 1_000_000_000 + nanos - - -def format_timestamp_ns(timestamp_ns: int | None) -> str: - if timestamp_ns is None: - return "-" - seconds, nanos = divmod(timestamp_ns, 1_000_000_000) - return f"{seconds}.{nanos:09d}" - - -def format_range(timestamp_range: TimestampRange) -> str: - return f"{format_timestamp_ns(timestamp_range.start_ns)} .. {format_timestamp_ns(timestamp_range.end_ns)}" - - -def enum_name(message: object, field_name: str) -> str: - field_descriptor = message.DESCRIPTOR.fields_by_name[field_name] - value = int(getattr(message, field_name)) - resolved = field_descriptor.enum_type.values_by_number.get(value) - return resolved.name if resolved is not None else str(value) - - -def is_present_status(status_name: str) -> bool: - return status_name in {"PRESENT", "BUNDLE_MEMBER_STATUS_PRESENT"} - - -def topic_for(layout: str, camera_label: str, kind: str) -> str: - if layout == "single-camera": - return f"/camera/{kind}" - if layout not in {"copy", "bundled"}: - raise click.ClickException(f"unsupported layout '{layout}'") - return f"/{camera_label}/{kind}" - - -def selected_camera_label(base_summary: bundle_validator.McapSummary, camera_label: str | None) -> str: - if camera_label is None: - return base_summary.camera_labels[0] - if camera_label not in base_summary.camera_labels: - available = ", ".join(base_summary.camera_labels) - raise click.ClickException(f"camera label '{camera_label}' not found. available: {available}") - return camera_label - - -def ensure_supported_layout(base_summary: bundle_validator.McapSummary) -> None: - if base_summary.layout not in {"single-camera", "copy", "bundled"}: - reason = base_summary.validation_reason or "unsupported MCAP layout" - raise click.ClickException(reason) - - -def summarize_mcap(path: Path) -> RecipeSummary: - base_summary = bundle_validator.summarize_mcap(path) - camera_ranges = { - label: CameraRanges() - for label in (base_summary.camera_labels or ("camera",)) - } - bundle_timestamps = TimestampRange() - reader_module = batch.load_mcap_reader() - - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - topic = channel.topic - if topic == BUNDLE_TOPIC and schema is not None and schema.name == "cvmmap_streamer.BundleManifest": - bundle_class, _present_value = batch.load_bundle_manifest_type(schema.data) - bundle_message = bundle_class() - bundle_message.ParseFromString(message.data) - bundle_timestamps.update(parse_timestamp_ns(bundle_message.timestamp, int(message.log_time))) - continue - - if topic.endswith("/video"): - if topic == "/camera/video": - label = "camera" - else: - label = topic.removeprefix("/").removesuffix("/video") - if schema is None or schema.name != "foxglove.CompressedVideo" or label not in camera_ranges: - continue - message_class = load_message_class(schema.data, "foxglove.CompressedVideo") - payload = message_class() - payload.ParseFromString(message.data) - camera_ranges[label].video.update(parse_timestamp_ns(payload.timestamp, int(message.log_time))) - continue - - if topic.endswith("/depth"): - if topic == "/camera/depth": - label = "camera" - else: - label = topic.removeprefix("/").removesuffix("/depth") - if schema is None or schema.name != "cvmmap_streamer.DepthMap" or label not in camera_ranges: - continue - message_class = load_message_class(schema.data, "cvmmap_streamer.DepthMap") - payload = message_class() - payload.ParseFromString(message.data) - camera_ranges[label].depth.update(parse_timestamp_ns(payload.timestamp, int(message.log_time))) - - return RecipeSummary( - base=base_summary, - bundle_timestamps=bundle_timestamps, - camera_ranges=camera_ranges, - ) - - -def print_summary(summary: RecipeSummary) -> None: - base = summary.base - click.echo(f"path: {base.path}") - click.echo(f"validation: {base.validation_status}") - if base.validation_reason: - click.echo(f"validation reason: {base.validation_reason}") - click.echo(f"layout: {base.layout}") - click.echo(f"camera labels: {', '.join(base.camera_labels) if base.camera_labels else '-'}") - if base.layout == "bundled": - click.echo(f"bundle count: {base.bundle_count}") - click.echo(f"bundle timestamp range: {format_range(summary.bundle_timestamps)}") - policy_text = ", ".join( - f"{policy}={count}" - for policy, count in sorted(base.policy_counts.items()) - ) or "-" - click.echo(f"bundle policies: {policy_text}") - - for label in base.camera_labels: - stats = base.camera_stats[label] - ranges = summary.camera_ranges[label] - click.echo(f"camera: {label}") - click.echo(f" video messages: {stats.video_messages}") - click.echo(f" video timestamp range: {format_range(ranges.video)}") - click.echo(f" depth messages: {stats.depth_messages}") - click.echo(f" depth timestamp range: {format_range(ranges.depth)}") - click.echo(f" pose messages: {stats.pose_messages}") - click.echo(f" calibration messages: {stats.calibration_messages}") - click.echo(f" depth calibration messages: {stats.depth_calibration_messages}") - click.echo(f" body messages: {stats.body_messages}") - if base.layout == "bundled": - click.echo(f" present bundle members: {stats.present_members}") - click.echo(f" corrupted gap members: {stats.corrupted_gap_members}") - click.echo(f" unknown bundle members: {stats.unknown_members}") - - -def decode_depth_array(depth_sample: DepthSample) -> np.ndarray: - try: - import rvl - except ModuleNotFoundError as error: - raise click.ClickException( - "depth export needs the optional rvl-impl binding; run `uv sync --extra viewer`" - ) from error - - if depth_sample.encoding_name == "RVL_U16_LOSSLESS": - depth = rvl.decompress_u16(depth_sample.payload).astype(np.float32) - if ( - depth_sample.storage_unit_name == "STORAGE_UNIT_MILLIMETER" - or depth_sample.source_unit_name == "DEPTH_UNIT_MILLIMETER" - ): - return depth / 1000.0 - return depth - if depth_sample.encoding_name == "RVL_F32": - return rvl.decompress_f32(depth_sample.payload).astype(np.float32) - raise click.ClickException(f"unsupported depth encoding '{depth_sample.encoding_name}'") - - -def colorize_depth( - depth_m: np.ndarray, - *, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, -) -> np.ndarray: - valid = np.isfinite(depth_m) & (depth_m > 0.0) - span = max(depth_max_m - depth_min_m, 1e-6) - clipped = np.clip((depth_m - depth_min_m) / span, 0.0, 1.0) - normalized = np.zeros(depth_m.shape, dtype=np.uint8) - normalized[valid] = np.round((1.0 - clipped[valid]) * 255.0).astype(np.uint8) - colormap = DEPTH_PALETTE_TO_OPENCV[depth_palette_name] - if colormap is None: - colored = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR) - else: - colored = cv2.applyColorMap(normalized, colormap) - colored[~valid] = 0 - return colored - - -def export_rgb_frame( - *, - ffmpeg_bin: str, - raw_video_path: Path, - video_format: str, - frame_index: int, - output_path: Path, -) -> None: - input_format = VIDEO_INPUT_FORMATS.get(video_format) - if input_format is None: - raise click.ClickException(f"unsupported video format '{video_format}'") - command = [ - ffmpeg_bin, - "-hide_banner", - "-loglevel", - "error", - "-y", - "-fflags", - "+genpts", - "-f", - input_format, - "-i", - str(raw_video_path), - "-vf", - f"select=eq(n\\,{frame_index})", - "-frames:v", - "1", - str(output_path), - ] - try: - completed = subprocess.run(command, check=False, capture_output=True, text=True) - except FileNotFoundError as error: - raise click.ClickException(f"ffmpeg binary not found: {ffmpeg_bin}") from error - if completed.returncode != 0: - reason = completed.stderr.strip() or completed.stdout.strip() or "ffmpeg failed to export the RGB frame" - raise click.ClickException(reason) - if not output_path.is_file(): - raise click.ClickException(f"ffmpeg did not write {output_path}") - - -def collect_sample_data( - path: Path, - *, - layout: str, - camera_label: str, - sample_index: int, -) -> tuple[VideoSample, DepthSample, BundleMemberSample | None, bytes]: - reader_module = batch.load_mcap_reader() - video_topic = topic_for(layout, camera_label, "video") - depth_topic = topic_for(layout, camera_label, "depth") - - video_sample: VideoSample | None = None - depth_sample: DepthSample | None = None - bundle_sample: BundleMemberSample | None = None - video_index = 0 - depth_index = 0 - bundle_member_index = 0 - video_format: str | None = None - - with tempfile.TemporaryDirectory(prefix="mcap_rgbd_example_") as temp_dir_name: - raw_video_path = Path(temp_dir_name) / "stream.bin" - with raw_video_path.open("wb") as raw_video_stream: - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - topic = channel.topic - - if layout == "bundled" and topic == BUNDLE_TOPIC and bundle_sample is None: - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - continue - bundle_class, present_value = batch.load_bundle_manifest_type(schema.data) - bundle_message = bundle_class() - bundle_message.ParseFromString(message.data) - for member in bundle_message.members: - if str(member.camera_label) != camera_label: - continue - status_name = bundle_validator.status_name_from_member(member, present_value) - member_timestamp_ns = None - if member.HasField("timestamp"): - member_timestamp_ns = parse_timestamp_ns(member.timestamp, int(message.log_time)) - if is_present_status(status_name): - if bundle_member_index == sample_index: - bundle_sample = BundleMemberSample( - bundle_index=int(bundle_message.bundle_index), - bundle_timestamp_ns=parse_timestamp_ns(bundle_message.timestamp, int(message.log_time)), - member_timestamp_ns=member_timestamp_ns, - status_name=status_name, - corrupted_frames_skipped=int(getattr(member, "corrupted_frames_skipped", 0)), - member_stream_index=bundle_member_index, - ) - bundle_member_index += 1 - break - continue - - if topic == video_topic: - if schema is None or schema.name != "foxglove.CompressedVideo": - raise click.ClickException(f"unexpected schema on {video_topic}: {schema.name if schema else 'none'}") - message_class = load_message_class(schema.data, "foxglove.CompressedVideo") - payload = message_class() - payload.ParseFromString(message.data) - frame_format = str(payload.format) - if frame_format not in VIDEO_INPUT_FORMATS: - raise click.ClickException(f"unsupported video format '{frame_format}' on {video_topic}") - if video_format is None: - video_format = frame_format - elif video_format != frame_format: - raise click.ClickException( - f"inconsistent video format on {video_topic}: {video_format} then {frame_format}" - ) - if video_index <= sample_index: - raw_video_stream.write(bytes(payload.data)) - if video_index == sample_index: - video_sample = VideoSample( - timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)), - format_name=frame_format, - stream_index=video_index, - ) - video_index += 1 - continue - - if topic == depth_topic: - if schema is None or schema.name != "cvmmap_streamer.DepthMap": - raise click.ClickException(f"unexpected schema on {depth_topic}: {schema.name if schema else 'none'}") - message_class = load_message_class(schema.data, "cvmmap_streamer.DepthMap") - payload = message_class() - payload.ParseFromString(message.data) - if depth_index == sample_index: - depth_sample = DepthSample( - timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)), - payload=bytes(payload.data), - stream_index=depth_index, - width=int(payload.width), - height=int(payload.height), - encoding_name=enum_name(payload, "encoding"), - source_unit_name=enum_name(payload, "source_unit"), - storage_unit_name=enum_name(payload, "storage_unit"), - ) - depth_index += 1 - continue - - if ( - video_sample is not None - and depth_sample is not None - and (layout != "bundled" or bundle_sample is not None) - ): - break - - raw_video_bytes = raw_video_path.read_bytes() - - if video_sample is None: - raise click.ClickException(f"sample index {sample_index} exceeded available video samples") - if depth_sample is None: - raise click.ClickException(f"sample index {sample_index} exceeded available depth samples") - if layout == "bundled" and bundle_sample is None: - raise click.ClickException( - f"could not map per-camera sample index {sample_index} to a bundle member for {camera_label}" - ) - return video_sample, depth_sample, bundle_sample, raw_video_bytes - - -def write_sample_outputs( - *, - path: Path, - layout: str, - output_dir: Path, - camera_label: str, - sample_index: int, - video_sample: VideoSample, - depth_sample: DepthSample, - bundle_sample: BundleMemberSample | None, - raw_video_bytes: bytes, - ffmpeg_bin: str, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, -) -> None: - output_dir.mkdir(parents=True, exist_ok=True) - rgb_output_path = output_dir / "rgb.png" - depth_output_path = output_dir / "depth.npy" - depth_preview_path = output_dir / "depth_preview.png" - metadata_path = output_dir / "sample_metadata.json" - - with tempfile.TemporaryDirectory(prefix="mcap_rgbd_example_export_") as temp_dir_name: - raw_video_path = Path(temp_dir_name) / f"sample.{video_sample.format_name}" - raw_video_path.write_bytes(raw_video_bytes) - export_rgb_frame( - ffmpeg_bin=ffmpeg_bin, - raw_video_path=raw_video_path, - video_format=video_sample.format_name, - frame_index=sample_index, - output_path=rgb_output_path, - ) - - depth_m = decode_depth_array(depth_sample) - np.save(depth_output_path, depth_m) - depth_preview = colorize_depth( - depth_m, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=depth_palette_name, - ) - if not cv2.imwrite(str(depth_preview_path), depth_preview): - raise click.ClickException(f"failed to write depth preview to {depth_preview_path}") - - metadata = { - "mcap_path": str(path), - "layout": layout, - } - metadata.update( - { - "camera_label": camera_label, - "sample_index": sample_index, - "video_stream_index": video_sample.stream_index, - "video_timestamp_ns": video_sample.timestamp_ns, - "video_timestamp": format_timestamp_ns(video_sample.timestamp_ns), - "video_format": video_sample.format_name, - "depth_stream_index": depth_sample.stream_index, - "depth_timestamp_ns": depth_sample.timestamp_ns, - "depth_timestamp": format_timestamp_ns(depth_sample.timestamp_ns), - "depth_width": depth_sample.width, - "depth_height": depth_sample.height, - "depth_encoding": depth_sample.encoding_name, - "depth_source_unit": depth_sample.source_unit_name, - "depth_storage_unit": depth_sample.storage_unit_name, - "depth_palette": depth_palette_name, - "depth_min_m": depth_min_m, - "depth_max_m": depth_max_m, - "rgb_output_path": str(rgb_output_path), - "depth_output_path": str(depth_output_path), - "depth_preview_path": str(depth_preview_path), - } - ) - if bundle_sample is not None: - metadata["bundle"] = asdict(bundle_sample) - metadata["bundle"]["bundle_timestamp"] = format_timestamp_ns(bundle_sample.bundle_timestamp_ns) - metadata["bundle"]["member_timestamp"] = format_timestamp_ns(bundle_sample.member_timestamp_ns) - - metadata_path.write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8") - - -@click.group() -def main() -> None: - """Small MCAP RGBD example helper for bundled, copy, and legacy single-camera MCAP files.""" - - -@main.command("summary") -@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True)) -def summary_command(mcap_path: Path) -> None: - """Print a compact metadata summary for a single MCAP file.""" - summary = summarize_mcap(mcap_path.resolve()) - ensure_supported_layout(summary.base) - print_summary(summary) - - -@main.command("export-sample") -@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True)) -@click.option("--camera-label", help="Camera label to export. Defaults to `camera` for legacy files or the first sorted namespaced label.") -@click.option("--sample-index", default=0, show_default=True, type=click.IntRange(min=0), help="Zero-based per-camera RGB+depth sample index.") -@click.option("--output-dir", required=True, type=click.Path(path_type=Path), help="Directory to write rgb.png, depth.npy, depth_preview.png, and sample_metadata.json.") -@click.option("--ffmpeg-bin", default="ffmpeg", show_default=True, help="ffmpeg binary used to decode the selected RGB frame.") -@click.option("--depth-min-m", default=0.2, show_default=True, type=float, help="Minimum displayed depth in meters for depth_preview.png.") -@click.option("--depth-max-m", default=5.0, show_default=True, type=float, help="Maximum displayed depth in meters for depth_preview.png.") -@click.option( - "--depth-palette", - default="Turbo", - show_default=True, - type=click.Choice(tuple(DEPTH_PALETTE_TO_OPENCV.keys()), case_sensitive=False), - help="Depth color palette for depth_preview.png.", -) -def export_sample_command( - mcap_path: Path, - camera_label: str | None, - sample_index: int, - output_dir: Path, - ffmpeg_bin: str, - depth_min_m: float, - depth_max_m: float, - depth_palette: str, -) -> None: - """Export one per-camera RGB/depth sample from a bundled, copy, or legacy single-camera MCAP file.""" - summary = summarize_mcap(mcap_path.resolve()) - ensure_supported_layout(summary.base) - if summary.base.validation_status != "valid": - raise click.ClickException( - f"refusing to export from invalid MCAP: {summary.base.validation_reason or summary.base.validation_status}" - ) - - label = selected_camera_label(summary.base, camera_label) - stats = summary.base.camera_stats[label] - pair_count = min(stats.video_messages, stats.depth_messages) - if pair_count <= 0: - raise click.ClickException(f"camera '{label}' has no paired RGB+depth samples") - if sample_index >= pair_count: - raise click.ClickException( - f"--sample-index {sample_index} is outside 0..{pair_count - 1} for camera '{label}'" - ) - - selected_palette = next( - palette_name - for palette_name in DEPTH_PALETTE_TO_OPENCV - if palette_name.lower() == depth_palette.lower() - ) - video_sample, depth_sample, bundle_sample, raw_video_bytes = collect_sample_data( - mcap_path.resolve(), - layout=summary.base.layout, - camera_label=label, - sample_index=sample_index, - ) - write_sample_outputs( - path=mcap_path.resolve(), - layout=summary.base.layout, - output_dir=output_dir.expanduser().resolve(), - camera_label=label, - sample_index=sample_index, - video_sample=video_sample, - depth_sample=depth_sample, - bundle_sample=bundle_sample, - raw_video_bytes=raw_video_bytes, - ffmpeg_bin=ffmpeg_bin, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=selected_palette, - ) - click.echo(f"wrote sample export: {output_dir.expanduser().resolve()}") - - -if __name__ == "__main__": - main() diff --git a/scripts/mcap_rgbd_viewer.py b/scripts/mcap_rgbd_viewer.py deleted file mode 100644 index 164fc77..0000000 --- a/scripts/mcap_rgbd_viewer.py +++ /dev/null @@ -1,1168 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import contextlib -import importlib -import subprocess -import sys -import tempfile -import time -from collections import deque -from dataclasses import dataclass -from pathlib import Path -from typing import BinaryIO - -import click -import cv2 -import numpy as np -from google.protobuf import descriptor_pb2, descriptor_pool, message_factory, timestamp_pb2 - -import zed_batch_svo_to_mcap as batch - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -WORKSPACE_ROOT = REPO_ROOT.parent -MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap" -if str(SCRIPT_PATH.parent) not in sys.path: - sys.path.insert(0, str(SCRIPT_PATH.parent)) -if str(MCAP_PYTHON_ROOT) not in sys.path: - sys.path.insert(0, str(MCAP_PYTHON_ROOT)) - - -BUNDLE_TOPIC = "/bundle" -SINGLE_CAMERA_LABEL = "camera" -DISPLAY_WIDTH = 720 -DISPLAY_HEIGHT = 420 -TEXT_LINE_HEIGHT = 28 -WINDOW_PADDING = 24 -METADATA_PANEL_WIDTH = 380 -VIEWPORT_WIDTH = DISPLAY_WIDTH * 2 + METADATA_PANEL_WIDTH + WINDOW_PADDING * 4 -VIEWPORT_HEIGHT = DISPLAY_HEIGHT + 260 -FRAME_SLIDER_DEBOUNCE_SECONDS = 0.08 -PLAYBACK_FPS_WARNING_RATIO = 0.9 -PLAYBACK_FPS_WARNING_ABS_TOLERANCE = 1.0 -PLAYBACK_FPS_WINDOW_SECONDS = 2.0 - -_MCAP_READER_MODULE = None -_MESSAGE_CLASS_CACHE: dict[tuple[bytes, str], object] = {} -DEPTH_PALETTE_TO_OPENCV = { - "Gray": None, - "Turbo": cv2.COLORMAP_TURBO, - "Inferno": cv2.COLORMAP_INFERNO, - "Plasma": cv2.COLORMAP_PLASMA, - "Viridis": cv2.COLORMAP_VIRIDIS, - "Cividis": cv2.COLORMAP_CIVIDIS, - "Magma": cv2.COLORMAP_MAGMA, - "Parula": cv2.COLORMAP_PARULA, -} - - -@dataclass(slots=True, frozen=True) -class CalibrationInfo: - width: int - height: int - - -@dataclass(slots=True, frozen=True) -class VideoFrameInfo: - timestamp_ns: int - - -@dataclass(slots=True, frozen=True) -class DepthFrameInfo: - timestamp_ns: int - offset: int - length: int - width: int - height: int - encoding_name: str - source_unit_name: str - storage_unit_name: str - - -@dataclass(slots=True, frozen=True) -class BundleSlot: - bundle_index: int - bundle_timestamp_ns: int - status_name: str - corrupted_frames_skipped: int - sample_timestamp_ns: int | None - video_index: int | None - depth_index: int | None - - -@dataclass(slots=True) -class McapLayoutInfo: - path: Path - layout: str - camera_labels: tuple[str, ...] - bundled_policy_counts: dict[str, int] - - -@dataclass(slots=True) -class CameraViewState: - mcap_path: Path - layout: str - camera_label: str - video_format: str | None - video_frames: list[VideoFrameInfo] - depth_frames: list[DepthFrameInfo] - slots: list[BundleSlot] - video_calibration: CalibrationInfo | None - depth_calibration: CalibrationInfo | None - preview_video_path: Path | None - depth_cache_path: Path - temp_dir: tempfile.TemporaryDirectory[str] - - def close(self) -> None: - self.temp_dir.cleanup() - - -@dataclass(slots=True) -class RenderedSlot: - rgb_bgr: np.ndarray - depth_bgr: np.ndarray - title: str - metadata_lines: list[str] - - -def load_mcap_reader(): - global _MCAP_READER_MODULE - if _MCAP_READER_MODULE is not None: - return _MCAP_READER_MODULE - if str(MCAP_PYTHON_ROOT) not in sys.path: - sys.path.insert(0, str(MCAP_PYTHON_ROOT)) - _MCAP_READER_MODULE = importlib.import_module("mcap.reader") - return _MCAP_READER_MODULE - - -def load_message_class(schema_data: bytes, message_type_name: str): - cache_key = (schema_data, message_type_name) - cached = _MESSAGE_CLASS_CACHE.get(cache_key) - if cached is not None: - return cached - - descriptor_set = descriptor_pb2.FileDescriptorSet() - descriptor_set.ParseFromString(schema_data) - pool = descriptor_pool.DescriptorPool() - has_embedded_timestamp = any( - file_descriptor.name == "google/protobuf/timestamp.proto" - for file_descriptor in descriptor_set.file - ) - if has_embedded_timestamp: - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - pool.Add(file_descriptor) - break - else: - pool.AddSerializedFile(timestamp_pb2.DESCRIPTOR.serialized_pb) - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - continue - pool.Add(file_descriptor) - - message_descriptor = pool.FindMessageTypeByName(message_type_name) - message_class = message_factory.GetMessageClass(message_descriptor) - _MESSAGE_CLASS_CACHE[cache_key] = message_class - return message_class - - -def parse_timestamp_ns(timestamp_message: object, fallback_log_time_ns: int) -> int: - seconds = int(getattr(timestamp_message, "seconds", 0)) - nanos = int(getattr(timestamp_message, "nanos", 0)) - if seconds == 0 and nanos == 0: - return fallback_log_time_ns - return seconds * 1_000_000_000 + nanos - - -def format_timestamp_ns(timestamp_ns: int | None) -> str: - if timestamp_ns is None: - return "-" - seconds, nanos = divmod(timestamp_ns, 1_000_000_000) - return f"{seconds}.{nanos:09d}" - - -def enum_name(message: object, field_name: str) -> str: - field_descriptor = message.DESCRIPTOR.fields_by_name[field_name] - value = int(getattr(message, field_name)) - resolved = field_descriptor.enum_type.values_by_number.get(value) - return resolved.name if resolved is not None else str(value) - - -def is_present_status(status_name: str) -> bool: - return status_name in {"PRESENT", "BUNDLE_MEMBER_STATUS_PRESENT"} - - -def is_corrupted_gap_status(status_name: str) -> bool: - return status_name in {"CORRUPTED_GAP", "BUNDLE_MEMBER_STATUS_CORRUPTED_GAP"} - - -def infer_layout(path: Path) -> McapLayoutInfo: - reader_module = load_mcap_reader() - camera_labels: set[str] = set() - bundled_policy_counts: dict[str, int] = {} - saw_single = False - saw_bundled = False - saw_bundle_manifest = False - - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - topic = channel.topic - if topic.startswith("/camera/"): - saw_single = True - continue - if topic == BUNDLE_TOPIC: - saw_bundled = True - saw_bundle_manifest = True - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - continue - bundle_class, _ = batch.load_bundle_manifest_type(schema.data) - bundle = bundle_class() - bundle.ParseFromString(message.data) - for member in bundle.members: - camera_labels.add(str(member.camera_label)) - policy_name = enum_name(bundle, "policy") - bundled_policy_counts[policy_name] = bundled_policy_counts.get(policy_name, 0) + 1 - continue - if topic.count("/") == 2 and topic.startswith("/"): - label = topic.split("/")[1] - if label and label != "camera": - saw_bundled = True - camera_labels.add(label) - - if saw_single and saw_bundled: - raise click.ClickException("MCAP mixes single-camera and multi-camera topics") - if saw_single: - return McapLayoutInfo( - path=path, - layout="single-camera", - camera_labels=(SINGLE_CAMERA_LABEL,), - bundled_policy_counts=bundled_policy_counts, - ) - if saw_bundled and camera_labels: - return McapLayoutInfo( - path=path, - layout="bundled" if saw_bundle_manifest else "copy", - camera_labels=tuple(sorted(camera_labels)), - bundled_policy_counts=bundled_policy_counts, - ) - raise click.ClickException(f"could not infer a supported MCAP layout from {path}") - - -def topic_for(layout: str, camera_label: str, kind: str) -> str: - if layout == "single-camera": - return f"/camera/{kind}" - return f"/{camera_label}/{kind}" - - -def build_preview_video( - ffmpeg_bin: str, - raw_video_path: Path, - video_format: str, - preview_width: int, - temp_dir: Path, -) -> Path: - input_format = {"h264": "h264", "h265": "hevc"}[video_format] - # This script uses one simple RGB preview path for both sequential playback and - # random-access scrubbing, so it builds an intra-frame MJPEG cache first. - preview_path = temp_dir / "preview.avi" - command = [ - ffmpeg_bin, - "-hide_banner", - "-loglevel", - "error", - "-y", - "-fflags", - "+genpts", - "-f", - input_format, - "-i", - str(raw_video_path), - "-vf", - f"scale=min(iw\\,{preview_width}):-2:flags=lanczos", - "-c:v", - "mjpeg", - "-q:v", - "3", - str(preview_path), - ] - completed = subprocess.run(command, check=False, capture_output=True, text=True) - if completed.returncode != 0: - reason = completed.stderr.strip() or completed.stdout.strip() or "ffmpeg failed to build preview cache" - raise click.ClickException(reason) - return preview_path - - -def read_camera_state( - path: Path, - *, - layout_info: McapLayoutInfo, - camera_label: str, - ffmpeg_bin: str, - preview_width: int, -) -> CameraViewState: - reader_module = load_mcap_reader() - temp_dir = tempfile.TemporaryDirectory(prefix="mcap_rgbd_viewer_") - temp_root = Path(temp_dir.name) - raw_video_path = temp_root / "stream.bin" - depth_cache_path = temp_root / "depth.cache" - - video_topic = topic_for(layout_info.layout, camera_label, "video") - depth_topic = topic_for(layout_info.layout, camera_label, "depth") - calibration_topic = topic_for(layout_info.layout, camera_label, "calibration") - depth_calibration_topic = topic_for(layout_info.layout, camera_label, "depth_calibration") - - video_frames: list[VideoFrameInfo] = [] - depth_frames: list[DepthFrameInfo] = [] - bundle_members: list[tuple[int, int, str, int, int | None]] = [] - video_format: str | None = None - video_calibration: CalibrationInfo | None = None - depth_calibration: CalibrationInfo | None = None - - with raw_video_path.open("wb") as video_stream, depth_cache_path.open("wb") as depth_stream: - with path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - topic = channel.topic - if topic == video_topic: - if schema is None or schema.name != "foxglove.CompressedVideo": - raise click.ClickException(f"unexpected schema on {video_topic}: {schema.name if schema else 'none'}") - message_class = load_message_class(schema.data, "foxglove.CompressedVideo") - payload = message_class() - payload.ParseFromString(message.data) - frame_format = str(payload.format) - if frame_format not in {"h264", "h265"}: - raise click.ClickException(f"unsupported video format '{frame_format}' on {video_topic}") - if video_format is None: - video_format = frame_format - elif frame_format != video_format: - raise click.ClickException( - f"inconsistent video format on {video_topic}: {video_format} then {frame_format}" - ) - video_stream.write(payload.data) - video_frames.append( - VideoFrameInfo( - timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)), - ) - ) - continue - - if topic == depth_topic: - if schema is None or schema.name != "cvmmap_streamer.DepthMap": - raise click.ClickException(f"unexpected schema on {depth_topic}: {schema.name if schema else 'none'}") - message_class = load_message_class(schema.data, "cvmmap_streamer.DepthMap") - payload = message_class() - payload.ParseFromString(message.data) - offset = depth_stream.tell() - depth_bytes = bytes(payload.data) - depth_stream.write(depth_bytes) - depth_frames.append( - DepthFrameInfo( - timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)), - offset=offset, - length=len(depth_bytes), - width=int(payload.width), - height=int(payload.height), - encoding_name=enum_name(payload, "encoding"), - source_unit_name=enum_name(payload, "source_unit"), - storage_unit_name=enum_name(payload, "storage_unit"), - ) - ) - continue - - if topic == calibration_topic: - if schema is None or schema.name != "foxglove.CameraCalibration": - continue - message_class = load_message_class(schema.data, "foxglove.CameraCalibration") - payload = message_class() - payload.ParseFromString(message.data) - if video_calibration is None: - video_calibration = CalibrationInfo(width=int(payload.width), height=int(payload.height)) - continue - - if topic == depth_calibration_topic: - if schema is None or schema.name != "foxglove.CameraCalibration": - continue - message_class = load_message_class(schema.data, "foxglove.CameraCalibration") - payload = message_class() - payload.ParseFromString(message.data) - if depth_calibration is None: - depth_calibration = CalibrationInfo(width=int(payload.width), height=int(payload.height)) - continue - - if layout_info.layout == "bundled" and topic == BUNDLE_TOPIC: - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - continue - bundle_class, present_value = batch.load_bundle_manifest_type(schema.data) - bundle = bundle_class() - bundle.ParseFromString(message.data) - for member in bundle.members: - if str(member.camera_label) != camera_label: - continue - status_name = "PRESENT" if member.HasField("timestamp") else "UNKNOWN" - if present_value is not None: - field_descriptor = member.DESCRIPTOR.fields_by_name["status"] - enum_value = field_descriptor.enum_type.values_by_number.get(int(member.status)) - status_name = enum_value.name if enum_value is not None else str(member.status) - sample_timestamp_ns = None - if member.HasField("timestamp"): - sample_timestamp_ns = parse_timestamp_ns(member.timestamp, int(message.log_time)) - bundle_members.append( - ( - int(bundle.bundle_index), - parse_timestamp_ns(bundle.timestamp, int(message.log_time)), - status_name, - int(getattr(member, "corrupted_frames_skipped", 0)), - sample_timestamp_ns, - ) - ) - break - - preview_video_path: Path | None = None - if video_format is not None and video_frames: - suffix = ".h265" if video_format == "h265" else ".h264" - renamed_raw_video_path = raw_video_path.with_suffix(suffix) - raw_video_path.replace(renamed_raw_video_path) - preview_video_path = build_preview_video( - ffmpeg_bin=ffmpeg_bin, - raw_video_path=renamed_raw_video_path, - video_format=video_format, - preview_width=preview_width, - temp_dir=temp_root, - ) - - slots: list[BundleSlot] = [] - if layout_info.layout == "bundled" and bundle_members: - present_cursor = 0 - usable_present_frames = min(len(video_frames), len(depth_frames)) - for bundle_index, bundle_timestamp_ns, status_name, corrupted_frames_skipped, sample_timestamp_ns in bundle_members: - video_index: int | None = None - depth_index: int | None = None - if is_present_status(status_name) and present_cursor < usable_present_frames: - video_index = present_cursor - depth_index = present_cursor - present_cursor += 1 - slots.append( - BundleSlot( - bundle_index=bundle_index, - bundle_timestamp_ns=bundle_timestamp_ns, - status_name=status_name, - corrupted_frames_skipped=corrupted_frames_skipped, - sample_timestamp_ns=sample_timestamp_ns, - video_index=video_index, - depth_index=depth_index, - ) - ) - else: - paired_frames = min(len(video_frames), len(depth_frames)) - slots = [ - BundleSlot( - bundle_index=index, - bundle_timestamp_ns=video_frames[index].timestamp_ns, - status_name="PRESENT", - corrupted_frames_skipped=0, - sample_timestamp_ns=video_frames[index].timestamp_ns, - video_index=index, - depth_index=index, - ) - for index in range(paired_frames) - ] - - if not slots: - raise click.ClickException(f"no viewable RGB+depth pairs found for camera '{camera_label}'") - - return CameraViewState( - mcap_path=path, - layout=layout_info.layout, - camera_label=camera_label, - video_format=video_format, - video_frames=video_frames, - depth_frames=depth_frames, - slots=slots, - video_calibration=video_calibration, - depth_calibration=depth_calibration, - preview_video_path=preview_video_path, - depth_cache_path=depth_cache_path, - temp_dir=temp_dir, - ) - - -def render_placeholder(text: str) -> np.ndarray: - canvas = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8) - cv2.putText( - canvas, - text, - (40, DISPLAY_HEIGHT // 2), - cv2.FONT_HERSHEY_SIMPLEX, - 1.0, - (255, 255, 255), - 2, - cv2.LINE_AA, - ) - return canvas - - -def fit_to_panel(frame_bgr: np.ndarray) -> np.ndarray: - src_height, src_width = frame_bgr.shape[:2] - scale = min(DISPLAY_WIDTH / src_width, DISPLAY_HEIGHT / src_height) - resized_width = max(1, int(round(src_width * scale))) - resized_height = max(1, int(round(src_height * scale))) - resized = cv2.resize(frame_bgr, (resized_width, resized_height), interpolation=cv2.INTER_AREA) - canvas = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8) - x_offset = (DISPLAY_WIDTH - resized_width) // 2 - y_offset = (DISPLAY_HEIGHT - resized_height) // 2 - canvas[y_offset : y_offset + resized_height, x_offset : x_offset + resized_width] = resized - return canvas - - -def read_video_frame( - preview_video_path: Path | None, - video_index: int | None, - *, - capture: cv2.VideoCapture | None = None, -) -> np.ndarray: - if preview_video_path is None or video_index is None: - return render_placeholder("NO VIDEO") - created_capture = False - if capture is None: - capture = cv2.VideoCapture(str(preview_video_path)) - created_capture = True - if not capture.isOpened(): - raise click.ClickException(f"OpenCV could not open preview cache {preview_video_path}") - try: - capture.set(cv2.CAP_PROP_POS_FRAMES, float(video_index)) - ok, frame = capture.read() - finally: - if created_capture: - capture.release() - if not ok or frame is None: - return render_placeholder("VIDEO SEEK FAILED") - return fit_to_panel(frame) - - -def decode_depth_frame( - state: CameraViewState, - depth_index: int | None, - *, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, - depth_cache_stream: BinaryIO | None = None, -) -> np.ndarray: - if depth_index is None: - return render_placeholder("NO DEPTH") - try: - import rvl - except ModuleNotFoundError as error: - raise click.ClickException( - "the viewer needs the optional rvl-impl binding; run `uv sync --extra viewer`" - ) from error - - ref = state.depth_frames[depth_index] - if depth_cache_stream is None: - with state.depth_cache_path.open("rb") as stream: - stream.seek(ref.offset) - payload = stream.read(ref.length) - else: - depth_cache_stream.seek(ref.offset) - payload = depth_cache_stream.read(ref.length) - - if ref.encoding_name == "RVL_U16_LOSSLESS": - depth = rvl.decompress_u16(payload).astype(np.float32) - if ref.storage_unit_name == "STORAGE_UNIT_MILLIMETER" or ref.source_unit_name == "DEPTH_UNIT_MILLIMETER": - depth_m = depth / 1000.0 - else: - depth_m = depth - elif ref.encoding_name == "RVL_F32": - depth_m = rvl.decompress_f32(payload).astype(np.float32) - else: - return render_placeholder(ref.encoding_name) - - valid = np.isfinite(depth_m) & (depth_m > 0.0) - span = max(depth_max_m - depth_min_m, 1e-6) - clipped = np.clip((depth_m - depth_min_m) / span, 0.0, 1.0) - normalized = np.zeros(depth_m.shape, dtype=np.uint8) - normalized[valid] = np.round((1.0 - clipped[valid]) * 255.0).astype(np.uint8) - colormap = DEPTH_PALETTE_TO_OPENCV[depth_palette_name] - if colormap is None: - colored = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR) - else: - colored = cv2.applyColorMap(normalized, colormap) - colored[~valid] = 0 - return fit_to_panel(colored) - - -def build_metadata_lines( - state: CameraViewState, - slot: BundleSlot, - *, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, -) -> list[str]: - lines = [ - f"file: {state.mcap_path.name}", - f"layout: {state.layout}", - f"camera: {state.camera_label}", - f"slots: {len(state.slots)}", - f"video frames: {len(state.video_frames)}", - f"depth frames: {len(state.depth_frames)}", - f"bundle/frame index: {slot.bundle_index}", - f"bundle ts: {format_timestamp_ns(slot.bundle_timestamp_ns)}", - f"sample ts: {format_timestamp_ns(slot.sample_timestamp_ns)}", - f"status: {slot.status_name}", - ] - if slot.corrupted_frames_skipped: - lines.append(f"corrupted frames skipped: {slot.corrupted_frames_skipped}") - if state.video_format is not None: - lines.append(f"video format: {state.video_format}") - if state.video_calibration is not None: - lines.append(f"video calib: {state.video_calibration.width}x{state.video_calibration.height}") - if state.depth_calibration is not None: - lines.append(f"depth calib: {state.depth_calibration.width}x{state.depth_calibration.height}") - elif slot.depth_index is not None: - ref = state.depth_frames[slot.depth_index] - lines.append(f"depth frame: {ref.width}x{ref.height} {ref.encoding_name}") - lines.append(f"depth range: {depth_min_m:.2f}m .. {depth_max_m:.2f}m") - lines.append(f"depth palette: {depth_palette_name}") - return lines - - -def render_slot( - state: CameraViewState, - slot_index: int, - *, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, - preview_capture: cv2.VideoCapture | None = None, - depth_cache_stream: BinaryIO | None = None, -) -> RenderedSlot: - slot = state.slots[slot_index] - if is_corrupted_gap_status(slot.status_name): - rgb_frame = render_placeholder("CORRUPTED GAP") - depth_frame = render_placeholder("CORRUPTED GAP") - else: - rgb_frame = read_video_frame(state.preview_video_path, slot.video_index, capture=preview_capture) - depth_frame = decode_depth_frame( - state, - slot.depth_index, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=depth_palette_name, - depth_cache_stream=depth_cache_stream, - ) - - title = ( - f"{state.camera_label} " - f"{'bundle' if state.layout == 'bundled' else 'frame'}={slot.bundle_index} " - f"status={slot.status_name}" - ) - return RenderedSlot( - rgb_bgr=rgb_frame, - depth_bgr=depth_frame, - title=title, - metadata_lines=build_metadata_lines( - state, - slot, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=depth_palette_name, - ), - ) - - -def compose_preview_image(rendered: RenderedSlot) -> np.ndarray: - banner_height = 64 - banner = np.zeros((banner_height, DISPLAY_WIDTH * 2, 3), dtype=np.uint8) - cv2.putText( - banner, - rendered.title, - (16, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), - 2, - cv2.LINE_AA, - ) - pair = np.concatenate([rendered.rgb_bgr, rendered.depth_bgr], axis=1) - return np.concatenate([banner, pair], axis=0) - - -def print_summary(layout_info: McapLayoutInfo, state: CameraViewState) -> None: - click.echo(f"path: {state.mcap_path}") - click.echo(f"layout: {layout_info.layout}") - click.echo(f"camera labels: {', '.join(layout_info.camera_labels)}") - if layout_info.bundled_policy_counts: - policy_text = ", ".join( - f"{policy}={count}" - for policy, count in sorted(layout_info.bundled_policy_counts.items()) - ) - click.echo(f"bundle policies: {policy_text}") - click.echo(f"selected camera: {state.camera_label}") - click.echo(f"viewable slots: {len(state.slots)}") - click.echo(f"video frames: {len(state.video_frames)}") - click.echo(f"depth frames: {len(state.depth_frames)}") - if state.video_format is not None: - click.echo(f"video format: {state.video_format}") - if state.video_calibration is not None: - click.echo(f"video calibration: {state.video_calibration.width}x{state.video_calibration.height}") - if state.depth_calibration is not None: - click.echo(f"depth calibration: {state.depth_calibration.width}x{state.depth_calibration.height}") - if state.slots: - click.echo(f"first slot timestamp: {format_timestamp_ns(state.slots[0].bundle_timestamp_ns)}") - click.echo(f"last slot timestamp: {format_timestamp_ns(state.slots[-1].bundle_timestamp_ns)}") - corrupted_gap_count = sum(1 for slot in state.slots if is_corrupted_gap_status(slot.status_name)) - if corrupted_gap_count: - click.echo(f"corrupted gap slots: {corrupted_gap_count}") - - -def write_preview_image( - state: CameraViewState, - output_path: Path, - frame_index: int, - depth_min_m: float, - depth_max_m: float, - depth_palette_name: str, -) -> None: - if frame_index < 0 or frame_index >= len(state.slots): - raise click.ClickException(f"--frame-index {frame_index} is outside 0..{len(state.slots) - 1}") - rendered = render_slot( - state, - frame_index, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=depth_palette_name, - ) - preview = compose_preview_image(rendered) - output_path.parent.mkdir(parents=True, exist_ok=True) - if not cv2.imwrite(str(output_path), preview): - raise click.ClickException(f"failed to write preview image to {output_path}") - click.echo(f"wrote preview: {output_path}") - - -def slot_target_timestamp_ns(slot: BundleSlot) -> int: - return slot.sample_timestamp_ns if slot.sample_timestamp_ns is not None else slot.bundle_timestamp_ns - - -def nearest_slot_index(slots: list[BundleSlot], target_timestamp_ns: int) -> int: - if not slots: - return 0 - return min( - range(len(slots)), - key=lambda index: abs(slot_target_timestamp_ns(slots[index]) - target_timestamp_ns), - ) - - -def bgr_to_texture(frame_bgr: np.ndarray) -> np.ndarray: - rgba = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGBA) - return np.ascontiguousarray(rgba, dtype=np.float32) / 255.0 - - -class ViewerApp: - def __init__( - self, - *, - mcap_path: Path, - layout_info: McapLayoutInfo, - initial_state: CameraViewState, - ffmpeg_bin: str, - preview_width: int, - depth_min_m: float, - depth_max_m: float, - ) -> None: - try: - import dearpygui.dearpygui as dpg - except ModuleNotFoundError as error: - raise click.ClickException( - "the GUI viewer needs DearPyGui; run `uv sync --extra viewer` first" - ) from error - - self.dpg = dpg - self.mcap_path = mcap_path - self.layout_info = layout_info - self.state = initial_state - self.ffmpeg_bin = ffmpeg_bin - self.preview_width = preview_width - self.depth_min_m = depth_min_m - self.depth_max_m = depth_max_m - self.depth_palette_name = "Turbo" - self.current_index = 0 - self.pending_index: int | None = None - self.pending_index_requested_at = 0.0 - self.playing = False - self.playback_fps = 15.0 - self.next_playback_deadline = time.monotonic() - self.playback_progress_samples: deque[tuple[float, int]] = deque() - self.playback_advanced_frames = 0 - self.actual_playback_fps = 0.0 - self.preview_capture: cv2.VideoCapture | None = None - self.depth_cache_stream: BinaryIO | None = None - self.open_state_resources() - - def open_state_resources(self) -> None: - if self.state.preview_video_path is not None: - capture = cv2.VideoCapture(str(self.state.preview_video_path)) - if not capture.isOpened(): - capture.release() - raise click.ClickException(f"OpenCV could not open preview cache {self.state.preview_video_path}") - self.preview_capture = capture - else: - self.preview_capture = None - self.depth_cache_stream = self.state.depth_cache_path.open("rb") - - def close_state_resources(self) -> None: - if self.preview_capture is not None: - self.preview_capture.release() - self.preview_capture = None - if self.depth_cache_stream is not None: - self.depth_cache_stream.close() - self.depth_cache_stream = None - - def replace_state(self, camera_label: str) -> None: - source_index = self.pending_index if self.pending_index is not None else self.current_index - source_index = max(0, min(source_index, len(self.state.slots) - 1)) - target_timestamp_ns = slot_target_timestamp_ns(self.state.slots[source_index]) - new_state = read_camera_state( - self.mcap_path, - layout_info=self.layout_info, - camera_label=camera_label, - ffmpeg_bin=self.ffmpeg_bin, - preview_width=self.preview_width, - ) - self.close_state_resources() - self.state.close() - self.state = new_state - self.open_state_resources() - self.current_index = nearest_slot_index(self.state.slots, target_timestamp_ns) - self.pending_index = None - self.next_playback_deadline = time.monotonic() + 1.0 / max(self.playback_fps, 1.0) - self.reset_playback_metrics() - self.dpg.configure_item("frame_slider", max_value=max(0, len(self.state.slots) - 1)) - self.refresh() - - def refresh(self) -> None: - rendered = render_slot( - self.state, - self.current_index, - depth_min_m=self.depth_min_m, - depth_max_m=self.depth_max_m, - depth_palette_name=self.depth_palette_name, - preview_capture=self.preview_capture, - depth_cache_stream=self.depth_cache_stream, - ) - self.dpg.set_value("rgb_texture", bgr_to_texture(rendered.rgb_bgr).ravel()) - self.dpg.set_value("depth_texture", bgr_to_texture(rendered.depth_bgr).ravel()) - self.dpg.set_value("viewer_title", rendered.title) - self.dpg.set_value("frame_slider", self.current_index) - self.dpg.set_value("frame_text", f"{self.current_index + 1}/{len(self.state.slots)}") - self.dpg.set_value("metadata_text", "\n".join(rendered.metadata_lines)) - - def set_frame_index(self, index: int, *, debounce: bool = False) -> None: - clamped = max(0, min(index, len(self.state.slots) - 1)) - if debounce: - self.pending_index = clamped - self.pending_index_requested_at = time.monotonic() - self.dpg.set_value("frame_text", f"{clamped + 1}/{len(self.state.slots)}") - return - self.pending_index = None - self.current_index = clamped - self.refresh() - - def set_playing(self, playing: bool) -> None: - self.playing = playing - self.next_playback_deadline = time.monotonic() + 1.0 / max(self.playback_fps, 1.0) - self.reset_playback_metrics() - - def toggle_playing(self) -> None: - self.set_playing(not self.playing) - - def set_playback_fps(self, playback_fps: float) -> None: - self.playback_fps = playback_fps - self.next_playback_deadline = time.monotonic() + 1.0 / max(self.playback_fps, 1.0) - self.update_playback_status() - - def reset_playback_metrics(self) -> None: - self.playback_progress_samples.clear() - self.playback_advanced_frames = 0 - self.actual_playback_fps = 0.0 - self.update_playback_status() - - def record_playback_step(self, timestamp_s: float, frames_advanced: int) -> None: - self.playback_advanced_frames += frames_advanced - self.playback_progress_samples.append((timestamp_s, self.playback_advanced_frames)) - cutoff = timestamp_s - PLAYBACK_FPS_WINDOW_SECONDS - while self.playback_progress_samples and self.playback_progress_samples[0][0] < cutoff: - self.playback_progress_samples.popleft() - - if len(self.playback_progress_samples) >= 2: - elapsed_s = self.playback_progress_samples[-1][0] - self.playback_progress_samples[0][0] - if elapsed_s > 0.0: - advanced_frames = self.playback_progress_samples[-1][1] - self.playback_progress_samples[0][1] - self.actual_playback_fps = advanced_frames / elapsed_s - else: - self.actual_playback_fps = 0.0 - else: - self.actual_playback_fps = 0.0 - self.update_playback_status() - - def update_playback_status(self) -> None: - if not self.dpg.does_item_exist("playback_status_text"): - return - if ( - self.playing - and self.actual_playback_fps > 0.0 - and ( - self.actual_playback_fps < self.playback_fps * PLAYBACK_FPS_WARNING_RATIO - and self.actual_playback_fps < self.playback_fps - PLAYBACK_FPS_WARNING_ABS_TOLERANCE - ) - ): - self.dpg.set_value( - "playback_status_text", - f"Warning: actual playback {self.actual_playback_fps:.1f} FPS is below target {self.playback_fps:.1f} FPS", - ) - return - self.dpg.set_value("playback_status_text", "") - - def flush_pending_index(self) -> None: - if self.pending_index is None: - return - if time.monotonic() - self.pending_index_requested_at < FRAME_SLIDER_DEBOUNCE_SECONDS: - return - target_index = self.pending_index - self.pending_index = None - self.current_index = target_index - self.refresh() - - def maybe_advance(self) -> None: - self.flush_pending_index() - if not self.playing: - return - interval_s = 1.0 / max(self.playback_fps, 1.0) - now = time.monotonic() - if now < self.next_playback_deadline: - return - frames_due = max(1, int((now - self.next_playback_deadline) / interval_s) + 1) - self.next_playback_deadline += frames_due * interval_s - next_index = self.current_index + frames_due - if next_index >= len(self.state.slots): - frames_due = len(self.state.slots) - 1 - self.current_index - if frames_due <= 0: - self.set_playing(False) - return - next_index = len(self.state.slots) - 1 - self.set_frame_index(next_index) - self.record_playback_step(time.monotonic(), frames_due) - if next_index >= len(self.state.slots) - 1: - self.set_playing(False) - - def process_ui_callbacks(self) -> None: - jobs = self.dpg.get_callback_queue() - if jobs: - self.dpg.run_callbacks(jobs) - - def run(self) -> None: - dpg = self.dpg - dpg.create_context() - dpg.configure_app(manual_callback_management=True) - with dpg.texture_registry(show=False): - dpg.add_dynamic_texture(DISPLAY_WIDTH, DISPLAY_HEIGHT, [0.0] * (DISPLAY_WIDTH * DISPLAY_HEIGHT * 4), tag="rgb_texture") - dpg.add_dynamic_texture(DISPLAY_WIDTH, DISPLAY_HEIGHT, [0.0] * (DISPLAY_WIDTH * DISPLAY_HEIGHT * 4), tag="depth_texture") - - with dpg.window(tag="main_window", label="MCAP RGBD Viewer", no_close=True): - dpg.add_text(default_value="", tag="viewer_title") - with dpg.group(horizontal=True): - if len(self.layout_info.camera_labels) > 1: - dpg.add_combo( - items=list(self.layout_info.camera_labels), - default_value=self.state.camera_label, - label="Camera", - width=160, - callback=lambda _s, app_data: self.replace_state(str(app_data)), - ) - dpg.add_button( - label="Play/Pause", - callback=lambda *_args: self.toggle_playing(), - ) - dpg.add_button(label="Prev", callback=lambda *_args: self.set_frame_index(self.current_index - 1)) - dpg.add_button(label="Next", callback=lambda *_args: self.set_frame_index(self.current_index + 1)) - dpg.add_slider_float( - label="Playback FPS", - min_value=1.0, - max_value=60.0, - default_value=self.playback_fps, - width=240, - callback=lambda _s, app_data: self.set_playback_fps(float(app_data)), - ) - dpg.add_text(default_value="", tag="playback_status_text", color=(255, 96, 96)) - with dpg.group(horizontal=True): - dpg.add_slider_float( - label="Depth Min (m)", - min_value=0.0, - max_value=10.0, - default_value=self.depth_min_m, - width=260, - callback=lambda _s, app_data: self._set_depth_min(float(app_data)), - ) - dpg.add_slider_float( - label="Depth Max (m)", - min_value=0.1, - max_value=20.0, - default_value=self.depth_max_m, - width=260, - callback=lambda _s, app_data: self._set_depth_max(float(app_data)), - ) - dpg.add_combo( - items=list(DEPTH_PALETTE_TO_OPENCV.keys()), - default_value=self.depth_palette_name, - label="Palette", - width=160, - callback=lambda _s, app_data: self._set_depth_palette(str(app_data)), - ) - with dpg.group(horizontal=True): - with dpg.child_window(width=DISPLAY_WIDTH + 16, height=DISPLAY_HEIGHT + 20): - dpg.add_text("RGB") - dpg.add_image("rgb_texture") - with dpg.child_window(width=DISPLAY_WIDTH + 16, height=DISPLAY_HEIGHT + 20): - dpg.add_text("Depth") - dpg.add_image("depth_texture") - with dpg.child_window(width=METADATA_PANEL_WIDTH, height=DISPLAY_HEIGHT + 20): - dpg.add_text("Metadata") - dpg.add_text(default_value="", wrap=METADATA_PANEL_WIDTH - 24, tag="metadata_text") - with dpg.group(horizontal=True): - dpg.add_slider_int( - label="Frame/Bundle", - min_value=0, - max_value=max(0, len(self.state.slots) - 1), - default_value=0, - tag="frame_slider", - width=DISPLAY_WIDTH * 2 + 80, - callback=lambda _s, app_data: self.set_frame_index(int(app_data), debounce=True), - ) - dpg.add_text(default_value="", tag="frame_text") - - with dpg.handler_registry(): - dpg.add_key_press_handler(dpg.mvKey_Left, callback=lambda *_args: self.set_frame_index(self.current_index - 1)) - dpg.add_key_press_handler(dpg.mvKey_Right, callback=lambda *_args: self.set_frame_index(self.current_index + 1)) - dpg.add_key_press_handler(dpg.mvKey_Home, callback=lambda *_args: self.set_frame_index(0)) - dpg.add_key_press_handler(dpg.mvKey_End, callback=lambda *_args: self.set_frame_index(len(self.state.slots) - 1)) - dpg.add_key_press_handler(dpg.mvKey_Spacebar, callback=lambda *_args: self.toggle_playing()) - - dpg.create_viewport( - title=f"MCAP RGBD Viewer - {self.mcap_path.name}", - width=VIEWPORT_WIDTH, - height=VIEWPORT_HEIGHT, - ) - dpg.setup_dearpygui() - dpg.show_viewport() - self.refresh() - try: - while dpg.is_dearpygui_running(): - dpg.render_dearpygui_frame() - self.process_ui_callbacks() - self.maybe_advance() - finally: - self.close_state_resources() - dpg.destroy_context() - self.state.close() - - def _set_depth_min(self, value: float) -> None: - self.depth_min_m = min(value, self.depth_max_m - 0.01) - self.refresh() - - def _set_depth_max(self, value: float) -> None: - self.depth_max_m = max(value, self.depth_min_m + 0.01) - self.refresh() - - def _set_depth_palette(self, value: str) -> None: - if value in DEPTH_PALETTE_TO_OPENCV: - self.depth_palette_name = value - self.refresh() - - -@click.command() -@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True)) -@click.option("--camera-label", help="Camera label to view. Defaults to 'camera' or the first sorted multi-camera label.") -@click.option("--ffmpeg-bin", default="ffmpeg", show_default=True, help="ffmpeg binary used to build the preview cache.") -@click.option("--preview-width", default=1280, show_default=True, type=click.IntRange(min=64), help="Maximum width of the temporary decoded preview cache.") -@click.option("--frame-index", default=0, show_default=True, type=click.IntRange(min=0), help="Frame or bundle index used for --export-preview.") -@click.option("--depth-min-m", default=0.2, show_default=True, type=float, help="Minimum displayed depth in meters.") -@click.option("--depth-max-m", default=5.0, show_default=True, type=float, help="Maximum displayed depth in meters.") -@click.option( - "--depth-palette", - default="Turbo", - show_default=True, - type=click.Choice(tuple(DEPTH_PALETTE_TO_OPENCV.keys()), case_sensitive=False), - help="Depth color palette.", -) -@click.option("--summary-only", is_flag=True, help="Print layout/camera/frame metadata and exit without opening a GUI.") -@click.option("--export-preview", type=click.Path(path_type=Path), help="Write a side-by-side RGB/depth preview PNG and exit.") -def main( - mcap_path: Path, - camera_label: str | None, - ffmpeg_bin: str, - preview_width: int, - frame_index: int, - depth_min_m: float, - depth_max_m: float, - depth_palette: str, - summary_only: bool, - export_preview: Path | None, -) -> None: - layout_info = infer_layout(mcap_path.resolve()) - if camera_label is None: - selected_camera = layout_info.camera_labels[0] - else: - selected_camera = camera_label - if selected_camera not in layout_info.camera_labels: - raise click.ClickException( - f"camera label '{selected_camera}' not found. available: {', '.join(layout_info.camera_labels)}" - ) - - selected_depth_palette = next( - palette_name for palette_name in DEPTH_PALETTE_TO_OPENCV if palette_name.lower() == depth_palette.lower() - ) - - state = read_camera_state( - mcap_path.resolve(), - layout_info=layout_info, - camera_label=selected_camera, - ffmpeg_bin=ffmpeg_bin, - preview_width=preview_width, - ) - try: - if summary_only: - print_summary(layout_info, state) - return - if export_preview is not None: - export_preview_path = export_preview.expanduser().resolve() - write_preview_image( - state, - export_preview_path, - frame_index=frame_index, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - depth_palette_name=selected_depth_palette, - ) - return - viewer = ViewerApp( - mcap_path=mcap_path.resolve(), - layout_info=layout_info, - initial_state=state, - ffmpeg_bin=ffmpeg_bin, - preview_width=preview_width, - depth_min_m=depth_min_m, - depth_max_m=depth_max_m, - ) - viewer.depth_palette_name = selected_depth_palette - viewer.run() - finally: - with contextlib.suppress(Exception): - state.close() - - -if __name__ == "__main__": - main() diff --git a/scripts/zed_batch_segment_sources.py b/scripts/zed_batch_segment_sources.py deleted file mode 100644 index 24a7651..0000000 --- a/scripts/zed_batch_segment_sources.py +++ /dev/null @@ -1,255 +0,0 @@ -from __future__ import annotations - -import csv -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Generic, Protocol, TypeVar - -import click -from click.core import ParameterSource - - -class SegmentScanLike(Protocol): - segment_dir: Path - matched_files: int - is_valid: bool - - -ScanT = TypeVar("ScanT", bound=SegmentScanLike) - - -@dataclass(slots=True, frozen=True) -class SourceResolution(Generic[ScanT]): - mode: str - segment_dirs: tuple[Path, ...] - ignored_partial_dirs: tuple[ScanT, ...] - - -def dedupe_paths(paths: list[Path]) -> list[Path]: - ordered: list[Path] = [] - seen: set[Path] = set() - for path in paths: - resolved = path.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - ordered.append(resolved) - return ordered - - -def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]: - csv_path = csv_path.expanduser().resolve() - if not csv_path.is_file(): - raise click.ClickException(f"CSV not found: {csv_path}") - - if csv_root is not None: - base_dir = csv_root.expanduser().resolve() - if not base_dir.is_dir(): - raise click.ClickException(f"CSV root is not a directory: {base_dir}") - else: - base_dir = csv_path.parent - - segment_dirs: list[Path] = [] - seen: set[Path] = set() - with csv_path.open(newline="") as stream: - reader = csv.DictReader(stream) - if reader.fieldnames is None or "segment_dir" not in reader.fieldnames: - raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header") - - for row_number, row in enumerate(reader, start=2): - raw_segment_dir = (row.get("segment_dir") or "").strip() - if not raw_segment_dir: - raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value") - segment_dir = Path(raw_segment_dir) - resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir - resolved = resolved.expanduser().resolve() - if resolved in seen: - continue - seen.add(resolved) - segment_dirs.append(resolved) - - if not segment_dirs: - raise click.ClickException(f"{csv_path} did not contain any segment_dir rows") - return tuple(segment_dirs) - - -def discover_segment_dirs( - root: Path, - recursive: bool, - *, - scan_segment_dir: Callable[[Path], ScanT], - no_matches_message: Callable[[Path], str], -) -> SourceResolution[ScanT]: - resolved_root = root.expanduser().resolve() - if not resolved_root.is_dir(): - raise click.ClickException(f"dataset root does not exist: {resolved_root}") - - candidate_dirs = {resolved_root} - iterator = resolved_root.rglob("*") if recursive else resolved_root.iterdir() - for path in iterator: - if path.is_dir(): - candidate_dirs.add(path.resolve()) - - valid_dirs: list[Path] = [] - ignored_partial_dirs: list[ScanT] = [] - for segment_dir in sorted(candidate_dirs): - scan = scan_segment_dir(segment_dir) - if scan.is_valid: - valid_dirs.append(segment_dir) - elif scan.matched_files > 0: - ignored_partial_dirs.append(scan) - - if not valid_dirs: - raise click.ClickException(no_matches_message(resolved_root)) - - return SourceResolution( - mode="dataset-root", - segment_dirs=tuple(valid_dirs), - ignored_partial_dirs=tuple(ignored_partial_dirs), - ) - - -def raise_if_recursive_flag_is_incompatible( - ctx: click.Context, - dataset_root: Path | None, - *, - dataset_root_flag: str = "--dataset-root", -) -> None: - if ctx.get_parameter_source("recursive") is ParameterSource.DEFAULT: - return - if dataset_root is None: - raise click.ClickException(f"--recursive/--no-recursive can only be used with {dataset_root_flag}") - - -def raise_for_legacy_source_args( - legacy_input_dir: Path | None, - legacy_segment_dirs: tuple[Path, ...], - *, - dataset_root_flag: str = "--dataset-root", - segment_flag: str = "--segment", -) -> None: - if legacy_input_dir is not None: - resolved = legacy_input_dir.expanduser().resolve() - raise click.ClickException( - f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}" - ) - - if legacy_segment_dirs: - resolved = legacy_segment_dirs[0].expanduser().resolve() - raise click.ClickException( - f"--segment-dir is no longer supported in this batch wrapper; use {segment_flag} {resolved} " - f"for an explicit segment directory, or {dataset_root_flag} --recursive for discovery" - ) - - -def raise_for_legacy_extra_args( - extra_args: list[str], - *, - dataset_root_flag: str = "--dataset-root", -) -> None: - if not extra_args: - return - - first = extra_args[0] - if first.startswith("-"): - extras_text = " ".join(extra_args) - raise click.ClickException(f"unexpected extra arguments: {extras_text}") - - resolved = Path(first).expanduser().resolve() - raise click.ClickException( - f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}" - ) - - -def raise_if_segment_path_looks_like_dataset_root( - segment_dir: Path, - *, - scan_segment_dir: Callable[[Path], ScanT], - dataset_root_flag: str = "--dataset-root", - segment_flag: str = "--segment", -) -> None: - resolved = segment_dir.expanduser().resolve() - if not resolved.is_dir(): - return - - scan = scan_segment_dir(resolved) - if scan.is_valid or scan.matched_files > 0: - return - - nested_segments = _find_nested_valid_segment_dirs(resolved, scan_segment_dir=scan_segment_dir) - if not nested_segments: - return - - example = nested_segments[0] - raise click.ClickException( - f"{resolved} looks like a dataset root, not a segment directory. " - f"{segment_flag} expects a directory that directly contains *_zedN.svo or *_zedN.svo2 files. " - f"Use {dataset_root_flag} {resolved} to discover nested segments such as {example}" - ) - - -def resolve_sources( - dataset_root: Path | None, - segment_dirs: tuple[Path, ...], - segments_csv: Path | None, - csv_root: Path | None, - recursive: bool, - *, - scan_segment_dir: Callable[[Path], ScanT], - no_matches_message: Callable[[Path], str], -) -> SourceResolution[ScanT]: - source_count = sum( - ( - 1 if dataset_root is not None else 0, - 1 if segment_dirs else 0, - 1 if segments_csv is not None else 0, - ) - ) - if source_count != 1: - raise click.ClickException( - "provide exactly one source mode: --dataset-root, --segment, or --segments-csv" - ) - - if dataset_root is not None: - return discover_segment_dirs( - dataset_root, - recursive, - scan_segment_dir=scan_segment_dir, - no_matches_message=no_matches_message, - ) - - if segment_dirs: - ordered_dirs = dedupe_paths(list(segment_dirs)) - for segment_dir in ordered_dirs: - raise_if_segment_path_looks_like_dataset_root( - segment_dir, - scan_segment_dir=scan_segment_dir, - ) - return SourceResolution(mode="segments", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=()) - - return SourceResolution( - mode="segments-csv", - segment_dirs=parse_segments_csv(segments_csv, csv_root), - ignored_partial_dirs=(), - ) - - -def _find_nested_valid_segment_dirs( - root: Path, - *, - scan_segment_dir: Callable[[Path], ScanT], - limit: int = 3, -) -> tuple[Path, ...]: - matches: list[Path] = [] - for path in sorted(root.rglob("*")): - if not path.is_dir(): - continue - resolved = path.resolve() - if resolved == root: - continue - scan = scan_segment_dir(resolved) - if scan.is_valid: - matches.append(resolved) - if len(matches) >= limit: - break - return tuple(matches) diff --git a/scripts/zed_batch_svo_grid_to_mp4.py b/scripts/zed_batch_svo_grid_to_mp4.py deleted file mode 100644 index a6d51ff..0000000 --- a/scripts/zed_batch_svo_grid_to_mp4.py +++ /dev/null @@ -1,747 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import concurrent.futures -import json -import math -import os -import re -import shutil -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path - -import click -from tqdm import tqdm - -try: - from scripts import zed_batch_segment_sources as segment_sources -except ModuleNotFoundError: - import zed_batch_segment_sources as segment_sources - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -SEGMENT_FILE_PATTERN = re.compile(r".*_zed([1-4])\.svo2?$", re.IGNORECASE) -EXPECTED_CAMERAS = ("zed1", "zed2", "zed3", "zed4") - - -@dataclass(slots=True, frozen=True) -class BatchConfig: - zed_bin: Path | None - ffprobe_bin: Path | None - probe_existing: bool - cuda_visible_devices: str | None - overwrite: bool - fail_fast: bool - codec: str - encoder_device: str - preset: str - tune: str - quality: int - gop: int - b_frames: int - start_offset_seconds: float - duration_seconds: float | None - output_fps: float | None - tile_scale: float - - -@dataclass(slots=True, frozen=True) -class ConversionJob: - segment_dir: Path - output_path: Path - - -@dataclass(slots=True, frozen=True) -class JobResult: - status: str - segment_dir: Path - output_path: Path - command: tuple[str, ...] - return_code: int = 0 - stdout: str = "" - stderr: str = "" - - -@dataclass(slots=True, frozen=True) -class SegmentScan: - segment_dir: Path - matched_files: int - is_valid: bool - reason: str | None = None - - -@dataclass(slots=True, frozen=True) -class OutputProbeResult: - output_path: Path - status: str - reason: str = "" - duration_seconds: float | None = None - - -def locate_binary(override: Path | None) -> Path: - if override is not None: - candidate = override.expanduser().resolve() - if not candidate.is_file(): - raise click.ClickException(f"binary not found: {candidate}") - return candidate - - candidates = ( - REPO_ROOT / "build" / "bin" / "zed_svo_grid_to_mp4", - REPO_ROOT / "build" / "zed_svo_grid_to_mp4", - ) - for candidate in candidates: - if candidate.is_file(): - return candidate - raise click.ClickException(f"could not find zed_svo_grid_to_mp4 under {REPO_ROOT / 'build'}") - - -def locate_ffprobe(override: Path | None) -> Path: - if override is not None: - candidate = override.expanduser().resolve() - if not candidate.is_file(): - raise click.ClickException(f"ffprobe binary not found: {candidate}") - return candidate - - resolved = shutil.which("ffprobe") - if resolved is None: - raise click.ClickException("could not find ffprobe on PATH") - return Path(resolved).resolve() - - -def scan_segment_dir(segment_dir: Path) -> SegmentScan: - if not segment_dir.is_dir(): - return SegmentScan( - segment_dir=segment_dir, - matched_files=0, - is_valid=False, - reason=f"segment directory does not exist: {segment_dir}", - ) - - matched_by_camera: dict[str, list[Path]] = {camera: [] for camera in EXPECTED_CAMERAS} - for child in segment_dir.iterdir(): - if not child.is_file(): - continue - match = SEGMENT_FILE_PATTERN.fullmatch(child.name) - if match is None: - continue - matched_by_camera[f"zed{match.group(1)}"].append(child) - - matched_files = sum(len(paths) for paths in matched_by_camera.values()) - duplicate_cameras = [camera for camera, paths in matched_by_camera.items() if len(paths) > 1] - missing_cameras = [camera for camera, paths in matched_by_camera.items() if len(paths) == 0] - - if duplicate_cameras: - duplicate_text = ", ".join(duplicate_cameras) - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - is_valid=False, - reason=f"duplicate camera inputs under {segment_dir}: {duplicate_text}", - ) - if missing_cameras: - missing_text = ", ".join(missing_cameras) - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - is_valid=False, - reason=f"missing camera inputs under {segment_dir}: {missing_text}", - ) - - return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True) - - -def output_path_for(segment_dir: Path) -> Path: - return segment_dir / f"{segment_dir.name}_grid.mp4" - - -def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]: - if config.zed_bin is None: - raise RuntimeError("zed_svo_grid_to_mp4 binary is not configured") - - command = [ - str(config.zed_bin), - "--segment-dir", - str(job.segment_dir), - "--codec", - config.codec, - "--encoder-device", - config.encoder_device, - "--preset", - config.preset, - "--tune", - config.tune, - "--quality", - str(config.quality), - "--gop", - str(config.gop), - "--b-frames", - str(config.b_frames), - "--start-offset-seconds", - str(config.start_offset_seconds), - "--tile-scale", - str(config.tile_scale), - ] - if config.duration_seconds is not None: - command.extend(["--duration-seconds", str(config.duration_seconds)]) - if config.output_fps is not None: - command.extend(["--output-fps", str(config.output_fps)]) - return command - - -def env_for_job(config: BatchConfig) -> dict[str, str]: - env = dict(os.environ) - if config.cuda_visible_devices is not None: - env["CUDA_VISIBLE_DEVICES"] = config.cuda_visible_devices - return env - - -def probe_output(output_path: Path, ffprobe_bin: Path | None) -> OutputProbeResult: - if not output_path.is_file(): - return OutputProbeResult(output_path=output_path, status="missing") - if ffprobe_bin is None: - raise RuntimeError("ffprobe binary is not configured") - - completed = subprocess.run( - [ - str(ffprobe_bin), - "-v", - "error", - "-print_format", - "json", - "-show_entries", - "format=duration,size:stream=codec_type,codec_name,width,height,nb_frames", - str(output_path), - ], - check=False, - capture_output=True, - text=True, - ) - if completed.returncode != 0: - reason = completed.stderr.strip() or completed.stdout.strip() or "ffprobe failed" - return OutputProbeResult(output_path=output_path, status="invalid", reason=reason) - - try: - payload = json.loads(completed.stdout) - except json.JSONDecodeError as error: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"ffprobe returned invalid JSON: {error}", - ) - - streams = payload.get("streams", []) - has_video_stream = any(stream.get("codec_type") == "video" for stream in streams) - if not has_video_stream: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason="ffprobe found no video stream", - ) - - format_payload = payload.get("format", {}) - duration_text = format_payload.get("duration") - if duration_text in (None, ""): - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason="ffprobe did not report a duration", - ) - - try: - duration_seconds = float(duration_text) - except (TypeError, ValueError): - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"ffprobe reported a non-numeric duration: {duration_text!r}", - ) - if not math.isfinite(duration_seconds) or duration_seconds <= 0.0: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"ffprobe reported a non-positive duration: {duration_seconds}", - ) - - return OutputProbeResult( - output_path=output_path, - status="valid", - duration_seconds=duration_seconds, - ) - - -def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult: - command = command_for_job(job, config) - completed = subprocess.run( - command, - check=False, - capture_output=True, - text=True, - env=env_for_job(config), - ) - status = "converted" if completed.returncode == 0 else "failed" - return JobResult( - status=status, - segment_dir=job.segment_dir, - output_path=job.output_path, - command=tuple(command), - return_code=completed.returncode, - stdout=completed.stdout, - stderr=completed.stderr, - ) - - -def summarize_failures(results: list[JobResult]) -> None: - failed_results = [result for result in results if result.status == "failed"] - if not failed_results: - return - - click.echo("\nFailed conversions:", err=True) - for result in failed_results: - click.echo(f"- {result.segment_dir} (exit {result.return_code})", err=True) - if result.stderr.strip(): - click.echo(result.stderr.rstrip(), err=True) - elif result.stdout.strip(): - click.echo(result.stdout.rstrip(), err=True) - - -def report_invalid_existing_outputs( - invalid_existing: list[tuple[ConversionJob, OutputProbeResult]], -) -> None: - if not invalid_existing: - return - - click.echo("\nInvalid existing outputs:", err=True) - for job, probe in invalid_existing: - click.echo(f"- {job.segment_dir}", err=True) - click.echo(f" output: {probe.output_path}", err=True) - reason_lines = probe.reason.splitlines() or [probe.reason] - click.echo(f" reason: {reason_lines[0]}", err=True) - for line in reason_lines[1:]: - click.echo(f" {line}", err=True) - - -def report_dry_run_plan( - pending_jobs: list[ConversionJob], - pending_reasons: dict[Path, str], - pending_details: dict[Path, str], -) -> None: - if not pending_jobs: - click.echo("dry-run: no conversions would be launched", err=True) - return - - click.echo("\nDry-run plan:", err=True) - for job in pending_jobs: - reason = pending_reasons[job.segment_dir] - detail = pending_details.get(job.segment_dir) - line = f"- {job.segment_dir} [{reason}]" - if detail: - line = f"{line}: {detail.replace(chr(10), ' | ')}" - click.echo(line, err=True) - - -def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -> tuple[list[JobResult], int]: - results: list[JobResult] = [] - aborted_count = 0 - if not jobs: - return results, aborted_count - - future_to_job: dict[concurrent.futures.Future[JobResult], ConversionJob] = {} - job_iter = iter(jobs) - stop_submitting = False - - with concurrent.futures.ThreadPoolExecutor(max_workers=jobs_limit) as executor: - with tqdm(total=len(jobs), unit="segment", dynamic_ncols=True) as progress: - - def submit_next() -> bool: - if stop_submitting: - return False - try: - job = next(job_iter) - except StopIteration: - return False - future = executor.submit(run_conversion, job, config) - future_to_job[future] = job - return True - - for _ in range(min(jobs_limit, len(jobs))): - submit_next() - - while future_to_job: - done, _ = concurrent.futures.wait( - future_to_job, - return_when=concurrent.futures.FIRST_COMPLETED, - ) - for future in done: - job = future_to_job.pop(future) - result = future.result() - results.append(result) - progress.update(1) - - if result.status == "failed": - tqdm.write( - f"failed: {job.segment_dir} (exit {result.return_code})", - file=sys.stderr, - ) - if config.fail_fast: - stop_submitting = True - - if not stop_submitting: - submit_next() - - if stop_submitting: - remaining = sum(1 for _ in job_iter) - aborted_count = remaining - progress.total = progress.n + len(future_to_job) - progress.refresh() - - return results, aborted_count - - -@click.command(context_settings={"allow_extra_args": True}) -@click.option( - "--dataset-root", - type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), - help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.", -) -@click.option( - "--segment", - "segment_dirs", - multiple=True, - type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True), - help=( - "Explicit segment directory. Repeatable. The directory must directly contain " - "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv." - ), -) -@click.option( - "--segment-dir", - "legacy_segment_dirs", - multiple=True, - type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - hidden=True, -) -@click.option( - "--segments-csv", - type=click.Path(path_type=Path, dir_okay=False), - help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.", -) -@click.option( - "--csv-root", - type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", -) -@click.option( - "--recursive/--no-recursive", - default=True, - show_default=True, - help="Recurse when discovering segment directories from --dataset-root.", -) -@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") -@click.option( - "--zed-bin", - type=click.Path(path_type=Path, dir_okay=False), - help="Explicit path to the zed_svo_grid_to_mp4 binary.", -) -@click.option( - "--ffprobe-bin", - type=click.Path(path_type=Path, dir_okay=False), - help="Explicit path to ffprobe. Required when probing existing outputs and ffprobe is not on PATH.", -) -@click.option( - "--cuda-visible-devices", - help="Optional CUDA_VISIBLE_DEVICES value exported for each conversion subprocess.", -) -@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing grid MP4 files.") -@click.option( - "--probe-existing/--trust-existing", - default=False, - show_default=True, - help="Validate existing grid MP4 files with ffprobe before skipping them. Invalid outputs are treated as missing.", -) -@click.option( - "--report-existing", - is_flag=True, - help="Probe existing grid MP4 files with ffprobe, report invalid ones, and do not launch conversions.", -) -@click.option( - "--dry-run", - is_flag=True, - help="Show which segments would be converted after applying skip/probe logic, without launching conversions.", -) -@click.option( - "--fail-fast/--continue-on-error", - default=False, - show_default=True, - help="Stop submitting new work after the first failed conversion.", -) -@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True) -@click.option( - "--encoder-device", - type=click.Choice(("auto", "nvidia", "software")), - default="auto", - show_default=True, -) -@click.option("--preset", type=click.Choice(("fast", "balanced", "quality")), default="fast", show_default=True) -@click.option( - "--tune", - type=click.Choice(("low-latency", "balanced")), - default="low-latency", - show_default=True, -) -@click.option( - "--quality", - type=click.IntRange(min=0, max=51), - default=23, - show_default=True, - help="Lower values mean higher quality.", -) -@click.option("--gop", type=click.IntRange(min=1), default=30, show_default=True) -@click.option("--b-frames", "b_frames", type=click.IntRange(min=0), default=0, show_default=True) -@click.option( - "--start-offset-seconds", - type=click.FloatRange(min=0.0), - default=0.0, - show_default=True, - help="Offset applied after the synced common start time.", -) -@click.option( - "--duration-seconds", - type=click.FloatRange(min=0.0, min_open=True), - default=None, - help="Limit export duration in seconds after sync.", -) -@click.option( - "--output-fps", - type=click.FloatRange(min=0.0, min_open=True), - default=None, - help="Composite output frame rate. Defaults to the grid tool's native behavior.", -) -@click.option( - "--tile-scale", - type=click.FloatRange(min=0.1, max=1.0), - default=0.5, - show_default=True, - help="Scale each tile relative to the source resolution.", -) -@click.pass_context -def main( - ctx: click.Context, - dataset_root: Path | None, - segment_dirs: tuple[Path, ...], - legacy_segment_dirs: tuple[Path, ...], - segments_csv: Path | None, - csv_root: Path | None, - recursive: bool, - jobs: int, - zed_bin: Path | None, - ffprobe_bin: Path | None, - cuda_visible_devices: str | None, - overwrite: bool, - probe_existing: bool, - report_existing: bool, - dry_run: bool, - fail_fast: bool, - codec: str, - encoder_device: str, - preset: str, - tune: str, - quality: int, - gop: int, - b_frames: int, - start_offset_seconds: float, - duration_seconds: float | None, - output_fps: float | None, - tile_scale: float, -) -> None: - """Batch-convert synced four-camera ZED segments into grid MP4 files.""" - segment_sources.raise_for_legacy_extra_args(ctx.args) - segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs) - segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root) - - if b_frames > gop: - raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames") - if report_existing and dry_run: - raise click.ClickException("--report-existing and --dry-run are mutually exclusive") - - sources = segment_sources.resolve_sources( - dataset_root, - segment_dirs, - segments_csv, - csv_root, - recursive, - scan_segment_dir=scan_segment_dir, - no_matches_message=lambda root: f"no complete four-camera segments found under {root}", - ) - ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None - binary_path = None if report_existing else locate_binary(zed_bin) - config = BatchConfig( - zed_bin=binary_path, - ffprobe_bin=ffprobe_path, - probe_existing=probe_existing or report_existing, - cuda_visible_devices=cuda_visible_devices, - overwrite=overwrite, - fail_fast=fail_fast, - codec=codec, - encoder_device=encoder_device, - preset=preset, - tune=tune, - quality=quality, - gop=gop, - b_frames=b_frames, - start_offset_seconds=start_offset_seconds, - duration_seconds=duration_seconds, - output_fps=output_fps, - tile_scale=tile_scale, - ) - - skipped_results: list[JobResult] = [] - failed_results: list[JobResult] = [] - pending_jobs: list[ConversionJob] = [] - pending_reasons: dict[Path, str] = {} - pending_details: dict[Path, str] = {} - valid_existing: list[OutputProbeResult] = [] - invalid_existing: list[tuple[ConversionJob, OutputProbeResult]] = [] - missing_outputs: list[ConversionJob] = [] - - for segment_dir in sources.segment_dirs: - output_path = output_path_for(segment_dir) - job = ConversionJob(segment_dir=segment_dir, output_path=output_path) - command = tuple(command_for_job(job, config)) if config.zed_bin is not None else () - scan = scan_segment_dir(segment_dir) - if not scan.is_valid: - failed_results.append( - JobResult( - status="failed", - segment_dir=segment_dir, - output_path=output_path, - command=command, - return_code=2, - stderr=scan.reason or "", - ) - ) - continue - - if report_existing: - probe_result = probe_output(output_path, config.ffprobe_bin) - if probe_result.status == "valid": - valid_existing.append(probe_result) - elif probe_result.status == "invalid": - invalid_existing.append((job, probe_result)) - else: - missing_outputs.append(job) - continue - - if overwrite: - pending_jobs.append(job) - pending_reasons[segment_dir] = "overwrite" - continue - - if config.probe_existing: - probe_result = probe_output(output_path, config.ffprobe_bin) - if probe_result.status == "valid": - valid_existing.append(probe_result) - skipped_results.append( - JobResult( - status="skipped", - segment_dir=segment_dir, - output_path=output_path, - command=command, - ) - ) - continue - if probe_result.status == "invalid": - invalid_existing.append((job, probe_result)) - pending_jobs.append(job) - pending_reasons[segment_dir] = "invalid-existing-output" - pending_details[segment_dir] = probe_result.reason - continue - missing_outputs.append(job) - pending_jobs.append(job) - pending_reasons[segment_dir] = "missing-output" - continue - - if output_path.exists(): - skipped_results.append( - JobResult( - status="skipped", - segment_dir=segment_dir, - output_path=output_path, - command=command, - ) - ) - continue - - pending_jobs.append(job) - pending_reasons[segment_dir] = "missing-output" - - if report_existing: - click.echo( - ( - f"source={sources.mode} matched={len(sources.segment_dirs)} valid={len(valid_existing)} " - f"invalid={len(invalid_existing)} missing={len(missing_outputs)} " - f"invalid-segments={len(failed_results)}" - ), - err=True, - ) - if sources.ignored_partial_dirs: - click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True) - report_invalid_existing_outputs(invalid_existing) - summarize_failures(failed_results) - if failed_results or invalid_existing: - raise SystemExit(1) - return - - click.echo( - ( - f"source={sources.mode} matched={len(sources.segment_dirs)} pending={len(pending_jobs)} " - f"skipped={len(skipped_results)} invalid={len(failed_results)} jobs={jobs} " - f"dry_run={'yes' if dry_run else 'no'}" - ), - err=True, - ) - if sources.ignored_partial_dirs: - click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True) - if config.probe_existing: - click.echo( - ( - f"probed-existing: valid={len(valid_existing)} invalid={len(invalid_existing)} " - f"missing={len(missing_outputs)}" - ), - err=True, - ) - - if dry_run: - report_dry_run_plan(pending_jobs, pending_reasons, pending_details) - summarize_failures(failed_results) - if failed_results: - raise SystemExit(1) - return - - results = list(skipped_results) - results.extend(failed_results) - conversion_results, aborted_count = run_batch(pending_jobs, config, jobs) - results.extend(conversion_results) - - converted_count = sum(1 for result in results if result.status == "converted") - skipped_count = sum(1 for result in results if result.status == "skipped") - failed_count = sum(1 for result in results if result.status == "failed") - - click.echo( - ( - f"summary: matched={len(sources.segment_dirs)} converted={converted_count} " - f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}" - ), - err=True, - ) - summarize_failures(results) - - if failed_count > 0 or aborted_count > 0: - raise SystemExit(1) - - -if __name__ == "__main__": - main() diff --git a/scripts/zed_batch_svo_to_mcap.py b/scripts/zed_batch_svo_to_mcap.py deleted file mode 100644 index 8a85216..0000000 --- a/scripts/zed_batch_svo_to_mcap.py +++ /dev/null @@ -1,1402 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import concurrent.futures -import importlib -import os -import re -import subprocess -import sys -import time -from collections import Counter -from dataclasses import dataclass -from pathlib import Path - -import click -from progress_table import ProgressTable - -try: - from scripts import zed_batch_segment_sources as segment_sources -except ModuleNotFoundError: - import zed_batch_segment_sources as segment_sources - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -WORKSPACE_ROOT = REPO_ROOT.parent -MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap" -SEGMENT_FILE_PATTERN = re.compile(r".*_zed([0-9]+)\.svo2?$", re.IGNORECASE) - - -@dataclass(slots=True, frozen=True) -class BatchConfig: - zed_bin: Path | None - probe_existing: bool - overwrite: bool - fail_fast: bool - codec: str - mcap_compression: str - depth_mode: str - depth_size: str - bundle_policy: str - copy_range: str - bundle_topic: str | None - with_pose: bool - pose_config: Path | None - world_frame_id: str | None - start_frame: int | None - end_frame: int | None - sync_tolerance_ms: float | None - progress_ui: str - - -@dataclass(slots=True, frozen=True) -class ConversionJob: - segment_dir: Path - output_path: Path - camera_labels: tuple[str, ...] - display_name: str - - -@dataclass(slots=True, frozen=True) -class WorkerSlot: - label: str - encoder_device: str - cuda_visible_devices: str | None - - -@dataclass(slots=True, frozen=True) -class JobResult: - status: str - segment_dir: Path - output_path: Path - command: tuple[str, ...] - return_code: int = 0 - stdout: str = "" - stderr: str = "" - - -@dataclass(slots=True, frozen=True) -class SegmentScan: - segment_dir: Path - matched_files: int - camera_labels: tuple[str, ...] - is_valid: bool - reason: str | None = None - - -@dataclass(slots=True, frozen=True) -class OutputProbeResult: - output_path: Path - status: str - reason: str = "" - - -@dataclass(slots=True) -class ActiveJobState: - submission_index: int - job: ConversionJob - slot: WorkerSlot - started_at_monotonic: float - row_index: int | None = None - - -_MCAP_READER_MODULE = None -_BUNDLE_MANIFEST_CLASS_CACHE: dict[bytes, tuple[object, int | None]] = {} -TABLE_REFRESH_SECONDS = 1.0 -TEXT_HEARTBEAT_SECONDS = 30.0 - - -def format_elapsed(seconds: float) -> str: - rounded = max(0, int(round(seconds))) - minutes, secs = divmod(rounded, 60) - hours, minutes = divmod(minutes, 60) - if hours > 0: - return f"{hours:d}:{minutes:02d}:{secs:02d}" - return f"{minutes:02d}:{secs:02d}" - - -class ProgressReporter: - heartbeat_interval_seconds: float - - def __init__(self, total_jobs: int) -> None: - self.total_jobs = total_jobs - self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS - - def job_started(self, state: ActiveJobState) -> None: - return - - def job_finished(self, state: ActiveJobState, result: JobResult) -> None: - return - - def heartbeat( - self, - *, - completed_count: int, - failed_count: int, - active_states: list[ActiveJobState], - ) -> None: - return - - def close(self) -> None: - return - - -class TextProgressReporter(ProgressReporter): - def __init__(self, total_jobs: int) -> None: - super().__init__(total_jobs) - self.heartbeat_interval_seconds = TEXT_HEARTBEAT_SECONDS - - def job_started(self, state: ActiveJobState) -> None: - cuda_label = state.slot.cuda_visible_devices or "-" - click.echo( - ( - f"started: [{state.submission_index}/{self.total_jobs}] " - f"{state.slot.label} encoder={state.slot.encoder_device} cuda={cuda_label} " - f"segment={state.job.display_name}" - ), - err=True, - ) - - def job_finished(self, state: ActiveJobState, result: JobResult) -> None: - elapsed = format_elapsed(time.monotonic() - state.started_at_monotonic) - prefix = "completed" if result.status == "converted" else "failed" - exit_text = "" if result.status == "converted" else f" exit={result.return_code}" - click.echo( - ( - f"{prefix}: [{state.submission_index}/{self.total_jobs}] " - f"{state.slot.label} elapsed={elapsed}{exit_text} segment={state.job.display_name}" - ), - err=True, - ) - if result.status == "failed": - for line in failure_excerpt(result): - click.echo(f" {line}", err=True) - - def heartbeat( - self, - *, - completed_count: int, - failed_count: int, - active_states: list[ActiveJobState], - ) -> None: - active_count = len(active_states) - remaining_count = self.total_jobs - completed_count - failed_count - active_count - click.echo( - ( - f"progress: completed={completed_count} failed={failed_count} " - f"active={active_count} remaining={remaining_count}" - ), - err=True, - ) - - -class TableProgressReporter(ProgressReporter): - def __init__(self, total_jobs: int) -> None: - super().__init__(total_jobs) - self.heartbeat_interval_seconds = TABLE_REFRESH_SECONDS - self.table = ProgressTable( - "#", - "segment", - "worker", - "encoder", - "cuda", - "status", - "elapsed_s", - interactive=2, - refresh_rate=10, - default_column_alignment="left", - default_column_width=12, - pbar_show_throughput=False, - pbar_show_progress=False, - pbar_show_percents=False, - pbar_show_eta=False, - print_header_every_n_rows=30, - file=sys.stderr, - ) - self.table.add_column("#", width=4, alignment="right") - self.table.add_column("segment", width=44, alignment="left") - self.table.add_column("worker", width=8, alignment="left") - self.table.add_column("encoder", width=10, alignment="left") - self.table.add_column("cuda", width=6, alignment="left") - self.table.add_column("status", width=12, alignment="left") - self.table.add_column("elapsed_s", width=10, alignment="right") - - def job_started(self, state: ActiveJobState) -> None: - self.table.add_row( - state.submission_index, - state.job.display_name, - state.slot.label, - state.slot.encoder_device, - state.slot.cuda_visible_devices or "-", - "running", - format_elapsed(0.0), - ) - state.row_index = self.table.num_rows() - 1 - - def job_finished(self, state: ActiveJobState, result: JobResult) -> None: - if state.row_index is None: - return - self.table.update("status", "converted" if result.status == "converted" else f"failed({result.return_code})", row=state.row_index) - self.table.update( - "elapsed_s", - format_elapsed(time.monotonic() - state.started_at_monotonic), - row=state.row_index, - ) - - def heartbeat( - self, - *, - completed_count: int, - failed_count: int, - active_states: list[ActiveJobState], - ) -> None: - for state in active_states: - if state.row_index is None: - continue - self.table.update( - "elapsed_s", - format_elapsed(time.monotonic() - state.started_at_monotonic), - row=state.row_index, - ) - - def close(self) -> None: - self.table.close() - - -def locate_binary(override: Path | None) -> Path: - if override is not None: - candidate = override.expanduser().resolve() - if not candidate.is_file(): - raise click.ClickException(f"binary not found: {candidate}") - return candidate - - candidates = ( - REPO_ROOT / "build" / "bin" / "zed_svo_to_mcap", - REPO_ROOT / "build" / "zed_svo_to_mcap", - ) - for candidate in candidates: - if candidate.is_file(): - return candidate - raise click.ClickException(f"could not find zed_svo_to_mcap under {REPO_ROOT / 'build'}") - - -def sorted_camera_labels(labels: set[str]) -> tuple[str, ...]: - return tuple(sorted(labels, key=lambda label: int(label[3:]))) - - -def scan_segment_dir(segment_dir: Path) -> SegmentScan: - if not segment_dir.is_dir(): - return SegmentScan( - segment_dir=segment_dir, - matched_files=0, - camera_labels=(), - is_valid=False, - reason=f"segment directory does not exist: {segment_dir}", - ) - - matched_by_camera: dict[str, list[Path]] = {} - for child in segment_dir.iterdir(): - if not child.is_file(): - continue - match = SEGMENT_FILE_PATTERN.fullmatch(child.name) - if match is None: - continue - label = f"zed{int(match.group(1))}" - matched_by_camera.setdefault(label, []).append(child) - - matched_files = sum(len(paths) for paths in matched_by_camera.values()) - camera_labels = sorted_camera_labels(set(matched_by_camera)) - duplicate_cameras = [label for label, paths in sorted(matched_by_camera.items()) if len(paths) > 1] - - if duplicate_cameras: - duplicate_text = ", ".join(duplicate_cameras) - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=False, - reason=f"duplicate camera inputs under {segment_dir}: {duplicate_text}", - ) - if len(camera_labels) < 2: - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=False, - reason=f"expected at least 2 camera inputs under {segment_dir}, found {len(camera_labels)}", - ) - - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=True, - ) - - -def output_path_for(segment_dir: Path) -> Path: - return segment_dir / f"{segment_dir.name}.mcap" - - -def common_segment_parent(segment_dirs: tuple[Path, ...]) -> Path | None: - if len(segment_dirs) <= 1: - return None - try: - return Path(os.path.commonpath([str(path) for path in segment_dirs])) - except ValueError: - return None - - -def display_name_for_segment( - segment_dir: Path, - *, - source_mode: str, - input_root: Path | None, - common_parent: Path | None, -) -> str: - if source_mode == "dataset-root" and input_root is not None: - try: - return str(segment_dir.relative_to(input_root)) - except ValueError: - pass - if common_parent is not None: - try: - relative = segment_dir.relative_to(common_parent) - if str(relative) != ".": - return str(relative) - except ValueError: - pass - parent_name = segment_dir.parent.name - if parent_name: - return str(Path(parent_name) / segment_dir.name) - return segment_dir.name - - -def command_for_job(job: ConversionJob, config: BatchConfig, encoder_device: str) -> list[str]: - if config.zed_bin is None: - raise RuntimeError("zed_svo_to_mcap binary is not configured") - - command = [ - str(config.zed_bin), - "--segment-dir", - str(job.segment_dir), - "--codec", - config.codec, - "--encoder-device", - encoder_device, - "--mcap-compression", - config.mcap_compression, - "--depth-mode", - config.depth_mode, - "--depth-size", - config.depth_size, - "--bundle-policy", - config.bundle_policy, - "--copy-range", - config.copy_range, - ] - if config.bundle_topic and config.bundle_policy != "copy": - command.extend(["--bundle-topic", config.bundle_topic]) - if config.with_pose: - command.append("--with-pose") - if config.pose_config is not None: - command.extend(["--pose-config", str(config.pose_config)]) - if config.world_frame_id is not None: - command.extend(["--world-frame-id", config.world_frame_id]) - if config.start_frame is not None: - command.extend(["--start-frame", str(config.start_frame)]) - if config.end_frame is not None: - command.extend(["--end-frame", str(config.end_frame)]) - if config.sync_tolerance_ms is not None: - command.extend(["--sync-tolerance-ms", str(config.sync_tolerance_ms)]) - return command - - -def env_for_job(config: BatchConfig) -> dict[str, str]: - return env_for_job_with_cuda(None) - - -def env_for_job_with_cuda(assigned_cuda_visible_devices: str | None) -> dict[str, str]: - env = dict(os.environ) - if assigned_cuda_visible_devices is not None: - env["CUDA_VISIBLE_DEVICES"] = assigned_cuda_visible_devices - return env - - -def parse_cuda_device_pool(raw_value: str | None) -> tuple[str, ...]: - if raw_value is None: - return () - devices = tuple(device.strip() for device in raw_value.split(",") if device.strip()) - return devices - - -def choose_progress_reporter(progress_ui: str, total_jobs: int) -> ProgressReporter: - if progress_ui == "table": - return TableProgressReporter(total_jobs) - if progress_ui == "text": - return TextProgressReporter(total_jobs) - if sys.stderr.isatty(): - return TableProgressReporter(total_jobs) - return TextProgressReporter(total_jobs) - - -def load_mcap_reader(): - global _MCAP_READER_MODULE - if _MCAP_READER_MODULE is not None: - return _MCAP_READER_MODULE - - if str(MCAP_PYTHON_ROOT) not in sys.path: - sys.path.insert(0, str(MCAP_PYTHON_ROOT)) - try: - _MCAP_READER_MODULE = importlib.import_module("mcap.reader") - except ModuleNotFoundError as error: - raise click.ClickException( - f"could not import mcap.reader from {MCAP_PYTHON_ROOT}" - ) from error - return _MCAP_READER_MODULE - - -def required_topics_for(camera_labels: tuple[str, ...]) -> set[str]: - topics: set[str] = set() - for label in camera_labels: - topics.add(f"/{label}/video") - topics.add(f"/{label}/depth") - topics.add(f"/{label}/calibration") - return topics - - -def load_bundle_manifest_type(schema_data: bytes) -> tuple[object, int | None]: - cached = _BUNDLE_MANIFEST_CLASS_CACHE.get(schema_data) - if cached is not None: - return cached - - from google.protobuf import descriptor_pb2, descriptor_pool, message_factory, timestamp_pb2 - - descriptor_set = descriptor_pb2.FileDescriptorSet() - descriptor_set.ParseFromString(schema_data) - pool = descriptor_pool.DescriptorPool() - has_embedded_timestamp = any( - file_descriptor.name == "google/protobuf/timestamp.proto" - for file_descriptor in descriptor_set.file - ) - if has_embedded_timestamp: - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - pool.Add(file_descriptor) - break - else: - pool.AddSerializedFile(timestamp_pb2.DESCRIPTOR.serialized_pb) - for file_descriptor in descriptor_set.file: - if file_descriptor.name == "google/protobuf/timestamp.proto": - continue - pool.Add(file_descriptor) - message_descriptor = pool.FindMessageTypeByName("cvmmap_streamer.BundleManifest") - message_class = message_factory.GetMessageClass(message_descriptor) - present_value = None - if "BundleMemberStatus" in message_descriptor.enum_types_by_name: - status_enum = message_descriptor.enum_types_by_name["BundleMemberStatus"] - present_value = status_enum.values_by_name["BUNDLE_MEMBER_STATUS_PRESENT"].number - _BUNDLE_MANIFEST_CLASS_CACHE[schema_data] = (message_class, present_value) - return message_class, present_value - - -def probe_output( - output_path: Path, - camera_labels: tuple[str, ...], - *, - layout: str, - bundle_topic: str | None, -) -> OutputProbeResult: - if not output_path.is_file(): - return OutputProbeResult(output_path=output_path, status="missing") - - reader_module = load_mcap_reader() - expected_topics = required_topics_for(camera_labels) - require_bundle = layout == "bundled" and len(camera_labels) > 1 and bool(bundle_topic) - if require_bundle: - expected_topics.add(bundle_topic or "/bundle") - found_topics: set[str] = set() - video_counts: Counter[str] = Counter() - depth_counts: Counter[str] = Counter() - bundle_present_counts: Counter[str] = Counter() - expected_camera_labels = set(camera_labels) - - try: - with output_path.open("rb") as stream: - reader = reader_module.make_reader(stream) - for schema, channel, message in reader.iter_messages(): - if layout == "copy" and channel.topic == "/bundle": - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason="copy-layout MCAP must not contain /bundle", - ) - if channel.topic in expected_topics: - found_topics.add(channel.topic) - if channel.topic.endswith("/video"): - video_counts[channel.topic.removeprefix("/").removesuffix("/video")] += 1 - continue - if channel.topic.endswith("/depth"): - depth_counts[channel.topic.removeprefix("/").removesuffix("/depth")] += 1 - continue - if require_bundle and channel.topic == bundle_topic: - if schema is None or schema.name != "cvmmap_streamer.BundleManifest": - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"bundle topic '{bundle_topic}' is missing the BundleManifest schema", - ) - try: - bundle_class, present_value = load_bundle_manifest_type(schema.data) - bundle = bundle_class() - bundle.ParseFromString(message.data) - except Exception as error: # noqa: BLE001 - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"failed to parse bundle manifest: {error}", - ) - - bundle_labels: set[str] = set() - for member in bundle.members: - label = str(member.camera_label) - if label not in expected_camera_labels: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"bundle manifest referenced unknown camera label '{label}'", - ) - if label in bundle_labels: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"bundle manifest duplicated camera label '{label}'", - ) - bundle_labels.add(label) - is_present = member.HasField("timestamp") - if present_value is not None: - is_present = member.status == present_value - if is_present and not member.HasField("timestamp"): - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=f"bundle member '{label}' is present but missing a timestamp", - ) - if is_present: - bundle_present_counts[label] += 1 - if bundle_labels != expected_camera_labels: - missing_labels = sorted(expected_camera_labels - bundle_labels) - extra_labels = sorted(bundle_labels - expected_camera_labels) - details = [] - if missing_labels: - details.append("missing=" + ",".join(missing_labels)) - if extra_labels: - details.append("extra=" + ",".join(extra_labels)) - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason="bundle manifest camera coverage mismatch: " + " ".join(details), - ) - except Exception as error: # noqa: BLE001 - return OutputProbeResult(output_path=output_path, status="invalid", reason=str(error)) - - missing_topics = sorted(expected_topics - found_topics) - if missing_topics: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason="missing expected topics: " + ", ".join(missing_topics), - ) - if require_bundle: - for label in camera_labels: - present_count = bundle_present_counts[label] - if video_counts[label] != present_count: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=( - f"video count mismatch for {label}: " - f"bundle_present={present_count} video_messages={video_counts[label]}" - ), - ) - if depth_counts[label] != present_count: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=( - f"depth count mismatch for {label}: " - f"bundle_present={present_count} depth_messages={depth_counts[label]}" - ), - ) - else: - for label in camera_labels: - if video_counts[label] != depth_counts[label]: - return OutputProbeResult( - output_path=output_path, - status="invalid", - reason=( - f"video/depth count mismatch for {label}: " - f"video_messages={video_counts[label]} depth_messages={depth_counts[label]}" - ), - ) - return OutputProbeResult(output_path=output_path, status="valid") - - -def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult: - return run_conversion_on_slot( - job, - config, - WorkerSlot(label="job-1", encoder_device="auto", cuda_visible_devices=None), - ) - - -def run_conversion_on_slot( - job: ConversionJob, - config: BatchConfig, - slot: WorkerSlot, -) -> JobResult: - command = command_for_job(job, config, slot.encoder_device) - completed = subprocess.run( - command, - check=False, - capture_output=True, - text=True, - env=env_for_job_with_cuda(slot.cuda_visible_devices), - ) - status = "converted" if completed.returncode == 0 else "failed" - return JobResult( - status=status, - segment_dir=job.segment_dir, - output_path=job.output_path, - command=tuple(command), - return_code=completed.returncode, - stdout=completed.stdout, - stderr=completed.stderr, - ) - - -def split_lines_for_excerpt(text: str, max_lines: int = 8) -> list[str]: - lines = [line.rstrip() for line in text.splitlines() if line.strip()] - if len(lines) <= max_lines: - return lines - head_count = max(1, max_lines // 2) - tail_count = max_lines - head_count - excerpt = lines[:head_count] - omitted = len(lines) - head_count - tail_count - if omitted > 0: - excerpt.append(f"... ({omitted} omitted line(s))") - excerpt.extend(lines[-tail_count:]) - return excerpt - - -def failure_excerpt(result: JobResult, max_lines: int = 8) -> list[str]: - if result.stderr.strip(): - return split_lines_for_excerpt(result.stderr, max_lines=max_lines) - if result.stdout.strip(): - return split_lines_for_excerpt(result.stdout, max_lines=max_lines) - return [] - - -def summarize_failures(results: list[JobResult]) -> None: - failed_results = [result for result in results if result.status == "failed"] - if not failed_results: - return - - click.echo("\nFailed conversions:", err=True) - for result in failed_results: - click.echo(f"- {result.segment_dir} (exit {result.return_code})", err=True) - if result.stderr.strip(): - click.echo(result.stderr.rstrip(), err=True) - elif result.stdout.strip(): - click.echo(result.stdout.rstrip(), err=True) - - -def report_invalid_existing_outputs( - invalid_existing: list[tuple[ConversionJob, OutputProbeResult]], -) -> None: - if not invalid_existing: - return - - click.echo("\nInvalid existing outputs:", err=True) - for job, probe in invalid_existing: - click.echo(f"- {job.segment_dir}", err=True) - click.echo(f" output: {probe.output_path}", err=True) - reason_lines = probe.reason.splitlines() or [probe.reason] - click.echo(f" reason: {reason_lines[0]}", err=True) - for line in reason_lines[1:]: - click.echo(f" {line}", err=True) - - -def report_dry_run_plan( - pending_jobs: list[ConversionJob], - pending_reasons: dict[Path, str], - pending_details: dict[Path, str], -) -> None: - if not pending_jobs: - click.echo("dry-run: no conversions would be launched", err=True) - return - - click.echo("\nDry-run plan:", err=True) - for job in pending_jobs: - reason = pending_reasons[job.segment_dir] - detail = pending_details.get(job.segment_dir) - line = f"- {job.segment_dir} [{reason}]" - if detail: - line = f"{line}: {detail.replace(chr(10), ' | ')}" - click.echo(line, err=True) - - -def run_batch( - jobs: list[ConversionJob], - config: BatchConfig, - worker_slots: list[WorkerSlot], -) -> tuple[list[JobResult], int]: - results: list[JobResult] = [] - aborted_count = 0 - if not jobs: - return results, aborted_count - if not worker_slots: - raise click.ClickException("no worker slots configured") - - available_slots = list(worker_slots) - max_parallel_jobs = len(worker_slots) - future_to_job: dict[concurrent.futures.Future[JobResult], ActiveJobState] = {} - job_iter = iter(jobs) - stop_submitting = False - completed_count = 0 - failed_count = 0 - submission_index = 0 - reporter = choose_progress_reporter(config.progress_ui, len(jobs)) - last_heartbeat_at = time.monotonic() - - with concurrent.futures.ThreadPoolExecutor(max_workers=max_parallel_jobs) as executor: - - def submit_next() -> bool: - nonlocal submission_index - if stop_submitting or not available_slots: - return False - slot = available_slots.pop(0) - try: - job = next(job_iter) - except StopIteration: - available_slots.insert(0, slot) - return False - - submission_index += 1 - state = ActiveJobState( - submission_index=submission_index, - job=job, - slot=slot, - started_at_monotonic=time.monotonic(), - ) - reporter.job_started(state) - future = executor.submit(run_conversion_on_slot, job, config, slot) - future_to_job[future] = state - return True - - for _ in range(min(max_parallel_jobs, len(jobs))): - submit_next() - - while future_to_job: - done, _ = concurrent.futures.wait( - future_to_job, - timeout=reporter.heartbeat_interval_seconds, - return_when=concurrent.futures.FIRST_COMPLETED, - ) - if not done: - reporter.heartbeat( - completed_count=completed_count, - failed_count=failed_count, - active_states=list(future_to_job.values()), - ) - last_heartbeat_at = time.monotonic() - continue - - for future in done: - state = future_to_job.pop(future) - available_slots.append(state.slot) - result = future.result() - results.append(result) - reporter.job_finished(state, result) - - if result.status == "failed": - failed_count += 1 - if config.fail_fast: - stop_submitting = True - else: - completed_count += 1 - - if not stop_submitting: - submit_next() - - now = time.monotonic() - if now - last_heartbeat_at >= reporter.heartbeat_interval_seconds: - reporter.heartbeat( - completed_count=completed_count, - failed_count=failed_count, - active_states=list(future_to_job.values()), - ) - last_heartbeat_at = now - - if stop_submitting: - aborted_count = sum(1 for _ in job_iter) - - reporter.close() - - return results, aborted_count - - -def build_uniform_worker_slots( - jobs: int, - encoder_device: str, - cuda_visible_devices: str | None, -) -> list[WorkerSlot]: - if jobs < 1: - raise click.ClickException("--jobs must be at least 1") - - if cuda_visible_devices is None: - return [ - WorkerSlot( - label=f"job-{index + 1}", - encoder_device=encoder_device, - cuda_visible_devices=None, - ) - for index in range(jobs) - ] - - device_pool = parse_cuda_device_pool(cuda_visible_devices) - if len(device_pool) < jobs: - raise click.ClickException( - f"--cuda-visible-devices must provide at least {jobs} entries when --jobs={jobs}" - ) - return [ - WorkerSlot( - label=f"job-{index + 1}", - encoder_device=encoder_device, - cuda_visible_devices=device_pool[index], - ) - for index in range(jobs) - ] - - -def parse_required_device_pool(raw_value: str | None, expected_count: int, flag_name: str) -> tuple[str, ...]: - if expected_count == 0: - if raw_value is None: - return () - raise click.ClickException(f"{flag_name} cannot be used when the matching job count is 0") - - device_pool = parse_cuda_device_pool(raw_value) - if len(device_pool) != expected_count: - raise click.ClickException( - f"{flag_name} must provide exactly {expected_count} entries when the matching job count is {expected_count}" - ) - return device_pool - - -def build_worker_slots( - *, - jobs: int, - encoder_device: str, - cuda_visible_devices: str | None, - hardware_jobs: int, - hardware_cuda_visible_devices: str | None, - software_jobs: int, - software_cuda_visible_devices: str | None, -) -> list[WorkerSlot]: - mixed_mode_requested = any( - ( - hardware_jobs > 0, - software_jobs > 0, - hardware_cuda_visible_devices is not None, - software_cuda_visible_devices is not None, - ) - ) - if not mixed_mode_requested: - return build_uniform_worker_slots(jobs, encoder_device, cuda_visible_devices) - - if jobs != 1: - raise click.ClickException("--jobs cannot be combined with mixed worker pool flags") - if cuda_visible_devices is not None: - raise click.ClickException("--cuda-visible-devices cannot be combined with mixed worker pool flags") - if encoder_device != "auto": - raise click.ClickException("--encoder-device cannot be combined with mixed worker pool flags") - - total_jobs = hardware_jobs + software_jobs - if total_jobs < 1: - raise click.ClickException("mixed worker pool flags require at least one hardware or software job") - - hardware_device_pool = parse_required_device_pool( - hardware_cuda_visible_devices, - hardware_jobs, - "--hardware-cuda-visible-devices", - ) - software_device_pool = parse_required_device_pool( - software_cuda_visible_devices, - software_jobs, - "--software-cuda-visible-devices", - ) - - worker_slots: list[WorkerSlot] = [] - worker_slots.extend( - WorkerSlot( - label=f"hw-{index + 1}", - encoder_device="nvidia", - cuda_visible_devices=device, - ) - for index, device in enumerate(hardware_device_pool) - ) - worker_slots.extend( - WorkerSlot( - label=f"sw-{index + 1}", - encoder_device="software", - cuda_visible_devices=device, - ) - for index, device in enumerate(software_device_pool) - ) - return worker_slots - - -@click.command(context_settings={"allow_extra_args": True}) -@click.option( - "--dataset-root", - type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), - help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.", -) -@click.option( - "--segment", - "segment_dirs", - multiple=True, - type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True), - help=( - "Explicit segment directory. Repeatable. The directory must directly contain " - "*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv." - ), -) -@click.option( - "--segment-dir", - "legacy_segment_dirs", - multiple=True, - type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - hidden=True, -) -@click.option( - "--segments-csv", - type=click.Path(path_type=Path, dir_okay=False), - help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.", -) -@click.option( - "--csv-root", - type=click.Path(path_type=Path, file_okay=False, dir_okay=True), - help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.", -) -@click.option( - "--recursive/--no-recursive", - default=True, - show_default=True, - help="Recurse when discovering segment directories from --dataset-root.", -) -@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") -@click.option( - "--hardware-jobs", - default=0, - show_default=True, - type=click.IntRange(min=0), - help="Mixed mode: number of hardware-encoded workers.", -) -@click.option( - "--hardware-cuda-visible-devices", - help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for hardware workers, one entry per worker.", -) -@click.option( - "--software-jobs", - default=0, - show_default=True, - type=click.IntRange(min=0), - help="Mixed mode: number of software-encoded workers.", -) -@click.option( - "--software-cuda-visible-devices", - help="Mixed mode: comma-separated CUDA_VISIBLE_DEVICES assignments for software workers, one entry per worker.", -) -@click.option( - "--zed-bin", - type=click.Path(path_type=Path, dir_okay=False), - help="Explicit path to the zed_svo_to_mcap binary.", -) -@click.option( - "--cuda-visible-devices", - help="Optional CUDA_VISIBLE_DEVICES value. A comma-separated list is distributed across concurrent jobs one GPU per subprocess.", -) -@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing MCAP files.") -@click.option( - "--probe-existing/--trust-existing", - default=False, - show_default=True, - help="Validate existing MCAP files before skipping them. Invalid outputs are treated as missing.", -) -@click.option( - "--report-existing", - is_flag=True, - help="Probe existing MCAP files, report invalid ones, and do not launch conversions.", -) -@click.option( - "--dry-run", - is_flag=True, - help="Show which segments would be converted after applying skip/probe logic, without launching conversions.", -) -@click.option( - "--fail-fast/--continue-on-error", - default=False, - show_default=True, - help="Stop submitting new work after the first failed conversion.", -) -@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True) -@click.option( - "--encoder-device", - type=click.Choice(("auto", "nvidia", "software")), - default="auto", - show_default=True, -) -@click.option( - "--mcap-compression", - type=click.Choice(("none", "lz4", "zstd")), - default="zstd", - show_default=True, -) -@click.option( - "--depth-mode", - type=click.Choice(("neural_light", "neural", "neural_plus")), - default="neural_plus", - show_default=True, -) -@click.option( - "--depth-size", - type=str, - default="optimal", - show_default=True, -) -@click.option( - "--bundle-policy", - type=click.Choice(("nearest", "strict", "copy")), - default="nearest", - show_default=True, - help="Bundling policy for multi-camera MCAP export.", -) -@click.option( - "--copy-range", - type=click.Choice(("common", "full")), - default="common", - show_default=True, - help="Timestamp range used when --bundle-policy copy is selected.", -) -@click.option( - "--bundle-topic", - default="/bundle", - show_default=True, - help="Topic used for bundled multi-camera manifest messages.", -) -@click.option("--with-pose", is_flag=True, help="Enable per-camera positional tracking export when available.") -@click.option( - "--pose-config", - type=click.Path(path_type=Path, dir_okay=False), - help="TOML config passed to zed_svo_to_mcap for pose tracking settings.", -) -@click.option( - "--world-frame-id", - default=None, - help="Optional pose reference frame id passed through to zed_svo_to_mcap.", -) -@click.option( - "--start-frame", - type=click.IntRange(min=0), - default=None, - help="First bundle index to export (inclusive) in bundled multi-camera mode.", -) -@click.option( - "--end-frame", - type=click.IntRange(min=0), - default=None, - help="Last bundle index to export (inclusive) in bundled multi-camera mode.", -) -@click.option( - "--sync-tolerance-ms", - type=click.FloatRange(min=0.0, min_open=True), - default=None, - help="Override the maximum timestamp delta used by strict bundled sync.", -) -@click.option( - "--progress-ui", - type=click.Choice(("auto", "table", "text")), - default="auto", - show_default=True, - help="Progress output mode. Auto uses a table on TTY and text logging otherwise.", -) -@click.pass_context -def main( - ctx: click.Context, - dataset_root: Path | None, - segment_dirs: tuple[Path, ...], - legacy_segment_dirs: tuple[Path, ...], - segments_csv: Path | None, - csv_root: Path | None, - recursive: bool, - jobs: int, - hardware_jobs: int, - hardware_cuda_visible_devices: str | None, - software_jobs: int, - software_cuda_visible_devices: str | None, - zed_bin: Path | None, - cuda_visible_devices: str | None, - overwrite: bool, - probe_existing: bool, - report_existing: bool, - dry_run: bool, - fail_fast: bool, - codec: str, - encoder_device: str, - mcap_compression: str, - depth_mode: str, - depth_size: str, - bundle_policy: str, - copy_range: str, - bundle_topic: str, - with_pose: bool, - pose_config: Path | None, - world_frame_id: str | None, - start_frame: int | None, - end_frame: int | None, - sync_tolerance_ms: float | None, - progress_ui: str, -) -> None: - """Batch-convert multi-camera ZED segments into grouped MCAP files.""" - segment_sources.raise_for_legacy_extra_args(ctx.args) - segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs) - segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root) - - if report_existing and dry_run: - raise click.ClickException("--report-existing and --dry-run are mutually exclusive") - if bundle_policy == "copy": - if start_frame is not None or end_frame is not None: - raise click.ClickException("--start-frame/--end-frame cannot be used with --bundle-policy copy") - if sync_tolerance_ms is not None: - raise click.ClickException("--sync-tolerance-ms cannot be used with --bundle-policy copy") - if bundle_topic != "/bundle": - raise click.ClickException("--bundle-topic cannot be customized with --bundle-policy copy") - - sources = segment_sources.resolve_sources( - dataset_root, - segment_dirs, - segments_csv, - csv_root, - recursive, - scan_segment_dir=scan_segment_dir, - no_matches_message=lambda root: f"no multi-camera segments found under {root}", - ) - binary_path = None if report_existing else locate_binary(zed_bin) - worker_slots = build_worker_slots( - jobs=jobs, - encoder_device=encoder_device, - cuda_visible_devices=cuda_visible_devices, - hardware_jobs=hardware_jobs, - hardware_cuda_visible_devices=hardware_cuda_visible_devices, - software_jobs=software_jobs, - software_cuda_visible_devices=software_cuda_visible_devices, - ) - config = BatchConfig( - zed_bin=binary_path, - probe_existing=probe_existing or report_existing, - overwrite=overwrite, - fail_fast=fail_fast, - codec=codec, - mcap_compression=mcap_compression, - depth_mode=depth_mode, - depth_size=depth_size, - bundle_policy=bundle_policy, - copy_range=copy_range, - bundle_topic=None if bundle_policy == "copy" else bundle_topic, - with_pose=with_pose, - pose_config=pose_config.expanduser().resolve() if pose_config is not None else None, - world_frame_id=world_frame_id, - start_frame=start_frame, - end_frame=end_frame, - sync_tolerance_ms=sync_tolerance_ms, - progress_ui=progress_ui, - ) - input_root = dataset_root.expanduser().resolve() if dataset_root is not None else None - display_parent = common_segment_parent(sources.segment_dirs) - - skipped_results: list[JobResult] = [] - failed_results: list[JobResult] = [] - pending_jobs: list[ConversionJob] = [] - pending_reasons: dict[Path, str] = {} - pending_details: dict[Path, str] = {} - valid_existing: list[OutputProbeResult] = [] - invalid_existing: list[tuple[ConversionJob, OutputProbeResult]] = [] - missing_outputs: list[ConversionJob] = [] - - for segment_dir in sources.segment_dirs: - scan = scan_segment_dir(segment_dir) - output_path = output_path_for(segment_dir) - job = ConversionJob( - segment_dir=segment_dir, - output_path=output_path, - camera_labels=scan.camera_labels, - display_name=display_name_for_segment( - segment_dir, - source_mode=sources.mode, - input_root=input_root, - common_parent=display_parent, - ), - ) - default_encoder_device = worker_slots[0].encoder_device if worker_slots else encoder_device - command = ( - tuple(command_for_job(job, config, default_encoder_device)) - if config.zed_bin is not None - else () - ) - if not scan.is_valid: - failed_results.append( - JobResult( - status="failed", - segment_dir=segment_dir, - output_path=output_path, - command=command, - return_code=2, - stderr=scan.reason or "", - ) - ) - continue - - if report_existing: - probe_result = probe_output( - output_path, - job.camera_labels, - layout="copy" if config.bundle_policy == "copy" else "bundled", - bundle_topic=config.bundle_topic, - ) - if probe_result.status == "valid": - valid_existing.append(probe_result) - elif probe_result.status == "invalid": - invalid_existing.append((job, probe_result)) - else: - missing_outputs.append(job) - continue - - if overwrite: - pending_jobs.append(job) - pending_reasons[segment_dir] = "overwrite" - continue - - if config.probe_existing: - probe_result = probe_output( - output_path, - job.camera_labels, - layout="copy" if config.bundle_policy == "copy" else "bundled", - bundle_topic=config.bundle_topic, - ) - if probe_result.status == "valid": - valid_existing.append(probe_result) - skipped_results.append( - JobResult( - status="skipped", - segment_dir=segment_dir, - output_path=output_path, - command=command, - ) - ) - continue - if probe_result.status == "invalid": - invalid_existing.append((job, probe_result)) - pending_jobs.append(job) - pending_reasons[segment_dir] = "invalid-existing-output" - pending_details[segment_dir] = probe_result.reason - continue - missing_outputs.append(job) - pending_jobs.append(job) - pending_reasons[segment_dir] = "missing-output" - continue - - if output_path.exists(): - skipped_results.append( - JobResult( - status="skipped", - segment_dir=segment_dir, - output_path=output_path, - command=command, - ) - ) - continue - - missing_outputs.append(job) - pending_jobs.append(job) - pending_reasons[segment_dir] = "missing-output" - - if report_existing: - click.echo( - ( - f"source={sources.mode} matched={len(sources.segment_dirs)} valid={len(valid_existing)} " - f"invalid={len(invalid_existing)} missing={len(missing_outputs)} " - f"invalid-segments={len(failed_results)}" - ), - err=True, - ) - if sources.ignored_partial_dirs: - click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True) - report_invalid_existing_outputs(invalid_existing) - summarize_failures(failed_results) - if failed_results or invalid_existing: - raise SystemExit(1) - return - - click.echo( - ( - f"source={sources.mode} matched={len(sources.segment_dirs)} pending={len(pending_jobs)} " - f"skipped={len(skipped_results)} invalid={len(failed_results)} jobs={len(worker_slots)} " - f"dry_run={'yes' if dry_run else 'no'}" - ), - err=True, - ) - if sources.ignored_partial_dirs: - click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True) - if config.probe_existing: - click.echo( - ( - f"probed-existing: valid={len(valid_existing)} invalid={len(invalid_existing)} " - f"missing={len(missing_outputs)}" - ), - err=True, - ) - - if dry_run: - report_dry_run_plan(pending_jobs, pending_reasons, pending_details) - summarize_failures(failed_results) - if failed_results: - raise SystemExit(1) - return - - results = list(skipped_results) - results.extend(failed_results) - conversion_results, aborted_count = run_batch(pending_jobs, config, worker_slots) - results.extend(conversion_results) - - converted_count = sum(1 for result in results if result.status == "converted") - skipped_count = sum(1 for result in results if result.status == "skipped") - failed_count = sum(1 for result in results if result.status == "failed") - - click.echo( - ( - f"summary: matched={len(sources.segment_dirs)} converted={converted_count} " - f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}" - ), - err=True, - ) - summarize_failures(results) - - if failed_count > 0 or aborted_count > 0: - raise SystemExit(1) - - -if __name__ == "__main__": - main() diff --git a/scripts/zed_batch_svo_to_mp4.py b/scripts/zed_batch_svo_to_mp4.py deleted file mode 100755 index 49676a9..0000000 --- a/scripts/zed_batch_svo_to_mp4.py +++ /dev/null @@ -1,361 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import concurrent.futures -import os -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable - -import click -from tqdm import tqdm - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -DEFAULT_PATTERNS = ("*.svo2",) -SUPPORTED_SUFFIXES = {".svo", ".svo2"} - - -@dataclass(slots=True, frozen=True) -class BatchConfig: - zed_bin: Path - cuda_visible_devices: str | None - overwrite: bool - fail_fast: bool - codec: str - encoder_device: str - preset: str - tune: str - quality: int - gop: int - b_frames: int - start_frame: int - end_frame: int | None - - -@dataclass(slots=True, frozen=True) -class ConversionJob: - input_path: Path - output_path: Path - - -@dataclass(slots=True, frozen=True) -class JobResult: - status: str - input_path: Path - output_path: Path - command: tuple[str, ...] - return_code: int = 0 - stdout: str = "" - stderr: str = "" - - -def locate_binary(override: Path | None) -> Path: - if override is not None: - candidate = override.expanduser().resolve() - if not candidate.is_file(): - raise click.ClickException(f"binary not found: {candidate}") - return candidate - - candidates = ( - REPO_ROOT / "build" / "bin" / "zed_svo_to_mp4", - REPO_ROOT / "build" / "zed_svo_to_mp4", - ) - for candidate in candidates: - if candidate.is_file(): - return candidate - raise click.ClickException(f"could not find zed_svo_to_mp4 under {REPO_ROOT / 'build'}") - - -def discover_inputs(root: Path, patterns: Iterable[str], recursive: bool) -> list[Path]: - discovered: set[Path] = set() - for pattern in patterns: - iterator = root.rglob(pattern) if recursive else root.glob(pattern) - for path in iterator: - if path.is_file() and path.suffix.lower() in SUPPORTED_SUFFIXES: - discovered.add(path.absolute()) - return sorted(discovered) - - -def output_path_for(input_path: Path) -> Path: - if input_path.suffix: - return input_path.with_suffix(".mp4") - return input_path.with_name(f"{input_path.name}.mp4") - - -def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]: - command = [ - str(config.zed_bin), - "--input", - str(job.input_path), - "--codec", - config.codec, - "--encoder-device", - config.encoder_device, - "--preset", - config.preset, - "--tune", - config.tune, - "--quality", - str(config.quality), - "--gop", - str(config.gop), - "--b-frames", - str(config.b_frames), - "--start-frame", - str(config.start_frame), - ] - if config.end_frame is not None: - command.extend(["--end-frame", str(config.end_frame)]) - return command - - -def env_for_job(config: BatchConfig) -> dict[str, str]: - env = dict(os.environ) - if config.cuda_visible_devices is not None: - env["CUDA_VISIBLE_DEVICES"] = config.cuda_visible_devices - return env - - -def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult: - command = command_for_job(job, config) - completed = subprocess.run( - command, - check=False, - capture_output=True, - text=True, - env=env_for_job(config), - ) - status = "converted" if completed.returncode == 0 else "failed" - return JobResult( - status=status, - input_path=job.input_path, - output_path=job.output_path, - command=tuple(command), - return_code=completed.returncode, - stdout=completed.stdout, - stderr=completed.stderr, - ) - - -def summarize_failures(results: list[JobResult]) -> None: - failed_results = [result for result in results if result.status == "failed"] - if not failed_results: - return - - click.echo("\nFailed conversions:", err=True) - for result in failed_results: - click.echo(f"- {result.input_path} (exit {result.return_code})", err=True) - if result.stderr.strip(): - click.echo(result.stderr.rstrip(), err=True) - elif result.stdout.strip(): - click.echo(result.stdout.rstrip(), err=True) - - -def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -> tuple[list[JobResult], int]: - results: list[JobResult] = [] - aborted_count = 0 - if not jobs: - return results, aborted_count - - future_to_job: dict[concurrent.futures.Future[JobResult], ConversionJob] = {} - job_iter = iter(jobs) - stop_submitting = False - - with concurrent.futures.ThreadPoolExecutor(max_workers=jobs_limit) as executor: - with tqdm(total=len(jobs), unit="file", dynamic_ncols=True) as progress: - def submit_next() -> bool: - if stop_submitting: - return False - try: - job = next(job_iter) - except StopIteration: - return False - future = executor.submit(run_conversion, job, config) - future_to_job[future] = job - return True - - for _ in range(min(jobs_limit, len(jobs))): - submit_next() - - while future_to_job: - done, _ = concurrent.futures.wait( - future_to_job, - return_when=concurrent.futures.FIRST_COMPLETED, - ) - for future in done: - job = future_to_job.pop(future) - result = future.result() - results.append(result) - progress.update(1) - - if result.status == "failed": - tqdm.write(f"failed: {job.input_path} (exit {result.return_code})", file=sys.stderr) - if config.fail_fast: - stop_submitting = True - - if not stop_submitting: - submit_next() - - if stop_submitting: - remaining = sum(1 for _ in job_iter) - aborted_count = remaining - progress.total = progress.n + len(future_to_job) - progress.refresh() - - return results, aborted_count - - -@click.command() -@click.argument("input_dir", type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path)) -@click.option( - "--pattern", - "patterns", - multiple=True, - default=DEFAULT_PATTERNS, - show_default=True, - help="Glob pattern to match under the input directory. Repeatable.", -) -@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Use rglob instead of glob.") -@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.") -@click.option( - "--zed-bin", - type=click.Path(path_type=Path, dir_okay=False), - help="Explicit path to the zed_svo_to_mp4 binary.", -) -@click.option( - "--cuda-visible-devices", - help="Optional CUDA_VISIBLE_DEVICES value exported for each conversion subprocess.", -) -@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing MP4 files.") -@click.option( - "--fail-fast/--continue-on-error", - default=False, - show_default=True, - help="Stop submitting new work after the first failed conversion.", -) -@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True) -@click.option( - "--encoder-device", - type=click.Choice(("auto", "nvidia", "software")), - default="auto", - show_default=True, -) -@click.option("--preset", type=click.Choice(("fast", "balanced", "quality")), default="fast", show_default=True) -@click.option( - "--tune", - type=click.Choice(("low-latency", "balanced")), - default="low-latency", - show_default=True, -) -@click.option( - "--quality", - type=click.IntRange(min=0, max=51), - default=23, - show_default=True, - help="Lower values mean higher quality.", -) -@click.option("--gop", type=click.IntRange(min=1), default=30, show_default=True) -@click.option("--b-frames", "b_frames", type=click.IntRange(min=0), default=0, show_default=True) -@click.option("--start-frame", type=click.IntRange(min=0), default=0, show_default=True) -@click.option("--end-frame", type=click.IntRange(min=0), default=None) -def main( - input_dir: Path, - patterns: tuple[str, ...], - recursive: bool, - jobs: int, - zed_bin: Path | None, - cuda_visible_devices: str | None, - overwrite: bool, - fail_fast: bool, - codec: str, - encoder_device: str, - preset: str, - tune: str, - quality: int, - gop: int, - b_frames: int, - start_frame: int, - end_frame: int | None, -) -> None: - """Batch-convert ZED SVO/SVO2 recordings in a folder to MP4.""" - if b_frames > gop: - raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames") - if end_frame is not None and end_frame < start_frame: - raise click.BadParameter( - f"end-frame {end_frame} must be >= start-frame {start_frame}", - param_hint="--end-frame", - ) - - binary_path = locate_binary(zed_bin) - inputs = discover_inputs(input_dir.absolute(), patterns, recursive) - if not inputs: - raise click.ClickException(f"no .svo/.svo2 files matched under {input_dir}") - - config = BatchConfig( - zed_bin=binary_path, - cuda_visible_devices=cuda_visible_devices, - overwrite=overwrite, - fail_fast=fail_fast, - codec=codec, - encoder_device=encoder_device, - preset=preset, - tune=tune, - quality=quality, - gop=gop, - b_frames=b_frames, - start_frame=start_frame, - end_frame=end_frame, - ) - - skipped_results: list[JobResult] = [] - pending_jobs: list[ConversionJob] = [] - for input_path in inputs: - output_path = output_path_for(input_path) - command = tuple(command_for_job(ConversionJob(input_path, output_path), config)) - if output_path.exists() and not overwrite: - skipped_results.append( - JobResult( - status="skipped", - input_path=input_path, - output_path=output_path, - command=command, - ) - ) - continue - pending_jobs.append(ConversionJob(input_path=input_path, output_path=output_path)) - - click.echo( - f"matched={len(inputs)} pending={len(pending_jobs)} skipped={len(skipped_results)} jobs={jobs}", - err=True, - ) - - results = list(skipped_results) - conversion_results, aborted_count = run_batch(pending_jobs, config, jobs) - results.extend(conversion_results) - - converted_count = sum(1 for result in results if result.status == "converted") - skipped_count = sum(1 for result in results if result.status == "skipped") - failed_count = sum(1 for result in results if result.status == "failed") - - click.echo( - ( - f"summary: matched={len(inputs)} converted={converted_count} " - f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}" - ), - err=True, - ) - summarize_failures(results) - - if failed_count > 0: - raise SystemExit(1) - if aborted_count > 0: - raise SystemExit(1) - - -if __name__ == "__main__": - main() diff --git a/scripts/zed_recording_mcap_tool.py b/scripts/zed_recording_mcap_tool.py deleted file mode 100755 index b52655c..0000000 --- a/scripts/zed_recording_mcap_tool.py +++ /dev/null @@ -1,374 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import math -import os -import shlex -import subprocess -import sys -import tempfile -from collections import Counter -from pathlib import Path -from typing import Iterable - -import cv2 -import numpy as np - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -WORKSPACE_ROOT = REPO_ROOT.parent -MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap" -if str(MCAP_PYTHON_ROOT) not in sys.path: - sys.path.insert(0, str(MCAP_PYTHON_ROOT)) - -from mcap.reader import make_reader # noqa: E402 - - -VIDEO_FORMATS = ("h264", "h265") - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description=( - "Convert ZED SVO/SVO2 recordings to MCAP and generate a lightweight preview. " - "If the input is already an MCAP file, conversion is skipped." - ) - ) - parser.add_argument("input", help="Input .svo/.svo2 file, .mcap file, or a directory containing SVO files") - parser.add_argument("--output-dir", help="Directory for generated MCAP files and previews") - parser.add_argument( - "--preview-all", - action="store_true", - help="When the input is a directory, generate a preview for every converted MCAP instead of just the first one", - ) - parser.add_argument("--no-preview", action="store_true", help="Convert only, do not generate preview images") - parser.add_argument( - "--format", - choices=("auto", "h264", "h265"), - default="auto", - help="CompressedVideo format to extract from MCAP during preview", - ) - parser.add_argument("--codec", choices=VIDEO_FORMATS, default="h264", help="Video codec for SVO to MCAP conversion") - parser.add_argument( - "--encoder-device", - choices=("auto", "nvidia", "software"), - default="software", - help="Encoder device passed to zed_svo_to_mcap", - ) - parser.add_argument( - "--mcap-compression", - choices=("none", "lz4", "zstd"), - default="none", - help="MCAP chunk compression passed to zed_svo_to_mcap", - ) - parser.add_argument( - "--depth-mode", - choices=("neural_light", "neural", "neural_plus"), - default="neural_plus", - help="Depth mode passed to zed_svo_to_mcap", - ) - parser.add_argument( - "--depth-size", - default="optimal", - help="Depth size passed to zed_svo_to_mcap (optimal|native|x)", - ) - parser.add_argument("--start-frame", type=int, default=0, help="First SVO frame to convert") - parser.add_argument("--end-frame", type=int, help="Last SVO frame to convert") - parser.add_argument( - "--sample-count", - type=int, - default=9, - help="Number of decoded frames to place in the preview contact sheet", - ) - parser.add_argument( - "--frame-step", - type=int, - default=15, - help="Decode every Nth frame for the contact sheet", - ) - parser.add_argument( - "--contact-sheet-width", - type=int, - default=480, - help="Width of each preview tile in pixels", - ) - parser.add_argument( - "--cuda-visible-devices", - help=( - "Optional CUDA_VISIBLE_DEVICES value to export while running zed_svo_to_mcap. " - "Useful when the ZED SDK must be pinned to a specific GPU UUID." - ), - ) - parser.add_argument("--zed-bin", help="Explicit path to zed_svo_to_mcap") - parser.add_argument("--reader-bin", help="Explicit path to mcap_reader_tester") - return parser.parse_args() - - -def locate_binary(name: str, override: str | None) -> Path: - if override: - path = Path(override).expanduser().resolve() - if not path.is_file(): - raise FileNotFoundError(f"binary not found: {path}") - return path - - candidates = ( - REPO_ROOT / "build" / "bin" / name, - REPO_ROOT / "build" / name, - ) - for candidate in candidates: - if candidate.is_file(): - return candidate - raise FileNotFoundError(f"could not find {name} under {REPO_ROOT / 'build'}") - - -def quote_command(args: Iterable[str]) -> str: - return " ".join(shlex.quote(arg) for arg in args) - - -def run(args: list[str], env: dict[str, str] | None = None) -> None: - print(f"$ {quote_command(args)}", flush=True) - subprocess.run(args, check=True, env=env) - - -def summarize_mcap(mcap_path: Path) -> list[tuple[str, str, str, int]]: - counts: Counter[tuple[str, str, str]] = Counter() - with mcap_path.open("rb") as stream: - reader = make_reader(stream) - for schema, channel, _message in reader.iter_messages(): - schema_name = schema.name if schema is not None else "" - counts[(channel.topic, channel.message_encoding, schema_name)] += 1 - summary_rows = [ - (topic, encoding, schema_name, count) - for (topic, encoding, schema_name), count in sorted(counts.items()) - ] - print(f"MCAP summary: {mcap_path}") - for topic, encoding, schema_name, count in summary_rows: - print(f" {count:6d} topic={topic} encoding={encoding} schema={schema_name}") - return summary_rows - - -def infer_video_format(reader_bin: Path, mcap_path: Path, requested: str) -> str: - if requested != "auto": - return requested - - for candidate in VIDEO_FORMATS: - result = subprocess.run( - [str(reader_bin), str(mcap_path), "--expect-format", candidate, "--min-messages", "1"], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - check=False, - ) - if result.returncode == 0: - return candidate - raise RuntimeError(f"could not infer video format from {mcap_path}") - - -def dump_annexb(reader_bin: Path, mcap_path: Path, video_format: str, output_path: Path) -> None: - run( - [ - str(reader_bin), - str(mcap_path), - "--expect-format", - video_format, - "--min-messages", - "1", - "--dump-annexb-output", - str(output_path), - ] - ) - - -def make_contact_sheet(stream_path: Path, image_path: Path, sample_count: int, frame_step: int, tile_width: int) -> int: - capture = cv2.VideoCapture(str(stream_path)) - if not capture.isOpened(): - raise RuntimeError(f"OpenCV could not open decoded stream {stream_path}") - - frames: list[np.ndarray] = [] - frame_index = 0 - while len(frames) < sample_count: - ok, frame = capture.read() - if not ok: - break - if frame_index % frame_step == 0: - annotated = frame.copy() - cv2.putText( - annotated, - f"frame {frame_index}", - (20, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 1.0, - (0, 255, 0), - 2, - cv2.LINE_AA, - ) - frames.append(annotated) - frame_index += 1 - capture.release() - - if not frames: - raise RuntimeError(f"no frames decoded from {stream_path}") - - tile_width = max(64, tile_width) - resized: list[np.ndarray] = [] - for frame in frames: - scale = tile_width / frame.shape[1] - tile_height = max(1, int(round(frame.shape[0] * scale))) - resized.append(cv2.resize(frame, (tile_width, tile_height), interpolation=cv2.INTER_AREA)) - - max_height = max(frame.shape[0] for frame in resized) - padded: list[np.ndarray] = [] - for frame in resized: - if frame.shape[0] == max_height: - padded.append(frame) - continue - canvas = np.zeros((max_height, frame.shape[1], 3), dtype=np.uint8) - canvas[: frame.shape[0], :, :] = frame - padded.append(canvas) - - columns = max(1, math.ceil(math.sqrt(len(padded)))) - rows = math.ceil(len(padded) / columns) - blank = np.zeros_like(padded[0]) - - row_images: list[np.ndarray] = [] - for row_index in range(rows): - row_frames = padded[row_index * columns : (row_index + 1) * columns] - while len(row_frames) < columns: - row_frames.append(blank) - row_images.append(np.concatenate(row_frames, axis=1)) - - sheet = np.concatenate(row_images, axis=0) - image_path.parent.mkdir(parents=True, exist_ok=True) - if not cv2.imwrite(str(image_path), sheet): - raise RuntimeError(f"failed to write preview image {image_path}") - print(f"Preview contact sheet: {image_path}") - return len(frames) - - -def collect_svo_inputs(input_path: Path) -> list[Path]: - if input_path.is_file(): - if input_path.suffix.lower() in {".svo", ".svo2"}: - return [input_path] - if input_path.suffix.lower() == ".mcap": - return [] - raise ValueError(f"unsupported input file: {input_path}") - - if input_path.is_dir(): - return sorted( - path for path in input_path.rglob("*") if path.suffix.lower() in {".svo", ".svo2"} - ) - - raise FileNotFoundError(f"input not found: {input_path}") - - -def default_output_dir(input_path: Path) -> Path: - if input_path.is_dir(): - return input_path / "mcap_preview" - return input_path.parent / "mcap_preview" - - -def convert_svo( - zed_bin: Path, - svo_path: Path, - mcap_path: Path, - args: argparse.Namespace, -) -> None: - env = os.environ.copy() - if args.cuda_visible_devices: - env["CUDA_VISIBLE_DEVICES"] = args.cuda_visible_devices - - command = [ - str(zed_bin), - "--input", - str(svo_path), - "--output", - str(mcap_path), - "--codec", - args.codec, - "--encoder-device", - args.encoder_device, - "--mcap-compression", - args.mcap_compression, - "--depth-mode", - args.depth_mode, - "--depth-size", - args.depth_size, - "--start-frame", - str(args.start_frame), - ] - if args.end_frame is not None: - command.extend(["--end-frame", str(args.end_frame)]) - - mcap_path.parent.mkdir(parents=True, exist_ok=True) - run(command, env=env) - - -def preview_mcap(reader_bin: Path, mcap_path: Path, args: argparse.Namespace) -> None: - summarize_mcap(mcap_path) - video_format = infer_video_format(reader_bin, mcap_path, args.format) - print(f"Detected video format: {video_format}") - - stream_extension = ".h265" if video_format == "h265" else ".h264" - with tempfile.TemporaryDirectory(prefix="zed_mcap_preview_") as temp_dir: - temp_root = Path(temp_dir) - stream_path = temp_root / f"preview{stream_extension}" - dump_annexb(reader_bin, mcap_path, video_format, stream_path) - - preview_path = mcap_path.with_suffix(".preview.png") - decoded = make_contact_sheet( - stream_path, - preview_path, - sample_count=args.sample_count, - frame_step=args.frame_step, - tile_width=args.contact_sheet_width, - ) - print(f"Decoded {decoded} preview frame(s)") - - -def main() -> int: - args = parse_args() - - input_path = Path(args.input).expanduser().resolve() - output_dir = Path(args.output_dir).expanduser().resolve() if args.output_dir else default_output_dir(input_path) - output_dir.mkdir(parents=True, exist_ok=True) - - reader_bin = locate_binary("mcap_reader_tester", args.reader_bin) - zed_bin = locate_binary("zed_svo_to_mcap", args.zed_bin) if input_path.suffix.lower() != ".mcap" or input_path.is_dir() else None - - if input_path.is_file() and input_path.suffix.lower() == ".mcap": - if not args.no_preview: - preview_mcap(reader_bin, input_path, args) - return 0 - - svo_inputs = collect_svo_inputs(input_path) - if not svo_inputs: - raise RuntimeError(f"no .svo/.svo2 files found under {input_path}") - - converted_paths: list[Path] = [] - for svo_path in svo_inputs: - output_name = f"{svo_path.stem}.mcap" - mcap_path = output_dir / output_name - convert_svo(zed_bin, svo_path, mcap_path, args) - converted_paths.append(mcap_path) - - if args.no_preview: - return 0 - - preview_targets = converted_paths if args.preview_all else converted_paths[:1] - for mcap_path in preview_targets: - preview_mcap(reader_bin, mcap_path, args) - - print("Generated MCAP files:") - for mcap_path in converted_paths: - print(f" {mcap_path}") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except KeyboardInterrupt: - raise SystemExit(130) diff --git a/scripts/zed_segment_time_index.py b/scripts/zed_segment_time_index.py deleted file mode 100644 index 3e1ab32..0000000 --- a/scripts/zed_segment_time_index.py +++ /dev/null @@ -1,658 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import concurrent.futures -import datetime as dt -import json -import os -import re -import subprocess -import tempfile -from dataclasses import dataclass -from pathlib import Path -from typing import Any -from zoneinfo import ZoneInfo - -import click -import duckdb - - -SCRIPT_PATH = Path(__file__).resolve() -REPO_ROOT = SCRIPT_PATH.parents[1] -DEFAULT_INDEX_NAME = "segment_time_index.duckdb" -INDEX_SCHEMA_VERSION = "1" -SEGMENT_FILE_PATTERN = re.compile(r".*_zed([0-9]+)\.svo2?$", re.IGNORECASE) -FOLDER_TIMESTAMP_PATTERN = re.compile( - r"^(?P\d{4}-\d{2}-\d{2})[T ](?P\d{2})-(?P\d{2})-(?P\d{2})(?P\.\d+)?(?PZ|[+-]\d{2}:\d{2})?$" -) - - -@dataclass(slots=True, frozen=True) -class SegmentScan: - segment_dir: Path - matched_files: int - camera_labels: tuple[str, ...] - is_valid: bool - reason: str | None = None - - -@dataclass(slots=True, frozen=True) -class BoundsRow: - segment_dir: Path - relative_segment_dir: str - group_path: str - activity: str - segment_name: str - mcap_path: Path - start_ns: int - end_ns: int - duration_ns: int - start_iso_utc: str - end_iso_utc: str - camera_count: int - camera_labels: str - video_message_count: int - index_source: str - - -def sorted_camera_labels(labels: set[str]) -> tuple[str, ...]: - return tuple(sorted(labels, key=lambda label: int(label[3:]))) - - -def scan_segment_dir(segment_dir: Path) -> SegmentScan: - if not segment_dir.is_dir(): - return SegmentScan( - segment_dir=segment_dir, - matched_files=0, - camera_labels=(), - is_valid=False, - reason=f"segment directory does not exist: {segment_dir}", - ) - - matched_by_camera: dict[str, list[Path]] = {} - for child in segment_dir.iterdir(): - if not child.is_file(): - continue - match = SEGMENT_FILE_PATTERN.fullmatch(child.name) - if match is None: - continue - label = f"zed{int(match.group(1))}" - matched_by_camera.setdefault(label, []).append(child) - - matched_files = sum(len(paths) for paths in matched_by_camera.values()) - camera_labels = sorted_camera_labels(set(matched_by_camera)) - duplicate_cameras = [label for label, paths in sorted(matched_by_camera.items()) if len(paths) > 1] - - if duplicate_cameras: - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=False, - reason=f"duplicate camera inputs under {segment_dir}: {', '.join(duplicate_cameras)}", - ) - if len(camera_labels) < 2: - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=False, - reason=f"expected at least 2 camera inputs under {segment_dir}, found {len(camera_labels)}", - ) - - return SegmentScan( - segment_dir=segment_dir, - matched_files=matched_files, - camera_labels=camera_labels, - is_valid=True, - ) - - -def discover_segment_dirs(root: Path, recursive: bool) -> tuple[list[SegmentScan], list[SegmentScan]]: - if not root.is_dir(): - raise click.ClickException(f"input directory does not exist: {root}") - - candidate_dirs = {root.resolve()} - iterator = root.rglob("*") if recursive else root.iterdir() - for path in iterator: - if path.is_dir(): - candidate_dirs.add(path.resolve()) - - valid_scans: list[SegmentScan] = [] - ignored_partial_scans: list[SegmentScan] = [] - for segment_dir in sorted(candidate_dirs): - scan = scan_segment_dir(segment_dir) - if scan.is_valid: - valid_scans.append(scan) - elif scan.matched_files > 0: - ignored_partial_scans.append(scan) - - if not valid_scans: - raise click.ClickException(f"no multi-camera segments found under {root}") - - return valid_scans, ignored_partial_scans - - -def locate_binary(name: str, override: Path | None) -> Path: - if override is not None: - candidate = override.expanduser().resolve() - if not candidate.is_file(): - raise click.ClickException(f"binary not found: {candidate}") - return candidate - - candidates = ( - REPO_ROOT / "build" / "bin" / name, - REPO_ROOT / "build" / name, - ) - for candidate in candidates: - if candidate.is_file(): - return candidate - raise click.ClickException(f"could not find {name} under {REPO_ROOT / 'build'}") - - -def default_index_path(dataset_root: Path) -> Path: - return dataset_root / DEFAULT_INDEX_NAME - - -def find_unique_mcap(segment_dir: Path) -> Path | None: - matches = sorted(path for path in segment_dir.iterdir() if path.is_file() and path.suffix.lower() == ".mcap") - if len(matches) == 1: - return matches[0] - return None - - -def format_ns_iso(ns: int, tzinfo: dt.tzinfo) -> str: - seconds, nanos = divmod(ns, 1_000_000_000) - stamp = dt.datetime.fromtimestamp(seconds, tz=dt.timezone.utc).astimezone(tzinfo) - offset = stamp.strftime("%z") - offset = f"{offset[:3]}:{offset[3:]}" if offset else "" - return f"{stamp.strftime('%Y-%m-%dT%H:%M:%S')}.{nanos:09d}{offset}" - - -def format_ns_utc(ns: int) -> str: - return format_ns_iso(ns, dt.timezone.utc).replace("+00:00", "Z") - - -def resolve_timezone(name: str) -> dt.tzinfo: - if name == "local": - local = dt.datetime.now().astimezone().tzinfo - if local is None: - raise click.ClickException("could not resolve local timezone") - return local - if name == "UTC": - return dt.timezone.utc - if name.startswith("UTC") and len(name) == len("UTC+00:00"): - try: - sign = 1 if name[3] == "+" else -1 - hours = int(name[4:6]) - minutes = int(name[7:9]) - except ValueError as exc: - raise click.ClickException(f"invalid fixed UTC offset '{name}'") from exc - return dt.timezone(sign * dt.timedelta(hours=hours, minutes=minutes)) - try: - return ZoneInfo(name) - except Exception as exc: # pragma: no cover - defensive wrapper around system tzdb - raise click.ClickException(f"unknown timezone '{name}': {exc}") from exc - - -def normalize_timestamp_text(value: str) -> str: - match = FOLDER_TIMESTAMP_PATTERN.fullmatch(value) - if match is None: - return value - parts = match.groupdict() - fraction = parts["fraction"] or "" - timezone_text = parts["timezone"] or "" - return f"{parts['date']}T{parts['hour']}:{parts['minute']}:{parts['second']}{fraction}{timezone_text}" - - -def parse_folder_name_naive(value: str) -> dt.datetime | None: - normalized = normalize_timestamp_text(value) - try: - parsed = dt.datetime.fromisoformat(normalized) - except ValueError: - return None - if parsed.tzinfo is not None: - return None - return parsed - - -def datetime_to_ns(value: dt.datetime) -> int: - utc_value = value.astimezone(dt.timezone.utc) - return int(utc_value.timestamp()) * 1_000_000_000 + utc_value.microsecond * 1_000 - - -def parse_timestamp_to_ns(value: str, timezone_name: str) -> int: - stripped = value.strip() - if not stripped: - raise click.ClickException("timestamp value is empty") - - digit_text = stripped.lstrip("+-") - if digit_text.isdigit(): - raw_value = int(stripped) - digits = len(digit_text) - if digits <= 10: - return raw_value * 1_000_000_000 - if digits <= 13: - return raw_value * 1_000_000 - if digits <= 16: - return raw_value * 1_000 - return raw_value - - normalized = normalize_timestamp_text(stripped) - if normalized.endswith("Z"): - normalized = normalized[:-1] + "+00:00" - try: - parsed = dt.datetime.fromisoformat(normalized) - except ValueError as exc: - raise click.ClickException(f"invalid timestamp '{value}': {exc}") from exc - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=resolve_timezone(timezone_name)) - return datetime_to_ns(parsed) - - -def parse_timestamp_window(value: str, timezone_name: str) -> tuple[int, int]: - stripped = value.strip() - if not stripped: - raise click.ClickException("timestamp value is empty") - - digit_text = stripped.lstrip("+-") - if digit_text.isdigit(): - base_ns = parse_timestamp_to_ns(stripped, timezone_name) - digits = len(digit_text) - if digits <= 10: - precision_ns = 1_000_000_000 - elif digits <= 13: - precision_ns = 1_000_000 - elif digits <= 16: - precision_ns = 1_000 - else: - precision_ns = 1 - return base_ns, base_ns + precision_ns - 1 - - normalized = normalize_timestamp_text(stripped) - base_ns = parse_timestamp_to_ns(stripped, timezone_name) - fraction_match = re.search(r"\.(\d+)", normalized) - if fraction_match is None: - precision_ns = 1_000_000_000 - else: - digits = min(len(fraction_match.group(1)), 9) - precision_ns = 10 ** (9 - digits) - return base_ns, base_ns + precision_ns - 1 - - -def probe_mcap_bounds(bounds_bin: Path, mcap_path: Path) -> dict[str, Any]: - result = subprocess.run( - [str(bounds_bin), str(mcap_path), "--json"], - check=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - if result.returncode != 0: - stderr = result.stderr.strip() or result.stdout.strip() or f"exit {result.returncode}" - raise RuntimeError(f"{mcap_path}: {stderr}") - try: - return json.loads(result.stdout) - except json.JSONDecodeError as exc: - raise RuntimeError(f"{mcap_path}: failed to parse helper JSON: {exc}") from exc - - -def build_row(dataset_root: Path, scan: SegmentScan, bounds_bin: Path) -> BoundsRow | None: - mcap_path = find_unique_mcap(scan.segment_dir) - if mcap_path is None: - return None - - bounds = probe_mcap_bounds(bounds_bin, mcap_path) - relative_segment_dir = scan.segment_dir.relative_to(dataset_root).as_posix() - parent = Path(relative_segment_dir).parent - group_path = "" if str(parent) == "." else parent.as_posix() - parts = Path(relative_segment_dir).parts - activity = parts[0] if parts else scan.segment_dir.name - - start_ns = int(bounds["start_ns"]) - end_ns = int(bounds["end_ns"]) - return BoundsRow( - segment_dir=scan.segment_dir, - relative_segment_dir=relative_segment_dir, - group_path=group_path, - activity=activity, - segment_name=scan.segment_dir.name, - mcap_path=mcap_path, - start_ns=start_ns, - end_ns=end_ns, - duration_ns=max(0, end_ns - start_ns), - start_iso_utc=str(bounds["start_iso_utc"]), - end_iso_utc=str(bounds["end_iso_utc"]), - camera_count=len(scan.camera_labels), - camera_labels=",".join(scan.camera_labels), - video_message_count=int(bounds["video_message_count"]), - index_source="mcap_video_bounds", - ) - - -def init_db(conn: duckdb.DuckDBPyConnection) -> None: - conn.execute( - """ - CREATE TABLE meta ( - key VARCHAR PRIMARY KEY, - value VARCHAR NOT NULL - ); - """ - ) - conn.execute( - """ - CREATE TABLE segments ( - segment_dir VARCHAR PRIMARY KEY, - relative_segment_dir VARCHAR NOT NULL, - group_path VARCHAR NOT NULL, - activity VARCHAR NOT NULL, - segment_name VARCHAR NOT NULL, - mcap_path VARCHAR NOT NULL, - start_ns BIGINT NOT NULL, - end_ns BIGINT NOT NULL, - duration_ns BIGINT NOT NULL, - start_iso_utc VARCHAR NOT NULL, - end_iso_utc VARCHAR NOT NULL, - camera_count INTEGER NOT NULL, - camera_labels VARCHAR NOT NULL, - video_message_count BIGINT NOT NULL, - index_source VARCHAR NOT NULL - ); - """ - ) - conn.execute("CREATE INDEX segments_start_ns_idx ON segments(start_ns);") - conn.execute("CREATE INDEX segments_end_ns_idx ON segments(end_ns);") - - -def write_index(index_path: Path, dataset_root: Path, rows: list[BoundsRow]) -> None: - index_path.parent.mkdir(parents=True, exist_ok=True) - with tempfile.NamedTemporaryFile(prefix=f"{index_path.name}.", suffix=".tmp", dir=index_path.parent, delete=False) as handle: - temp_path = Path(handle.name) - temp_path.unlink(missing_ok=True) - - inferred_timezone = infer_dataset_timezone(rows) - - try: - conn = duckdb.connect(str(temp_path)) - try: - init_db(conn) - conn.executemany( - "INSERT INTO meta (key, value) VALUES (?, ?)", - [ - ("schema_version", INDEX_SCHEMA_VERSION), - ("dataset_root", str(dataset_root)), - ("built_at_utc", dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")), - ("default_timezone", inferred_timezone), - ], - ) - conn.executemany( - """ - INSERT INTO segments ( - segment_dir, - relative_segment_dir, - group_path, - activity, - segment_name, - mcap_path, - start_ns, - end_ns, - duration_ns, - start_iso_utc, - end_iso_utc, - camera_count, - camera_labels, - video_message_count, - index_source - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - [ - ( - str(row.segment_dir), - row.relative_segment_dir, - row.group_path, - row.activity, - row.segment_name, - str(row.mcap_path), - row.start_ns, - row.end_ns, - row.duration_ns, - row.start_iso_utc, - row.end_iso_utc, - row.camera_count, - row.camera_labels, - row.video_message_count, - row.index_source, - ) - for row in rows - ], - ) - finally: - conn.close() - temp_path.replace(index_path) - except Exception: - temp_path.unlink(missing_ok=True) - raise - - -def infer_dataset_timezone(rows: list[BoundsRow]) -> str: - offset_counts: dict[int, int] = {} - for row in rows: - folder_time = parse_folder_name_naive(row.segment_name) - if folder_time is None: - continue - actual_utc = dt.datetime.fromtimestamp(row.start_ns / 1_000_000_000, tz=dt.timezone.utc).replace(tzinfo=None) - offset_minutes = round((folder_time - actual_utc).total_seconds() / 60.0) - offset_counts[offset_minutes] = offset_counts.get(offset_minutes, 0) + 1 - - if not offset_counts: - return "local" - - minutes = max(offset_counts.items(), key=lambda item: item[1])[0] - if minutes == 0: - return "UTC" - - sign = "+" if minutes >= 0 else "-" - absolute_minutes = abs(minutes) - hours, mins = divmod(absolute_minutes, 60) - return f"UTC{sign}{hours:02d}:{mins:02d}" - - -def require_query_window(at: str | None, start: str | None, end: str | None, timezone_name: str) -> tuple[int, int]: - if at is not None and (start is not None or end is not None): - raise click.ClickException("use either --at or --start/--end, not both") - if at is not None: - return parse_timestamp_window(at, timezone_name) - if start is None or end is None: - raise click.ClickException("provide --at or both --start and --end") - start_ns = parse_timestamp_to_ns(start, timezone_name) - end_ns = parse_timestamp_to_ns(end, timezone_name) - if start_ns > end_ns: - raise click.ClickException("query start must be before or equal to query end") - return start_ns, end_ns - - -def load_meta(conn: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = conn.execute("SELECT key, value FROM meta").fetchall() - return {str(key): str(value) for key, value in rows} - - -def format_duration(duration_ns: int) -> str: - return f"{duration_ns / 1_000_000_000:.3f}s" - - -@click.group() -def cli() -> None: - """Build and query a DuckDB index of bundled ZED segment timestamps.""" - - -@cli.command() -@click.argument("dataset_root", type=click.Path(path_type=Path, file_okay=False)) -@click.option("--index", "index_path", type=click.Path(path_type=Path, dir_okay=False)) -@click.option("--recursive/--no-recursive", default=True, show_default=True) -@click.option("--jobs", type=click.IntRange(min=1), default=min(8, os.cpu_count() or 1), show_default=True) -@click.option("--bounds-bin", type=click.Path(path_type=Path, dir_okay=False)) -def build(dataset_root: Path, index_path: Path | None, recursive: bool, jobs: int, bounds_bin: Path | None) -> None: - """Build or replace the embedded DuckDB time index for DATASET_ROOT.""" - - dataset_root = dataset_root.expanduser().resolve() - index_path = (index_path or default_index_path(dataset_root)).expanduser().resolve() - bounds_binary = locate_binary("mcap_video_bounds", bounds_bin) - - valid_scans, ignored_partial_scans = discover_segment_dirs(dataset_root, recursive) - click.echo( - f"discovered {len(valid_scans)} valid segment directories under {dataset_root}", - err=True, - ) - if ignored_partial_scans: - click.echo(f"ignored {len(ignored_partial_scans)} partial segment directories", err=True) - - rows: list[BoundsRow] = [] - skipped_missing_mcap: list[Path] = [] - errors: list[str] = [] - - with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor: - future_to_scan: dict[concurrent.futures.Future[BoundsRow | None], SegmentScan] = { - executor.submit(build_row, dataset_root, scan, bounds_binary): scan for scan in valid_scans - } - for future in concurrent.futures.as_completed(future_to_scan): - scan = future_to_scan[future] - try: - row = future.result() - except Exception as exc: - errors.append(f"{scan.segment_dir}: {exc}") - continue - if row is None: - skipped_missing_mcap.append(scan.segment_dir) - continue - rows.append(row) - - rows.sort(key=lambda row: (row.start_ns, row.segment_dir.as_posix())) - - if skipped_missing_mcap: - click.echo(f"skipped {len(skipped_missing_mcap)} segments with missing or ambiguous MCAP files", err=True) - if errors: - for error in errors: - click.echo(f"error: {error}", err=True) - raise click.ClickException(f"failed to probe {len(errors)} segment(s)") - if not rows: - raise click.ClickException("no indexable MCAP segments were found") - - write_index(index_path, dataset_root, rows) - click.echo( - f"wrote {len(rows)} segments to {index_path} (skipped_missing_mcap={len(skipped_missing_mcap)})", - err=True, - ) - - -@cli.command() -@click.argument("dataset_root", type=click.Path(path_type=Path, file_okay=False)) -@click.option("--index", "index_path", type=click.Path(path_type=Path, dir_okay=False)) -@click.option("--at") -@click.option("--start") -@click.option("--end") -@click.option("--json", "as_json", is_flag=True) -@click.option("--timezone", "timezone_name", default="dataset", show_default=True) -def query( - dataset_root: Path, - index_path: Path | None, - at: str | None, - start: str | None, - end: str | None, - as_json: bool, - timezone_name: str, -) -> None: - """Query the embedded time index for matching segment folders.""" - - dataset_root = dataset_root.expanduser().resolve() - index_path = (index_path or default_index_path(dataset_root)).expanduser().resolve() - if not index_path.is_file(): - raise click.ClickException(f"index not found: {index_path}") - - conn = duckdb.connect(str(index_path), read_only=True) - try: - meta = load_meta(conn) - indexed_root = Path(meta.get("dataset_root", "")).expanduser().resolve() - if indexed_root != dataset_root: - raise click.ClickException( - f"index root mismatch: index was built for {indexed_root}, not {dataset_root}" - ) - effective_timezone_name = meta.get("default_timezone", "local") if timezone_name == "dataset" else timezone_name - query_start_ns, query_end_ns = require_query_window(at, start, end, effective_timezone_name) - display_timezone = resolve_timezone(effective_timezone_name) - - result_rows = conn.execute( - """ - SELECT - segment_dir, - relative_segment_dir, - group_path, - activity, - segment_name, - mcap_path, - start_ns, - end_ns, - duration_ns, - start_iso_utc, - end_iso_utc, - camera_count, - camera_labels, - video_message_count, - index_source - FROM segments - WHERE start_ns <= ? AND end_ns >= ? - ORDER BY start_ns, segment_dir - """, - [query_end_ns, query_start_ns], - ).fetchall() - finally: - conn.close() - - payload = [ - { - "segment_dir": row[0], - "relative_segment_dir": row[1], - "group_path": row[2], - "activity": row[3], - "segment_name": row[4], - "mcap_path": row[5], - "start_ns": row[6], - "end_ns": row[7], - "duration_ns": row[8], - "start_iso_utc": row[9], - "end_iso_utc": row[10], - "camera_count": row[11], - "camera_labels": row[12].split(",") if row[12] else [], - "video_message_count": row[13], - "index_source": row[14], - "start_display": format_ns_iso(row[6], display_timezone), - "end_display": format_ns_iso(row[7], display_timezone), - } - for row in result_rows - ] - - if as_json: - click.echo(json.dumps(payload, indent=2, ensure_ascii=False)) - return - - if not payload: - click.echo("no matching segments") - return - - click.echo(f"matched {len(payload)} segment(s)") - for row in payload: - click.echo( - " | ".join( - ( - row["start_display"], - row["end_display"], - format_duration(int(row["duration_ns"])), - row["segment_dir"], - row["mcap_path"], - ) - ) - ) - - -if __name__ == "__main__": - cli() diff --git a/src/tools/mcap_video_bounds.cpp b/src/tools/mcap_video_bounds.cpp deleted file mode 100644 index 516a174..0000000 --- a/src/tools/mcap_video_bounds.cpp +++ /dev/null @@ -1,219 +0,0 @@ -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - -enum class ToolExitCode : int { - Success = 0, - UsageError = 2, - OpenError = 3, - SchemaError = 4, - ParseError = 5, - EmptyError = 6, -}; - -struct Config { - std::string input_path{}; - bool json{false}; -}; - -struct BoundsSummary { - std::uint64_t start_ns{std::numeric_limits::max()}; - std::uint64_t end_ns{0}; - std::uint64_t message_count{0}; -}; - -[[nodiscard]] -constexpr int exit_code(const ToolExitCode code) { - return static_cast(code); -} - -[[nodiscard]] -std::uint64_t proto_timestamp_ns(const google::protobuf::Timestamp ×tamp) { - return static_cast(timestamp.seconds()) * 1000000000ull + static_cast(timestamp.nanos()); -} - -[[nodiscard]] -std::string json_escape(const std::string &input) { - std::ostringstream output; - for (const unsigned char ch : input) { - switch (ch) { - case '\\': - output << "\\\\"; - break; - case '"': - output << "\\\""; - break; - case '\b': - output << "\\b"; - break; - case '\f': - output << "\\f"; - break; - case '\n': - output << "\\n"; - break; - case '\r': - output << "\\r"; - break; - case '\t': - output << "\\t"; - break; - default: - if (ch < 0x20) { - output << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast(ch) << std::dec; - } else { - output << static_cast(ch); - } - break; - } - } - return output.str(); -} - -[[nodiscard]] -std::string format_iso_utc(const std::uint64_t timestamp_ns) { - const auto seconds = static_cast(timestamp_ns / 1000000000ull); - const auto nanos = timestamp_ns % 1000000000ull; - std::tm tm{}; -#if defined(_WIN32) - gmtime_s(&tm, &seconds); -#else - gmtime_r(&seconds, &tm); -#endif - std::ostringstream output; - output << std::put_time(&tm, "%Y-%m-%dT%H:%M:%S") << '.' << std::setw(9) << std::setfill('0') << nanos << 'Z'; - return output.str(); -} - -[[nodiscard]] -bool is_video_message(const auto &view) { - if (view.channel == nullptr || view.schema == nullptr) { - return false; - } - return view.schema->encoding == "protobuf" && - view.schema->name == "foxglove.CompressedVideo" && - view.channel->messageEncoding == "protobuf"; -} - -[[nodiscard]] -BoundsSummary collect_bounds(const Config &config, ToolExitCode &error_code) { - mcap::McapReader reader{}; - const auto open_status = reader.open(config.input_path); - if (!open_status.ok()) { - spdlog::error("failed to open MCAP file '{}': {}", config.input_path, open_status.message); - error_code = ToolExitCode::OpenError; - return {}; - } - - BoundsSummary summary{}; - auto messages = reader.readMessages(); - for (auto it = messages.begin(); it != messages.end(); ++it) { - if (it->channel == nullptr) { - spdlog::error("MCAP message missing channel metadata"); - reader.close(); - error_code = ToolExitCode::SchemaError; - return {}; - } - if (it->schema == nullptr) { - continue; - } - if (!is_video_message(*it)) { - continue; - } - - foxglove::CompressedVideo message{}; - if (!message.ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { - spdlog::error("failed to parse foxglove.CompressedVideo payload from '{}'", config.input_path); - reader.close(); - error_code = ToolExitCode::ParseError; - return {}; - } - - auto timestamp_ns = proto_timestamp_ns(message.timestamp()); - if (timestamp_ns == 0) { - timestamp_ns = it->message.logTime; - } - - summary.start_ns = std::min(summary.start_ns, timestamp_ns); - summary.end_ns = std::max(summary.end_ns, timestamp_ns); - summary.message_count += 1; - } - - reader.close(); - - if (summary.message_count == 0) { - spdlog::error("no foxglove.CompressedVideo messages found in '{}'", config.input_path); - error_code = ToolExitCode::EmptyError; - return {}; - } - - error_code = ToolExitCode::Success; - return summary; -} - -void print_json(const Config &config, const BoundsSummary &summary) { - std::cout - << '{' - << "\"input_path\":\"" << json_escape(config.input_path) << "\"," - << "\"start_ns\":" << summary.start_ns << ',' - << "\"end_ns\":" << summary.end_ns << ',' - << "\"duration_ns\":" << (summary.end_ns - summary.start_ns) << ',' - << "\"video_message_count\":" << summary.message_count << ',' - << "\"start_iso_utc\":\"" << format_iso_utc(summary.start_ns) << "\"," - << "\"end_iso_utc\":\"" << format_iso_utc(summary.end_ns) << "\"" - << "}\n"; -} - -void print_text(const Config &config, const BoundsSummary &summary) { - std::cout - << config.input_path << '\t' - << summary.start_ns << '\t' - << summary.end_ns << '\t' - << summary.message_count << '\t' - << format_iso_utc(summary.start_ns) << '\t' - << format_iso_utc(summary.end_ns) - << '\n'; -} - -} // namespace - -int main(int argc, char **argv) { - Config config{}; - CLI::App app{"mcap_video_bounds - emit bundled video timestamp bounds from an MCAP"}; - app.add_option("input", config.input_path, "Input MCAP path")->required(); - app.add_flag("--json", config.json, "Emit a JSON object instead of tab-separated text"); - - try { - app.parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app.exit(e); - } - - auto error_code = ToolExitCode::Success; - const auto summary = collect_bounds(config, error_code); - if (error_code != ToolExitCode::Success) { - return exit_code(error_code); - } - - if (config.json) { - print_json(config, summary); - } else { - print_text(config, summary); - } - - return exit_code(ToolExitCode::Success); -} diff --git a/src/tools/zed_progress_bar.cpp b/src/tools/zed_progress_bar.cpp deleted file mode 100644 index 96b98b8..0000000 --- a/src/tools/zed_progress_bar.cpp +++ /dev/null @@ -1,178 +0,0 @@ -#include "cvmmap_streamer/tools/zed_progress_bar.hpp" - -#include -#include -#include -#include -#include - -#include - -namespace cvmmap_streamer::zed_tools { -namespace { - -[[nodiscard]] -std::string format_duration(const double seconds_raw) { - const auto seconds = seconds_raw > 0.0 ? static_cast(std::llround(seconds_raw)) : 0ll; - const auto hours = seconds / 3600; - const auto minutes = (seconds % 3600) / 60; - const auto secs = seconds % 60; - - char buffer[32]{}; - if (hours > 0) { - std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld:%02lld", hours, minutes, secs); - } else { - std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld", minutes, secs); - } - return std::string(buffer); -} - -} // namespace - -bool stderr_supports_progress_bar() { - return ::isatty(STDERR_FILENO) == 1; -} - -struct ProgressBar::Impl { - using Clock = std::chrono::steady_clock; - - explicit Impl(const std::uint64_t total_frames_arg) - : total_frames(total_frames_arg), - enabled(stderr_supports_progress_bar()), - started_at(Clock::now()), - last_render_at(started_at) {} - - void render_prefix(const double ratio, const Clock::time_point now, char *line, const std::size_t line_size) { - const auto filled = static_cast(std::llround(ratio * 24.0)); - std::string bar{}; - bar.reserve(24); - for (std::size_t index = 0; index < 24; ++index) { - bar.push_back(index < filled ? '#' : '-'); - } - - const auto elapsed_seconds = std::chrono::duration(now - started_at).count(); - const auto eta_seconds = ratio > 0.0 ? elapsed_seconds * (1.0 - ratio) / ratio : 0.0; - std::snprintf( - line, - line_size, - "\r[%s] %6.2f%% | %s elapsed | %s ETA", - bar.c_str(), - ratio * 100.0, - format_duration(elapsed_seconds).c_str(), - format_duration(eta_seconds).c_str()); - } - - void render(const std::uint64_t completed_frames, const bool force) { - if (!enabled || total_frames == 0) { - return; - } - - const auto now = Clock::now(); - if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) { - return; - } - last_render_at = now; - rendered = true; - - const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames; - const double ratio = static_cast(bounded_completed) / static_cast(total_frames); - const auto elapsed_seconds = std::chrono::duration(now - started_at).count(); - const auto fps = elapsed_seconds > 0.0 ? static_cast(bounded_completed) / elapsed_seconds : 0.0; - - char line[256]{}; - render_prefix(ratio, now, line, sizeof(line)); - const auto written = std::char_traits::length(line); - std::snprintf( - line + written, - sizeof(line) - written, - " | %llu/%llu | %5.1f fps\x1b[K", - static_cast(bounded_completed), - static_cast(total_frames), - fps); - std::fprintf(stderr, "%s", line); - std::fflush(stderr); - } - - void render_fraction(const double fraction, const std::string_view detail, const bool force) { - if (!enabled) { - return; - } - - const auto now = Clock::now(); - if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) { - return; - } - last_render_at = now; - rendered = true; - - const double bounded_fraction = std::clamp(fraction, 0.0, 1.0); - char line[256]{}; - render_prefix(bounded_fraction, now, line, sizeof(line)); - if (!detail.empty()) { - const auto written = std::char_traits::length(line); - std::snprintf(line + written, sizeof(line) - written, " | %.*s\x1b[K", static_cast(detail.size()), detail.data()); - } else { - const auto written = std::char_traits::length(line); - std::snprintf(line + written, sizeof(line) - written, "\x1b[K"); - } - std::fprintf(stderr, "%s", line); - std::fflush(stderr); - } - - std::uint64_t total_frames{0}; - bool enabled{false}; - bool rendered{false}; - Clock::time_point started_at{}; - Clock::time_point last_render_at{}; -}; - -ProgressBar::ProgressBar(const std::uint64_t total_frames) - : impl_(std::make_unique(total_frames)) {} - -ProgressBar::~ProgressBar() = default; - -bool ProgressBar::enabled() const { - return impl_ != nullptr && impl_->enabled; -} - -void ProgressBar::update(const std::uint64_t completed_frames) { - impl_->render(completed_frames, false); -} - -void ProgressBar::update_fraction(const double fraction, const std::string_view detail) { - impl_->render_fraction(fraction, detail, false); -} - -void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) { - if (impl_ == nullptr || !impl_->enabled) { - return; - } - - if (!(success && impl_->rendered && completed_frames >= impl_->total_frames)) { - impl_->render(completed_frames, true); - if (!impl_->rendered) { - return; - } - } - - std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n"); - std::fflush(stderr); -} - -void ProgressBar::finish_fraction(const double fraction, const bool success, const std::string_view detail) { - if (impl_ == nullptr || !impl_->enabled) { - return; - } - - if (!(success && impl_->rendered && fraction >= 1.0)) { - impl_->render_fraction(fraction, detail, true); - if (!impl_->rendered) { - return; - } - } - - std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n"); - std::fflush(stderr); -} - -} // namespace cvmmap_streamer::zed_tools diff --git a/src/tools/zed_svo_grid_to_mp4.cpp b/src/tools/zed_svo_grid_to_mp4.cpp deleted file mode 100644 index 4855fab..0000000 --- a/src/tools/zed_svo_grid_to_mp4.cpp +++ /dev/null @@ -1,728 +0,0 @@ -#include -#include - -#include - -#include -#include - -#include "cvmmap_streamer/tools/zed_progress_bar.hpp" -#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - -using cvmmap_streamer::zed_tools::EncodeTuning; -using cvmmap_streamer::zed_tools::Mp4Writer; -using cvmmap_streamer::zed_tools::ProgressBar; -using cvmmap_streamer::zed_tools::frame_period_ns; -using cvmmap_streamer::zed_tools::parse_codec; -using cvmmap_streamer::zed_tools::parse_encoder_device; -using cvmmap_streamer::zed_tools::parse_preset; -using cvmmap_streamer::zed_tools::parse_tune; - -constexpr std::size_t kExpectedInputCount = 4; - -enum class ToolExitCode : int { - Success = 0, - UsageError = 2, - RuntimeError = 3, -}; - -struct CliOptions { - std::vector input_paths{}; - std::string segment_dir{}; - std::string output_path{}; - std::string codec{"h265"}; - std::string encoder_device{"auto"}; - std::string preset{"fast"}; - std::string tune{"low-latency"}; - int quality{cvmmap_streamer::zed_tools::kDefaultQuality}; - std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize}; - std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames}; - double start_offset_seconds{0.0}; - double duration_seconds{0.0}; - bool has_duration{false}; - double output_fps{0.0}; - bool has_output_fps{false}; - double tile_scale{0.5}; -}; - -struct SourceSpec { - std::filesystem::path path{}; - std::string label{}; -}; - -struct CameraStream { - SourceSpec source{}; - std::unique_ptr camera{}; - sl::RuntimeParameters runtime{}; - sl::Mat current_frame{}; - sl::Mat next_frame{}; - std::uint64_t current_timestamp_ns{0}; - std::uint64_t next_timestamp_ns{0}; - std::uint64_t first_timestamp_ns{0}; - std::uint64_t last_timestamp_ns{0}; - std::uint64_t total_frames{0}; - std::uint64_t nominal_frame_period_ns{0}; - float fps{0.0f}; - std::uint32_t width{0}; - std::uint32_t height{0}; - int sync_position{-1}; - bool has_next{false}; -}; - -[[nodiscard]] -constexpr int exit_code(const ToolExitCode code) { - return static_cast(code); -} - -[[nodiscard]] -std::string zed_string(const sl::String &value) { - return std::string(value.c_str() == nullptr ? "" : value.c_str()); -} - -[[nodiscard]] -std::string zed_status_string(const sl::ERROR_CODE code) { - return zed_string(sl::toString(code)); -} - -[[nodiscard]] -std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { - if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { - return std::unexpected(std::string(label) + " must be U8_C3"); - } - if (mat.getWidth() == 0 || mat.getHeight() == 0) { - return std::unexpected(std::string(label) + " dimensions must be non-zero"); - } - if (mat.getPtr(sl::MEM::CPU) == nullptr) { - return std::unexpected(std::string(label) + " CPU buffer is null"); - } - return {}; -} - -[[nodiscard]] -std::expected, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) { - if (!std::filesystem::is_directory(segment_dir)) { - return std::unexpected("segment directory does not exist: " + segment_dir.string()); - } - - const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase}; - std::vector> ordered_paths{}; - for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) { - if (!entry.is_regular_file()) { - continue; - } - - std::smatch match{}; - const auto filename = entry.path().filename().string(); - if (!std::regex_match(filename, match, pattern)) { - continue; - } - ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path()); - } - - std::sort( - ordered_paths.begin(), - ordered_paths.end(), - [](const auto &left, const auto &right) { - return left.first < right.first; - }); - - if (ordered_paths.size() != kExpectedInputCount) { - return std::unexpected( - "expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size())); - } - - std::vector sources{}; - sources.reserve(ordered_paths.size()); - for (const auto &[camera_index, path] : ordered_paths) { - sources.push_back(SourceSpec{ - .path = path, - .label = "zed" + std::to_string(camera_index), - }); - } - return sources; -} - -[[nodiscard]] -std::expected, std::string> resolve_sources(const CliOptions &options) { - if (!options.segment_dir.empty()) { - return discover_segment_inputs(std::filesystem::path{options.segment_dir}); - } - - if (options.input_paths.size() != kExpectedInputCount) { - return std::unexpected("repeat --input exactly 4 times"); - } - - std::vector sources{}; - sources.reserve(options.input_paths.size()); - for (std::size_t index = 0; index < options.input_paths.size(); ++index) { - const auto path = std::filesystem::path{options.input_paths[index]}; - if (!std::filesystem::is_regular_file(path)) { - return std::unexpected("input file does not exist: " + path.string()); - } - sources.push_back(SourceSpec{ - .path = path, - .label = "view" + std::to_string(index + 1), - }); - } - return sources; -} - -[[nodiscard]] -std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector &sources) { - if (!options.output_path.empty()) { - return std::filesystem::path{options.output_path}; - } - - if (!options.segment_dir.empty()) { - const auto segment_dir = std::filesystem::path{options.segment_dir}; - return segment_dir / (segment_dir.filename().string() + "_grid.mp4"); - } - - auto output_path = sources.front().path; - output_path.replace_extension(""); - output_path += "_grid.mp4"; - return output_path; -} - -[[nodiscard]] -std::string format_unix_timestamp(const std::uint64_t timestamp_ns) { - const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond; - const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull; - return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds); -} - -void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) { - const auto text = format_unix_timestamp(timestamp_ns); - int baseline = 0; - const auto font_face = cv::FONT_HERSHEY_SIMPLEX; - const double font_scale = 0.8; - const int thickness = 2; - const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline); - const cv::Point origin{16, 16 + text_size.height}; - const cv::Rect background{ - 8, - 8, - text_size.width + 16, - text_size.height + baseline + 16, - }; - cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED); - cv::putText( - canvas, - text, - origin, - font_face, - font_scale, - cv::Scalar(255, 255, 255), - thickness, - cv::LINE_AA); -} - -[[nodiscard]] -std::expected read_image_timestamp_ns( - sl::Camera &camera, - const std::optional fallback_timestamp_ns, - const std::uint64_t nominal_frame_period_ns) { - auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); - if (timestamp_ns == 0) { - if (!fallback_timestamp_ns) { - return std::unexpected("ZED SDK returned a zero image timestamp for the first frame"); - } - timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns; - } - return timestamp_ns; -} - -[[nodiscard]] -std::expected read_into_mat( - sl::Camera &camera, - sl::RuntimeParameters &runtime, - sl::Mat &target, - std::optional fallback_timestamp_ns, - std::uint64_t nominal_frame_period_ns, - std::uint64_t ×tamp_ns_out, - const std::string_view label) { - const auto grab_status = camera.grab(runtime); - if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { - return std::unexpected("end-of-svo"); - } - if (grab_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status)); - } - - const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU); - if (image_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status)); - } - if (auto valid = validate_u8c3_mat(target, label); !valid) { - return std::unexpected(valid.error()); - } - - auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns); - if (!timestamp_ns) { - return std::unexpected(timestamp_ns.error()); - } - timestamp_ns_out = *timestamp_ns; - return {}; -} - -[[nodiscard]] -std::expected fill_next_frame(CameraStream &stream) { - std::uint64_t timestamp_ns = 0; - auto next = read_into_mat( - *stream.camera, - stream.runtime, - stream.next_frame, - stream.current_timestamp_ns, - stream.nominal_frame_period_ns, - timestamp_ns, - stream.source.label); - if (!next) { - if (next.error() == "end-of-svo") { - stream.has_next = false; - return {}; - } - return std::unexpected(next.error()); - } - - stream.next_timestamp_ns = timestamp_ns; - stream.has_next = true; - return {}; -} - -[[nodiscard]] -std::expected promote_next_frame(CameraStream &stream) { - if (!stream.has_next) { - return std::unexpected("no buffered next frame is available for " + stream.source.label); - } - - std::swap(stream.current_frame, stream.next_frame); - std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns); - stream.has_next = false; - return fill_next_frame(stream); -} - -[[nodiscard]] -std::expected read_last_readable_timestamp(CameraStream &stream) { - const auto last_candidate = static_cast(stream.total_frames - 1); - std::string last_error{}; - - for (int position = last_candidate; position >= 0; --position) { - stream.camera->setSVOPosition(position); - std::uint64_t timestamp_ns = 0; - auto frame = read_into_mat( - *stream.camera, - stream.runtime, - stream.current_frame, - std::nullopt, - stream.nominal_frame_period_ns, - timestamp_ns, - stream.source.label); - if (frame) { - const auto skipped_tail_frames = static_cast(last_candidate - position); - if (skipped_tail_frames > 0) { - spdlog::warn( - "skipping {} unreadable tail frame(s) for {} last_error={}", - skipped_tail_frames, - stream.source.path.string(), - last_error); - } - return timestamp_ns; - } - last_error = frame.error(); - } - - return std::unexpected( - "failed to read any trailing frame for " + stream.source.path.string() + ": " + last_error); -} - -[[nodiscard]] -std::expected open_camera_stream(const SourceSpec &source) { - CameraStream stream{}; - stream.source = source; - stream.camera = std::make_unique(); - - sl::InitParameters init{}; - init.input.setFromSVOFile(source.path.c_str()); - init.svo_real_time_mode = false; - init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; - init.coordinate_units = sl::UNIT::METER; - init.depth_mode = sl::DEPTH_MODE::NONE; - init.sdk_verbose = false; - - const auto open_status = stream.camera->open(init); - if (open_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status)); - } - - const auto total_frames = stream.camera->getSVONumberOfFrames(); - if (total_frames <= 0) { - return std::unexpected("input SVO has no frames: " + source.path.string()); - } - stream.total_frames = static_cast(total_frames); - - const auto camera_info = stream.camera->getCameraInformation().camera_configuration; - stream.width = static_cast(camera_info.resolution.width); - stream.height = static_cast(camera_info.resolution.height); - stream.fps = camera_info.fps; - stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps); - if (stream.width == 0 || stream.height == 0) { - return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string()); - } - - std::uint64_t first_timestamp_ns = 0; - auto first_frame = read_into_mat( - *stream.camera, - stream.runtime, - stream.current_frame, - std::nullopt, - stream.nominal_frame_period_ns, - first_timestamp_ns, - source.label); - if (!first_frame) { - return std::unexpected(first_frame.error()); - } - stream.first_timestamp_ns = first_timestamp_ns; - - auto last_timestamp_ns = read_last_readable_timestamp(stream); - if (!last_timestamp_ns) { - return std::unexpected(last_timestamp_ns.error()); - } - stream.last_timestamp_ns = *last_timestamp_ns; - - return stream; -} - -void close_camera_streams(std::vector &streams) { - for (auto &stream : streams) { - if (stream.camera != nullptr && stream.camera->isOpened()) { - stream.camera->close(); - } - } -} - -} // namespace - -int main(int argc, char **argv) { - CliOptions options{}; - - CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"}; - auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)"); - auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files"); - input_option->excludes(segment_dir_option); - segment_dir_option->excludes(input_option); - app.add_option("--output", options.output_path, "Output MP4 file"); - app.add_option("--codec", options.codec, "Video codec (h264|h265)") - ->check(CLI::IsMember({"h264", "h265"})); - app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") - ->check(CLI::IsMember({"auto", "nvidia", "software"})); - app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)") - ->check(CLI::IsMember({"fast", "balanced", "quality"})); - app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)") - ->check(CLI::IsMember({"low-latency", "balanced"})); - app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)") - ->check(CLI::Range(0, 51)); - app.add_option("--gop", options.gop, "Encoder GOP length in frames") - ->check(CLI::PositiveNumber); - app.add_option("--b-frames", options.b_frames, "Encoder B-frame count") - ->check(CLI::NonNegativeNumber); - app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds") - ->check(CLI::NonNegativeNumber); - auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync") - ->check(CLI::PositiveNumber); - auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)") - ->check(CLI::PositiveNumber); - app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution") - ->check(CLI::Range(0.1, 1.0)); - - try { - app.parse(argc, argv); - } catch (const CLI::ParseError &error) { - return app.exit(error); - } - options.has_duration = duration_option->count() > 0; - options.has_output_fps = output_fps_option->count() > 0; - - if (options.input_paths.empty() && options.segment_dir.empty()) { - spdlog::error("provide either --segment-dir or repeat --input exactly 4 times"); - return exit_code(ToolExitCode::UsageError); - } - if (options.b_frames > options.gop) { - spdlog::error( - "invalid encoder config: b-frames {} must be <= gop {}", - options.b_frames, - options.gop); - return exit_code(ToolExitCode::UsageError); - } - - auto codec = parse_codec(options.codec); - if (!codec) { - spdlog::error("{}", codec.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto encoder_device = parse_encoder_device(options.encoder_device); - if (!encoder_device) { - spdlog::error("{}", encoder_device.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto preset = parse_preset(options.preset); - if (!preset) { - spdlog::error("{}", preset.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto tune = parse_tune(options.tune); - if (!tune) { - spdlog::error("{}", tune.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto sources = resolve_sources(options); - if (!sources) { - spdlog::error("{}", sources.error()); - return exit_code(ToolExitCode::UsageError); - } - - const auto output_path = derive_grid_output_path(options, *sources); - if (output_path.has_parent_path()) { - std::filesystem::create_directories(output_path.parent_path()); - } - - const EncodeTuning tuning{ - .preset = *preset, - .tune = *tune, - .quality = options.quality, - .gop = options.gop, - .b_frames = options.b_frames, - }; - - std::vector streams{}; - streams.reserve(sources->size()); - for (const auto &source : *sources) { - auto stream = open_camera_stream(source); - if (!stream) { - close_camera_streams(streams); - spdlog::error("{}", stream.error()); - return exit_code(ToolExitCode::RuntimeError); - } - streams.push_back(std::move(*stream)); - } - - const auto sync_start_ts = std::max_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.first_timestamp_ns < right.first_timestamp_ns; - })->first_timestamp_ns; - const auto start_offset_ns = static_cast(std::llround(options.start_offset_seconds * 1'000'000'000.0)); - const auto effective_start_ts = sync_start_ts + start_offset_ns; - - const auto common_end_ts = std::min_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.last_timestamp_ns < right.last_timestamp_ns; - })->last_timestamp_ns; - const auto requested_end_exclusive_ts = options.has_duration - ? effective_start_ts + static_cast(std::llround(options.duration_seconds * 1'000'000'000.0)) - : common_end_ts + 1; - const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1); - if (effective_start_ts >= output_end_exclusive_ts) { - close_camera_streams(streams); - spdlog::error( - "synced time window is empty: start_ts={} end_ts={}", - effective_start_ts, - output_end_exclusive_ts); - return exit_code(ToolExitCode::UsageError); - } - - std::uint32_t source_width = streams.front().width; - std::uint32_t source_height = streams.front().height; - float max_input_fps = streams.front().fps; - for (const auto &stream : streams) { - if (stream.width != source_width || stream.height != source_height) { - close_camera_streams(streams); - spdlog::error( - "all inputs must share the same resolution: expected {}x{}, got {}x{} for {}", - source_width, - source_height, - stream.width, - stream.height, - stream.source.path.string()); - return exit_code(ToolExitCode::UsageError); - } - max_input_fps = std::max(max_input_fps, stream.fps); - } - - const auto output_fps = options.has_output_fps ? static_cast(options.output_fps) : max_input_fps; - const auto output_period_ns = frame_period_ns(output_fps); - const auto total_frames_to_emit = - static_cast((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns); - - for (auto &stream : streams) { - stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts}); - if (stream.sync_position < 0) { - close_camera_streams(streams); - spdlog::error( - "failed to compute synced start frame for {} at timestamp {}", - stream.source.path.string(), - effective_start_ts); - return exit_code(ToolExitCode::RuntimeError); - } - - stream.camera->setSVOPosition(stream.sync_position); - std::uint64_t current_timestamp_ns = 0; - auto current = read_into_mat( - *stream.camera, - stream.runtime, - stream.current_frame, - std::nullopt, - stream.nominal_frame_period_ns, - current_timestamp_ns, - stream.source.label); - if (!current) { - close_camera_streams(streams); - spdlog::error("{}", current.error()); - return exit_code(ToolExitCode::RuntimeError); - } - stream.current_timestamp_ns = current_timestamp_ns; - - auto next = fill_next_frame(stream); - if (!next) { - close_camera_streams(streams); - spdlog::error("{}", next.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) { - auto promote = promote_next_frame(stream); - if (!promote) { - close_camera_streams(streams); - spdlog::error("{}", promote.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - - spdlog::info( - "ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}", - stream.source.path.string(), - stream.source.label, - stream.sync_position, - stream.first_timestamp_ns, - stream.current_timestamp_ns, - stream.has_next ? stream.next_timestamp_ns : 0); - } - - const auto tile_width = static_cast(std::llround(static_cast(source_width) * options.tile_scale)); - const auto tile_height = static_cast(std::llround(static_cast(source_height) * options.tile_scale)); - if (tile_width <= 0 || tile_height <= 0) { - close_camera_streams(streams); - spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale); - return exit_code(ToolExitCode::UsageError); - } - - const auto composite_width = tile_width * 2; - const auto composite_height = tile_height * 2; - - Mp4Writer writer{}; - if (auto open_writer = writer.open( - output_path, - *codec, - *encoder_device, - static_cast(composite_width), - static_cast(composite_height), - output_fps, - tuning); - !open_writer) { - close_camera_streams(streams); - spdlog::error("failed to initialize MP4 writer: {}", open_writer.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - cv::Mat composite(composite_height, composite_width, CV_8UC3); - std::vector resized_tiles(streams.size()); - ProgressBar progress{total_frames_to_emit}; - - for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) { - const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns; - if (target_timestamp_ns >= output_end_exclusive_ts) { - break; - } - - for (auto &stream : streams) { - while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) { - auto promote = promote_next_frame(stream); - if (!promote) { - progress.finish(emitted_frames, false); - close_camera_streams(streams); - spdlog::error("{}", promote.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - } - - composite.setTo(cv::Scalar(0, 0, 0)); - for (std::size_t index = 0; index < streams.size(); ++index) { - auto &stream = streams[index]; - cv::Mat source_view( - static_cast(stream.current_frame.getHeight()), - static_cast(stream.current_frame.getWidth()), - CV_8UC3, - stream.current_frame.getPtr(sl::MEM::CPU), - stream.current_frame.getStepBytes(sl::MEM::CPU)); - cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA); - - const int row = static_cast(index / 2); - const int col = static_cast(index % 2); - const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height}; - resized_tiles[index].copyTo(composite(roi)); - } - - draw_timestamp_overlay(composite, target_timestamp_ns); - if (auto write = writer.write_bgr_frame( - composite.data, - static_cast(composite.step), - target_timestamp_ns - effective_start_ts); - !write) { - progress.finish(emitted_frames, false); - close_camera_streams(streams); - spdlog::error("failed to encode or mux frame: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - progress.update(emitted_frames + 1); - } - - if (auto flush = writer.flush(); !flush) { - progress.finish(total_frames_to_emit, false); - close_camera_streams(streams); - spdlog::error("failed to finalize MP4 output: {}", flush.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - progress.finish(total_frames_to_emit, true); - close_camera_streams(streams); - spdlog::info( - "converted {} synced frames to '{}' using codec={} hardware={}", - total_frames_to_emit, - output_path.string(), - cvmmap_streamer::zed_tools::codec_name(*codec), - writer.using_hardware()); - return exit_code(ToolExitCode::Success); -} diff --git a/src/tools/zed_svo_mp4_support.cpp b/src/tools/zed_svo_mp4_support.cpp deleted file mode 100644 index 1e43f5d..0000000 --- a/src/tools/zed_svo_mp4_support.cpp +++ /dev/null @@ -1,707 +0,0 @@ -#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" - -#include - -extern "C" { -#include -#include -#include -#include -#include -#include -} - -#include -#include -#include -#include -#include -#include - -namespace cvmmap_streamer::zed_tools { -namespace { - -struct EncoderCandidate { - std::string name{}; - bool using_hardware{false}; - AVPixelFormat pixel_format{AV_PIX_FMT_NONE}; -}; - -struct ResolvedEncoderSettings { - std::string requested_preset{}; - std::string requested_tune{}; - std::string mapped_preset{}; - std::optional mapped_tune{}; - std::optional rate_control_mode{}; - std::string quality_key{}; - int quality_value{kDefaultQuality}; - std::uint32_t gop{kDefaultGopSize}; - std::uint32_t b_frames{kDefaultBFrames}; -}; - -[[nodiscard]] -std::string av_error_string(const int error_code) { - char buffer[AV_ERROR_MAX_STRING_SIZE]{}; - av_strerror(error_code, buffer, sizeof(buffer)); - return std::string(buffer); -} - -[[nodiscard]] -AVCodecID codec_id(const CodecType codec) { - return codec == CodecType::H265 ? AV_CODEC_ID_HEVC : AV_CODEC_ID_H264; -} - -[[nodiscard]] -AVRational frame_rate_rational(const float fps) { - if (!(fps > 0.0f)) { - return AVRational{30, 1}; - } - - const auto scaled = static_cast(std::llround(static_cast(fps) * 1000.0)); - if (scaled <= 0) { - return AVRational{30, 1}; - } - return AVRational{scaled, 1000}; -} - -[[nodiscard]] -std::uint64_t frame_period_ns(const AVRational frame_rate) { - if (frame_rate.num <= 0 || frame_rate.den <= 0) { - return 33'333'333ull; - } - - const auto numerator = - static_cast(frame_rate.den) * 1'000'000'000ull; - const auto denominator = static_cast(frame_rate.num); - if (denominator == 0) { - return 33'333'333ull; - } - - const auto interval = numerator / denominator; - return interval == 0 ? 1ull : interval; -} - -[[nodiscard]] -std::vector encoder_candidates(const CodecType codec, const EncoderDeviceType device) { - const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc"; - const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264"; - - switch (device) { - case EncoderDeviceType::Auto: - return { - EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12}, - EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P}, - }; - case EncoderDeviceType::Nvidia: - return { - EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12}, - }; - case EncoderDeviceType::Software: - return { - EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P}, - }; - } - - return {}; -} - -[[nodiscard]] -std::string mapped_preset_value(const EncoderCandidate &candidate, const PresetKind preset) { - if (candidate.using_hardware) { - switch (preset) { - case PresetKind::Fast: - return "p1"; - case PresetKind::Balanced: - return "p4"; - case PresetKind::Quality: - return "p7"; - } - } - - switch (preset) { - case PresetKind::Fast: - return "veryfast"; - case PresetKind::Balanced: - return "medium"; - case PresetKind::Quality: - return "slow"; - } - - return "veryfast"; -} - -[[nodiscard]] -std::optional mapped_tune_value(const EncoderCandidate &candidate, const TuneKind tune) { - if (candidate.using_hardware) { - return tune == TuneKind::LowLatency ? std::optional{"ull"} : std::optional{"hq"}; - } - - if (candidate.name == "libx264" && tune == TuneKind::LowLatency) { - return std::optional{"zerolatency"}; - } - return std::nullopt; -} - -[[nodiscard]] -std::optional x265_params_value(const EncoderCandidate &candidate, const TuneKind tune) { - if (candidate.name != "libx265") { - return std::nullopt; - } - if (tune == TuneKind::LowLatency) { - return std::optional{"repeat-headers=1:scenecut=0"}; - } - return std::optional{"repeat-headers=1"}; -} - -[[nodiscard]] -std::expected set_string_option(AVCodecContext *context, const char *key, const std::string &value) { - const auto result = av_opt_set(context->priv_data, key, value.c_str(), 0); - if (result < 0) { - return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + value + "': " + av_error_string(result)); - } - return {}; -} - -[[nodiscard]] -std::expected set_int_option(AVCodecContext *context, const char *key, const std::int64_t value) { - const auto result = av_opt_set_int(context->priv_data, key, value, 0); - if (result < 0) { - return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result)); - } - return {}; -} - -[[nodiscard]] -std::expected configure_codec_context( - AVCodecContext *context, - const EncoderCandidate &candidate, - const CodecType codec, - const std::uint32_t width, - const std::uint32_t height, - const AVRational framerate, - const EncodeTuning &tuning) { - context->codec_type = AVMEDIA_TYPE_VIDEO; - context->codec_id = codec_id(codec); - context->width = static_cast(width); - context->height = static_cast(height); - context->pix_fmt = candidate.pixel_format; - context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; - context->time_base = AVRational{1, static_cast(kNanosPerSecond)}; - context->framerate = framerate; - context->gop_size = static_cast(tuning.gop); - context->max_b_frames = static_cast(tuning.b_frames); - context->thread_count = 1; - - ResolvedEncoderSettings resolved{ - .requested_preset = std::string(preset_name(tuning.preset)), - .requested_tune = std::string(tune_name(tuning.tune)), - .mapped_preset = mapped_preset_value(candidate, tuning.preset), - .mapped_tune = mapped_tune_value(candidate, tuning.tune), - .quality_value = tuning.quality, - .gop = tuning.gop, - .b_frames = tuning.b_frames, - }; - - if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) { - return std::unexpected(set.error()); - } - if (resolved.mapped_tune) { - if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) { - return std::unexpected(set.error()); - } - } - - if (candidate.using_hardware) { - resolved.rate_control_mode = "vbr"; - resolved.quality_key = "cq"; - if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) { - return std::unexpected(set.error()); - } - if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) { - return std::unexpected(set.error()); - } - if (tuning.tune == TuneKind::LowLatency) { - if (auto set = set_string_option(context, "zerolatency", "1"); !set) { - return std::unexpected(set.error()); - } - if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) { - return std::unexpected(set.error()); - } - } - } else { - resolved.quality_key = "crf"; - if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) { - return std::unexpected(set.error()); - } - if (const auto x265_params = x265_params_value(candidate, tuning.tune); x265_params) { - if (auto set = set_string_option(context, "x265-params", *x265_params); !set) { - return std::unexpected(set.error()); - } - } - } - - if (auto set = set_int_option(context, "forced-idr", 1); !set) { - return std::unexpected(set.error()); - } - - return resolved; -} - -struct OpenedEncoder { - AVCodecContext *context{nullptr}; - EncoderCandidate candidate{}; - ResolvedEncoderSettings resolved{}; -}; - -[[nodiscard]] -std::expected open_encoder( - const CodecType codec, - const EncoderDeviceType device, - const std::uint32_t width, - const std::uint32_t height, - const AVRational framerate, - const EncodeTuning &tuning) { - std::string last_error{}; - - for (const auto &candidate : encoder_candidates(codec, device)) { - const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str()); - if (encoder == nullptr) { - last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable"; - if (device == EncoderDeviceType::Auto) { - spdlog::warn( - "encoder '{}' unavailable for codec={} in auto mode, trying next candidate", - candidate.name, - codec_name(codec)); - continue; - } - return std::unexpected(last_error); - } - - auto *context = avcodec_alloc_context3(encoder); - if (context == nullptr) { - return std::unexpected("failed to allocate FFmpeg encoder context"); - } - - auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning); - if (!resolved) { - avcodec_free_context(&context); - return std::unexpected(resolved.error()); - } - - const auto open_result = avcodec_open2(context, encoder, nullptr); - if (open_result < 0) { - last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result); - avcodec_free_context(&context); - if (device == EncoderDeviceType::Auto) { - spdlog::warn( - "encoder '{}' failed to open in auto mode: {}. trying software fallback", - candidate.name, - av_error_string(open_result)); - continue; - } - return std::unexpected(last_error); - } - - return OpenedEncoder{ - .context = context, - .candidate = candidate, - .resolved = std::move(*resolved), - }; - } - - if (last_error.empty()) { - last_error = "no usable FFmpeg encoder candidates were configured"; - } - return std::unexpected(last_error); -} - -} // namespace - -struct Mp4Writer::Impl { - [[nodiscard]] - std::expected open( - const std::filesystem::path &output_path, - const CodecType codec_arg, - const EncoderDeviceType encoder_device, - const std::uint32_t width, - const std::uint32_t height, - const float fps, - const EncodeTuning &tuning) { - close(); - - codec = codec_arg; - frame_rate = frame_rate_rational(fps); - frame_period = frame_period_ns(frame_rate); - last_frame_pts_ns.reset(); - auto encoder = open_encoder(codec, encoder_device, width, height, frame_rate, tuning); - if (!encoder) { - return std::unexpected(encoder.error()); - } - - encoder_context = encoder->context; - encoder_name = encoder->candidate.name; - using_hardware = encoder->candidate.using_hardware; - encoder_pixel_format = encoder->candidate.pixel_format; - resolved_settings = std::move(encoder->resolved); - - scaler = sws_getCachedContext( - nullptr, - static_cast(width), - static_cast(height), - AV_PIX_FMT_BGR24, - static_cast(width), - static_cast(height), - encoder_pixel_format, - SWS_BILINEAR, - nullptr, - nullptr, - nullptr); - if (scaler == nullptr) { - return std::unexpected("failed to create swscale conversion context"); - } - - frame = av_frame_alloc(); - if (frame == nullptr) { - return std::unexpected("failed to allocate FFmpeg frame"); - } - frame->format = encoder_pixel_format; - frame->width = encoder_context->width; - frame->height = encoder_context->height; - const auto frame_buffer_result = av_frame_get_buffer(frame, 32); - if (frame_buffer_result < 0) { - return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer_result)); - } - - packet = av_packet_alloc(); - if (packet == nullptr) { - return std::unexpected("failed to allocate FFmpeg packet"); - } - - const auto alloc_result = avformat_alloc_output_context2( - &format_context, - nullptr, - "mp4", - output_path.string().c_str()); - if (alloc_result < 0 || format_context == nullptr) { - return std::unexpected("failed to allocate MP4 output context: " + av_error_string(alloc_result)); - } - - video_stream = avformat_new_stream(format_context, nullptr); - if (video_stream == nullptr) { - return std::unexpected("failed to allocate MP4 video stream"); - } - - video_stream->time_base = encoder_context->time_base; - video_stream->avg_frame_rate = frame_rate; - - const auto params_result = avcodec_parameters_from_context(video_stream->codecpar, encoder_context); - if (params_result < 0) { - return std::unexpected("failed to copy encoder parameters into MP4 stream: " + av_error_string(params_result)); - } - - if ((format_context->oformat->flags & AVFMT_NOFILE) == 0) { - const auto open_result = avio_open2( - &format_context->pb, - output_path.string().c_str(), - AVIO_FLAG_WRITE, - nullptr, - nullptr); - if (open_result < 0) { - return std::unexpected("failed to open output MP4 '" + output_path.string() + "': " + av_error_string(open_result)); - } - } - - AVDictionary *muxer_options = nullptr; - av_dict_set(&muxer_options, "movflags", "+faststart", 0); - const auto header_result = avformat_write_header(format_context, &muxer_options); - av_dict_free(&muxer_options); - if (header_result < 0) { - return std::unexpected("failed to write MP4 header: " + av_error_string(header_result)); - } - - spdlog::info( - "ZED_SVO_MP4_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} requested_preset={} requested_tune={} mapped_preset={} mapped_tune={} rc={} {}={} gop={} b_frames={} output={}", - codec_name(codec), - encoder_name, - using_hardware, - width, - height, - frame_rate.num, - frame_rate.den, - resolved_settings.requested_preset, - resolved_settings.requested_tune, - resolved_settings.mapped_preset, - resolved_settings.mapped_tune.value_or("none"), - resolved_settings.rate_control_mode.value_or("auto"), - resolved_settings.quality_key, - resolved_settings.quality_value, - resolved_settings.gop, - resolved_settings.b_frames, - output_path.string()); - return {}; - } - - [[nodiscard]] - std::expected write_bgr_frame( - const std::uint8_t *data, - const std::size_t row_stride_bytes, - const std::uint64_t relative_timestamp_ns) { - if (encoder_context == nullptr || frame == nullptr || scaler == nullptr || packet == nullptr || video_stream == nullptr) { - return std::unexpected("MP4 writer is not initialized"); - } - - const auto writable_result = av_frame_make_writable(frame); - if (writable_result < 0) { - return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(writable_result)); - } - - const std::uint8_t *source_planes[4]{data, nullptr, nullptr, nullptr}; - const int source_strides[4]{static_cast(row_stride_bytes), 0, 0, 0}; - sws_scale( - scaler, - source_planes, - source_strides, - 0, - encoder_context->height, - frame->data, - frame->linesize); - - auto normalized_timestamp_ns = relative_timestamp_ns; - if (last_frame_pts_ns && normalized_timestamp_ns <= *last_frame_pts_ns) { - normalized_timestamp_ns = *last_frame_pts_ns + frame_period; - } - - frame->pts = static_cast(normalized_timestamp_ns); - last_frame_pts_ns = normalized_timestamp_ns; - - const auto send_result = avcodec_send_frame(encoder_context, frame); - if (send_result < 0) { - return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result)); - } - - return drain_packets(); - } - - [[nodiscard]] - std::expected flush() { - if (encoder_context == nullptr) { - return {}; - } - - const auto flush_result = avcodec_send_frame(encoder_context, nullptr); - if (flush_result < 0 && flush_result != AVERROR_EOF) { - return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result)); - } - - auto drained = drain_packets(); - if (!drained) { - return drained; - } - - return close_output(); - } - - [[nodiscard]] - std::expected drain_packets() { - while (true) { - const auto receive_result = avcodec_receive_packet(encoder_context, packet); - if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { - break; - } - if (receive_result < 0) { - return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result)); - } - - packet->stream_index = video_stream->index; - av_packet_rescale_ts(packet, encoder_context->time_base, video_stream->time_base); - - const auto write_result = av_interleaved_write_frame(format_context, packet); - av_packet_unref(packet); - if (write_result < 0) { - return std::unexpected("failed to write MP4 packet: " + av_error_string(write_result)); - } - } - - return {}; - } - - [[nodiscard]] - std::expected close_output() { - if (format_context == nullptr || trailer_written) { - return {}; - } - - const auto trailer_result = av_write_trailer(format_context); - if (trailer_result < 0) { - return std::unexpected("failed to write MP4 trailer: " + av_error_string(trailer_result)); - } - trailer_written = true; - return {}; - } - - void close() { - (void)close_output(); - - if (packet != nullptr) { - av_packet_free(&packet); - } - if (frame != nullptr) { - av_frame_free(&frame); - } - if (encoder_context != nullptr) { - avcodec_free_context(&encoder_context); - } - if (scaler != nullptr) { - sws_freeContext(scaler); - scaler = nullptr; - } - if (format_context != nullptr) { - if ((format_context->oformat->flags & AVFMT_NOFILE) == 0 && format_context->pb != nullptr) { - avio_closep(&format_context->pb); - } - avformat_free_context(format_context); - format_context = nullptr; - } - - video_stream = nullptr; - encoder_name.clear(); - using_hardware = false; - trailer_written = false; - frame_period = 33'333'333ull; - last_frame_pts_ns.reset(); - resolved_settings = ResolvedEncoderSettings{}; - } - - ~Impl() { - close(); - } - - CodecType codec{CodecType::H265}; - AVCodecContext *encoder_context{nullptr}; - AVFormatContext *format_context{nullptr}; - AVStream *video_stream{nullptr}; - AVFrame *frame{nullptr}; - AVPacket *packet{nullptr}; - SwsContext *scaler{nullptr}; - AVPixelFormat encoder_pixel_format{AV_PIX_FMT_NONE}; - AVRational frame_rate{30, 1}; - std::uint64_t frame_period{33'333'333ull}; - std::optional last_frame_pts_ns{}; - std::string encoder_name{}; - ResolvedEncoderSettings resolved_settings{}; - bool using_hardware{false}; - bool trailer_written{false}; -}; - -std::expected parse_codec(const std::string_view raw) { - if (raw == "h264") { - return CodecType::H264; - } - if (raw == "h265") { - return CodecType::H265; - } - return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)"); -} - -std::expected parse_encoder_device(const std::string_view raw) { - if (raw == "auto") { - return EncoderDeviceType::Auto; - } - if (raw == "nvidia") { - return EncoderDeviceType::Nvidia; - } - if (raw == "software") { - return EncoderDeviceType::Software; - } - return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)"); -} - -std::expected parse_preset(const std::string_view raw) { - if (raw == "fast") { - return PresetKind::Fast; - } - if (raw == "balanced") { - return PresetKind::Balanced; - } - if (raw == "quality") { - return PresetKind::Quality; - } - return std::unexpected("invalid preset: '" + std::string(raw) + "' (expected: fast|balanced|quality)"); -} - -std::expected parse_tune(const std::string_view raw) { - if (raw == "low-latency") { - return TuneKind::LowLatency; - } - if (raw == "balanced") { - return TuneKind::Balanced; - } - return std::unexpected("invalid tune: '" + std::string(raw) + "' (expected: low-latency|balanced)"); -} - -std::string_view codec_name(const CodecType codec) { - return codec == CodecType::H265 ? "h265" : "h264"; -} - -std::string_view preset_name(const PresetKind preset) { - switch (preset) { - case PresetKind::Fast: - return "fast"; - case PresetKind::Balanced: - return "balanced"; - case PresetKind::Quality: - return "quality"; - } - return "fast"; -} - -std::string_view tune_name(const TuneKind tune) { - switch (tune) { - case TuneKind::LowLatency: - return "low-latency"; - case TuneKind::Balanced: - return "balanced"; - } - return "low-latency"; -} - -std::filesystem::path derive_output_path(const std::filesystem::path &input_path) { - auto output_path = input_path; - output_path.replace_extension(".mp4"); - return output_path; -} - -Mp4Writer::Mp4Writer() - : impl_(std::make_unique()) {} - -Mp4Writer::Mp4Writer(Mp4Writer &&) noexcept = default; -Mp4Writer &Mp4Writer::operator=(Mp4Writer &&) noexcept = default; -Mp4Writer::~Mp4Writer() = default; - -std::expected Mp4Writer::open( - const std::filesystem::path &output_path, - const CodecType codec, - const EncoderDeviceType encoder_device, - const std::uint32_t width, - const std::uint32_t height, - const float fps, - const EncodeTuning &tuning) { - return impl_->open(output_path, codec, encoder_device, width, height, fps, tuning); -} - -std::expected Mp4Writer::write_bgr_frame( - const std::uint8_t *data, - const std::size_t row_stride_bytes, - const std::uint64_t relative_timestamp_ns) { - return impl_->write_bgr_frame(data, row_stride_bytes, relative_timestamp_ns); -} - -std::expected Mp4Writer::flush() { - return impl_->flush(); -} - -bool Mp4Writer::using_hardware() const { - return impl_ != nullptr && impl_->using_hardware; -} - -} // namespace cvmmap_streamer::zed_tools diff --git a/src/tools/zed_svo_to_mcap.cpp b/src/tools/zed_svo_to_mcap.cpp deleted file mode 100644 index 94f6272..0000000 --- a/src/tools/zed_svo_to_mcap.cpp +++ /dev/null @@ -1,2956 +0,0 @@ -#include -#include -#include - -#include - -#include "cvmmap_streamer/config/runtime_config.hpp" -#include "cvmmap_streamer/core/status.hpp" -#include "cvmmap_streamer/encode/encoder_backend.hpp" -#include "cvmmap_streamer/ipc/contracts.hpp" -#include "cvmmap_streamer/record/mcap_record_sink.hpp" -#include "cvmmap_streamer/tools/zed_progress_bar.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - -using cvmmap_streamer::zed_tools::ProgressBar; -using cvmmap_streamer::zed_tools::stderr_supports_progress_bar; - -volatile std::sig_atomic_t g_signal_count = 0; -volatile std::sig_atomic_t g_last_signal = 0; - -enum class ToolExitCode : int { - Success = 0, - UsageError = 2, - RuntimeError = 3, -}; - -struct CliOptions { - std::vector input_paths{}; - std::string segment_dir{}; - std::string output_path{}; - std::string codec{"h265"}; - std::string encoder_device{"auto"}; - std::string mcap_compression{"zstd"}; - std::string depth_mode{"neural_plus"}; - std::string depth_size{"optimal"}; - std::string bundle_policy{"nearest"}; - std::string copy_range{"common"}; - std::string bundle_topic{"/bundle"}; - bool with_pose{false}; - std::uint32_t start_frame{0}; - bool has_start_frame{false}; - std::uint32_t end_frame{0}; - bool has_end_frame{false}; - bool has_bundle_topic{false}; - std::string frame_id{"camera"}; - std::string video_topic{"/camera/video"}; - std::string depth_topic{"/camera/depth"}; - std::string calibration_topic{"/camera/calibration"}; - std::string depth_calibration_topic{"/camera/depth_calibration"}; - std::string pose_topic{"/camera/pose"}; - std::string world_frame_id{"world"}; - std::string pose_config_path{}; - double sync_tolerance_ms{0.0}; - bool has_sync_tolerance{false}; -}; - -struct PoseTrackingOptions { - sl::COORDINATE_SYSTEM coordinate_system{sl::COORDINATE_SYSTEM::IMAGE}; - sl::REFERENCE_FRAME reference_frame{sl::REFERENCE_FRAME::WORLD}; - bool set_floor_as_origin{false}; - std::string reference_frame_name{"world"}; -}; - -struct SourceSpec { - std::filesystem::path path{}; - std::string label{}; -}; - -struct TrackingSample { - sl::POSITIONAL_TRACKING_STATE state{sl::POSITIONAL_TRACKING_STATE::OFF}; - bool has_pose{false}; - std::array position{}; - std::array orientation{}; -}; - -struct CalibrationData { - std::uint32_t width{0}; - std::uint32_t height{0}; - std::array distortion{ - 0.0, 0.0, 0.0, 0.0, 0.0, - }; - std::array intrinsic_matrix{ - 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, - 0.0, 0.0, 1.0, - }; - std::array rectification_matrix{ - 1.0, 0.0, 0.0, - 0.0, 1.0, 0.0, - 0.0, 0.0, 1.0, - }; - std::array projection_matrix{ - 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 1.0, 0.0, - }; -}; - -struct CameraStream { - SourceSpec source{}; - std::unique_ptr camera{}; - sl::RuntimeParameters runtime{}; - sl::Resolution depth_image_size{}; - sl::Mat current_left_frame{}; - sl::Mat current_depth_frame{}; - sl::Mat next_left_frame{}; - sl::Mat next_depth_frame{}; - TrackingSample current_tracking{}; - TrackingSample next_tracking{}; - std::uint64_t current_timestamp_ns{0}; - std::uint64_t next_timestamp_ns{0}; - std::uint32_t next_corrupted_frames_skipped{0}; - std::uint64_t first_timestamp_ns{0}; - std::uint64_t last_timestamp_ns{0}; - std::uint64_t total_frames{0}; - std::uint64_t nominal_frame_period_ns{0}; - std::uint64_t dropped_frames{0}; - float fps{0.0f}; - std::uint32_t width{0}; - std::uint32_t height{0}; - std::uint32_t depth_width{0}; - std::uint32_t depth_height{0}; - int sync_position{-1}; - bool has_next{false}; - bool tracking_enabled{false}; - bool calibration_written{false}; - bool depth_calibration_written{false}; - std::optional last_tracking_state{}; - int serial_number{0}; - PoseTrackingOptions pose_tracking{}; - cvmmap_streamer::ipc::FrameInfo frame_info{}; - std::optional backend{}; - cvmmap_streamer::encode::EncodedStreamInfo stream_info{}; - cvmmap_streamer::record::MultiMcapRecordSink::StreamId mcap_stream_id{0}; - CalibrationData video_calibration{}; - CalibrationData depth_calibration{}; -}; - -struct BundledFrameSelection { - std::size_t stream_index{0}; - cvmmap_streamer::record::BundleMemberStatus status{ - cvmmap_streamer::record::BundleMemberStatus::Present, - }; - bool use_next{false}; - std::optional timestamp_ns{}; - std::int64_t delta_ns{0}; - std::uint32_t corrupted_frames_skipped{0}; -}; - -struct GrabResult { - int svo_position{-1}; - std::uint32_t corrupted_frames_skipped{0}; - int first_corrupted_position{-1}; - int last_corrupted_position{-1}; -}; - -enum class MultiCameraExportPolicy { - Nearest, - Strict, - Copy, -}; - -enum class CopyRangeMode { - Common, - Full, -}; - -[[nodiscard]] -constexpr int exit_code(const ToolExitCode code) { - return static_cast(code); -} - -void termination_signal_handler(const int signal_number) { - g_last_signal = signal_number; - if (g_signal_count > 0) { - std::_Exit(128 + signal_number); - } - g_signal_count = 1; -} - -void install_signal_handlers() { - std::signal(SIGINT, termination_signal_handler); - std::signal(SIGTERM, termination_signal_handler); -} - -[[nodiscard]] -bool shutdown_requested() { - return g_signal_count != 0; -} - -[[nodiscard]] -int interrupted_exit_code() { - if (g_last_signal == SIGTERM) { - return 128 + SIGTERM; - } - return 128 + SIGINT; -} - -[[nodiscard]] -std::string interrupted_signal_name() { - if (g_last_signal == SIGTERM) { - return "SIGTERM"; - } - return "SIGINT"; -} - -[[nodiscard]] -bool log_shutdown_request(bool &logged, const std::string_view context) { - if (!shutdown_requested()) { - return false; - } - if (!logged) { - spdlog::warn( - "{}: received {}; attempting graceful shutdown. send the signal again to force exit", - context, - interrupted_signal_name()); - logged = true; - } - return true; -} - -[[nodiscard]] -std::string zed_string(const sl::String &value) { - return std::string(value.c_str() == nullptr ? "" : value.c_str()); -} - -[[nodiscard]] -std::string zed_status_string(const sl::ERROR_CODE code) { - return zed_string(sl::toString(code)); -} - -[[nodiscard]] -std::string zed_tracking_state_string(const sl::POSITIONAL_TRACKING_STATE state) { - return zed_string(sl::toString(state)); -} - -[[nodiscard]] -std::string lowercase(std::string value); - -[[nodiscard]] -std::expected parse_codec(const std::string_view raw) { - if (raw == "h264") { - return cvmmap_streamer::CodecType::H264; - } - if (raw == "h265") { - return cvmmap_streamer::CodecType::H265; - } - return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)"); -} - -[[nodiscard]] -std::expected parse_encoder_device(const std::string_view raw) { - if (raw == "auto") { - return cvmmap_streamer::EncoderDeviceType::Auto; - } - if (raw == "nvidia") { - return cvmmap_streamer::EncoderDeviceType::Nvidia; - } - if (raw == "software") { - return cvmmap_streamer::EncoderDeviceType::Software; - } - return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)"); -} - -[[nodiscard]] -std::expected parse_mcap_compression(const std::string_view raw) { - if (raw == "none") { - return cvmmap_streamer::McapCompression::None; - } - if (raw == "lz4") { - return cvmmap_streamer::McapCompression::Lz4; - } - if (raw == "zstd") { - return cvmmap_streamer::McapCompression::Zstd; - } - return std::unexpected("invalid mcap compression: '" + std::string(raw) + "' (expected: none|lz4|zstd)"); -} - -[[nodiscard]] -std::expected parse_depth_mode(const std::string_view raw) { - auto normalized = lowercase(std::string(raw)); - std::replace(normalized.begin(), normalized.end(), '-', '_'); - if (normalized == "neural_light") { - return sl::DEPTH_MODE::NEURAL_LIGHT; - } - if (normalized == "neural") { - return sl::DEPTH_MODE::NEURAL; - } - if (normalized == "neural_plus") { - return sl::DEPTH_MODE::NEURAL_PLUS; - } - return std::unexpected( - "invalid depth mode: '" + std::string(raw) + "' (expected: neural_light|neural|neural_plus)"); -} - -[[nodiscard]] -std::expected parse_depth_size(const std::string_view raw) { - auto normalized = lowercase(std::string(raw)); - std::replace(normalized.begin(), normalized.end(), '-', '_'); - if (normalized == "optimal") { - return sl::Resolution(-1, -1); - } - if (normalized == "native") { - return sl::Resolution(0, 0); - } - - static const std::regex size_pattern{R"(^([1-9][0-9]*)[xX]([1-9][0-9]*)$)"}; - std::smatch match{}; - const auto size_string = std::string(raw); - if (!std::regex_match(size_string, match, size_pattern)) { - return std::unexpected( - "invalid depth size: '" + size_string + "' (expected: optimal|native|x)"); - } - - return sl::Resolution(std::stoi(match[1].str()), std::stoi(match[2].str())); -} - -[[nodiscard]] -std::expected parse_bundle_policy(const std::string_view raw) { - const auto normalized = lowercase(std::string(raw)); - if (normalized == "nearest") { - return MultiCameraExportPolicy::Nearest; - } - if (normalized == "strict") { - return MultiCameraExportPolicy::Strict; - } - if (normalized == "copy") { - return MultiCameraExportPolicy::Copy; - } - return std::unexpected("invalid bundle policy: '" + std::string(raw) + "' (expected: nearest|strict|copy)"); -} - -[[nodiscard]] -std::expected parse_copy_range(const std::string_view raw) { - const auto normalized = lowercase(std::string(raw)); - if (normalized == "common") { - return CopyRangeMode::Common; - } - if (normalized == "full") { - return CopyRangeMode::Full; - } - return std::unexpected("invalid copy range: '" + std::string(raw) + "' (expected: common|full)"); -} - -[[nodiscard]] -std::string_view multi_camera_policy_name(const MultiCameraExportPolicy policy) { - switch (policy) { - case MultiCameraExportPolicy::Strict: - return "strict"; - case MultiCameraExportPolicy::Copy: - return "copy"; - case MultiCameraExportPolicy::Nearest: - default: - return "nearest"; - } -} - -[[nodiscard]] -std::string_view copy_range_name(const CopyRangeMode range_mode) { - switch (range_mode) { - case CopyRangeMode::Full: - return "full"; - case CopyRangeMode::Common: - default: - return "common"; - } -} - -[[nodiscard]] -cvmmap_streamer::record::BundlePolicy manifest_bundle_policy(const MultiCameraExportPolicy policy) { - return policy == MultiCameraExportPolicy::Strict - ? cvmmap_streamer::record::BundlePolicy::Strict - : cvmmap_streamer::record::BundlePolicy::Nearest; -} - -[[nodiscard]] -std::uint64_t frame_period_ns(const float fps) { - if (!(fps > 0.0f)) { - return 33'333'333ull; - } - return static_cast(std::llround(1'000'000'000.0 / static_cast(fps))); -} - -[[nodiscard]] -std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { - if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { - return std::unexpected(std::string(label) + " must be U8_C3"); - } - if (mat.getWidth() == 0 || mat.getHeight() == 0) { - return std::unexpected(std::string(label) + " dimensions must be non-zero"); - } - if (mat.getPtr(sl::MEM::CPU) == nullptr) { - return std::unexpected(std::string(label) + " CPU buffer is null"); - } - return {}; -} - -[[nodiscard]] -std::expected validate_u16c1_mat(const sl::Mat &mat, const std::string_view label) { - if (mat.getDataType() != sl::MAT_TYPE::U16_C1) { - return std::unexpected(std::string(label) + " must be U16_C1"); - } - if (mat.getWidth() == 0 || mat.getHeight() == 0) { - return std::unexpected(std::string(label) + " dimensions must be non-zero"); - } - if (mat.getPtr(sl::MEM::CPU) == nullptr) { - return std::unexpected(std::string(label) + " CPU buffer is null"); - } - return {}; -} - -[[nodiscard]] -std::vector copy_compact_u16_plane(const sl::Mat &mat) { - const auto width = static_cast(mat.getWidth()); - const auto height = static_cast(mat.getHeight()); - const auto row_bytes = width * sizeof(std::uint16_t); - const auto step_bytes = mat.getStepBytes(sl::MEM::CPU); - auto *src = mat.getPtr(sl::MEM::CPU); - - std::vector compact(width * height, 0); - auto *dst = reinterpret_cast(compact.data()); - for (std::size_t row = 0; row < height; ++row) { - std::memcpy( - dst + row * row_bytes, - src + row * step_bytes, - row_bytes); - } - return compact; -} - -[[nodiscard]] -std::string lowercase(std::string value) { - std::transform( - value.begin(), - value.end(), - value.begin(), - [](unsigned char ch) { - return static_cast(std::tolower(ch)); - }); - return value; -} - -[[nodiscard]] -std::string normalize_config_token(std::string value) { - value = lowercase(std::move(value)); - std::replace(value.begin(), value.end(), '-', '_'); - std::replace(value.begin(), value.end(), ' ', '_'); - return value; -} - -[[nodiscard]] -CalibrationData make_calibration_data(const sl::CameraInformation &camera_info) { - CalibrationData calibration{}; - const auto &camera_config = camera_info.camera_configuration; - const auto &left_camera = camera_config.calibration_parameters.left_cam; - const auto resolved_width = left_camera.image_size.width > 0 - ? left_camera.image_size.width - : camera_config.resolution.width; - const auto resolved_height = left_camera.image_size.height > 0 - ? left_camera.image_size.height - : camera_config.resolution.height; - calibration.width = static_cast(resolved_width); - calibration.height = static_cast(resolved_height); - calibration.intrinsic_matrix = { - static_cast(left_camera.fx), 0.0, static_cast(left_camera.cx), - 0.0, static_cast(left_camera.fy), static_cast(left_camera.cy), - 0.0, 0.0, 1.0, - }; - calibration.projection_matrix = { - static_cast(left_camera.fx), 0.0, static_cast(left_camera.cx), 0.0, - 0.0, static_cast(left_camera.fy), static_cast(left_camera.cy), 0.0, - 0.0, 0.0, 1.0, 0.0, - }; - return calibration; -} - -[[nodiscard]] -std::string describe_depth_resolution_request(const sl::Resolution &resolution) { - if (resolution.width == -1 && resolution.height == -1) { - return "optimal"; - } - if (resolution.width == 0 && resolution.height == 0) { - return "native"; - } - return std::to_string(resolution.width) + "x" + std::to_string(resolution.height); -} - -[[nodiscard]] -std::expected validate_depth_resolution_request( - const sl::Resolution &requested_resolution, - const std::uint32_t native_width, - const std::uint32_t native_height) { - if (requested_resolution.width <= 0 || requested_resolution.height <= 0) { - return {}; - } - if (requested_resolution.width > static_cast(native_width) || - requested_resolution.height > static_cast(native_height)) { - return std::unexpected( - "requested depth size " + describe_depth_resolution_request(requested_resolution) + - " exceeds native camera resolution " + std::to_string(native_width) + "x" + std::to_string(native_height)); - } - return {}; -} - -[[nodiscard]] -std::expected parse_coordinate_system(const std::string_view raw) { - const auto normalized = normalize_config_token(std::string(raw)); - if (normalized == "image") { - return sl::COORDINATE_SYSTEM::IMAGE; - } - if (normalized == "right_handed_y_up" || normalized == "y_up") { - return sl::COORDINATE_SYSTEM::RIGHT_HANDED_Y_UP; - } - return std::unexpected( - "invalid zed.coordinate_system: '" + std::string(raw) + "' (allowed: IMAGE, RIGHT_HANDED_Y_UP)"); -} - -[[nodiscard]] -std::expected load_pose_tracking_options(const CliOptions &options) { - PoseTrackingOptions config{}; - config.reference_frame_name = options.world_frame_id; - if (options.pose_config_path.empty()) { - return config; - } - - toml::table table{}; - try { - table = toml::parse_file(options.pose_config_path); - } catch (const toml::parse_error &error) { - return std::unexpected("failed to parse pose config '" + options.pose_config_path + "': " + std::string(error.description())); - } - - if (const auto value = table["zed"]["coordinate_system"].value(); value) { - auto parsed = parse_coordinate_system(*value); - if (!parsed) { - return std::unexpected(parsed.error()); - } - config.coordinate_system = *parsed; - } - - if (const auto value = table["zed"]["body_tracking"]["reference_frame"].value(); value) { - const auto normalized = normalize_config_token(*value); - if (normalized == "camera") { - config.reference_frame = sl::REFERENCE_FRAME::CAMERA; - config.reference_frame_name = "camera"; - } else if (normalized == "world") { - config.reference_frame = sl::REFERENCE_FRAME::WORLD; - config.reference_frame_name = "world"; - } else { - return std::unexpected( - "invalid zed.body_tracking.reference_frame: '" + *value + "' (allowed: CAMERA, WORLD)"); - } - } - - if (const auto value = table["zed"]["body_tracking"]["set_floor_as_origin"].value(); value) { - config.set_floor_as_origin = *value; - } - - return config; -} - -[[nodiscard]] -std::string extract_camera_label(const std::filesystem::path &path, const std::size_t fallback_index = 0) { - static const std::regex pattern{R"(.*_(zed[0-9]+)\.svo2?$)", std::regex::icase}; - std::smatch match{}; - const auto filename = path.filename().string(); - if (std::regex_match(filename, match, pattern)) { - return lowercase(match[1].str()); - } - if (fallback_index > 0) { - return "cam" + std::to_string(fallback_index); - } - return lowercase(path.stem().string()); -} - -[[nodiscard]] -std::string namespace_topic(const std::string_view label, const std::string_view topic) { - if (topic.empty()) { - return {}; - } - std::string suffix{topic}; - while (!suffix.empty() && suffix.front() == '/') { - suffix.erase(suffix.begin()); - } - if (suffix.rfind("camera/", 0) == 0) { - suffix.erase(0, std::string_view{"camera/"}.size()); - } - return "/" + std::string(label) + "/" + suffix; -} - -[[nodiscard]] -std::string multi_frame_id(const CliOptions &options, const std::string_view label) { - if (options.frame_id.empty() || options.frame_id == "camera") { - return std::string(label); - } - return options.frame_id + "/" + std::string(label); -} - -[[nodiscard]] -std::string pose_reference_frame_id(const PoseTrackingOptions &options, const std::string_view label) { - return std::string(label) + "/" + options.reference_frame_name; -} - -[[nodiscard]] -std::expected, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) { - if (!std::filesystem::is_directory(segment_dir)) { - return std::unexpected("segment directory does not exist: " + segment_dir.string()); - } - - static const std::regex pattern{R"(.*_zed([0-9]+)\.svo2?$)", std::regex::icase}; - std::vector> ordered_paths{}; - for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) { - if (!entry.is_regular_file()) { - continue; - } - std::smatch match{}; - const auto filename = entry.path().filename().string(); - if (!std::regex_match(filename, match, pattern)) { - continue; - } - ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path()); - } - - std::sort( - ordered_paths.begin(), - ordered_paths.end(), - [](const auto &left, const auto &right) { - return left.first < right.first; - }); - - if (ordered_paths.size() < 2) { - return std::unexpected( - "expected at least 2 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size())); - } - - std::vector sources{}; - sources.reserve(ordered_paths.size()); - for (const auto &[camera_index, path] : ordered_paths) { - sources.push_back(SourceSpec{ - .path = path, - .label = "zed" + std::to_string(camera_index), - }); - } - return sources; -} - -[[nodiscard]] -std::expected, std::string> resolve_sources(const CliOptions &options) { - if (!options.segment_dir.empty()) { - if (!options.input_paths.empty()) { - return std::unexpected("provide either --segment-dir or --input, not both"); - } - return discover_segment_inputs(std::filesystem::path{options.segment_dir}); - } - - if (options.input_paths.empty()) { - return std::unexpected("provide at least one --input or use --segment-dir"); - } - - std::vector sources{}; - sources.reserve(options.input_paths.size()); - for (std::size_t index = 0; index < options.input_paths.size(); ++index) { - const auto path = std::filesystem::path{options.input_paths[index]}; - if (!std::filesystem::is_regular_file(path)) { - return std::unexpected("input file does not exist: " + path.string()); - } - sources.push_back(SourceSpec{ - .path = path, - .label = extract_camera_label(path, index + 1), - }); - } - - std::sort( - sources.begin(), - sources.end(), - [](const auto &left, const auto &right) { - return left.label < right.label; - }); - for (std::size_t index = 1; index < sources.size(); ++index) { - if (sources[index - 1].label == sources[index].label) { - return std::unexpected("duplicate camera label resolved from inputs: " + sources[index].label); - } - } - return sources; -} - -[[nodiscard]] -std::filesystem::path derive_output_path(const CliOptions &options, const std::vector &sources) { - if (!options.output_path.empty()) { - return std::filesystem::path{options.output_path}; - } - if (!options.segment_dir.empty()) { - const auto segment_dir = std::filesystem::path{options.segment_dir}; - return segment_dir / (segment_dir.filename().string() + ".mcap"); - } - auto output_path = sources.front().path; - output_path.replace_extension(".mcap"); - return output_path; -} - -[[nodiscard]] -std::expected read_image_timestamp_ns( - sl::Camera &camera, - const std::optional fallback_timestamp_ns, - const std::uint64_t nominal_frame_period_ns) { - auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); - if (timestamp_ns == 0) { - if (!fallback_timestamp_ns) { - return std::unexpected("ZED SDK returned a zero image timestamp for the first frame"); - } - timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns; - } - return timestamp_ns; -} - -void maybe_log_tracking_state(CameraStream &stream, const sl::POSITIONAL_TRACKING_STATE tracking_state) { - if (!stream.tracking_enabled) { - return; - } - if (stream.last_tracking_state && *stream.last_tracking_state == tracking_state) { - return; - } - stream.last_tracking_state = tracking_state; - if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) { - spdlog::warn( - "pose tracking state for {} changed to {}", - stream.source.label, - zed_tracking_state_string(tracking_state)); - } -} - -[[nodiscard]] -std::expected grab_next_readable_frame( - sl::Camera &camera, - const sl::RuntimeParameters &runtime, - const std::string_view source_label, - const std::string_view source_path, - const std::uint64_t total_frames, - const std::optional current_timestamp_ns, - const std::optional last_timestamp_ns) { - GrabResult result{}; - while (true) { - const auto grab_status = camera.grab(runtime); - const auto svo_position = camera.getSVOPosition(); - if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { - return std::unexpected("end-of-svo"); - } - if (grab_status == sl::ERROR_CODE::CORRUPTED_FRAME) { - if (result.first_corrupted_position < 0) { - result.first_corrupted_position = svo_position; - } - result.last_corrupted_position = svo_position; - result.corrupted_frames_skipped += 1; - - const auto last_frame_position = total_frames > 0 - ? static_cast(total_frames - 1) - : 0; - const bool at_or_past_tail = svo_position >= last_frame_position; - const bool beyond_last_readable = - last_timestamp_ns.has_value() && - current_timestamp_ns.has_value() && - *current_timestamp_ns >= *last_timestamp_ns; - if (at_or_past_tail || beyond_last_readable) { - spdlog::warn( - "treating {} unreadable tail frame(s) as end-of-svo for {} last_corrupted_position={} current_timestamp_ns={} last_timestamp_ns={}", - result.corrupted_frames_skipped, - source_path, - svo_position, - current_timestamp_ns.value_or(0), - last_timestamp_ns.value_or(0)); - return std::unexpected("end-of-svo"); - } - - if (svo_position < 0) { - return std::unexpected( - "failed to advance past corrupted frame for " + std::string(source_label) + - ": invalid svo_position=" + std::to_string(svo_position)); - } - - const auto next_position = svo_position + 1; - if (next_position >= static_cast(total_frames)) { - spdlog::warn( - "treating {} unreadable tail frame(s) as end-of-svo for {} last_corrupted_position={} current_timestamp_ns={} last_timestamp_ns={}", - result.corrupted_frames_skipped, - source_path, - svo_position, - current_timestamp_ns.value_or(0), - last_timestamp_ns.value_or(0)); - return std::unexpected("end-of-svo"); - } - camera.setSVOPosition(next_position); - continue; - } - if (grab_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected( - "failed to grab frame for " + std::string(source_label) + - ": " + zed_status_string(grab_status) + - " svo_position=" + std::to_string(svo_position) + - " total_frames=" + std::to_string(total_frames) + - " current_timestamp_ns=" + std::to_string(current_timestamp_ns.value_or(0)) + - " last_timestamp_ns=" + std::to_string(last_timestamp_ns.value_or(0)) + - " corrupted_frames_skipped=" + std::to_string(result.corrupted_frames_skipped)); - } - - result.svo_position = svo_position; - return result; - } -} - -void maybe_log_recovered_corruption_gap( - const std::string_view source_label, - const GrabResult &grab_result, - const std::uint64_t recovered_timestamp_ns) { - if (grab_result.corrupted_frames_skipped == 0) { - return; - } - spdlog::warn( - "recovered {} after skipping {} corrupted frame(s) positions={}..{} recovered_position={} recovered_timestamp_ns={}", - source_label, - grab_result.corrupted_frames_skipped, - grab_result.first_corrupted_position, - grab_result.last_corrupted_position, - grab_result.svo_position, - recovered_timestamp_ns); -} - -[[nodiscard]] -std::expected read_frame_data( - CameraStream &stream, - sl::Mat &left_frame, - sl::Mat &depth_frame, - TrackingSample &tracking_sample, - std::optional fallback_timestamp_ns, - std::uint64_t ×tamp_ns_out) { - auto grab = grab_next_readable_frame( - *stream.camera, - stream.runtime, - stream.source.label, - stream.source.path.string(), - stream.total_frames, - stream.current_timestamp_ns == 0 ? std::nullopt : std::optional{stream.current_timestamp_ns}, - stream.last_timestamp_ns == 0 ? std::nullopt : std::optional{stream.last_timestamp_ns}); - if (!grab) { - return std::unexpected(grab.error()); - } - - const auto image_status = stream.camera->retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU); - if (image_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected( - "failed to retrieve left image for " + stream.source.label + ": " + zed_status_string(image_status)); - } - if (auto valid = validate_u8c3_mat(left_frame, stream.source.label + " left image"); !valid) { - return std::unexpected(valid.error()); - } - - const auto depth_status = stream.camera->retrieveMeasure( - depth_frame, - sl::MEASURE::DEPTH_U16_MM, - sl::MEM::CPU, - stream.depth_image_size); - if (depth_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected( - "failed to retrieve depth map for " + stream.source.label + ": " + zed_status_string(depth_status)); - } - if (auto valid = validate_u16c1_mat(depth_frame, stream.source.label + " depth map"); !valid) { - return std::unexpected(valid.error()); - } - - auto timestamp_ns = read_image_timestamp_ns(*stream.camera, fallback_timestamp_ns, stream.nominal_frame_period_ns); - if (!timestamp_ns) { - return std::unexpected(timestamp_ns.error()); - } - timestamp_ns_out = *timestamp_ns; - maybe_log_recovered_corruption_gap(stream.source.label, *grab, timestamp_ns_out); - - tracking_sample = {}; - if (stream.tracking_enabled) { - sl::Pose pose{}; - tracking_sample.state = stream.camera->getPosition(pose, stream.pose_tracking.reference_frame); - maybe_log_tracking_state(stream, tracking_sample.state); - if (tracking_sample.state == sl::POSITIONAL_TRACKING_STATE::OK) { - tracking_sample.has_pose = true; - const auto translation = pose.getTranslation(); - const auto orientation = pose.getOrientation(); - tracking_sample.position = { - static_cast(translation.x), - static_cast(translation.y), - static_cast(translation.z), - }; - tracking_sample.orientation = { - static_cast(orientation.x), - static_cast(orientation.y), - static_cast(orientation.z), - static_cast(orientation.w), - }; - } - } - return *grab; -} - -[[nodiscard]] -std::expected fill_next_frame(CameraStream &stream) { - auto next = read_frame_data( - stream, - stream.next_left_frame, - stream.next_depth_frame, - stream.next_tracking, - stream.current_timestamp_ns, - stream.next_timestamp_ns); - if (!next) { - if (next.error() == "end-of-svo") { - stream.has_next = false; - stream.next_corrupted_frames_skipped = 0; - return {}; - } - return std::unexpected(next.error()); - } - stream.has_next = true; - stream.next_corrupted_frames_skipped = next->corrupted_frames_skipped; - return {}; -} - -[[nodiscard]] -std::expected promote_next_frame(CameraStream &stream) { - if (!stream.has_next) { - return std::unexpected("no buffered next frame is available for " + stream.source.label); - } - - std::swap(stream.current_left_frame, stream.next_left_frame); - std::swap(stream.current_depth_frame, stream.next_depth_frame); - std::swap(stream.current_tracking, stream.next_tracking); - std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns); - stream.has_next = false; - stream.next_corrupted_frames_skipped = 0; - return fill_next_frame(stream); -} - -[[nodiscard]] -std::expected read_last_readable_timestamp(CameraStream &stream) { - const auto last_candidate = static_cast(stream.total_frames - 1); - std::string last_error{}; - - for (int position = last_candidate; position >= 0; --position) { - stream.camera->setSVOPosition(position); - std::uint64_t timestamp_ns = 0; - TrackingSample ignored_tracking{}; - auto frame = read_frame_data( - stream, - stream.current_left_frame, - stream.current_depth_frame, - ignored_tracking, - std::nullopt, - timestamp_ns); - if (frame) { - const auto skipped_tail_frames = static_cast(last_candidate - position); - if (skipped_tail_frames > 0) { - spdlog::warn( - "skipping {} unreadable tail frame(s) for {} last_error={}", - skipped_tail_frames, - stream.source.path.string(), - last_error); - } - return timestamp_ns; - } - last_error = frame.error(); - } - - return std::unexpected( - "failed to read any trailing frame for " + stream.source.path.string() + ": " + last_error); -} - -[[nodiscard]] -std::expected make_encoder_backend( - const CliOptions &options, - const cvmmap_streamer::CodecType codec, - const cvmmap_streamer::EncoderDeviceType encoder_device, - const cvmmap_streamer::ipc::FrameInfo &frame_info) { - cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults(); - config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg; - config.encoder.device = encoder_device; - config.encoder.codec = codec; - config.encoder.gop = 30; - config.encoder.b_frames = 0; - - auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config); - if (!backend_result) { - return std::unexpected( - "failed to create encoder backend: " + cvmmap_streamer::format_error(backend_result.error())); - } - auto backend = std::move(*backend_result); - if (auto init_status = backend->init(config, frame_info); !init_status) { - return std::unexpected( - "failed to initialize encoder backend: " + cvmmap_streamer::format_error(init_status.error())); - } - return std::move(backend); -} - -[[nodiscard]] -std::expected open_camera_stream( - const SourceSpec &source, - const CliOptions &options, - const cvmmap_streamer::CodecType codec, - const cvmmap_streamer::EncoderDeviceType encoder_device, - const sl::DEPTH_MODE depth_mode, - const sl::Resolution depth_size, - const PoseTrackingOptions &pose_tracking) { - CameraStream stream{}; - stream.source = source; - stream.camera = std::make_unique(); - stream.pose_tracking = pose_tracking; - stream.depth_image_size = depth_size; - - sl::InitParameters init{}; - init.input.setFromSVOFile(source.path.c_str()); - init.svo_real_time_mode = false; - init.coordinate_system = pose_tracking.coordinate_system; - init.coordinate_units = sl::UNIT::METER; - init.depth_mode = depth_mode; - init.sdk_verbose = false; - - const auto open_status = stream.camera->open(init); - if (open_status != sl::ERROR_CODE::SUCCESS) { - return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status)); - } - - if (options.with_pose) { - sl::PositionalTrackingParameters tracking_parameters{}; - tracking_parameters.set_floor_as_origin = pose_tracking.set_floor_as_origin; - const auto tracking_status = stream.camera->enablePositionalTracking(tracking_parameters); - if (tracking_status == sl::ERROR_CODE::SUCCESS) { - stream.tracking_enabled = true; - } else { - spdlog::warn( - "positional tracking unavailable for '{}': {}. continuing without pose output", - source.path.string(), - zed_status_string(tracking_status)); - } - } - - const auto total_frames = stream.camera->getSVONumberOfFrames(); - if (total_frames <= 0) { - return std::unexpected("input SVO has no frames: " + source.path.string()); - } - stream.total_frames = static_cast(total_frames); - - const auto camera_info = stream.camera->getCameraInformation(); - const auto &camera_config = camera_info.camera_configuration; - stream.serial_number = camera_info.serial_number; - stream.width = static_cast(camera_config.resolution.width); - stream.height = static_cast(camera_config.resolution.height); - stream.fps = camera_config.fps; - stream.nominal_frame_period_ns = frame_period_ns(camera_config.fps); - if (stream.width == 0 || stream.height == 0) { - return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string()); - } - if (auto valid = validate_depth_resolution_request(depth_size, stream.width, stream.height); !valid) { - return std::unexpected(valid.error()); - } - stream.video_calibration = make_calibration_data(camera_info); - - stream.frame_info = cvmmap_streamer::ipc::FrameInfo{ - .width = static_cast(stream.width), - .height = static_cast(stream.height), - .channels = 3, - .depth = cvmmap_streamer::ipc::Depth::U8, - .pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR, - .buffer_size = stream.width * stream.height * 3u, - }; - - auto backend = make_encoder_backend(options, codec, encoder_device, stream.frame_info); - if (!backend) { - return std::unexpected(backend.error()); - } - stream.backend.emplace(std::move(*backend)); - - auto stream_info = (*stream.backend)->stream_info(); - if (!stream_info) { - return std::unexpected( - "encoder backend did not provide stream info: " + cvmmap_streamer::format_error(stream_info.error())); - } - stream.stream_info = *stream_info; - - std::uint64_t first_timestamp_ns = 0; - auto first_frame = read_frame_data( - stream, - stream.current_left_frame, - stream.current_depth_frame, - stream.current_tracking, - std::nullopt, - first_timestamp_ns); - if (!first_frame) { - return std::unexpected(first_frame.error()); - } - stream.depth_width = static_cast(stream.current_depth_frame.getWidth()); - stream.depth_height = static_cast(stream.current_depth_frame.getHeight()); - stream.depth_calibration = make_calibration_data( - stream.camera->getCameraInformation(sl::Resolution( - static_cast(stream.depth_width), - static_cast(stream.depth_height)))); - stream.first_timestamp_ns = first_timestamp_ns; - - auto last_timestamp_ns = read_last_readable_timestamp(stream); - if (!last_timestamp_ns) { - return std::unexpected(last_timestamp_ns.error()); - } - stream.last_timestamp_ns = *last_timestamp_ns; - - return stream; -} - -void close_camera_streams(std::vector &streams) { - for (auto &stream : streams) { - if (stream.backend) { - (*stream.backend)->shutdown(); - } - if (stream.tracking_enabled && stream.camera != nullptr && stream.camera->isOpened()) { - stream.camera->disablePositionalTracking(); - stream.tracking_enabled = false; - } - if (stream.camera != nullptr && stream.camera->isOpened()) { - stream.camera->close(); - } - } -} - -[[nodiscard]] -std::expected write_access_units( - cvmmap_streamer::record::McapRecordSink &sink, - const std::vector &access_units) { - for (const auto &access_unit : access_units) { - if (auto write = sink.write_access_unit(access_unit); !write) { - return std::unexpected(write.error()); - } - } - return {}; -} - -[[nodiscard]] -std::expected write_access_units( - cvmmap_streamer::record::MultiMcapRecordSink &sink, - const cvmmap_streamer::record::MultiMcapRecordSink::StreamId stream_id, - const std::vector &access_units) { - for (const auto &access_unit : access_units) { - if (auto write = sink.write_access_unit(stream_id, access_unit); !write) { - return std::unexpected(write.error()); - } - } - return {}; -} - -[[nodiscard]] -std::expected flush_and_write( - cvmmap_streamer::record::McapRecordSink &sink, - cvmmap_streamer::encode::EncoderBackend &backend) { - auto flushed = backend->flush(); - if (!flushed) { - return std::unexpected(cvmmap_streamer::format_error(flushed.error())); - } - return write_access_units(sink, *flushed); -} - -[[nodiscard]] -std::expected flush_and_write( - cvmmap_streamer::record::MultiMcapRecordSink &sink, - const cvmmap_streamer::record::MultiMcapRecordSink::StreamId stream_id, - cvmmap_streamer::encode::EncoderBackend &backend) { - auto flushed = backend->flush(); - if (!flushed) { - return std::unexpected(cvmmap_streamer::format_error(flushed.error())); - } - return write_access_units(sink, stream_id, *flushed); -} - -[[nodiscard]] -std::expected register_mcap_streams( - cvmmap_streamer::record::MultiMcapRecordSink &sink, - std::vector &streams, - const CliOptions &options) { - for (auto &stream : streams) { - cvmmap_streamer::record::McapRecordStreamConfig config{}; - config.topic = namespace_topic(stream.source.label, options.video_topic); - config.depth_topic = namespace_topic(stream.source.label, options.depth_topic); - config.calibration_topic = namespace_topic(stream.source.label, options.calibration_topic); - if (stream.depth_width != stream.width || stream.depth_height != stream.height) { - config.depth_calibration_topic = namespace_topic(stream.source.label, options.depth_calibration_topic); - } - config.pose_topic = options.with_pose ? namespace_topic(stream.source.label, options.pose_topic) : ""; - config.body_topic = namespace_topic(stream.source.label, "/camera/body"); - config.frame_id = multi_frame_id(options, stream.source.label); - auto stream_id = sink.add_stream(config, stream.stream_info); - if (!stream_id) { - return std::unexpected(stream_id.error()); - } - stream.mcap_stream_id = *stream_id; - } - return {}; -} - -[[nodiscard]] -std::expected sync_streams_to_timestamp( - std::vector &streams, - const std::uint64_t effective_start_ts, - const bool log_sync_info = true) { - bool shutdown_logged{false}; - for (auto &stream : streams) { - if (log_shutdown_request(shutdown_logged, "multi-camera sync")) { - return std::unexpected("interrupted"); - } - const auto sdk_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts}); - if (sdk_position < 0) { - return std::unexpected( - "failed to compute synced start frame for " + stream.source.path.string() + " at timestamp " + - std::to_string(effective_start_ts)); - } - stream.sync_position = std::clamp( - sdk_position, - 0, - static_cast(stream.total_frames > 0 ? stream.total_frames - 1 : 0)); - - const auto read_at_position = [&](const int position) -> std::expected { - stream.camera->setSVOPosition(position); - auto current = read_frame_data( - stream, - stream.current_left_frame, - stream.current_depth_frame, - stream.current_tracking, - std::nullopt, - stream.current_timestamp_ns); - if (!current) { - return std::unexpected(current.error()); - } - auto next = fill_next_frame(stream); - if (!next) { - return std::unexpected(next.error()); - } - return {}; - }; - - if (auto loaded = read_at_position(stream.sync_position); !loaded) { - return std::unexpected(loaded.error()); - } - - while (stream.sync_position > 0 && stream.current_timestamp_ns > effective_start_ts) { - if (log_shutdown_request(shutdown_logged, "multi-camera sync")) { - return std::unexpected("interrupted"); - } - stream.sync_position -= 1; - if (auto loaded = read_at_position(stream.sync_position); !loaded) { - return std::unexpected(loaded.error()); - } - } - - while (stream.has_next && stream.next_timestamp_ns <= effective_start_ts) { - if (log_shutdown_request(shutdown_logged, "multi-camera sync")) { - return std::unexpected("interrupted"); - } - stream.dropped_frames += 1; - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - } - - if (log_sync_info) { - spdlog::info( - "ZED_SVO_MCAP_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}", - stream.source.path.string(), - stream.source.label, - stream.sync_position, - stream.first_timestamp_ns, - stream.current_timestamp_ns, - stream.has_next ? stream.next_timestamp_ns : 0); - } - } - return {}; -} - -[[nodiscard]] -bool have_sync_window(const std::vector &streams, const std::uint64_t common_end_ts) { - for (const auto &stream : streams) { - if (stream.current_timestamp_ns > common_end_ts) { - return false; - } - } - return true; -} - -[[nodiscard]] -std::expected advance_streams_to_timestamp( - std::vector &streams, - const std::uint64_t target_timestamp_ns) { - for (auto &stream : streams) { - while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) { - stream.dropped_frames += 1; - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - } - } - return {}; -} - -[[nodiscard]] -std::uint64_t timestamp_delta_abs(const std::uint64_t left, const std::uint64_t right) { - return left >= right ? left - right : right - left; -} - -[[nodiscard]] -std::expected, std::string> select_nearest_bundle( - const std::vector &streams, - const std::uint64_t bundle_timestamp_ns, - const std::uint64_t common_end_ts) { - std::vector selections{}; - selections.reserve(streams.size()); - - for (std::size_t stream_index = 0; stream_index < streams.size(); ++stream_index) { - const auto &stream = streams[stream_index]; - if (stream.current_timestamp_ns > common_end_ts) { - return std::unexpected("no bundle frames remain within the common overlap window"); - } - - const bool blocked_by_corruption_gap = - stream.has_next && - stream.next_corrupted_frames_skipped > 0 && - bundle_timestamp_ns > stream.current_timestamp_ns && - bundle_timestamp_ns < stream.next_timestamp_ns; - if (blocked_by_corruption_gap) { - selections.push_back(BundledFrameSelection{ - .stream_index = stream_index, - .status = cvmmap_streamer::record::BundleMemberStatus::CorruptedGap, - .use_next = false, - .timestamp_ns = std::nullopt, - .delta_ns = 0, - .corrupted_frames_skipped = stream.next_corrupted_frames_skipped, - }); - continue; - } - - bool use_next = false; - std::uint64_t selected_timestamp_ns = stream.current_timestamp_ns; - if (stream.has_next && stream.next_timestamp_ns <= common_end_ts) { - const auto current_delta = timestamp_delta_abs(stream.current_timestamp_ns, bundle_timestamp_ns); - const auto next_delta = timestamp_delta_abs(stream.next_timestamp_ns, bundle_timestamp_ns); - if (next_delta <= current_delta) { - use_next = true; - selected_timestamp_ns = stream.next_timestamp_ns; - } - } - - selections.push_back(BundledFrameSelection{ - .stream_index = stream_index, - .status = cvmmap_streamer::record::BundleMemberStatus::Present, - .use_next = use_next, - .timestamp_ns = selected_timestamp_ns, - .delta_ns = static_cast(selected_timestamp_ns) - - static_cast(bundle_timestamp_ns), - .corrupted_frames_skipped = 0, - }); - } - - return selections; -} - -[[nodiscard]] -std::vector make_strict_bundle( - const std::vector &streams, - const std::uint64_t bundle_timestamp_ns) { - std::vector selections{}; - selections.reserve(streams.size()); - for (std::size_t stream_index = 0; stream_index < streams.size(); ++stream_index) { - const auto &stream = streams[stream_index]; - selections.push_back(BundledFrameSelection{ - .stream_index = stream_index, - .status = cvmmap_streamer::record::BundleMemberStatus::Present, - .use_next = false, - .timestamp_ns = stream.current_timestamp_ns, - .delta_ns = static_cast(stream.current_timestamp_ns) - - static_cast(bundle_timestamp_ns), - .corrupted_frames_skipped = 0, - }); - } - return selections; -} - -[[nodiscard]] -const sl::Mat &selected_left_frame(const CameraStream &stream, const BundledFrameSelection &selection) { - return selection.use_next ? stream.next_left_frame : stream.current_left_frame; -} - -[[nodiscard]] -const sl::Mat &selected_depth_frame(const CameraStream &stream, const BundledFrameSelection &selection) { - return selection.use_next ? stream.next_depth_frame : stream.current_depth_frame; -} - -[[nodiscard]] -const TrackingSample &selected_tracking(const CameraStream &stream, const BundledFrameSelection &selection) { - return selection.use_next ? stream.next_tracking : stream.current_tracking; -} - -[[nodiscard]] -std::expected, std::string> next_synced_group_timestamp( - std::vector &streams, - const std::uint64_t tolerance_ns, - const std::uint64_t common_end_ts) { - bool shutdown_logged{false}; - while (have_sync_window(streams, common_end_ts)) { - if (log_shutdown_request(shutdown_logged, "multi-camera sync")) { - return std::unexpected("interrupted"); - } - const auto candidate_it = std::max_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.current_timestamp_ns < right.current_timestamp_ns; - }); - const auto candidate_ts = candidate_it->current_timestamp_ns; - - bool advanced = false; - for (auto &stream : streams) { - while (stream.current_timestamp_ns + tolerance_ns < candidate_ts) { - if (log_shutdown_request(shutdown_logged, "multi-camera sync")) { - return std::unexpected("interrupted"); - } - if (!stream.has_next) { - return std::optional{}; - } - stream.dropped_frames += 1; - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - advanced = true; - if (stream.current_timestamp_ns > common_end_ts) { - return std::optional{}; - } - } - } - if (advanced) { - continue; - } - - const auto [min_it, max_it] = std::minmax_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.current_timestamp_ns < right.current_timestamp_ns; - }); - if (max_it->current_timestamp_ns - min_it->current_timestamp_ns <= tolerance_ns) { - return max_it->current_timestamp_ns; - } - } - return std::optional{}; -} - -[[nodiscard]] -std::expected encode_and_write_sample( - cvmmap_streamer::record::MultiMcapRecordSink &sink, - CameraStream &stream, - const CliOptions &options, - const std::uint64_t timestamp_ns, - const sl::Mat &left_frame, - const sl::Mat &depth_frame, - const TrackingSample &tracking) { - const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU); - const auto video_bytes = std::span( - left_frame.getPtr(sl::MEM::CPU), - video_step_bytes * left_frame.getHeight()); - cvmmap_streamer::encode::RawVideoFrame raw_video{ - .info = stream.frame_info, - .source_timestamp_ns = timestamp_ns, - .row_stride_bytes = video_step_bytes, - .bytes = video_bytes, - }; - if (auto push = (*stream.backend)->push_frame(raw_video); !push) { - return std::unexpected( - "failed to encode frame for " + stream.source.label + ": " + cvmmap_streamer::format_error(push.error())); - } - - auto drained = (*stream.backend)->drain(); - if (!drained) { - return std::unexpected( - "failed to drain encoded access units for " + stream.source.label + ": " + - cvmmap_streamer::format_error(drained.error())); - } - if (auto write = write_access_units(sink, stream.mcap_stream_id, *drained); !write) { - return std::unexpected("failed to write video access unit for " + stream.source.label + ": " + write.error()); - } - - if (!stream.calibration_written) { - cvmmap_streamer::record::RawCameraCalibrationView calibration{ - .timestamp_ns = timestamp_ns, - .width = stream.video_calibration.width, - .height = stream.video_calibration.height, - .distortion_model = "plumb_bob", - .distortion = stream.video_calibration.distortion, - .intrinsic_matrix = stream.video_calibration.intrinsic_matrix, - .rectification_matrix = stream.video_calibration.rectification_matrix, - .projection_matrix = stream.video_calibration.projection_matrix, - }; - if (auto write = sink.write_camera_calibration(stream.mcap_stream_id, calibration); !write) { - return std::unexpected("failed to write calibration for " + stream.source.label + ": " + write.error()); - } - stream.calibration_written = true; - } - - if (!stream.depth_calibration_written && - (stream.depth_width != stream.width || stream.depth_height != stream.height)) { - cvmmap_streamer::record::RawCameraCalibrationView depth_calibration{ - .timestamp_ns = timestamp_ns, - .width = stream.depth_calibration.width, - .height = stream.depth_calibration.height, - .distortion_model = "plumb_bob", - .distortion = stream.depth_calibration.distortion, - .intrinsic_matrix = stream.depth_calibration.intrinsic_matrix, - .rectification_matrix = stream.depth_calibration.rectification_matrix, - .projection_matrix = stream.depth_calibration.projection_matrix, - }; - if (auto write = sink.write_depth_camera_calibration(stream.mcap_stream_id, depth_calibration); !write) { - return std::unexpected("failed to write depth calibration for " + stream.source.label + ": " + write.error()); - } - stream.depth_calibration_written = true; - } - - const auto depth_width = static_cast(depth_frame.getWidth()); - const auto depth_height = static_cast(depth_frame.getHeight()); - if (depth_width != stream.depth_width || depth_height != stream.depth_height) { - return std::unexpected( - "depth resolution changed unexpectedly for " + stream.source.label + ": " + - std::to_string(depth_width) + "x" + std::to_string(depth_height) + " vs " + - std::to_string(stream.depth_width) + "x" + std::to_string(stream.depth_height)); - } - const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU); - const auto packed_depth_bytes = static_cast(depth_width) * sizeof(std::uint16_t); - if (depth_step_bytes < packed_depth_bytes) { - return std::unexpected( - "depth stride " + std::to_string(depth_step_bytes) + " is smaller than packed row size " + - std::to_string(packed_depth_bytes) + " for " + stream.source.label); - } - - std::optional> compact_depth{}; - std::span depth_pixels{}; - if (depth_step_bytes == packed_depth_bytes) { - depth_pixels = std::span( - depth_frame.getPtr(sl::MEM::CPU), - static_cast(depth_width) * static_cast(depth_height)); - } else { - compact_depth = copy_compact_u16_plane(depth_frame); - depth_pixels = *compact_depth; - } - - cvmmap_streamer::record::RawDepthMapU16View depth_map{ - .timestamp_ns = timestamp_ns, - .width = depth_width, - .height = depth_height, - .pixels = depth_pixels, - }; - if (auto write = sink.write_depth_map_u16(stream.mcap_stream_id, depth_map); !write) { - return std::unexpected("failed to write depth map for " + stream.source.label + ": " + write.error()); - } - - if (options.with_pose && tracking.has_pose) { - cvmmap_streamer::record::RawPoseView pose_view{ - .timestamp_ns = timestamp_ns, - .reference_frame_id = pose_reference_frame_id(stream.pose_tracking, stream.source.label), - .position = tracking.position, - .orientation = tracking.orientation, - }; - if (auto write = sink.write_pose(stream.mcap_stream_id, pose_view); !write) { - return std::unexpected("failed to write pose for " + stream.source.label + ": " + write.error()); - } - } - return {}; -} - -[[nodiscard]] -std::expected encode_and_write_group( - cvmmap_streamer::record::MultiMcapRecordSink &sink, - std::vector &streams, - const CliOptions &options, - const cvmmap_streamer::record::BundlePolicy bundle_policy, - const std::uint64_t bundle_index, - const std::uint64_t bundle_timestamp_ns, - const std::span selections) { - if (selections.size() != streams.size()) { - return std::unexpected("bundle selection size does not match stream count"); - } - - std::vector bundle_members{}; - bundle_members.reserve(selections.size()); - for (const auto &selection : selections) { - const auto &stream = streams[selection.stream_index]; - bundle_members.push_back(cvmmap_streamer::record::RawBundleMemberView{ - .camera_label = stream.source.label, - .status = selection.status, - .timestamp_ns = selection.timestamp_ns, - .delta_ns = selection.delta_ns, - .corrupted_frames_skipped = selection.corrupted_frames_skipped, - }); - } - if (auto write = sink.write_bundle_manifest(cvmmap_streamer::record::RawBundleManifestView{ - .timestamp_ns = bundle_timestamp_ns, - .bundle_index = bundle_index, - .policy = bundle_policy, - .members = bundle_members, - }); !write) { - return std::unexpected("failed to write bundle manifest: " + write.error()); - } - - std::vector ordered_selections{}; - ordered_selections.reserve(selections.size()); - for (const auto &selection : selections) { - if (selection.status == cvmmap_streamer::record::BundleMemberStatus::Present && - selection.timestamp_ns.has_value()) { - ordered_selections.push_back(selection); - } - } - std::sort( - ordered_selections.begin(), - ordered_selections.end(), - [](const auto &left, const auto &right) { - if (left.timestamp_ns != right.timestamp_ns) { - return left.timestamp_ns < right.timestamp_ns; - } - return left.stream_index < right.stream_index; - }); - - for (const auto &selection : ordered_selections) { - auto &stream = streams[selection.stream_index]; - const auto &left_frame = selected_left_frame(stream, selection); - const auto &depth_frame = selected_depth_frame(stream, selection); - const auto &tracking = selected_tracking(stream, selection); - if (auto write = encode_and_write_sample( - sink, - stream, - options, - *selection.timestamp_ns, - left_frame, - depth_frame, - tracking); !write) { - return write; - } - } - return {}; -} - -[[nodiscard]] -std::expected advance_after_nearest_emit( - std::vector &streams, - const std::span selections) { - for (const auto &selection : selections) { - if (selection.status != cvmmap_streamer::record::BundleMemberStatus::Present || !selection.use_next) { - continue; - } - auto &stream = streams[selection.stream_index]; - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - } - return {}; -} - -[[nodiscard]] -std::expected advance_after_emit(std::vector &streams) { - for (auto &stream : streams) { - if (!stream.has_next) { - return std::unexpected("end-of-svo"); - } - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - } - return {}; -} - -[[nodiscard]] -bool is_copy_stream_in_range( - const CameraStream &stream, - const std::optional range_end_ts) { - if (stream.current_timestamp_ns == 0) { - return false; - } - if (range_end_ts.has_value() && stream.current_timestamp_ns > *range_end_ts) { - return false; - } - return true; -} - -[[nodiscard]] -std::optional next_copy_stream_index( - const std::vector &streams, - const std::optional range_end_ts) { - std::optional best_index{}; - for (std::size_t index = 0; index < streams.size(); ++index) { - const auto &stream = streams[index]; - if (!is_copy_stream_in_range(stream, range_end_ts)) { - continue; - } - if (!best_index.has_value()) { - best_index = index; - continue; - } - const auto &best_stream = streams[*best_index]; - if (stream.current_timestamp_ns < best_stream.current_timestamp_ns) { - best_index = index; - continue; - } - if (stream.current_timestamp_ns == best_stream.current_timestamp_ns && - stream.source.label < best_stream.source.label) { - best_index = index; - } - } - return best_index; -} - -[[nodiscard]] -std::expected advance_copy_stream(CameraStream &stream) { - if (!stream.has_next) { - stream.current_timestamp_ns = 0; - return {}; - } - auto promote = promote_next_frame(stream); - if (!promote) { - return std::unexpected(promote.error()); - } - return {}; -} - -[[nodiscard]] -double time_window_progress_fraction( - const std::uint64_t window_start_ts, - const std::uint64_t window_end_ts, - const std::uint64_t current_timestamp_ns) { - if (window_end_ts <= window_start_ts) { - return 1.0; - } - const auto bounded_timestamp = std::clamp(current_timestamp_ns, window_start_ts, window_end_ts); - return static_cast(bounded_timestamp - window_start_ts) / - static_cast(window_end_ts - window_start_ts); -} - -[[nodiscard]] -std::string time_window_progress_detail( - const std::uint64_t window_start_ts, - const std::uint64_t window_end_ts, - const std::uint64_t current_timestamp_ns, - const std::string_view window_label) { - const auto bounded_timestamp = std::clamp(current_timestamp_ns, window_start_ts, window_end_ts); - const double elapsed_seconds = static_cast(bounded_timestamp - window_start_ts) / 1'000'000'000.0; - const double total_seconds = window_end_ts > window_start_ts - ? static_cast(window_end_ts - window_start_ts) / 1'000'000'000.0 - : 0.0; - char buffer[96]{}; - std::snprintf(buffer, sizeof(buffer), "%.1fs/%.1fs %s", elapsed_seconds, total_seconds, std::string(window_label).c_str()); - return std::string(buffer); -} - -[[nodiscard]] -std::expected skip_bundled_start_groups( - std::vector &streams, - const std::uint32_t start_group_index, - const std::uint64_t tolerance_ns, - const std::uint64_t common_end_ts) { - for (std::uint32_t skipped_groups = 0; skipped_groups < start_group_index; ++skipped_groups) { - auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts); - if (!group_timestamp) { - return std::unexpected(group_timestamp.error()); - } - if (!*group_timestamp) { - return std::unexpected( - "start-frame " + std::to_string(start_group_index) + - " is out of range for bundled multi-camera mode"); - } - - auto advance = advance_after_emit(streams); - if (!advance) { - if (advance.error() == "end-of-svo") { - return std::unexpected( - "start-frame " + std::to_string(start_group_index) + - " is out of range for bundled multi-camera mode"); - } - return std::unexpected(advance.error()); - } - } - - auto first_selected_group = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts); - if (!first_selected_group) { - return std::unexpected(first_selected_group.error()); - } - if (!*first_selected_group) { - return std::unexpected( - "start-frame " + std::to_string(start_group_index) + - " is out of range for bundled multi-camera mode"); - } - return **first_selected_group; -} - -[[nodiscard]] -int run_single_source( - const CliOptions &options, - const std::filesystem::path &output_path, - const cvmmap_streamer::CodecType codec, - const cvmmap_streamer::EncoderDeviceType encoder_device, - const cvmmap_streamer::McapCompression compression, - const sl::DEPTH_MODE depth_mode, - const sl::Resolution depth_size, - const PoseTrackingOptions &pose_tracking) { - const auto input_path = std::filesystem::path{options.input_paths.front()}; - if (!std::filesystem::is_regular_file(input_path)) { - spdlog::error("input file does not exist: {}", input_path.string()); - return exit_code(ToolExitCode::UsageError); - } - - if (options.has_end_frame && options.end_frame < options.start_frame) { - spdlog::error( - "invalid frame range: start-frame={} end-frame={}", - options.start_frame, - options.end_frame); - return exit_code(ToolExitCode::UsageError); - } - if (output_path.has_parent_path()) { - std::filesystem::create_directories(output_path.parent_path()); - } - - const auto source_label = extract_camera_label(input_path, 1); - - sl::Camera camera{}; - bool tracking_enabled{false}; - - auto close_camera = [&]() { - if (tracking_enabled) { - camera.disablePositionalTracking(); - tracking_enabled = false; - } - if (camera.isOpened()) { - camera.close(); - } - }; - - sl::InitParameters init{}; - init.input.setFromSVOFile(input_path.c_str()); - init.svo_real_time_mode = false; - init.coordinate_system = pose_tracking.coordinate_system; - init.coordinate_units = sl::UNIT::METER; - init.depth_mode = depth_mode; - init.sdk_verbose = false; - - const auto open_status = camera.open(init); - if (open_status != sl::ERROR_CODE::SUCCESS) { - spdlog::error( - "failed to open SVO '{}': {}", - input_path.string(), - zed_status_string(open_status)); - return exit_code(ToolExitCode::RuntimeError); - } - - const auto total_frames = camera.getSVONumberOfFrames(); - if (total_frames <= 0) { - close_camera(); - spdlog::error("input SVO has no frames"); - return exit_code(ToolExitCode::RuntimeError); - } - if (options.start_frame >= static_cast(total_frames)) { - close_camera(); - spdlog::error( - "start-frame {} is out of range for {} frames", - options.start_frame, - total_frames); - return exit_code(ToolExitCode::UsageError); - } - if (options.has_end_frame && options.end_frame >= static_cast(total_frames)) { - close_camera(); - spdlog::error( - "end-frame {} is out of range for {} frames", - options.end_frame, - total_frames); - return exit_code(ToolExitCode::UsageError); - } - - camera.setSVOPosition(static_cast(options.start_frame)); - - if (options.with_pose) { - sl::PositionalTrackingParameters tracking_parameters{}; - tracking_parameters.set_floor_as_origin = pose_tracking.set_floor_as_origin; - const auto tracking_status = camera.enablePositionalTracking(tracking_parameters); - if (tracking_status == sl::ERROR_CODE::SUCCESS) { - tracking_enabled = true; - } else { - spdlog::warn( - "positional tracking unavailable for '{}': {}. continuing without pose output", - input_path.string(), - zed_status_string(tracking_status)); - } - } - - const auto camera_info = camera.getCameraInformation(); - const auto &camera_config = camera_info.camera_configuration; - const auto width = static_cast(camera_config.resolution.width); - const auto height = static_cast(camera_config.resolution.height); - if (width == 0 || height == 0) { - close_camera(); - spdlog::error("camera resolution reported by the ZED SDK is invalid"); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto valid = validate_depth_resolution_request(depth_size, width, height); !valid) { - close_camera(); - spdlog::error("{}", valid.error()); - return exit_code(ToolExitCode::UsageError); - } - - cvmmap_streamer::RuntimeConfig config = cvmmap_streamer::RuntimeConfig::defaults(); - config.encoder.backend = cvmmap_streamer::EncoderBackendType::FFmpeg; - config.encoder.device = encoder_device; - config.encoder.codec = codec; - config.encoder.gop = 30; - config.encoder.b_frames = 0; - config.record.mcap.enabled = true; - config.record.mcap.path = output_path.string(); - config.record.mcap.topic = namespace_topic(source_label, options.video_topic); - config.record.mcap.depth_topic = namespace_topic(source_label, options.depth_topic); - config.record.mcap.calibration_topic = namespace_topic(source_label, options.calibration_topic); - config.record.mcap.depth_calibration_topic = namespace_topic(source_label, options.depth_calibration_topic); - config.record.mcap.pose_topic = options.with_pose ? namespace_topic(source_label, options.pose_topic) : ""; - config.record.mcap.frame_id = multi_frame_id(options, source_label); - config.record.mcap.compression = compression; - - cvmmap_streamer::ipc::FrameInfo frame_info{ - .width = static_cast(width), - .height = static_cast(height), - .channels = 3, - .depth = cvmmap_streamer::ipc::Depth::U8, - .pixel_format = cvmmap_streamer::ipc::PixelFormat::BGR, - .buffer_size = width * height * 3u, - }; - - auto backend_result = cvmmap_streamer::encode::make_encoder_backend(config); - if (!backend_result) { - close_camera(); - spdlog::error("failed to create encoder backend: {}", cvmmap_streamer::format_error(backend_result.error())); - return exit_code(ToolExitCode::RuntimeError); - } - auto backend = std::move(*backend_result); - - if (auto init_status = backend->init(config, frame_info); !init_status) { - close_camera(); - spdlog::error("failed to initialize encoder backend: {}", cvmmap_streamer::format_error(init_status.error())); - return exit_code(ToolExitCode::RuntimeError); - } - - auto stream_info = backend->stream_info(); - if (!stream_info) { - backend->shutdown(); - close_camera(); - spdlog::error("encoder backend did not provide stream info: {}", cvmmap_streamer::format_error(stream_info.error())); - return exit_code(ToolExitCode::RuntimeError); - } - - auto sink = cvmmap_streamer::record::McapRecordSink::create(config, *stream_info); - if (!sink) { - backend->shutdown(); - close_camera(); - spdlog::error("failed to create MCAP sink: {}", sink.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - const auto video_calibration = make_calibration_data(camera_info); - - sl::RuntimeParameters runtime_parameters{}; - sl::Mat left_frame{}; - sl::Mat depth_frame{}; - sl::Pose pose{}; - bool calibration_written{false}; - bool depth_calibration_written{false}; - std::uint32_t depth_width{0}; - std::uint32_t depth_height{0}; - std::optional depth_calibration{}; - std::uint64_t emitted_frames{0}; - std::optional last_timestamp_ns{}; - std::optional last_tracking_state{}; - bool interrupted{false}; - bool shutdown_logged{false}; - const auto last_frame = options.has_end_frame - ? options.end_frame - : static_cast(total_frames - 1); - const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps); - const auto total_frames_to_emit = static_cast(last_frame - options.start_frame + 1); - ProgressBar progress{total_frames_to_emit}; - - while (true) { - if (log_shutdown_request(shutdown_logged, "single-camera export")) { - interrupted = true; - break; - } - auto grab = grab_next_readable_frame( - camera, - runtime_parameters, - input_path.filename().string(), - input_path.string(), - static_cast(total_frames), - last_timestamp_ns, - std::nullopt); - if (!grab && grab.error() == "end-of-svo") { - break; - } - if (!grab) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("{}", grab.error()); - return exit_code(ToolExitCode::RuntimeError); - } - if (grab->svo_position > static_cast(last_frame)) { - break; - } - - const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU); - if (image_status != sl::ERROR_CODE::SUCCESS) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status)); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("{}", valid.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - const auto depth_status = camera.retrieveMeasure(depth_frame, sl::MEASURE::DEPTH_U16_MM, sl::MEM::CPU, depth_size); - if (depth_status != sl::ERROR_CODE::SUCCESS) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to retrieve depth map: {}", zed_status_string(depth_status)); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto valid = validate_u16c1_mat(depth_frame, "depth map"); !valid) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("{}", valid.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); - if (timestamp_ns == 0) { - timestamp_ns = emitted_frames * nominal_frame_period_ns; - } - if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) { - timestamp_ns = *last_timestamp_ns + 1; - } - maybe_log_recovered_corruption_gap(input_path.filename().string(), *grab, timestamp_ns); - last_timestamp_ns = timestamp_ns; - - const auto video_step_bytes = left_frame.getStepBytes(sl::MEM::CPU); - const auto video_bytes = std::span( - left_frame.getPtr(sl::MEM::CPU), - video_step_bytes * left_frame.getHeight()); - cvmmap_streamer::encode::RawVideoFrame raw_video{ - .info = frame_info, - .source_timestamp_ns = timestamp_ns, - .row_stride_bytes = video_step_bytes, - .bytes = video_bytes, - }; - - if (auto push = backend->push_frame(raw_video); !push) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to encode frame: {}", cvmmap_streamer::format_error(push.error())); - return exit_code(ToolExitCode::RuntimeError); - } - - auto drained = backend->drain(); - if (!drained) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to drain encoded access units: {}", cvmmap_streamer::format_error(drained.error())); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto write = write_access_units(*sink, *drained); !write) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to write video access unit: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - if (!calibration_written) { - cvmmap_streamer::record::RawCameraCalibrationView calibration{ - .timestamp_ns = timestamp_ns, - .width = video_calibration.width, - .height = video_calibration.height, - .distortion_model = "plumb_bob", - .distortion = video_calibration.distortion, - .intrinsic_matrix = video_calibration.intrinsic_matrix, - .rectification_matrix = video_calibration.rectification_matrix, - .projection_matrix = video_calibration.projection_matrix, - }; - if (auto write = sink->write_camera_calibration(calibration); !write) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to write calibration: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - calibration_written = true; - } - - const auto actual_depth_width = static_cast(depth_frame.getWidth()); - const auto actual_depth_height = static_cast(depth_frame.getHeight()); - if (depth_width == 0 || depth_height == 0) { - depth_width = actual_depth_width; - depth_height = actual_depth_height; - depth_calibration = make_calibration_data(camera.getCameraInformation(sl::Resolution( - static_cast(depth_width), - static_cast(depth_height)))); - if (depth_width != width || depth_height != height) { - spdlog::info( - "exporting depth at {}x{} (requested {}) while video remains {}x{}", - depth_width, - depth_height, - describe_depth_resolution_request(depth_size), - width, - height); - } - } else if (actual_depth_width != depth_width || actual_depth_height != depth_height) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error( - "depth resolution changed unexpectedly from {}x{} to {}x{}", - depth_width, - depth_height, - actual_depth_width, - actual_depth_height); - return exit_code(ToolExitCode::RuntimeError); - } - - if (!depth_calibration_written && - (depth_width != width || depth_height != height) && - depth_calibration.has_value()) { - cvmmap_streamer::record::RawCameraCalibrationView calibration{ - .timestamp_ns = timestamp_ns, - .width = depth_calibration->width, - .height = depth_calibration->height, - .distortion_model = "plumb_bob", - .distortion = depth_calibration->distortion, - .intrinsic_matrix = depth_calibration->intrinsic_matrix, - .rectification_matrix = depth_calibration->rectification_matrix, - .projection_matrix = depth_calibration->projection_matrix, - }; - if (auto write = sink->write_depth_camera_calibration(calibration); !write) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to write depth calibration: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - depth_calibration_written = true; - } - - const auto depth_step_bytes = depth_frame.getStepBytes(sl::MEM::CPU); - const auto packed_depth_bytes = static_cast(depth_width) * sizeof(std::uint16_t); - if (depth_step_bytes < packed_depth_bytes) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("depth stride {} is smaller than packed row size {}", depth_step_bytes, packed_depth_bytes); - return exit_code(ToolExitCode::RuntimeError); - } - - std::optional> compact_depth{}; - std::span depth_pixels{}; - if (depth_step_bytes == packed_depth_bytes) { - depth_pixels = std::span( - depth_frame.getPtr(sl::MEM::CPU), - static_cast(depth_width) * static_cast(depth_height)); - } else { - compact_depth = copy_compact_u16_plane(depth_frame); - depth_pixels = *compact_depth; - } - - cvmmap_streamer::record::RawDepthMapU16View depth_map{ - .timestamp_ns = timestamp_ns, - .width = depth_width, - .height = depth_height, - .pixels = depth_pixels, - }; - if (auto write = sink->write_depth_map_u16(depth_map); !write) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to write depth map: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - if (tracking_enabled) { - const auto tracking_state = camera.getPosition(pose, pose_tracking.reference_frame); - if (!last_tracking_state || *last_tracking_state != tracking_state) { - last_tracking_state = tracking_state; - if (tracking_state != sl::POSITIONAL_TRACKING_STATE::OK) { - spdlog::warn( - "pose tracking state changed to {} at frame {}", - zed_tracking_state_string(tracking_state), - grab->svo_position); - } - } - if (tracking_state == sl::POSITIONAL_TRACKING_STATE::OK) { - const auto translation = pose.getTranslation(); - const auto orientation = pose.getOrientation(); - cvmmap_streamer::record::RawPoseView pose_view{ - .timestamp_ns = timestamp_ns, - .reference_frame_id = pose_reference_frame_id(pose_tracking, source_label), - .position = { - static_cast(translation.x), - static_cast(translation.y), - static_cast(translation.z), - }, - .orientation = { - static_cast(orientation.x), - static_cast(orientation.y), - static_cast(orientation.z), - static_cast(orientation.w), - }, - }; - if (auto write = sink->write_pose(pose_view); !write) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to write pose: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - } - - emitted_frames += 1; - progress.update(emitted_frames); - } - - if (auto flushed = flush_and_write(*sink, backend); !flushed) { - progress.finish(emitted_frames, false); - sink->close(); - backend->shutdown(); - close_camera(); - spdlog::error("failed to finalize encoded video: {}", flushed.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - sink->close(); - backend->shutdown(); - close_camera(); - - if (interrupted) { - progress.finish(emitted_frames, false); - spdlog::warn( - "gracefully stopped after writing {} frame(s) from '{}' to '{}'", - emitted_frames, - input_path.string(), - output_path.string()); - return interrupted_exit_code(); - } - - progress.finish(emitted_frames, true); - spdlog::info( - "wrote {} frame(s) from '{}' to '{}'", - emitted_frames, - input_path.string(), - output_path.string()); - return exit_code(ToolExitCode::Success); -} - -[[nodiscard]] -int run_multi_source( - const CliOptions &options, - const std::vector &sources, - const std::filesystem::path &output_path, - const cvmmap_streamer::CodecType codec, - const cvmmap_streamer::EncoderDeviceType encoder_device, - const cvmmap_streamer::McapCompression compression, - const sl::DEPTH_MODE depth_mode, - const sl::Resolution depth_size, - const PoseTrackingOptions &pose_tracking, - const MultiCameraExportPolicy bundle_policy, - const CopyRangeMode copy_range_mode) { - if (bundle_policy == MultiCameraExportPolicy::Copy) { - if (options.has_start_frame || options.has_end_frame) { - spdlog::error("--start-frame/--end-frame are not supported with --bundle-policy copy"); - return exit_code(ToolExitCode::UsageError); - } - if (options.has_sync_tolerance) { - spdlog::error("--sync-tolerance-ms is not supported with --bundle-policy copy"); - return exit_code(ToolExitCode::UsageError); - } - if (options.has_bundle_topic) { - spdlog::error("--bundle-topic is not supported with --bundle-policy copy"); - return exit_code(ToolExitCode::UsageError); - } - } - if (options.has_end_frame && options.end_frame < options.start_frame) { - spdlog::error( - "invalid bundled range: start-frame={} end-frame={}", - options.start_frame, - options.end_frame); - return exit_code(ToolExitCode::UsageError); - } - bool interrupted{false}; - bool shutdown_logged{false}; - if (output_path.has_parent_path()) { - std::filesystem::create_directories(output_path.parent_path()); - } - - std::vector streams{}; - streams.reserve(sources.size()); - for (const auto &source : sources) { - if (log_shutdown_request(shutdown_logged, "multi-camera export")) { - close_camera_streams(streams); - return interrupted_exit_code(); - } - auto stream = open_camera_stream(source, options, codec, encoder_device, depth_mode, depth_size, pose_tracking); - if (!stream) { - close_camera_streams(streams); - spdlog::error("{}", stream.error()); - return exit_code(ToolExitCode::RuntimeError); - } - streams.push_back(std::move(*stream)); - } - - const auto common_start_ts = std::max_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.first_timestamp_ns < right.first_timestamp_ns; - })->first_timestamp_ns; - const auto common_end_ts = std::min_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.last_timestamp_ns < right.last_timestamp_ns; - })->last_timestamp_ns; - const auto full_start_ts = std::min_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.first_timestamp_ns < right.first_timestamp_ns; - })->first_timestamp_ns; - const auto full_end_ts = std::max_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.last_timestamp_ns < right.last_timestamp_ns; - })->last_timestamp_ns; - if ((bundle_policy != MultiCameraExportPolicy::Copy || copy_range_mode == CopyRangeMode::Common) && - common_start_ts > common_end_ts) { - close_camera_streams(streams); - spdlog::error("synced time window is empty: start_ts={} end_ts={}", common_start_ts, common_end_ts); - return exit_code(ToolExitCode::UsageError); - } - - const auto slowest_period_ns = std::max_element( - streams.begin(), - streams.end(), - [](const auto &left, const auto &right) { - return left.nominal_frame_period_ns < right.nominal_frame_period_ns; - })->nominal_frame_period_ns; - const auto bundle_policy_name = multi_camera_policy_name(bundle_policy); - const auto tolerance_ns = options.has_sync_tolerance - ? static_cast(std::llround(options.sync_tolerance_ms * 1'000'000.0)) - : std::max(1, slowest_period_ns); - if (bundle_policy == MultiCameraExportPolicy::Copy) { - const auto range_start_ts = copy_range_mode == CopyRangeMode::Common ? common_start_ts : full_start_ts; - const auto range_end_ts = copy_range_mode == CopyRangeMode::Common ? common_end_ts : full_end_ts; - spdlog::info( - "multi-camera copy window start_ts={} end_ts={} policy={} copy_range={}", - range_start_ts, - range_end_ts, - bundle_policy_name, - copy_range_name(copy_range_mode)); - } else { - spdlog::info( - "multi-camera bundle window start_ts={} end_ts={} policy={} bundle_period_ns={} tolerance_ns={}", - common_start_ts, - common_end_ts, - bundle_policy_name, - slowest_period_ns, - tolerance_ns); - } - const auto render_progress = stderr_supports_progress_bar(); - const auto total_timeline_bundles = common_end_ts >= common_start_ts - ? ((common_end_ts - common_start_ts) / slowest_period_ns) + 1 - : 0; - if (bundle_policy == MultiCameraExportPolicy::Nearest && - options.start_frame >= total_timeline_bundles) { - close_camera_streams(streams); - spdlog::error( - "start-frame {} is out of range for bundled multi-camera mode (available bundles: {})", - options.start_frame, - total_timeline_bundles); - return exit_code(ToolExitCode::UsageError); - } - if (bundle_policy == MultiCameraExportPolicy::Nearest && - options.has_end_frame && - options.end_frame >= total_timeline_bundles) { - close_camera_streams(streams); - spdlog::error( - "end-frame {} is out of range for bundled multi-camera mode (available bundles: {})", - options.end_frame, - total_timeline_bundles); - return exit_code(ToolExitCode::UsageError); - } - - const auto selected_end_bundle = options.has_end_frame - ? static_cast(options.end_frame) - : (bundle_policy == MultiCameraExportPolicy::Nearest - ? total_timeline_bundles - 1 - : 0); - const auto selected_total_groups = options.has_end_frame - ? static_cast(options.end_frame - options.start_frame) + 1 - : (bundle_policy == MultiCameraExportPolicy::Nearest - ? total_timeline_bundles - static_cast(options.start_frame) - : 0); - const bool exact_group_progress = - render_progress && - (bundle_policy == MultiCameraExportPolicy::Nearest || options.has_end_frame); - const bool approximate_time_progress = - render_progress && - (bundle_policy == MultiCameraExportPolicy::Strict || bundle_policy == MultiCameraExportPolicy::Copy) && - (bundle_policy == MultiCameraExportPolicy::Copy || !options.has_end_frame); - ProgressBar progress{exact_group_progress ? selected_total_groups : 0}; - double last_progress_fraction = 0.0; - std::string last_progress_detail{}; - - const auto initial_target_ts = bundle_policy == MultiCameraExportPolicy::Nearest - ? common_start_ts + static_cast(options.start_frame) * slowest_period_ns - : (bundle_policy == MultiCameraExportPolicy::Copy - ? (copy_range_mode == CopyRangeMode::Common ? common_start_ts : full_start_ts) - : common_start_ts); - if (auto synced = sync_streams_to_timestamp(streams, initial_target_ts); !synced) { - close_camera_streams(streams); - if (synced.error() == "interrupted") { - return interrupted_exit_code(); - } - spdlog::error("{}", synced.error()); - return exit_code(ToolExitCode::RuntimeError); - } - auto effective_progress_start_ts = initial_target_ts; - if (bundle_policy == MultiCameraExportPolicy::Strict && options.start_frame > 0) { - if (auto skipped_to = skip_bundled_start_groups(streams, options.start_frame, tolerance_ns, common_end_ts); !skipped_to) { - close_camera_streams(streams); - if (skipped_to.error() == "interrupted") { - return interrupted_exit_code(); - } - spdlog::error("{}", skipped_to.error()); - return exit_code(ToolExitCode::UsageError); - } else { - effective_progress_start_ts = *skipped_to; - } - } - - auto sink = cvmmap_streamer::record::MultiMcapRecordSink::create( - output_path.string(), - compression, - bundle_policy == MultiCameraExportPolicy::Copy ? "" : options.bundle_topic); - if (!sink) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(0, false); - } - close_camera_streams(streams); - spdlog::error("failed to create MCAP sink: {}", sink.error()); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto registered = register_mcap_streams(*sink, streams, options); !registered) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(0, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("failed to register MCAP streams: {}", registered.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - std::uint64_t emitted_groups{0}; - std::uint64_t emitted_samples{0}; - if (bundle_policy == MultiCameraExportPolicy::Nearest) { - for (std::uint64_t bundle_index = options.start_frame; bundle_index <= selected_end_bundle; ++bundle_index) { - if (log_shutdown_request(shutdown_logged, "multi-camera export")) { - interrupted = true; - break; - } - const auto bundle_timestamp_ns = common_start_ts + bundle_index * slowest_period_ns; - if (auto advanced = advance_streams_to_timestamp(streams, bundle_timestamp_ns); !advanced) { - progress.finish(emitted_groups, false); - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", advanced.error()); - return exit_code(ToolExitCode::RuntimeError); - } - auto selections = select_nearest_bundle(streams, bundle_timestamp_ns, common_end_ts); - if (!selections) { - break; - } - - if (auto write = encode_and_write_group( - *sink, - streams, - options, - manifest_bundle_policy(bundle_policy), - bundle_index, - bundle_timestamp_ns, - *selections); !write) { - progress.finish(emitted_groups, false); - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto advance = advance_after_nearest_emit(streams, *selections); !advance) { - progress.finish(emitted_groups, false); - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", advance.error()); - return exit_code(ToolExitCode::RuntimeError); - } - emitted_groups += 1; - progress.update(emitted_groups); - } - } else if (bundle_policy == MultiCameraExportPolicy::Strict) { - while (true) { - if (log_shutdown_request(shutdown_logged, "multi-camera export")) { - interrupted = true; - break; - } - auto group_timestamp = next_synced_group_timestamp(streams, tolerance_ns, common_end_ts); - if (!group_timestamp) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_groups, false); - } - sink->close(); - close_camera_streams(streams); - if (group_timestamp.error() == "interrupted") { - return interrupted_exit_code(); - } - spdlog::error("sync failed: {}", group_timestamp.error()); - return exit_code(ToolExitCode::RuntimeError); - } - if (!*group_timestamp) { - break; - } - - const auto bundle_index = static_cast(options.start_frame) + emitted_groups; - const auto selections = make_strict_bundle(streams, **group_timestamp); - if (auto write = encode_and_write_group( - *sink, - streams, - options, - manifest_bundle_policy(bundle_policy), - bundle_index, - **group_timestamp, - selections); !write) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_groups, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - emitted_groups += 1; - if (approximate_time_progress) { - last_progress_fraction = time_window_progress_fraction(effective_progress_start_ts, common_end_ts, **group_timestamp); - last_progress_detail = time_window_progress_detail( - effective_progress_start_ts, - common_end_ts, - **group_timestamp, - "overlap"); - progress.update_fraction(last_progress_fraction, last_progress_detail); - } else { - progress.update(emitted_groups); - } - if (options.has_end_frame && emitted_groups >= selected_total_groups) { - break; - } - - auto advance = advance_after_emit(streams); - if (!advance) { - if (advance.error() == "end-of-svo") { - break; - } - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_groups, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", advance.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - } else { - const auto range_start_ts = copy_range_mode == CopyRangeMode::Common ? common_start_ts : full_start_ts; - const auto range_end_ts = copy_range_mode == CopyRangeMode::Common - ? std::optional{common_end_ts} - : std::optional{full_end_ts}; - while (true) { - if (log_shutdown_request(shutdown_logged, "multi-camera copy export")) { - interrupted = true; - break; - } - const auto next_stream_index = next_copy_stream_index(streams, range_end_ts); - if (!next_stream_index.has_value()) { - break; - } - auto &stream = streams[*next_stream_index]; - if (auto write = encode_and_write_sample( - *sink, - stream, - options, - stream.current_timestamp_ns, - stream.current_left_frame, - stream.current_depth_frame, - stream.current_tracking); !write) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_samples, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - emitted_samples += 1; - if (approximate_time_progress) { - last_progress_fraction = time_window_progress_fraction(range_start_ts, *range_end_ts, stream.current_timestamp_ns); - last_progress_detail = time_window_progress_detail( - range_start_ts, - *range_end_ts, - stream.current_timestamp_ns, - copy_range_mode == CopyRangeMode::Common ? "copy overlap" : "copy range"); - progress.update_fraction(last_progress_fraction, last_progress_detail); - } - if (auto advance = advance_copy_stream(stream); !advance) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_samples, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("{}", advance.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - } - - for (auto &stream : streams) { - if (auto flushed = flush_and_write(*sink, stream.mcap_stream_id, *stream.backend); !flushed) { - if (approximate_time_progress) { - progress.finish_fraction(last_progress_fraction, false, last_progress_detail); - } else { - progress.finish(emitted_groups, false); - } - sink->close(); - close_camera_streams(streams); - spdlog::error("failed to finalize encoded video for {}: {}", stream.source.label, flushed.error()); - return exit_code(ToolExitCode::RuntimeError); - } - } - - sink->close(); - if (approximate_time_progress) { - if (!interrupted) { - last_progress_fraction = 1.0; - const auto progress_end_ts = bundle_policy == MultiCameraExportPolicy::Copy - ? (copy_range_mode == CopyRangeMode::Common ? common_end_ts : full_end_ts) - : common_end_ts; - last_progress_detail = time_window_progress_detail( - effective_progress_start_ts, - progress_end_ts, - progress_end_ts, - bundle_policy == MultiCameraExportPolicy::Copy - ? (copy_range_mode == CopyRangeMode::Common ? "copy overlap" : "copy range") - : "overlap"); - } - progress.finish_fraction(last_progress_fraction, !interrupted, last_progress_detail); - } else { - progress.finish(bundle_policy == MultiCameraExportPolicy::Copy ? emitted_samples : emitted_groups, !interrupted); - } - for (const auto &stream : streams) { - spdlog::info( - "multi-camera export skipped {} frame(s) while aligning {}", - stream.dropped_frames, - stream.source.label); - } - close_camera_streams(streams); - const auto emitted_count = bundle_policy == MultiCameraExportPolicy::Copy ? emitted_samples : emitted_groups; - if (!interrupted && emitted_count == 0) { - std::error_code remove_error{}; - std::filesystem::remove(output_path, remove_error); - if (remove_error) { - spdlog::warn( - "failed to remove empty MCAP output '{}': {}", - output_path.string(), - remove_error.message()); - } - if (bundle_policy == MultiCameraExportPolicy::Copy) { - spdlog::error( - "no camera samples were found across {} camera(s) for '{}' using policy={} copy_range={}", - sources.size(), - output_path.string(), - bundle_policy_name, - copy_range_name(copy_range_mode)); - } else { - spdlog::error( - "no bundled frame groups were found across {} camera(s) for '{}' using policy={}", - sources.size(), - output_path.string(), - bundle_policy_name); - } - return exit_code(ToolExitCode::RuntimeError); - } - - if (interrupted) { - if (bundle_policy == MultiCameraExportPolicy::Copy) { - spdlog::warn( - "gracefully stopped after writing {} camera sample(s) across {} camera(s) to '{}' using policy={} copy_range={}", - emitted_count, - sources.size(), - output_path.string(), - bundle_policy_name, - copy_range_name(copy_range_mode)); - } else { - spdlog::warn( - "gracefully stopped after writing {} bundled frame group(s) across {} camera(s) to '{}'", - emitted_groups, - sources.size(), - output_path.string()); - } - return interrupted_exit_code(); - } - - if (bundle_policy == MultiCameraExportPolicy::Copy) { - spdlog::info( - "wrote {} camera sample(s) across {} camera(s) to '{}' using policy={} copy_range={}", - emitted_count, - sources.size(), - output_path.string(), - bundle_policy_name, - copy_range_name(copy_range_mode)); - } else { - spdlog::info( - "wrote {} bundled frame group(s) across {} camera(s) to '{}' using policy={}", - emitted_groups, - sources.size(), - output_path.string(), - bundle_policy_name); - } - return exit_code(ToolExitCode::Success); -} - -} // namespace - -int main(int argc, char **argv) { - CliOptions options{}; - install_signal_handlers(); - - CLI::App app{"zed_svo_to_mcap - convert ZED SVO/SVO2 playback to MCAP"}; - app.add_option("--input", options.input_paths, "Input SVO/SVO2 file (repeat for multi-camera mode)"); - app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed*.svo or *_zed*.svo2 files"); - app.add_option("--output", options.output_path, "Output MCAP file"); - app.add_option("--codec", options.codec, "Video codec (h264|h265)") - ->check(CLI::IsMember({"h264", "h265"})); - app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") - ->check(CLI::IsMember({"auto", "nvidia", "software"})); - app.add_flag("--with-pose", options.with_pose, "Emit foxglove.PoseInFrame when tracking is available"); - app.add_option("--mcap-compression", options.mcap_compression, "MCAP chunk compression (none|lz4|zstd)") - ->check(CLI::IsMember({"none", "lz4", "zstd"})); - app.add_option("--depth-mode", options.depth_mode, "ZED depth mode (neural_light|neural|neural_plus)") - ->check(CLI::IsMember({"neural_light", "neural", "neural_plus"})); - app.add_option("--depth-size", options.depth_size, "Depth output size (optimal|native|x)") - ->default_val("optimal"); - app.add_option("--bundle-policy", options.bundle_policy, "Bundling policy for multi-camera mode (nearest|strict|copy)") - ->check(CLI::IsMember({"nearest", "strict", "copy"})); - app.add_option( - "--copy-range", - options.copy_range, - "Timestamp range used by --bundle-policy copy (common|full)") - ->default_val("common") - ->check(CLI::IsMember({"common", "full"})); - auto *start_frame_option = app.add_option( - "--start-frame", - options.start_frame, - "First frame/group to export (inclusive): raw SVO frame in single-camera mode, bundle index in multi-camera mode") - ->check(CLI::NonNegativeNumber); - auto *end_frame_option = app.add_option( - "--end-frame", - options.end_frame, - "Last frame/group to export (inclusive): raw SVO frame in single-camera mode, bundle index in multi-camera mode") - ->check(CLI::NonNegativeNumber); - app.add_option("--frame-id", options.frame_id, "Frame id for image and depth topics"); - app.add_option("--video-topic", options.video_topic, "MCAP topic for foxglove.CompressedVideo"); - app.add_option("--depth-topic", options.depth_topic, "MCAP topic for cvmmap_streamer.DepthMap"); - app.add_option("--calibration-topic", options.calibration_topic, "MCAP topic for foxglove.CameraCalibration"); - auto *bundle_topic_option = app.add_option("--bundle-topic", options.bundle_topic, "MCAP topic for bundled multi-camera manifests"); - app.add_option( - "--depth-calibration-topic", - options.depth_calibration_topic, - "MCAP topic for foxglove.CameraCalibration aligned to the exported depth resolution"); - app.add_option("--pose-topic", options.pose_topic, "MCAP topic for foxglove.PoseInFrame"); - app.add_option( - "--pose-config", - options.pose_config_path, - "TOML config file; reads zed.coordinate_system and zed.body_tracking.{reference_frame,set_floor_as_origin}"); - app.add_option("--world-frame-id", options.world_frame_id, "Reference frame id for pose output") - ->default_val("world"); - auto *sync_tolerance_option = app.add_option( - "--sync-tolerance-ms", - options.sync_tolerance_ms, - "Maximum allowed timestamp delta between cameras in strict multi-camera mode"); - sync_tolerance_option->check(CLI::PositiveNumber); - - try { - app.parse(argc, argv); - } catch (const CLI::ParseError &error) { - return app.exit(error); - } - options.has_start_frame = start_frame_option->count() > 0; - options.has_end_frame = end_frame_option->count() > 0; - options.has_bundle_topic = bundle_topic_option->count() > 0; - options.has_sync_tolerance = sync_tolerance_option->count() > 0; - - auto codec = parse_codec(options.codec); - if (!codec) { - spdlog::error("{}", codec.error()); - return exit_code(ToolExitCode::UsageError); - } - auto encoder_device = parse_encoder_device(options.encoder_device); - if (!encoder_device) { - spdlog::error("{}", encoder_device.error()); - return exit_code(ToolExitCode::UsageError); - } - auto compression = parse_mcap_compression(options.mcap_compression); - if (!compression) { - spdlog::error("{}", compression.error()); - return exit_code(ToolExitCode::UsageError); - } - auto depth_mode = parse_depth_mode(options.depth_mode); - if (!depth_mode) { - spdlog::error("{}", depth_mode.error()); - return exit_code(ToolExitCode::UsageError); - } - auto depth_size = parse_depth_size(options.depth_size); - if (!depth_size) { - spdlog::error("{}", depth_size.error()); - return exit_code(ToolExitCode::UsageError); - } - auto bundle_policy = parse_bundle_policy(options.bundle_policy); - if (!bundle_policy) { - spdlog::error("{}", bundle_policy.error()); - return exit_code(ToolExitCode::UsageError); - } - auto copy_range_mode = parse_copy_range(options.copy_range); - if (!copy_range_mode) { - spdlog::error("{}", copy_range_mode.error()); - return exit_code(ToolExitCode::UsageError); - } - auto pose_tracking = load_pose_tracking_options(options); - if (!pose_tracking) { - spdlog::error("{}", pose_tracking.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto sources = resolve_sources(options); - if (!sources) { - spdlog::error("{}", sources.error()); - return exit_code(ToolExitCode::UsageError); - } - - const auto output_path = derive_output_path(options, *sources); - if (sources->size() == 1) { - options.input_paths = {sources->front().path.string()}; - return run_single_source( - options, - output_path, - *codec, - *encoder_device, - *compression, - *depth_mode, - *depth_size, - *pose_tracking); - } - - return run_multi_source( - options, - *sources, - output_path, - *codec, - *encoder_device, - *compression, - *depth_mode, - *depth_size, - *pose_tracking, - *bundle_policy, - *copy_range_mode); -} diff --git a/src/tools/zed_svo_to_mp4.cpp b/src/tools/zed_svo_to_mp4.cpp deleted file mode 100644 index 321a332..0000000 --- a/src/tools/zed_svo_to_mp4.cpp +++ /dev/null @@ -1,320 +0,0 @@ -#include -#include - -#include - -#include "cvmmap_streamer/tools/zed_progress_bar.hpp" -#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp" - -#include -#include -#include -#include -#include -#include - -namespace { - -using cvmmap_streamer::zed_tools::EncodeTuning; -using cvmmap_streamer::zed_tools::Mp4Writer; -using cvmmap_streamer::zed_tools::ProgressBar; -using cvmmap_streamer::zed_tools::derive_output_path; -using cvmmap_streamer::zed_tools::frame_period_ns; -using cvmmap_streamer::zed_tools::parse_codec; -using cvmmap_streamer::zed_tools::parse_encoder_device; -using cvmmap_streamer::zed_tools::parse_preset; -using cvmmap_streamer::zed_tools::parse_tune; - -enum class ToolExitCode : int { - Success = 0, - UsageError = 2, - RuntimeError = 3, -}; - -struct CliOptions { - std::string input_path{}; - std::string output_path{}; - std::string codec{"h265"}; - std::string encoder_device{"auto"}; - std::string preset{"fast"}; - std::string tune{"low-latency"}; - int quality{cvmmap_streamer::zed_tools::kDefaultQuality}; - std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize}; - std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames}; - std::uint32_t start_frame{0}; - std::uint32_t end_frame{0}; - bool has_end_frame{false}; -}; - -[[nodiscard]] -constexpr int exit_code(const ToolExitCode code) { - return static_cast(code); -} - -[[nodiscard]] -std::string zed_string(const sl::String &value) { - return std::string(value.c_str() == nullptr ? "" : value.c_str()); -} - -[[nodiscard]] -std::string zed_status_string(const sl::ERROR_CODE code) { - return zed_string(sl::toString(code)); -} - -[[nodiscard]] -std::expected validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) { - if (mat.getDataType() != sl::MAT_TYPE::U8_C3) { - return std::unexpected(std::string(label) + " must be U8_C3"); - } - if (mat.getWidth() == 0 || mat.getHeight() == 0) { - return std::unexpected(std::string(label) + " dimensions must be non-zero"); - } - if (mat.getPtr(sl::MEM::CPU) == nullptr) { - return std::unexpected(std::string(label) + " CPU buffer is null"); - } - return {}; -} - -} // namespace - -int main(int argc, char **argv) { - CliOptions options{}; - - CLI::App app{"zed_svo_to_mp4 - convert ZED SVO/SVO2 playback to MP4"}; - app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required(); - app.add_option("--output", options.output_path, "Output MP4 file (default: input path with .mp4 suffix)"); - app.add_option("--codec", options.codec, "Video codec (h264|h265)") - ->check(CLI::IsMember({"h264", "h265"})); - app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)") - ->check(CLI::IsMember({"auto", "nvidia", "software"})); - app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)") - ->check(CLI::IsMember({"fast", "balanced", "quality"})); - app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)") - ->check(CLI::IsMember({"low-latency", "balanced"})); - app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)") - ->check(CLI::Range(0, 51)); - app.add_option("--gop", options.gop, "Encoder GOP length in frames") - ->check(CLI::PositiveNumber); - app.add_option("--b-frames", options.b_frames, "Encoder B-frame count") - ->check(CLI::NonNegativeNumber); - app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)") - ->check(CLI::NonNegativeNumber); - auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)") - ->check(CLI::NonNegativeNumber); - - try { - app.parse(argc, argv); - } catch (const CLI::ParseError &error) { - return app.exit(error); - } - options.has_end_frame = end_frame_option->count() > 0; - - auto codec = parse_codec(options.codec); - if (!codec) { - spdlog::error("{}", codec.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto encoder_device = parse_encoder_device(options.encoder_device); - if (!encoder_device) { - spdlog::error("{}", encoder_device.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto preset = parse_preset(options.preset); - if (!preset) { - spdlog::error("{}", preset.error()); - return exit_code(ToolExitCode::UsageError); - } - - auto tune = parse_tune(options.tune); - if (!tune) { - spdlog::error("{}", tune.error()); - return exit_code(ToolExitCode::UsageError); - } - - if (options.has_end_frame && options.end_frame < options.start_frame) { - spdlog::error( - "invalid frame range: start-frame={} end-frame={}", - options.start_frame, - options.end_frame); - return exit_code(ToolExitCode::UsageError); - } - if (options.b_frames > options.gop) { - spdlog::error( - "invalid encoder config: b-frames {} must be <= gop {}", - options.b_frames, - options.gop); - return exit_code(ToolExitCode::UsageError); - } - - const auto output_path = options.output_path.empty() - ? derive_output_path(std::filesystem::path{options.input_path}) - : std::filesystem::path{options.output_path}; - if (output_path.empty()) { - spdlog::error("output path must not be empty"); - return exit_code(ToolExitCode::UsageError); - } - if (output_path.has_parent_path()) { - std::filesystem::create_directories(output_path.parent_path()); - } - - const EncodeTuning tuning{ - .preset = *preset, - .tune = *tune, - .quality = options.quality, - .gop = options.gop, - .b_frames = options.b_frames, - }; - - sl::Camera camera{}; - auto close_camera = [&]() { - if (camera.isOpened()) { - camera.close(); - } - }; - - sl::InitParameters init{}; - init.input.setFromSVOFile(options.input_path.c_str()); - init.svo_real_time_mode = false; - init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE; - init.coordinate_units = sl::UNIT::METER; - init.depth_mode = sl::DEPTH_MODE::NONE; - init.sdk_verbose = false; - - const auto open_status = camera.open(init); - if (open_status != sl::ERROR_CODE::SUCCESS) { - spdlog::error( - "failed to open SVO '{}': {}", - options.input_path, - zed_status_string(open_status)); - return exit_code(ToolExitCode::RuntimeError); - } - - const auto total_frames = camera.getSVONumberOfFrames(); - if (total_frames <= 0) { - close_camera(); - spdlog::error("input SVO has no frames"); - return exit_code(ToolExitCode::RuntimeError); - } - if (options.start_frame >= static_cast(total_frames)) { - close_camera(); - spdlog::error( - "start-frame {} is out of range for {} frames", - options.start_frame, - total_frames); - return exit_code(ToolExitCode::UsageError); - } - if (options.has_end_frame && options.end_frame >= static_cast(total_frames)) { - close_camera(); - spdlog::error( - "end-frame {} is out of range for {} frames", - options.end_frame, - total_frames); - return exit_code(ToolExitCode::UsageError); - } - - camera.setSVOPosition(static_cast(options.start_frame)); - - const auto camera_info = camera.getCameraInformation(); - const auto &camera_config = camera_info.camera_configuration; - const auto width = static_cast(camera_config.resolution.width); - const auto height = static_cast(camera_config.resolution.height); - if (width == 0 || height == 0) { - close_camera(); - spdlog::error("camera resolution reported by the ZED SDK is invalid"); - return exit_code(ToolExitCode::RuntimeError); - } - - Mp4Writer writer{}; - if (auto open_writer = writer.open(output_path, *codec, *encoder_device, width, height, camera_config.fps, tuning); !open_writer) { - close_camera(); - spdlog::error("failed to initialize MP4 writer: {}", open_writer.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - sl::RuntimeParameters runtime_parameters{}; - sl::Mat left_frame{}; - std::optional first_timestamp_ns{}; - std::optional last_timestamp_ns{}; - std::uint64_t emitted_frames{0}; - const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps); - const auto last_frame = options.has_end_frame - ? options.end_frame - : static_cast(total_frames - 1); - const auto total_frames_to_emit = static_cast(last_frame - options.start_frame + 1); - ProgressBar progress{total_frames_to_emit}; - - while (options.start_frame + emitted_frames <= last_frame) { - const auto grab_status = camera.grab(runtime_parameters); - if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) { - break; - } - if (grab_status != sl::ERROR_CODE::SUCCESS) { - progress.finish(emitted_frames, false); - close_camera(); - spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status)); - return exit_code(ToolExitCode::RuntimeError); - } - - const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU); - if (image_status != sl::ERROR_CODE::SUCCESS) { - progress.finish(emitted_frames, false); - close_camera(); - spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status)); - return exit_code(ToolExitCode::RuntimeError); - } - if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) { - progress.finish(emitted_frames, false); - close_camera(); - spdlog::error("{}", valid.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds(); - if (timestamp_ns == 0) { - timestamp_ns = emitted_frames * nominal_frame_period_ns; - } - if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) { - timestamp_ns = *last_timestamp_ns + 1; - } - last_timestamp_ns = timestamp_ns; - - if (!first_timestamp_ns) { - first_timestamp_ns = timestamp_ns; - } - const auto relative_timestamp_ns = timestamp_ns - *first_timestamp_ns; - - if (auto write = writer.write_bgr_frame( - left_frame.getPtr(sl::MEM::CPU), - left_frame.getStepBytes(sl::MEM::CPU), - relative_timestamp_ns); - !write) { - progress.finish(emitted_frames, false); - close_camera(); - spdlog::error("failed to encode or mux frame: {}", write.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - emitted_frames += 1; - progress.update(emitted_frames); - } - - if (auto flush = writer.flush(); !flush) { - progress.finish(emitted_frames, false); - close_camera(); - spdlog::error("failed to finalize MP4 output: {}", flush.error()); - return exit_code(ToolExitCode::RuntimeError); - } - - progress.finish(emitted_frames, true); - close_camera(); - spdlog::info( - "converted {} frames from '{}' to '{}' using codec={} hardware={}", - emitted_frames, - options.input_path, - output_path.string(), - cvmmap_streamer::zed_tools::codec_name(*codec), - writer.using_hardware()); - return exit_code(ToolExitCode::Success); -} diff --git a/tests/test_zed_batch_segment_sources.py b/tests/test_zed_batch_segment_sources.py deleted file mode 100644 index faa04df..0000000 --- a/tests/test_zed_batch_segment_sources.py +++ /dev/null @@ -1,268 +0,0 @@ -from __future__ import annotations - -import dataclasses -import tempfile -import unittest -from pathlib import Path - -import click -from click.testing import CliRunner - -from scripts import zed_batch_segment_sources as segment_sources -from scripts.zed_batch_svo_grid_to_mp4 import main as grid_main -from scripts.zed_batch_svo_to_mcap import main as mcap_main - - -@dataclasses.dataclass(slots=True, frozen=True) -class FakeScan: - segment_dir: Path - matched_files: int - is_valid: bool - reason: str | None = None - - -def fake_scan(segment_dir: Path) -> FakeScan: - if not segment_dir.is_dir(): - return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="missing directory") - if (segment_dir / "valid.segment").is_file(): - return FakeScan(segment_dir=segment_dir, matched_files=2, is_valid=True) - if (segment_dir / "partial.segment").is_file(): - return FakeScan(segment_dir=segment_dir, matched_files=1, is_valid=False, reason="partial segment") - return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="no camera files") - - -def create_multicamera_segment(parent: Path, segment_name: str) -> Path: - segment_dir = parent / segment_name - segment_dir.mkdir(parents=True) - for camera_index in range(1, 5): - (segment_dir / f"{segment_name}_zed{camera_index}.svo2").write_bytes(b"") - return segment_dir - - -class SharedSourceResolutionTests(unittest.TestCase): - def test_dataset_root_recursive_discovers_nested_segments(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" - segment_dir.mkdir(parents=True) - (segment_dir / "valid.segment").write_text("", encoding="utf-8") - - sources = segment_sources.resolve_sources( - dataset_root, - (), - None, - None, - True, - scan_segment_dir=fake_scan, - no_matches_message=lambda root: f"no segments under {root}", - ) - - self.assertEqual(sources.mode, "dataset-root") - self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) - - def test_dataset_root_without_recursive_does_not_descend(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" - segment_dir.mkdir(parents=True) - (segment_dir / "valid.segment").write_text("", encoding="utf-8") - - with self.assertRaises(click.ClickException) as error: - segment_sources.resolve_sources( - dataset_root, - (), - None, - None, - False, - scan_segment_dir=fake_scan, - no_matches_message=lambda root: f"no segments under {root}", - ) - - self.assertIn("no segments under", str(error.exception)) - - def test_explicit_segments_are_deduped(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = Path(tmp) / "2026-04-08T11-50-32" - segment_dir.mkdir() - (segment_dir / "valid.segment").write_text("", encoding="utf-8") - - sources = segment_sources.resolve_sources( - None, - (segment_dir, segment_dir), - None, - None, - True, - scan_segment_dir=fake_scan, - no_matches_message=lambda root: f"no segments under {root}", - ) - - self.assertEqual(sources.mode, "segments") - self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) - - def test_segments_csv_uses_segment_dir_column(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - tmp_path = Path(tmp) - segment_dir = tmp_path / "segments" / "2026-04-08T11-50-32" - segment_dir.mkdir(parents=True) - (segment_dir / "valid.segment").write_text("", encoding="utf-8") - csv_path = tmp_path / "segments.csv" - csv_path.write_text("segment_dir\nsegments/2026-04-08T11-50-32\n", encoding="utf-8") - - sources = segment_sources.resolve_sources( - None, - (), - csv_path, - None, - True, - scan_segment_dir=fake_scan, - no_matches_message=lambda root: f"no segments under {root}", - ) - - self.assertEqual(sources.mode, "segments-csv") - self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),)) - - def test_segment_path_like_dataset_root_has_hint(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - segment_dir = dataset_root / "run" / "2026-04-08T11-50-32" - segment_dir.mkdir(parents=True) - (segment_dir / "valid.segment").write_text("", encoding="utf-8") - - with self.assertRaises(click.ClickException) as error: - segment_sources.resolve_sources( - None, - (dataset_root,), - None, - None, - True, - scan_segment_dir=fake_scan, - no_matches_message=lambda root: f"no segments under {root}", - ) - - message = str(error.exception) - self.assertIn("looks like a dataset root", message) - self.assertIn("--dataset-root", message) - - -class BatchCliSmokeTests(unittest.TestCase): - def setUp(self) -> None: - self.runner = CliRunner() - - def test_mcap_dataset_root_flag_discovers_segments(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") - - result = self.runner.invoke( - mcap_main, - [ - "--dataset-root", - str(dataset_root), - "--recursive", - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertEqual(result.exit_code, 0, result.output) - self.assertIn("source=dataset-root matched=1 pending=1", result.output) - - def test_mcap_segment_flag_rejects_dataset_root_with_hint(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") - - result = self.runner.invoke( - mcap_main, - [ - "--segment", - str(dataset_root), - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertNotEqual(result.exit_code, 0) - self.assertIn("looks like a dataset root", result.output) - self.assertIn("--dataset-root", result.output) - - def test_mcap_rejects_legacy_positional_dataset_root(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - dataset_root = Path(tmp) / "dataset" - create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32") - - result = self.runner.invoke( - mcap_main, - [ - str(dataset_root), - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertNotEqual(result.exit_code, 0) - self.assertIn("positional dataset paths are no longer supported", result.output) - self.assertIn("--dataset-root", result.output) - - def test_mcap_rejects_recursive_without_dataset_root(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") - - result = self.runner.invoke( - mcap_main, - [ - "--segment", - str(segment_dir), - "--no-recursive", - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertNotEqual(result.exit_code, 0) - self.assertIn("--recursive/--no-recursive can only be used with --dataset-root", result.output) - - def test_grid_segment_flag_discovers_one_segment(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") - - result = self.runner.invoke( - grid_main, - [ - "--segment", - str(segment_dir), - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertEqual(result.exit_code, 0, result.output) - self.assertIn("source=segments matched=1 pending=1", result.output) - - def test_grid_rejects_legacy_segment_dir_flag(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32") - - result = self.runner.invoke( - grid_main, - [ - "--segment-dir", - str(segment_dir), - "--dry-run", - "--zed-bin", - "/bin/true", - ], - ) - - self.assertNotEqual(result.exit_code, 0) - self.assertIn("--segment-dir is no longer supported", result.output) - self.assertIn("--segment", result.output) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_zed_segment_time_index.py b/tests/test_zed_segment_time_index.py deleted file mode 100644 index 3abc710..0000000 --- a/tests/test_zed_segment_time_index.py +++ /dev/null @@ -1,139 +0,0 @@ -from __future__ import annotations - -import datetime as dt -import tempfile -import unittest -from pathlib import Path - -import duckdb - -from scripts.zed_segment_time_index import ( - BoundsRow, - format_ns_iso, - infer_dataset_timezone, - parse_timestamp_to_ns, - parse_timestamp_window, - require_query_window, - scan_segment_dir, - write_index, -) - - -class TimestampParseTests(unittest.TestCase): - def test_parse_folder_style_timestamp(self) -> None: - actual = parse_timestamp_to_ns("2026-03-18T12-00-23", "UTC") - expected = parse_timestamp_to_ns("2026-03-18T12:00:23+00:00", "UTC") - self.assertEqual(actual, expected) - - def test_parse_integer_epoch_milliseconds(self) -> None: - self.assertEqual(parse_timestamp_to_ns("1710000000123", "UTC"), 1710000000123 * 1_000_000) - - def test_parse_timestamp_window_for_second_precision_text(self) -> None: - start_ns, end_ns = parse_timestamp_window("2026-03-18T12-00-23", "UTC") - self.assertEqual(end_ns - start_ns, 999_999_999) - - def test_require_query_window_rejects_mixed_modes(self) -> None: - with self.assertRaises(Exception): - require_query_window("1", "2", "3", "UTC") - - def test_format_ns_iso_utc(self) -> None: - rendered = format_ns_iso(1_710_000_000_123_000_000, dt.timezone.utc) - self.assertTrue(rendered.startswith("2024-03-09T16:00:00.123000000")) - - -class SegmentDiscoveryTests(unittest.TestCase): - def test_scan_segment_dir_accepts_multicamera_dir(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = Path(tmp) - for label in ("zed1", "zed2", "zed3", "zed4"): - (segment_dir / f"2026-03-18T12-00-23_{label}.svo2").write_bytes(b"") - scan = scan_segment_dir(segment_dir) - self.assertTrue(scan.is_valid) - self.assertEqual(scan.camera_labels, ("zed1", "zed2", "zed3", "zed4")) - - def test_scan_segment_dir_rejects_partial_dir(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - segment_dir = Path(tmp) - (segment_dir / "2026-03-18T12-00-23_zed1.svo2").write_bytes(b"") - scan = scan_segment_dir(segment_dir) - self.assertFalse(scan.is_valid) - - -class DuckDbIndexTests(unittest.TestCase): - def test_infer_dataset_timezone_from_folder_names(self) -> None: - row = BoundsRow( - segment_dir=Path("/tmp/bar/2026-03-18T11-59-41"), - relative_segment_dir="bar/2026-03-18T11-59-41", - group_path="bar", - activity="bar", - segment_name="2026-03-18T11-59-41", - mcap_path=Path("/tmp/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap"), - start_ns=1_773_806_381_201_081_000, - end_ns=1_773_806_392_268_226_000, - duration_ns=11_067_145_000, - start_iso_utc="2026-03-18T03:59:41.201081000Z", - end_iso_utc="2026-03-18T03:59:52.268226000Z", - camera_count=4, - camera_labels="zed1,zed2,zed3,zed4", - video_message_count=1330, - index_source="mcap_video_bounds", - ) - self.assertEqual(infer_dataset_timezone([row]), "UTC+08:00") - - def test_write_index_and_query_overlap(self) -> None: - with tempfile.TemporaryDirectory() as tmp: - root = Path(tmp) / "dataset" - root.mkdir() - index_path = root / "segment_time_index.duckdb" - - rows = [ - BoundsRow( - segment_dir=root / "bar" / "2026-03-18T12-00-23", - relative_segment_dir="bar/2026-03-18T12-00-23", - group_path="bar", - activity="bar", - segment_name="2026-03-18T12-00-23", - mcap_path=root / "bar" / "2026-03-18T12-00-23" / "2026-03-18T12-00-23.mcap", - start_ns=100, - end_ns=200, - duration_ns=100, - start_iso_utc="1970-01-01T00:00:00.000000100Z", - end_iso_utc="1970-01-01T00:00:00.000000200Z", - camera_count=4, - camera_labels="zed1,zed2,zed3,zed4", - video_message_count=1330, - index_source="mcap_video_bounds", - ), - BoundsRow( - segment_dir=root / "run" / "2026-03-18T12-01-00", - relative_segment_dir="run/2026-03-18T12-01-00", - group_path="run", - activity="run", - segment_name="2026-03-18T12-01-00", - mcap_path=root / "run" / "2026-03-18T12-01-00" / "2026-03-18T12-01-00.mcap", - start_ns=250, - end_ns=400, - duration_ns=150, - start_iso_utc="1970-01-01T00:00:00.000000250Z", - end_iso_utc="1970-01-01T00:00:00.000000400Z", - camera_count=4, - camera_labels="zed1,zed2,zed3,zed4", - video_message_count=1400, - index_source="mcap_video_bounds", - ), - ] - write_index(index_path, root, rows) - - conn = duckdb.connect(str(index_path), read_only=True) - try: - matches = conn.execute( - "SELECT relative_segment_dir FROM segments WHERE start_ns <= ? AND end_ns >= ? ORDER BY start_ns", - [300, 180], - ).fetchall() - self.assertEqual(matches, [("bar/2026-03-18T12-00-23",), ("run/2026-03-18T12-01-00",)]) - finally: - conn.close() - - -if __name__ == "__main__": - unittest.main()