refactor(zed): remove extracted offline helper tooling

Drop the offline ZED helper implementations that were moved into zed-offline-tools.\n\nThis removes the standalone conversion binaries, batch/index/inspection scripts, related configs and tests, and the tool-specific support code that no longer belongs in cvmmap-streamer.\n\nThe build files and docs are updated to point at the standalone repo while keeping the streamer runtime surface intact.
This commit is contained in:
2026-04-13 07:50:41 +00:00
parent 30cd956c5c
commit ddea6b0e3d
28 changed files with 16 additions and 12881 deletions
-186
View File
@@ -8,11 +8,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
include(GNUInstallDirs)
option(
CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4
"Build the OpenCV-based zed_svo_grid_to_mp4 tool"
ON)
find_package(Threads REQUIRED)
find_package(OpenSSL REQUIRED)
if (NOT TARGET OpenSSL::SSL AND DEFINED OPENSSL_SSL_LIBRARY)
@@ -80,34 +75,6 @@ find_package(spdlog REQUIRED)
find_package(Protobuf REQUIRED)
find_package(PkgConfig REQUIRED)
find_package(rvl CONFIG QUIET)
set(ZED_DIR "/usr/local/zed" CACHE PATH "Path to the local ZED SDK")
find_package(ZED QUIET)
set(CVMMAP_HAS_ZED_SDK OFF)
if (ZED_FOUND)
find_package(CUDA ${ZED_CUDA_VERSION} REQUIRED)
find_library(CVMMAP_STREAMER_LIBUSB_LIBRARY NAMES usb-1.0 libusb-1.0)
if (CVMMAP_STREAMER_LIBUSB_LIBRARY)
set(_CVMMAP_STREAMER_ZED_LIBRARIES "")
foreach(_zed_lib IN LISTS ZED_LIBRARIES)
if (_zed_lib STREQUAL "/usr/lib/x86_64-linux-gnu/libusb-1.0.so")
list(APPEND _CVMMAP_STREAMER_ZED_LIBRARIES "${CVMMAP_STREAMER_LIBUSB_LIBRARY}")
else()
list(APPEND _CVMMAP_STREAMER_ZED_LIBRARIES "${_zed_lib}")
endif()
endforeach()
set(ZED_LIBRARIES "${_CVMMAP_STREAMER_ZED_LIBRARIES}")
endif()
set(CVMMAP_HAS_ZED_SDK ON)
message(STATUS "ZED SDK found: enabling zed_svo_to_mcap and zed_svo_to_mp4")
else()
message(STATUS "ZED SDK not found: skipping ZED conversion tools")
endif()
if (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4 AND CVMMAP_HAS_ZED_SDK)
find_package(OpenCV REQUIRED COMPONENTS core imgproc)
elseif (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4)
message(STATUS "CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=ON but ZED SDK is unavailable; zed_svo_grid_to_mp4 will not be built")
endif()
add_subdirectory(third_party)
@@ -396,160 +363,7 @@ set_target_properties(mcap_replay_tester PROPERTIES
OUTPUT_NAME "mcap_replay_tester"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
add_executable(mcap_video_bounds src/tools/mcap_video_bounds.cpp)
target_include_directories(mcap_video_bounds
PRIVATE
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(mcap_video_bounds
PRIVATE
CLI11::CLI11
cvmmap_streamer_foxglove_proto
cvmmap_streamer_mcap_runtime
mcap::mcap
PkgConfig::ZSTD
PkgConfig::LZ4)
if (TARGET spdlog::spdlog)
target_link_libraries(mcap_video_bounds PRIVATE spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(mcap_video_bounds PRIVATE spdlog)
endif()
target_link_libraries(mcap_video_bounds PRIVATE cvmmap_streamer_protobuf)
if (TARGET PkgConfig::PROTOBUF_PKG)
target_link_libraries(mcap_video_bounds PRIVATE PkgConfig::PROTOBUF_PKG)
endif()
set_target_properties(mcap_video_bounds PROPERTIES
OUTPUT_NAME "mcap_video_bounds"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
set(CVMMAP_STREAMER_INSTALL_TARGETS cvmmap_streamer)
list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS mcap_video_bounds)
if (CVMMAP_HAS_ZED_SDK)
add_library(
cvmmap_streamer_zed_progress_support
STATIC
src/tools/zed_progress_bar.cpp)
target_include_directories(cvmmap_streamer_zed_progress_support
PUBLIC
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
add_executable(
zed_svo_to_mcap
src/tools/zed_svo_to_mcap.cpp
src/config/runtime_config.cpp)
target_include_directories(zed_svo_to_mcap
PRIVATE
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}"
${ZED_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS})
target_link_directories(zed_svo_to_mcap
PRIVATE
${ZED_LIBRARY_DIR}
${CUDA_LIBRARY_DIRS})
target_link_libraries(zed_svo_to_mcap
PRIVATE
cvmmap_streamer_zed_progress_support
cvmmap_streamer_record_support
CLI11::CLI11
tomlplusplus::tomlplusplus
${ZED_LIBRARIES}
${CUDA_CUDA_LIBRARY}
${CUDA_CUDART_LIBRARY})
if (TARGET spdlog::spdlog)
target_link_libraries(zed_svo_to_mcap PRIVATE spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(zed_svo_to_mcap PRIVATE spdlog)
endif()
set_target_properties(zed_svo_to_mcap PROPERTIES
OUTPUT_NAME "zed_svo_to_mcap"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_to_mcap)
add_library(
cvmmap_streamer_zed_svo_mp4_support
STATIC
src/tools/zed_svo_mp4_support.cpp)
target_include_directories(cvmmap_streamer_zed_svo_mp4_support
PUBLIC
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(cvmmap_streamer_zed_svo_mp4_support
PUBLIC
PkgConfig::FFMPEG)
if (TARGET spdlog::spdlog)
target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(cvmmap_streamer_zed_svo_mp4_support PUBLIC spdlog)
endif()
add_executable(
zed_svo_to_mp4
src/tools/zed_svo_to_mp4.cpp)
target_include_directories(zed_svo_to_mp4
PRIVATE
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}"
${ZED_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS})
target_link_directories(zed_svo_to_mp4
PRIVATE
${ZED_LIBRARY_DIR}
${CUDA_LIBRARY_DIRS})
target_link_libraries(zed_svo_to_mp4
PRIVATE
CLI11::CLI11
cvmmap_streamer_zed_progress_support
cvmmap_streamer_zed_svo_mp4_support
${ZED_LIBRARIES}
${CUDA_CUDA_LIBRARY}
${CUDA_CUDART_LIBRARY})
if (TARGET spdlog::spdlog)
target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(zed_svo_to_mp4 PRIVATE spdlog)
endif()
set_target_properties(zed_svo_to_mp4 PROPERTIES
OUTPUT_NAME "zed_svo_to_mp4"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_to_mp4)
if (CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4)
add_executable(
zed_svo_grid_to_mp4
src/tools/zed_svo_grid_to_mp4.cpp)
target_include_directories(zed_svo_grid_to_mp4
PRIVATE
"${CMAKE_CURRENT_LIST_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}"
${ZED_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS})
target_link_directories(zed_svo_grid_to_mp4
PRIVATE
${ZED_LIBRARY_DIR}
${CUDA_LIBRARY_DIRS})
target_link_libraries(zed_svo_grid_to_mp4
PRIVATE
CLI11::CLI11
cvmmap_streamer_zed_progress_support
cvmmap_streamer_zed_svo_mp4_support
${ZED_LIBRARIES}
${CUDA_CUDA_LIBRARY}
${CUDA_CUDART_LIBRARY}
${OpenCV_LIBS})
if (TARGET spdlog::spdlog)
target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog::spdlog)
elseif (TARGET spdlog)
target_link_libraries(zed_svo_grid_to_mp4 PRIVATE spdlog)
endif()
set_target_properties(zed_svo_grid_to_mp4 PROPERTIES
OUTPUT_NAME "zed_svo_grid_to_mp4"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
list(APPEND CVMMAP_STREAMER_INSTALL_TARGETS zed_svo_grid_to_mp4)
endif()
endif()
install(
TARGETS ${CVMMAP_STREAMER_INSTALL_TARGETS}
+15 -301
View File
@@ -45,17 +45,6 @@ cmake -B build -S .
cmake --build build
```
When the ZED SDK is available, the build also enables `zed_svo_to_mcap` and
`zed_svo_to_mp4` automatically. When the SDK is absent, those tools are skipped
and the main streamer plus non-ZED testers still build normally.
`zed_svo_grid_to_mp4` remains optional and additionally requires OpenCV. Disable
it explicitly with:
```bash
cmake -B build -S . -DCVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=OFF
```
```bash
# Use a local cv-mmap build tree
cmake -B build -S . \
@@ -69,300 +58,25 @@ cmake --build build
ls -la build/{cvmmap_streamer,rtp_receiver_tester,rtmp_stub_tester}
```
### ZED SVO/SVO2 To MP4
### Offline ZED Tooling
This tool is only built when the ZED SDK is detected during CMake configure.
Offline ZED conversion, batch wrappers, dataset indexing, and MCAP inspection helpers moved to the sibling repository `../zed-offline-tools`.
The repo also includes an offline conversion tool for the left ZED color stream:
Use that repo for:
```bash
CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
./build/bin/zed_svo_to_mp4 \
--input <SVO_INPUT> \
--encoder-device auto \
--preset balanced \
--quality 20 \
--start-frame 0 \
--end-frame 89
```
- `zed_svo_to_mcap`
- `zed_svo_to_mp4`
- `zed_svo_grid_to_mp4`
- `mcap_video_bounds`
- `scripts/zed_batch_*`
- `scripts/zed_segment_time_index.py`
- `scripts/generate_playlist_config.py`
- `scripts/mcap_bundle_validator.py`
- `scripts/mcap_rgbd_example.py`
- `scripts/mcap_rgbd_viewer.py`
- `scripts/mcap_depth_alignment.py`
By default the tool writes `foo.mp4` next to `foo.svo` or `foo.svo2`, defaults to `h265`, and shows a tqdm-like progress bar when stderr is attached to a TTY. `--encoder-device auto` tries NVENC first and falls back to software (`libx264` or `libx265`) if the hardware encoder is unavailable or cannot be opened.
### Batch ZED SVO2 To MP4
Python dependencies for the batch wrapper are managed with `uv`:
```bash
uv sync
```
Expected multi-camera dataset layout:
```text
<DATASET_ROOT>/
├── svo2_segments_sorted.csv
├── bar/
│ └── 2026-03-18T11-59-41/
│ ├── 2026-03-18T11-59-41_zed1.svo2
│ ├── 2026-03-18T11-59-41_zed2.svo2
│ ├── 2026-03-18T11-59-41_zed3.svo2
│ └── 2026-03-18T11-59-41_zed4.svo2
└── jump/
└── experiment/
└── 1/
└── 2026-03-18T11-26-23/
├── 2026-03-18T11-26-23_zed1.svo2
├── 2026-03-18T11-26-23_zed2.svo2
├── 2026-03-18T11-26-23_zed3.svo2
└── 2026-03-18T11-26-23_zed4.svo2
```
Placeholders used below:
- `<DATASET_ROOT>`: dataset root containing multi-camera segment directories
- `<SEGMENT_DIR>`: one multi-camera segment directory containing `*_zedN.svo` or `*_zedN.svo2`
- `<SEGMENT_DIR_A>`, `<SEGMENT_DIR_B>`: explicit segment directories
- `<SEGMENTS_CSV>`: CSV file with a `segment_dir` column, for example `config/svo2_segments_sorted.sample.csv`
- `<SVO_INPUT>`: one single-camera `.svo` or `.svo2` file
- `<POSE_CONFIG>`: TOML file such as `config/zed_pose_config.toml`
Use the wrapper to recurse through a folder, run `zed_svo_to_mp4` on every matched `.svo2`, and show one aggregate tqdm progress bar:
```bash
uv run python scripts/zed_batch_svo_to_mp4.py \
<DATASET_ROOT>/bar \
--pattern '*.svo2' \
--recursive \
--jobs 2 \
--encoder-device auto \
--start-frame 0 \
--end-frame 29 \
--cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6
```
The batch tool mirrors the common encoder options from `zed_svo_to_mp4`, skips existing sibling `.mp4` outputs by default, and continues after failures while returning a nonzero exit code if any conversion fails.
### ZED SVO Grid To MP4
This tool is only built when the ZED SDK is detected and
`CVMMAP_BUILD_ZED_SVO_GRID_TO_MP4=ON`.
Use the grid converter to merge four synced ZED recordings into a 2x2 CCTV-style MP4 with a Unix timestamp overlay in the top-left corner:
```bash
./build/bin/zed_svo_grid_to_mp4 \
--segment-dir <SEGMENT_DIR> \
--encoder-device auto \
--codec h265 \
--duration-seconds 2
```
The tool syncs the four inputs using the same common-start timestamp rule as the ZED multi-camera playback sample, defaults to a 2x2 layout ordered as `zed1 zed2 / zed3 zed4`, and writes `<segment>/<segment>_grid.mp4` unless `--output` is provided. By default each tile is scaled to `0.5x`, so a four-camera 1920x1200 segment produces a 1920x1200 composite. Use repeated `--input` flags instead of `--segment-dir` when you want explicit row-major ordering.
Use the batch wrapper to run `zed_svo_grid_to_mp4` over many segment directories with one aggregate progress bar:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--dataset-root <DATASET_ROOT> \
--recursive \
--jobs 2 \
--encoder-device auto \
--duration-seconds 2
```
You can also provide the exact segments to convert:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--segment <SEGMENT_DIR_A> \
--segment <SEGMENT_DIR_B> \
--jobs 2
```
Or preserve a precomputed CSV ordering:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--segments-csv <SEGMENTS_CSV> \
--jobs 2 \
--duration-seconds 2
```
The batch grid wrapper mirrors the grid encoder options, skips existing `<segment>/<segment>_grid.mp4` outputs by default, and returns a nonzero exit code if any segment fails.
When you suspect a previous run left behind partial MP4 files, opt into `ffprobe` validation so broken existing outputs are treated as missing instead of skipped:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--dataset-root <DATASET_ROOT> \
--probe-existing \
--jobs 2
```
Use `--report-existing` to audit existing outputs without launching conversions. The report prints invalid existing files only, while the summary still includes valid and missing counts. This is useful for the partial-write failure mode currently seen as `moov atom not found` in some kindergarten grid MP4s:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
--dataset-root <DATASET_ROOT> \
--report-existing
```
Use `--dry-run` to preview what the batch wrapper would convert after applying skip logic. Combine it with `--probe-existing` when you want to see which broken existing outputs would be requeued:
```bash
uv run python scripts/zed_batch_svo_grid_to_mp4.py \
<DATASET_ROOT> \
--probe-existing \
--dry-run
```
#### Expected CSV Input Format
The `--segments-csv` input expects a header row with at least a `segment_dir` column. Extra columns are allowed and ignored by the batch wrapper. `segment_dir` values may be absolute paths or paths relative to the CSV file's parent directory. Use `--csv-root` to override that base directory.
Repeated rows for the same `segment_dir` are allowed; the wrapper converts each unique segment once, preserving the first-seen CSV order. The repo includes a small example at `config/svo2_segments_sorted.sample.csv`:
```csv
timestamp,activity,group_path,segment_dir,camera,relative_path
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed1,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed1.svo2
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed2,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed2.svo2
```
### Batch ZED Segments To MCAP
This workflow depends on the `zed_svo_to_mcap` binary, which is only built when
the ZED SDK is detected during CMake configure.
Use the wrapper to recurse through a dataset root, run `zed_svo_to_mcap --segment-dir` on every matched multi-camera segment, and show interactive table progress on TTYs with durable text logging elsewhere:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--dataset-root <DATASET_ROOT> \
--recursive \
--jobs 2 \
--cuda-visible-devices GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
--start-frame 10 \
--end-frame 29
```
You can also preserve the precomputed kindergarten CSV ordering:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--segments-csv <SEGMENTS_CSV> \
--jobs 2 \
--start-frame 10 \
--end-frame 29
```
Enable per-camera pose export when the segment has valid tracking:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--segment <SEGMENT_DIR> \
--with-pose \
--pose-config <POSE_CONFIG>
```
The batch MCAP wrapper writes `<segment>/<segment>.mcap` by default, skips existing outputs unless told otherwise, and returns a nonzero exit code if any segment fails.
The repo includes a minimal pose config at `config/zed_pose_config.toml` so MCAP conversion does not depend on a separate `cv-mmap` checkout.
In bundled multi-camera timeline mode, `--start-frame` and `--end-frame` mean the first and last emitted bundle indices from the common start timestamp, inclusive.
When stderr is attached to a TTY, `zed_batch_svo_to_mcap.py` uses a `progress-table` view by default; otherwise it emits line-oriented start/completion/failure logs plus periodic heartbeat summaries. Use `--progress-ui table` or `--progress-ui text` to override the automatic mode selection.
Bundled MCAP export now defaults to `--bundle-policy nearest`. That mode emits one `/bundle` manifest message per bundle timestamp on the common timeline and keeps the original per-camera timestamps on `/zedN/video`, `/zedN/depth`, and optional `/zedN/pose`. Faster cameras are sampled onto the slowest common timeline there, so they can end up with the same message count as slower cameras. Consumers that care about grouping should follow `/bundle` instead of inferring bundle membership from identical message timestamps.
Use `--bundle-policy strict` when you want thresholded grouping; `--sync-tolerance-ms` only applies in that strict mode. Use `--bundle-policy copy` when you want one MCAP containing all camera namespaces with their original per-camera cadence and no `/bundle` manifest. `copy` disables `--start-frame`, `--end-frame`, and `--sync-tolerance-ms`; `--copy-range common|full` controls whether it trims to the overlap window or preserves each cameras full timestamp range.
Single-source `zed_svo_to_mcap` now writes the one-camera `copy` shape by default, so `foo_zed4.svo2` exports namespaced topics like `/zed4/video` and `/zed4/depth` with no `/bundle`. See [docs/mcap_layout.md](./docs/mcap_layout.md) for the current bundled/copy contract and [docs/mcap_legacy_single_camera_layout.md](./docs/mcap_legacy_single_camera_layout.md) for the separate legacy `/camera/*` reference.
For the simple non-GUI path, use `scripts/mcap_rgbd_example.py` and [docs/mcap_recipes.md](./docs/mcap_recipes.md). That helper supports current `bundled` and `copy` MCAPs, and it also accepts the legacy `/camera/*` shape by treating it as a single-camera stream with the literal label `camera`.
For calibration-based depth/RGB mapping, use `scripts/mcap_depth_alignment.py` and [docs/depth_alignment.md](./docs/depth_alignment.md). That helper explains the current affine mapping implied by the exported calibration topics and can export example aligned-depth and overlay PNGs from a chosen MCAP frame.
### MCAP RGBD Viewer
The repo includes an example RGB+depth viewer at `scripts/mcap_rgbd_viewer.py`. It supports legacy standalone `/camera/*` MCAPs, bundled `/bundle` + `/zedN/*` MCAPs, and `copy` MCAPs with namespaced `/{label}/*` topics and no `/bundle`, including the default single-source output from `zed_svo_to_mcap`.
Install the optional viewer dependencies first:
```bash
uv sync --extra viewer
```
Then launch the interactive viewer:
```bash
uv run --extra viewer python scripts/mcap_rgbd_viewer.py \
/workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap \
--camera-label zed1
```
You can also use the same script without a GUI to inspect metadata or render a preview PNG:
```bash
uv run --extra viewer python scripts/mcap_rgbd_viewer.py \
--summary-only \
/workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap
```
```bash
uv run --extra viewer python scripts/mcap_rgbd_viewer.py \
--camera-label zed2 \
--frame-index 150 \
--export-preview /tmp/mcap_bundled_gap_preview.png \
/workspaces/data/kindergarten/throw/2026-03-18T12-58-13/2026-03-18T12-58-13.mcap
```
The viewer depends on `ffmpeg` being on `PATH` so it can build a seek-friendly preview cache for H.264/H.265 MCAP video streams.
This is intentionally a simple preview script: it transcodes only the RGB video stream into a temporary intra-frame `mjpeg` cache and then uses that same cache for both scrubbing and normal playback. Depth data is not transcoded to `mjpeg`; it stays in the temporary raw depth cache and is decoded and color-mapped on demand.
### Why Mixed Hardware/Software Mode Exists
Bundled MCAP export opens one video encoder per camera stream. A four-camera segment therefore consumes four H.264/H.265 encoder sessions at once.
This matters because NVIDIA's NVENC session limit is separate from raw CUDA utilization. In NVIDIA's Video Codec SDK documentation, non-qualified systems are capped at 8 concurrent encode sessions across all non-qualified GPUs in the system, and NVIDIA's SDK readme still calls out a 5-session GeForce limit in some contexts. In practice, consumer/GeForce hosts often hit NVENC session-init failures before the GPUs look "full" in `nvidia-smi`.
That is why the batch wrapper supports mixed pools such as two NVENC workers plus two software-encoded workers:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--dataset-root <DATASET_ROOT> \
--recursive \
--overwrite \
--hardware-jobs 2 \
--hardware-cuda-visible-devices 0,1 \
--software-jobs 2 \
--software-cuda-visible-devices 0,1 \
--depth-mode neural_plus
```
With bundled four-camera segments, `4` all-hardware jobs would try to open about `16` NVENC sessions, which is why mixed mode is the safe default for high-throughput rebuilds on GeForce-class machines. The software workers still use the GPUs for ZED neural depth; only video encoding moves to CPU.
If you intentionally want to bypass NVIDIA's consumer NVENC session cap, there is an unofficial driver patch at [`keylase/nvidia-patch`](https://github.com/keylase/nvidia-patch). That can make larger all-hardware batches viable, but it is not NVIDIA-supported and should be treated as an explicit ops decision rather than a project requirement.
Use `--probe-existing` to validate existing MCAPs before skipping them. Invalid outputs are treated as missing and requeued:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--dataset-root <DATASET_ROOT> \
--probe-existing \
--jobs 2
```
Use `--report-existing` to audit existing MCAPs without launching conversions:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--dataset-root <DATASET_ROOT> \
--report-existing
```
Use `--dry-run` to preview what would be converted after applying skip or probe logic:
```bash
uv run python scripts/zed_batch_svo_to_mcap.py \
--segments-csv <SEGMENTS_CSV> \
--probe-existing \
--dry-run
```
This repo keeps the live downstream streamer/runtime plus the MCAP contract docs such as [docs/mcap_layout.md](./docs/mcap_layout.md), [docs/mcap_legacy_single_camera_layout.md](./docs/mcap_legacy_single_camera_layout.md), and [docs/mcap_body_tracking.md](./docs/mcap_body_tracking.md).
### Mandatory Acceptance (Standalone)
-7
View File
@@ -1,7 +0,0 @@
timestamp,activity,group_path,segment_dir,camera,relative_path
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed1,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed1.svo2
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed2,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed2.svo2
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed3,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed3.svo2
2026-03-18T11-23-22,jump,jump/external/recording,jump/external/recording/2026-03-18T11-23-22,zed4,jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed4.svo2
2026-03-18T11-26-23,jump,jump/experiment/1,jump/experiment/1/2026-03-18T11-26-23,zed1,jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed1.svo2
2026-03-18T11-26-23,jump,jump/experiment/1,jump/experiment/1/2026-03-18T11-26-23,zed2,jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed2.svo2
1 timestamp activity group_path segment_dir camera relative_path
2 2026-03-18T11-23-22 jump jump/external/recording jump/external/recording/2026-03-18T11-23-22 zed1 jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed1.svo2
3 2026-03-18T11-23-22 jump jump/external/recording jump/external/recording/2026-03-18T11-23-22 zed2 jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed2.svo2
4 2026-03-18T11-23-22 jump jump/external/recording jump/external/recording/2026-03-18T11-23-22 zed3 jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed3.svo2
5 2026-03-18T11-23-22 jump jump/external/recording jump/external/recording/2026-03-18T11-23-22 zed4 jump/external/recording/2026-03-18T11-23-22/2026-03-18T11-23-22_zed4.svo2
6 2026-03-18T11-26-23 jump jump/experiment/1 jump/experiment/1/2026-03-18T11-26-23 zed1 jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed1.svo2
7 2026-03-18T11-26-23 jump jump/experiment/1 jump/experiment/1/2026-03-18T11-26-23 zed2 jump/experiment/1/2026-03-18T11-26-23/2026-03-18T11-26-23_zed2.svo2
-18
View File
@@ -1,18 +0,0 @@
# Minimal pose-tracking config for zed_svo_to_mcap.
# The converter currently reads only:
# - zed.coordinate_system
# - zed.body_tracking.reference_frame
# - zed.body_tracking.set_floor_as_origin
[zed]
# Native ZED 3D/body coordinate system used when reading positional tracking.
# Supported values in this repo are IMAGE and RIGHT_HANDED_Y_UP.
coordinate_system = "IMAGE"
[zed.body_tracking]
# Reference frame used for per-camera pose estimation.
# Supported values are CAMERA and WORLD.
reference_frame = "CAMERA"
# When true, WORLD origin is placed on the floor during positional tracking.
set_floor_as_origin = false
-86
View File
@@ -1,86 +0,0 @@
# Depth Alignment
Exported ZED MCAP files can carry RGB video and depth at different raster sizes.
For the current kindergarten `zed4` exports, the common pair is:
- video: `1920x1200`
- depth: `960x512`
That means RGB and depth do not share aspect ratio. The files stay alignable because the exporter writes two separate calibration topics:
- `/{label}/calibration` for video
- `/{label}/depth_calibration` for depth
See [mcap_layout.md](./mcap_layout.md) for the topic contract.
## What The Mapping Means
The correct way to align depth onto RGB is to use the two calibration matrices, not to assume matching pixel grids.
For the same camera, with zero distortion and identity rectification, the mapping reduces to a 2D affine transform:
```text
u_rgb = (fx_rgb / fx_depth) * u_depth + (cx_rgb - (fx_rgb / fx_depth) * cx_depth)
v_rgb = (fy_rgb / fy_depth) * v_depth + (cy_rgb - (fy_rgb / fy_depth) * cy_depth)
```
and the inverse:
```text
u_depth = (fx_depth / fx_rgb) * u_rgb + (cx_depth - (fx_depth / fx_rgb) * cx_rgb)
v_depth = (fy_depth / fy_rgb) * v_rgb + (cy_depth - (fy_depth / fy_rgb) * cy_rgb)
```
For the sampled kindergarten `zed4` files, those offsets are effectively zero, so the mapping becomes an anisotropic resize:
```text
u_rgb ~= 2.0 * u_depth
v_rgb ~= 2.34375 * v_depth
```
This is why the practical overlay behavior is a stretch, not a crop.
It is still better to derive the mapping from the two calibration topics than to hardcode `2.0` and `2.34375`, because the exact calibration can vary by camera and export settings.
## Helper Script
Use the alignment helper to inspect the calibration pair and optionally export an example overlay:
```bash
uv run --extra viewer python scripts/mcap_depth_alignment.py \
/workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41_zed4.mcap \
--camera-label zed4
```
To export example images:
```bash
uv run --extra viewer python scripts/mcap_depth_alignment.py \
/workspaces/data/kindergarten/bar/2026-03-18T11-59-41/2026-03-18T11-59-41_zed4.mcap \
--camera-label zed4 \
--frame-index 400 \
--output-dir /tmp/zed4_alignment_demo
```
That command writes:
- `rgb_frame.png`
- `depth_native_colorized.png`
- `depth_aligned_to_rgb_colorized.png`
- `depth_overlay_on_rgb.png`
- `rgb_aligned_to_depth.png`
## What The Helper Actually Does
The script:
1. reads `/{label}/calibration` and `/{label}/depth_calibration`
2. computes the affine mapping implied by the two intrinsic matrices
3. decodes one RGB frame and one depth frame from the MCAP
4. warps depth into RGB space with `cv2.warpAffine`
5. optionally warps RGB into depth space with the inverse mapping
For the current exported ZED MCAP contract, that is the right simple alignment path.
If a future export starts carrying non-zero distortion or non-identity rectification, consumers should switch from this affine shortcut to a full camera-model reprojection path.
+1 -1
View File
@@ -137,4 +137,4 @@ For multi-camera `copy` MCAP files, the current validation contract is:
Legacy `/camera/*` validation expectations are documented in [mcap_legacy_single_camera_layout.md](./mcap_legacy_single_camera_layout.md).
The repository-level Python helper [scripts/mcap_bundle_validator.py](../scripts/mcap_bundle_validator.py) understands bundled, copy, and legacy `/camera/*` layouts and reports which one it found before applying the corresponding validation rules.
The standalone helper [zed-offline-tools/scripts/mcap_bundle_validator.py](../../zed-offline-tools/scripts/mcap_bundle_validator.py) understands bundled, copy, and legacy `/camera/*` layouts and reports which one it found before applying the corresponding validation rules.
-179
View File
@@ -1,179 +0,0 @@
# MCAP Recipes
This guide is the simple, non-GUI path for inspecting RGB+depth MCAP files.
Use it when you want to:
- confirm whether an MCAP is bundled, `copy`, or legacy `/camera/*`
- inspect camera labels, message counts, and timestamp ranges
- export one RGB frame and one decoded depth sample as a concrete example
- understand how `/bundle` changes the meaning of timestamps and sample grouping
For the current bundled/copy layout contract, see [mcap_layout.md](./mcap_layout.md). The older `/camera/*` wire shape is documented separately in [mcap_legacy_single_camera_layout.md](./mcap_legacy_single_camera_layout.md).
## Quick Summary
The repository includes a small example helper:
```bash
uv run python scripts/mcap_rgbd_example.py --help
```
It has two commands:
- `summary`: print layout, per-camera counts, and timestamp ranges
- `export-sample`: write one RGB image plus one depth array/preview
`summary` works with the base Python dependencies:
```bash
uv sync
```
`export-sample` also needs:
- `ffmpeg` on `PATH`
- the optional depth decoder binding:
```bash
uv sync --extra viewer
```
## The Practical Cases
For this helper, there are really two operational cases:
- `bundled`: multiple namespaced camera topics plus `/bundle`
- single-camera stream with no `/bundle`
That second case can appear in two wire shapes:
- `copy`: namespaced topics such as `/zed4/video`
- legacy single-camera: `/camera/video`
Current single-source `zed_svo_to_mcap` output uses the one-camera `copy` shape by default, so even a one-camera file usually looks like namespaced `/{label}/*` topics with no `/bundle`.
The helper treats legacy `/camera/*` as compatible with `copy` by using the implicit camera label `camera`.
## Recipe: Summarize One MCAP
```bash
uv run python scripts/mcap_rgbd_example.py summary <MCAP_PATH>
```
What the summary prints:
- layout and validation status
- camera labels
- per-camera `video`, `depth`, `pose`, `calibration`, `depth_calibration`, and `body` counts
- per-camera video/depth timestamp ranges
- for bundled files only:
- bundle count
- bundle timestamp range
- bundle policy counts
- per-camera present/corrupted-gap/unknown bundle-member counts
This is the fastest way to answer:
- “is this file bundled, copy, or legacy single-camera?”
- “which camera labels are inside?”
- “do video and depth counts match?”
- “what timestamp range does each camera cover?”
## Recipe: Export One RGB + Depth Sample
```bash
uv run python scripts/mcap_rgbd_example.py export-sample \
<MCAP_PATH> \
--output-dir /tmp/mcap_sample
```
For multi-camera or namespaced one-camera files, choose the camera explicitly when needed:
```bash
uv run python scripts/mcap_rgbd_example.py export-sample \
<MCAP_PATH> \
--camera-label zed2 \
--sample-index 25 \
--output-dir /tmp/mcap_sample_zed2
```
Outputs:
- `rgb.png`
- `depth.npy`
- `depth_preview.png`
- `sample_metadata.json`
`sample_index` is always zero-based per-camera RGB+depth sample order.
That means:
- legacy `/camera/*`: sample `N` is `/camera/video[N]` + `/camera/depth[N]`
- `copy`: sample `N` is `/{label}/video[N]` + `/{label}/depth[N]`
- `bundled`: sample `N` is the `N`th present sample for that camera, not bundle index `N`
In bundled files, `sample_metadata.json` also records the matched `/bundle` member metadata for the selected camera sample.
## Recipe: Understand Bundled vs Non-Bundled Timing
Bundled files intentionally separate bundle time from camera sample time:
- `/bundle.timestamp` is the nominal common-timeline bundle timestamp
- `/zedN/video` and `/zedN/depth` keep the original per-camera sample timestamps
Copy and legacy single-camera files do not have bundle time at all:
- there is no `/bundle`
- each camera topic keeps its own original cadence and timestamps
If you care about grouping, use `/bundle` in bundled files.
For `copy` and legacy single-camera files, treat each camera stream independently.
## Recipe: Inspect `/bundle` In Python
The helper script is intentionally small, but sometimes it is easier to inspect `/bundle` directly.
This snippet shows how to print bundle membership for one camera:
```python
from pathlib import Path
import zed_batch_svo_to_mcap as batch
path = Path("<MCAP_PATH>").expanduser().resolve()
camera_label = "zed1"
reader_module = batch.load_mcap_reader()
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, message in reader.iter_messages():
if channel.topic != "/bundle":
continue
if schema is None or schema.name != "cvmmap_streamer.BundleManifest":
continue
bundle_class, present_value = batch.load_bundle_manifest_type(schema.data)
bundle = bundle_class()
bundle.ParseFromString(message.data)
for member in bundle.members:
if str(member.camera_label) != camera_label:
continue
status_value = int(getattr(member, "status", 0))
status_field = member.DESCRIPTOR.fields_by_name.get("status")
status_enum = status_field.enum_type if status_field is not None else None
status_name = (
status_enum.values_by_number.get(status_value).name
if status_enum is not None and status_enum.values_by_number.get(status_value) is not None
else str(status_value)
)
print(bundle.bundle_index, status_name)
break
```
This is the important mental model:
- `bundled`: follow `/bundle` for grouping
- `copy`: treat each namespaced camera as an independent stream
- legacy `/camera/*`: same model as one-camera `copy`, with the implicit label `camera`
-97
View File
@@ -1,97 +0,0 @@
# ZED Segment Time Index
`scripts/zed_segment_time_index.py` builds and queries an embedded DuckDB index for bundled ZED segment folders.
Default artifact name:
```text
<DATASET_ROOT>/segment_time_index.duckdb
```
Primary commands:
```bash
uv run python scripts/zed_segment_time_index.py build <DATASET_ROOT>
uv run python scripts/zed_segment_time_index.py query <DATASET_ROOT> --at 2026-03-18T12-00-23
uv run python scripts/zed_segment_time_index.py query <DATASET_ROOT> --start 2026-03-18T12-00-23 --end 2026-03-18T12-00-30
```
## Data Source Rules
- Segment discovery is recursive and follows the same multi-camera layout assumptions as the batch ZED tooling.
- A directory is considered a valid segment when it contains at least two unique `*_zedN.svo` or `*_zedN.svo2` files and no duplicate camera labels.
- Timing is sourced from the segment MCAP, not from the SVO/SVO2 files.
- A valid segment is skipped when it has no `.mcap` file or more than one `.mcap` file in the segment directory.
## MCAP Bounds Extraction
`build/bin/mcap_video_bounds` scans `foxglove.CompressedVideo` messages in one MCAP and emits:
- `start_ns`
- `end_ns`
- `duration_ns`
- `video_message_count`
- `start_iso_utc`
- `end_iso_utc`
The helper prefers the protobuf `CompressedVideo.timestamp` field and falls back to MCAP `logTime` when that field is zero.
## DuckDB Layout
The database contains two tables: `meta` and `segments`.
### `meta`
Key-value metadata for the index:
- `schema_version`: current schema version, currently `1`
- `dataset_root`: absolute dataset root used when the index was built
- `built_at_utc`: build timestamp in UTC
- `default_timezone`: inferred dataset wall-clock timezone used when querying with `--timezone dataset`
### `segments`
One row per indexed segment.
| Column | Type | Meaning |
|---|---|---|
| `segment_dir` | `VARCHAR` | Absolute path to the segment directory |
| `relative_segment_dir` | `VARCHAR` | Path relative to the dataset root |
| `group_path` | `VARCHAR` | Parent path of the segment within the dataset |
| `activity` | `VARCHAR` | First path component under the dataset root |
| `segment_name` | `VARCHAR` | Segment directory basename |
| `mcap_path` | `VARCHAR` | Absolute MCAP path used for timing |
| `start_ns` | `BIGINT` | Earliest video timestamp in nanoseconds since Unix epoch |
| `end_ns` | `BIGINT` | Latest video timestamp in nanoseconds since Unix epoch |
| `duration_ns` | `BIGINT` | `end_ns - start_ns` |
| `start_iso_utc` | `VARCHAR` | UTC rendering of `start_ns` |
| `end_iso_utc` | `VARCHAR` | UTC rendering of `end_ns` |
| `camera_count` | `INTEGER` | Number of discovered camera inputs in the segment directory |
| `camera_labels` | `VARCHAR` | Comma-separated camera labels, for example `zed1,zed2,zed3,zed4` |
| `video_message_count` | `BIGINT` | Number of `foxglove.CompressedVideo` messages observed in the MCAP |
| `index_source` | `VARCHAR` | Current extractor label, currently `mcap_video_bounds` |
Indexes are created on `start_ns` and `end_ns`.
## Query Semantics
- `--at` performs an overlap lookup, not just an exact nanosecond equality check.
- Query precision follows the precision supplied by the user.
- A second-precision value like `2026-03-18T12-00-23` is treated as the whole second `[12:00:23.000, 12:00:23.999999999]`.
- Integer epochs are widened similarly by their apparent unit:
- 10 digits or fewer: seconds
- 11-13 digits: milliseconds
- 14-16 digits: microseconds
- 17+ digits: nanoseconds
- `--start/--end` returns every segment whose `[start_ns, end_ns]` overlaps the requested interval.
## Timezone Behavior
- Query default is `--timezone dataset`.
- `dataset` resolves to the `default_timezone` stored in `meta`.
- If inference is unavailable, the script falls back to `local`.
- Explicit values are also accepted:
- `local`
- `UTC`
- fixed offsets such as `UTC+08:00`
- IANA zone names such as `Asia/Shanghai`
@@ -1,30 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
#include <string_view>
namespace cvmmap_streamer::zed_tools {
[[nodiscard]]
bool stderr_supports_progress_bar();
class ProgressBar {
public:
explicit ProgressBar(std::uint64_t total_frames);
~ProgressBar();
[[nodiscard]]
bool enabled() const;
void update(std::uint64_t completed_frames);
void update_fraction(double fraction, std::string_view detail = {});
void finish(std::uint64_t completed_frames, bool success);
void finish_fraction(double fraction, bool success, std::string_view detail = {});
private:
struct Impl;
std::unique_ptr<Impl> impl_{};
};
} // namespace cvmmap_streamer::zed_tools
@@ -1,104 +0,0 @@
#pragma once
#include "cvmmap_streamer/config/runtime_config.hpp"
#include <cstdint>
#include <expected>
#include <filesystem>
#include <memory>
#include <string>
#include <string_view>
namespace cvmmap_streamer::zed_tools {
using cvmmap_streamer::CodecType;
using cvmmap_streamer::EncoderDeviceType;
inline constexpr std::uint32_t kDefaultGopSize = 30;
inline constexpr std::uint32_t kDefaultBFrames = 0;
inline constexpr int kDefaultQuality = 23;
inline constexpr std::uint64_t kNanosPerSecond = 1'000'000'000ull;
enum class PresetKind : std::uint8_t {
Fast,
Balanced,
Quality,
};
enum class TuneKind : std::uint8_t {
LowLatency,
Balanced,
};
struct EncodeTuning {
PresetKind preset{PresetKind::Fast};
TuneKind tune{TuneKind::LowLatency};
int quality{kDefaultQuality};
std::uint32_t gop{kDefaultGopSize};
std::uint32_t b_frames{kDefaultBFrames};
};
[[nodiscard]]
std::expected<CodecType, std::string> parse_codec(std::string_view raw);
[[nodiscard]]
std::expected<EncoderDeviceType, std::string> parse_encoder_device(std::string_view raw);
[[nodiscard]]
std::expected<PresetKind, std::string> parse_preset(std::string_view raw);
[[nodiscard]]
std::expected<TuneKind, std::string> parse_tune(std::string_view raw);
[[nodiscard]]
std::string_view codec_name(CodecType codec);
[[nodiscard]]
std::string_view preset_name(PresetKind preset);
[[nodiscard]]
std::string_view tune_name(TuneKind tune);
[[nodiscard]]
std::uint64_t frame_period_ns(float fps);
[[nodiscard]]
std::filesystem::path derive_output_path(const std::filesystem::path &input_path);
class Mp4Writer {
public:
Mp4Writer();
Mp4Writer(const Mp4Writer &) = delete;
Mp4Writer &operator=(const Mp4Writer &) = delete;
Mp4Writer(Mp4Writer &&) noexcept;
Mp4Writer &operator=(Mp4Writer &&) noexcept;
~Mp4Writer();
[[nodiscard]]
std::expected<void, std::string> open(
const std::filesystem::path &output_path,
CodecType codec,
EncoderDeviceType encoder_device,
std::uint32_t width,
std::uint32_t height,
float fps,
const EncodeTuning &tuning);
[[nodiscard]]
std::expected<void, std::string> write_bgr_frame(
const std::uint8_t *data,
std::size_t row_stride_bytes,
std::uint64_t relative_timestamp_ns);
[[nodiscard]]
std::expected<void, std::string> flush();
[[nodiscard]]
bool using_hardware() const;
private:
struct Impl;
std::unique_ptr<Impl> impl_{};
};
} // namespace cvmmap_streamer::zed_tools
-362
View File
@@ -1,362 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
import re
import click
import zed_batch_svo_to_mcap as batch
BUNDLE_TOPIC = "/bundle"
CAMERA_PREFIX = "/camera/"
NAMESPACED_TOPIC_PATTERN = re.compile(r"^/([^/]+)/([^/]+)$")
SINGLE_TOPIC_SCHEMA_NAMES = {
"/camera/video": "foxglove.CompressedVideo",
"/camera/depth": "cvmmap_streamer.DepthMap",
"/camera/calibration": "foxglove.CameraCalibration",
"/camera/depth_calibration": "foxglove.CameraCalibration",
"/camera/pose": "foxglove.PoseInFrame",
}
@dataclass(slots=True)
class CameraSummary:
video_messages: int = 0
depth_messages: int = 0
pose_messages: int = 0
calibration_messages: int = 0
depth_calibration_messages: int = 0
body_messages: int = 0
present_members: int = 0
corrupted_gap_members: int = 0
unknown_members: int = 0
@dataclass(slots=True)
class McapSummary:
path: Path
layout: str = "unknown"
validation_status: str = "invalid"
validation_reason: str = ""
camera_labels: tuple[str, ...] = ()
bundle_count: int = 0
policy_counts: Counter[str] = field(default_factory=Counter)
camera_stats: dict[str, CameraSummary] = field(default_factory=dict)
schema_mismatches: list[str] = field(default_factory=list)
def iter_mcap_paths(inputs: tuple[Path, ...], recursive: bool) -> list[Path]:
discovered: list[Path] = []
for input_path in inputs:
resolved = input_path.expanduser().resolve()
if resolved.is_file():
discovered.append(resolved)
continue
if resolved.is_dir():
pattern = "*.mcap" if not recursive else "**/*.mcap"
discovered.extend(sorted(resolved.glob(pattern)))
continue
raise click.ClickException(f"path does not exist: {resolved}")
return sorted(dict.fromkeys(discovered))
def policy_name_from_message(bundle_message: object) -> str:
descriptor = bundle_message.DESCRIPTOR.enum_types_by_name.get("BundlePolicy")
if descriptor is None:
return str(bundle_message.policy)
value = descriptor.values_by_number.get(bundle_message.policy)
return value.name if value is not None else str(bundle_message.policy)
def status_name_from_member(member: object, present_value: int | None) -> str:
if present_value is None:
return "PRESENT" if member.HasField("timestamp") else "UNKNOWN"
field_descriptor = member.DESCRIPTOR.fields_by_name.get("status")
descriptor = field_descriptor.enum_type if field_descriptor is not None else None
if descriptor is None:
return "PRESENT" if member.status == present_value else "UNKNOWN"
value = descriptor.values_by_number.get(member.status)
return value.name if value is not None else str(member.status)
def record_single_camera_topic(
summary: McapSummary,
topic: str,
schema_name: str | None,
) -> None:
stats = summary.camera_stats.setdefault("camera", CameraSummary())
if topic == "/camera/video":
stats.video_messages += 1
elif topic == "/camera/depth":
stats.depth_messages += 1
elif topic == "/camera/pose":
stats.pose_messages += 1
elif topic == "/camera/calibration":
stats.calibration_messages += 1
elif topic == "/camera/depth_calibration":
stats.depth_calibration_messages += 1
elif topic == "/camera/body":
stats.body_messages += 1
expected_schema = SINGLE_TOPIC_SCHEMA_NAMES.get(topic)
if expected_schema is not None and schema_name != expected_schema:
summary.schema_mismatches.append(
f"{topic}: expected schema '{expected_schema}', got '{schema_name or 'none'}'"
)
def probe_single_camera_output(path: Path) -> batch.OutputProbeResult:
base_probe = batch.probe_output(path, ("camera",), layout="single-camera", bundle_topic=None)
if base_probe.status != "valid":
return base_probe
reader_module = batch.load_mcap_reader()
stats = CameraSummary()
schema_mismatches: list[str] = []
try:
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, _message in reader.iter_messages():
topic = channel.topic
schema_name = schema.name if schema is not None else None
if topic == "/camera/video":
stats.video_messages += 1
elif topic == "/camera/depth":
stats.depth_messages += 1
elif topic == "/camera/pose":
stats.pose_messages += 1
elif topic == "/camera/calibration":
stats.calibration_messages += 1
elif topic == "/camera/depth_calibration":
stats.depth_calibration_messages += 1
elif topic == "/camera/body":
stats.body_messages += 1
expected_schema = SINGLE_TOPIC_SCHEMA_NAMES.get(topic)
if expected_schema is not None and schema_name != expected_schema:
schema_mismatches.append(
f"{topic}: expected schema '{expected_schema}', got '{schema_name or 'none'}'"
)
except Exception as error: # noqa: BLE001
return batch.OutputProbeResult(output_path=path, status="invalid", reason=str(error))
if schema_mismatches:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason=schema_mismatches[0],
)
if stats.video_messages == 0:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason="single-camera MCAP has no /camera/video messages",
)
if stats.depth_messages == 0:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason="single-camera MCAP has no /camera/depth messages",
)
if stats.video_messages != stats.depth_messages:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason=(
"single-camera video/depth count mismatch: "
f"video_messages={stats.video_messages} depth_messages={stats.depth_messages}"
),
)
if stats.calibration_messages != 1:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason=(
"single-camera calibration count mismatch: "
f"/camera/calibration={stats.calibration_messages}"
),
)
if stats.depth_calibration_messages not in (0, 1):
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason=(
"single-camera depth calibration count mismatch: "
f"/camera/depth_calibration={stats.depth_calibration_messages}"
),
)
if stats.pose_messages > stats.video_messages:
return batch.OutputProbeResult(
output_path=path,
status="invalid",
reason=(
"single-camera pose count exceeds video count: "
f"pose_messages={stats.pose_messages} video_messages={stats.video_messages}"
),
)
return batch.OutputProbeResult(output_path=path, status="valid")
def summarize_mcap(path: Path) -> McapSummary:
reader_module = batch.load_mcap_reader()
summary = McapSummary(path=path)
camera_labels: set[str] = set()
saw_single_camera_topic = False
saw_namespaced_camera_topic = False
saw_bundle_manifest = False
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, message in reader.iter_messages():
topic = channel.topic
schema_name = schema.name if schema is not None else None
if topic == BUNDLE_TOPIC:
summary.layout = "bundled"
saw_bundle_manifest = True
if schema is None or schema.name != "cvmmap_streamer.BundleManifest":
summary.validation_status = "invalid"
summary.validation_reason = f"bundle topic '{BUNDLE_TOPIC}' is missing the BundleManifest schema"
continue
bundle_class, present_value = batch.load_bundle_manifest_type(schema.data)
bundle = bundle_class()
bundle.ParseFromString(message.data)
summary.bundle_count += 1
summary.policy_counts[policy_name_from_message(bundle)] += 1
for member in bundle.members:
label = str(member.camera_label)
camera_labels.add(label)
stats = summary.camera_stats.setdefault(label, CameraSummary())
status_name = status_name_from_member(member, present_value)
if status_name == "BUNDLE_MEMBER_STATUS_PRESENT" or status_name == "PRESENT":
stats.present_members += 1
elif status_name == "BUNDLE_MEMBER_STATUS_CORRUPTED_GAP":
stats.corrupted_gap_members += 1
else:
stats.unknown_members += 1
continue
if topic.startswith(CAMERA_PREFIX):
saw_single_camera_topic = True
if summary.layout == "unknown":
summary.layout = "single-camera"
record_single_camera_topic(summary, topic, schema_name)
continue
match = NAMESPACED_TOPIC_PATTERN.match(topic)
if not match:
continue
label, stream_kind = match.groups()
if label == "camera":
continue
saw_namespaced_camera_topic = True
if summary.layout == "unknown":
summary.layout = "copy"
camera_labels.add(label)
stats = summary.camera_stats.setdefault(label, CameraSummary())
if stream_kind == "video":
stats.video_messages += 1
elif stream_kind == "depth":
stats.depth_messages += 1
elif stream_kind == "pose":
stats.pose_messages += 1
elif stream_kind == "calibration":
stats.calibration_messages += 1
elif stream_kind == "depth_calibration":
stats.depth_calibration_messages += 1
elif stream_kind == "body":
stats.body_messages += 1
if saw_single_camera_topic and saw_namespaced_camera_topic:
summary.layout = "mixed"
summary.validation_status = "invalid"
summary.validation_reason = "MCAP mixes single-camera and multi-camera topic layouts"
return summary
if saw_namespaced_camera_topic and not saw_bundle_manifest and summary.layout == "bundled":
summary.layout = "copy"
if summary.layout == "single-camera":
summary.camera_labels = ("camera",)
probe = probe_single_camera_output(path)
summary.validation_status = probe.status
summary.validation_reason = probe.reason
if summary.schema_mismatches and summary.validation_status == "valid":
summary.validation_status = "invalid"
summary.validation_reason = summary.schema_mismatches[0]
return summary
summary.camera_labels = tuple(sorted(camera_labels))
if summary.camera_labels:
probe = batch.probe_output(
path,
summary.camera_labels,
layout=summary.layout,
bundle_topic=BUNDLE_TOPIC if summary.layout == "bundled" else None,
)
summary.validation_status = probe.status
summary.validation_reason = probe.reason
else:
summary.validation_status = "invalid"
summary.validation_reason = "could not infer a supported MCAP layout from topics"
return summary
def print_summary(summary: McapSummary) -> None:
status_text = summary.validation_status
layout_text = summary.layout
cameras_text = ",".join(summary.camera_labels) if summary.camera_labels else "-"
policy_text = ",".join(
f"{policy}={count}"
for policy, count in sorted(summary.policy_counts.items())
) or "-"
click.echo(
f"{status_text}: {summary.path} [{layout_text}] bundles={summary.bundle_count} "
f"cameras={cameras_text} policies={policy_text}"
)
for label in summary.camera_labels:
stats = summary.camera_stats[label]
click.echo(
" "
f"{label}: video={stats.video_messages} depth={stats.depth_messages} pose={stats.pose_messages} "
f"calibration={stats.calibration_messages} depth_calibration={stats.depth_calibration_messages} "
f"body={stats.body_messages} present={stats.present_members} "
f"corrupted_gap={stats.corrupted_gap_members} unknown={stats.unknown_members}"
)
if summary.validation_reason:
click.echo(f" reason: {summary.validation_reason}")
@click.command()
@click.argument("paths", nargs=-1, type=click.Path(path_type=Path))
@click.option("--recursive", is_flag=True, help="Recursively discover *.mcap files under directory inputs.")
def main(paths: tuple[Path, ...], recursive: bool) -> None:
"""Summarize and validate legacy single-camera, bundled, or copy-layout MCAP files."""
if not paths:
raise click.ClickException("provide at least one MCAP file or directory")
mcap_paths = iter_mcap_paths(paths, recursive=recursive)
if not mcap_paths:
raise click.ClickException("no .mcap files matched the provided inputs")
invalid_count = 0
for path in mcap_paths:
summary = summarize_mcap(path)
print_summary(summary)
if summary.validation_status != "valid":
invalid_count += 1
if invalid_count:
raise SystemExit(1)
if __name__ == "__main__":
main()
-400
View File
@@ -1,400 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import BinaryIO
import click
import cv2
import numpy as np
import mcap_rgbd_viewer as viewer
@dataclass(slots=True, frozen=True)
class CameraCalibration:
width: int
height: int
distortion_model: str
distortion: tuple[float, float, float, float, float]
intrinsic_matrix: tuple[float, float, float, float, float, float, float, float, float]
rectification_matrix: tuple[float, float, float, float, float, float, float, float, float]
projection_matrix: tuple[float, float, float, float, float, float, float, float, float, float, float, float]
@property
def fx(self) -> float:
return self.intrinsic_matrix[0]
@property
def fy(self) -> float:
return self.intrinsic_matrix[4]
@property
def cx(self) -> float:
return self.intrinsic_matrix[2]
@property
def cy(self) -> float:
return self.intrinsic_matrix[5]
@dataclass(slots=True, frozen=True)
class CalibrationPair:
video: CameraCalibration
depth: CameraCalibration
@dataclass(slots=True, frozen=True)
class AffineMapping:
scale_x: float
scale_y: float
offset_x: float
offset_y: float
def matrix(self) -> np.ndarray:
return np.array(
[
[self.scale_x, 0.0, self.offset_x],
[0.0, self.scale_y, self.offset_y],
],
dtype=np.float32,
)
def select_camera_label(layout_info: viewer.McapLayoutInfo, camera_label: str | None) -> str:
if camera_label is None:
return layout_info.camera_labels[0]
if camera_label not in layout_info.camera_labels:
available = ", ".join(layout_info.camera_labels)
raise click.ClickException(f"camera label '{camera_label}' not found; available: {available}")
return camera_label
def load_calibration(path: Path, topic: str) -> CameraCalibration:
reader_module = viewer.load_mcap_reader()
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, message in reader.iter_messages():
if channel.topic != topic:
continue
if schema is None or schema.name != "foxglove.CameraCalibration":
raise click.ClickException(f"unexpected schema on {topic}: {schema.name if schema else 'none'}")
message_class = viewer.load_message_class(schema.data, "foxglove.CameraCalibration")
payload = message_class()
payload.ParseFromString(message.data)
return CameraCalibration(
width=int(payload.width),
height=int(payload.height),
distortion_model=str(payload.distortion_model),
distortion=tuple(float(value) for value in payload.D[:5]),
intrinsic_matrix=tuple(float(value) for value in payload.K[:9]),
rectification_matrix=tuple(float(value) for value in payload.R[:9]),
projection_matrix=tuple(float(value) for value in payload.P[:12]),
)
raise click.ClickException(f"missing calibration topic {topic} in {path}")
def load_calibration_pair(path: Path, layout_info: viewer.McapLayoutInfo, camera_label: str) -> CalibrationPair:
video_topic = viewer.topic_for(layout_info.layout, camera_label, "calibration")
depth_topic = viewer.topic_for(layout_info.layout, camera_label, "depth_calibration")
return CalibrationPair(
video=load_calibration(path, video_topic),
depth=load_calibration(path, depth_topic),
)
def mapping_from_depth_to_rgb(pair: CalibrationPair) -> AffineMapping:
scale_x = pair.video.fx / pair.depth.fx
scale_y = pair.video.fy / pair.depth.fy
offset_x = pair.video.cx - (scale_x * pair.depth.cx)
offset_y = pair.video.cy - (scale_y * pair.depth.cy)
return AffineMapping(scale_x=scale_x, scale_y=scale_y, offset_x=offset_x, offset_y=offset_y)
def mapping_from_rgb_to_depth(pair: CalibrationPair) -> AffineMapping:
scale_x = pair.depth.fx / pair.video.fx
scale_y = pair.depth.fy / pair.video.fy
offset_x = pair.depth.cx - (scale_x * pair.video.cx)
offset_y = pair.depth.cy - (scale_y * pair.video.cy)
return AffineMapping(scale_x=scale_x, scale_y=scale_y, offset_x=offset_x, offset_y=offset_y)
def describe_mapping(pair: CalibrationPair) -> str:
depth_to_rgb = mapping_from_depth_to_rgb(pair)
rgb_to_depth = mapping_from_rgb_to_depth(pair)
anisotropic = abs(depth_to_rgb.scale_x - depth_to_rgb.scale_y) > 1e-6
has_offset = abs(depth_to_rgb.offset_x) > 1e-3 or abs(depth_to_rgb.offset_y) > 1e-3
shape = "anisotropic stretch" if anisotropic else "uniform scale"
if has_offset:
shape += " with offset"
else:
shape += " with zero offset"
return (
f"mapping type: {shape}\n"
f"depth->rgb: u_rgb = {depth_to_rgb.scale_x:.9f} * u_depth + {depth_to_rgb.offset_x:.9f}\n"
f"depth->rgb: v_rgb = {depth_to_rgb.scale_y:.9f} * v_depth + {depth_to_rgb.offset_y:.9f}\n"
f"rgb->depth: u_depth = {rgb_to_depth.scale_x:.9f} * u_rgb + {rgb_to_depth.offset_x:.9f}\n"
f"rgb->depth: v_depth = {rgb_to_depth.scale_y:.9f} * v_rgb + {rgb_to_depth.offset_y:.9f}"
)
def is_identity_rectification(calibration: CameraCalibration) -> bool:
expected = (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)
return max(abs(value - target) for value, target in zip(calibration.rectification_matrix, expected, strict=True)) < 1e-6
def has_zero_distortion(calibration: CameraCalibration) -> bool:
return max(abs(value) for value in calibration.distortion) < 1e-9
def depth_pixel_to_rgb(depth_u: float, depth_v: float, pair: CalibrationPair) -> tuple[float, float]:
mapping = mapping_from_depth_to_rgb(pair)
return (
(mapping.scale_x * depth_u) + mapping.offset_x,
(mapping.scale_y * depth_v) + mapping.offset_y,
)
def rgb_pixel_to_depth(rgb_u: float, rgb_v: float, pair: CalibrationPair) -> tuple[float, float]:
mapping = mapping_from_rgb_to_depth(pair)
return (
(mapping.scale_x * rgb_u) + mapping.offset_x,
(mapping.scale_y * rgb_v) + mapping.offset_y,
)
def align_depth_to_rgb(
depth_image: np.ndarray,
pair: CalibrationPair,
*,
interpolation: int = cv2.INTER_NEAREST,
) -> np.ndarray:
mapping = mapping_from_depth_to_rgb(pair)
return cv2.warpAffine(
depth_image,
mapping.matrix(),
(pair.video.width, pair.video.height),
flags=interpolation,
borderMode=cv2.BORDER_CONSTANT,
borderValue=0,
)
def align_rgb_to_depth(
rgb_image: np.ndarray,
pair: CalibrationPair,
*,
interpolation: int = cv2.INTER_LINEAR,
) -> np.ndarray:
mapping = mapping_from_rgb_to_depth(pair)
return cv2.warpAffine(
rgb_image,
mapping.matrix(),
(pair.depth.width, pair.depth.height),
flags=interpolation,
borderMode=cv2.BORDER_CONSTANT,
borderValue=0,
)
def load_depth_array(state: viewer.CameraViewState, depth_index: int, depth_cache_stream: BinaryIO | None = None) -> np.ndarray:
try:
import rvl
except ModuleNotFoundError as error:
raise click.ClickException(
"image export needs the optional rvl-impl binding; run `uv sync --extra viewer`"
) from error
ref = state.depth_frames[depth_index]
if depth_cache_stream is None:
with state.depth_cache_path.open("rb") as stream:
stream.seek(ref.offset)
payload = stream.read(ref.length)
else:
depth_cache_stream.seek(ref.offset)
payload = depth_cache_stream.read(ref.length)
if ref.encoding_name == "RVL_U16_LOSSLESS":
depth = rvl.decompress_u16(payload).reshape(ref.height, ref.width)
return depth.astype(np.float32)
if ref.encoding_name == "RVL_F32":
return rvl.decompress_f32(payload).reshape(ref.height, ref.width).astype(np.float32)
raise click.ClickException(f"unsupported depth encoding '{ref.encoding_name}'")
def resolve_present_slot(state: viewer.CameraViewState, frame_index: int) -> tuple[int, viewer.BundleSlot]:
if not state.slots:
raise click.ClickException("MCAP does not contain any viewable RGB+depth pairs")
clamped = max(0, min(frame_index, len(state.slots) - 1))
slot = state.slots[clamped]
if slot.video_index is not None and slot.depth_index is not None:
return clamped, slot
for delta in range(1, len(state.slots)):
left = clamped - delta
if left >= 0:
candidate = state.slots[left]
if candidate.video_index is not None and candidate.depth_index is not None:
return left, candidate
right = clamped + delta
if right < len(state.slots):
candidate = state.slots[right]
if candidate.video_index is not None and candidate.depth_index is not None:
return right, candidate
raise click.ClickException("could not find a present RGB+depth slot")
def colorize_depth(depth_m: np.ndarray, palette_name: str) -> np.ndarray:
valid = np.isfinite(depth_m) & (depth_m > 0.0)
normalized = np.zeros(depth_m.shape, dtype=np.uint8)
if valid.any():
lo = float(np.percentile(depth_m[valid], 5.0))
hi = float(np.percentile(depth_m[valid], 95.0))
span = max(hi - lo, 1e-6)
scaled = np.clip((depth_m - lo) / span, 0.0, 1.0)
normalized[valid] = np.round((1.0 - scaled[valid]) * 255.0).astype(np.uint8)
colormap = viewer.DEPTH_PALETTE_TO_OPENCV[palette_name]
if colormap is None:
colored = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)
else:
colored = cv2.applyColorMap(normalized, colormap)
colored[~valid] = 0
return colored
def export_example_images(
path: Path,
*,
layout_info: viewer.McapLayoutInfo,
camera_label: str,
pair: CalibrationPair,
frame_index: int,
ffmpeg_bin: str,
output_dir: Path,
palette_name: str,
) -> None:
state = viewer.read_camera_state(
path,
layout_info=layout_info,
camera_label=camera_label,
ffmpeg_bin=ffmpeg_bin,
preview_width=pair.video.width,
)
try:
resolved_index, slot = resolve_present_slot(state, frame_index)
capture = cv2.VideoCapture(str(state.preview_video_path))
capture.set(cv2.CAP_PROP_POS_FRAMES, float(slot.video_index))
ok, rgb_bgr = capture.read()
capture.release()
if not ok or rgb_bgr is None:
raise click.ClickException(f"could not decode RGB frame {slot.video_index}")
depth_native = load_depth_array(state, slot.depth_index) / 1000.0
depth_aligned = align_depth_to_rgb(depth_native, pair, interpolation=cv2.INTER_NEAREST)
rgb_aligned = align_rgb_to_depth(rgb_bgr, pair, interpolation=cv2.INTER_LINEAR)
output_dir.mkdir(parents=True, exist_ok=True)
rgb_path = output_dir / "rgb_frame.png"
depth_native_path = output_dir / "depth_native_colorized.png"
depth_aligned_path = output_dir / "depth_aligned_to_rgb_colorized.png"
overlay_path = output_dir / "depth_overlay_on_rgb.png"
rgb_to_depth_path = output_dir / "rgb_aligned_to_depth.png"
depth_native_color = colorize_depth(depth_native, palette_name)
depth_aligned_color = colorize_depth(depth_aligned, palette_name)
overlay = cv2.addWeighted(rgb_bgr, 0.72, depth_aligned_color, 0.28, 0.0)
cv2.imwrite(str(rgb_path), rgb_bgr)
cv2.imwrite(str(depth_native_path), depth_native_color)
cv2.imwrite(str(depth_aligned_path), depth_aligned_color)
cv2.imwrite(str(overlay_path), overlay)
cv2.imwrite(str(rgb_to_depth_path), rgb_aligned)
click.echo(f"exported slot index: {resolved_index}")
click.echo(f"rgb frame: {rgb_path}")
click.echo(f"native depth: {depth_native_path}")
click.echo(f"depth aligned to rgb: {depth_aligned_path}")
click.echo(f"depth overlay on rgb: {overlay_path}")
click.echo(f"rgb aligned to depth: {rgb_to_depth_path}")
finally:
state.close()
@click.command()
@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True, dir_okay=False))
@click.option("--camera-label", type=str, help="Camera label to inspect; defaults to the first camera in the MCAP.")
@click.option("--frame-index", type=int, default=0, show_default=True, help="Frame or bundle index used for example image export.")
@click.option("--output-dir", type=click.Path(path_type=Path, file_okay=False), help="When set, export an aligned depth example and overlay PNGs here.")
@click.option("--ffmpeg-bin", default="ffmpeg", show_default=True, help="ffmpeg binary used to decode MCAP video for the example export.")
@click.option(
"--depth-palette",
type=click.Choice(tuple(viewer.DEPTH_PALETTE_TO_OPENCV.keys()), case_sensitive=False),
default="Turbo",
show_default=True,
help="Depth palette used for exported example PNGs.",
)
def main(
mcap_path: Path,
camera_label: str | None,
frame_index: int,
output_dir: Path | None,
ffmpeg_bin: str,
depth_palette: str,
) -> None:
"""Explain and demonstrate how depth/rgb alignment works for an exported MCAP."""
layout_info = viewer.infer_layout(mcap_path)
selected_camera = select_camera_label(layout_info, camera_label)
pair = load_calibration_pair(mcap_path, layout_info, selected_camera)
click.echo(f"path: {mcap_path}")
click.echo(f"layout: {layout_info.layout}")
click.echo(f"camera: {selected_camera}")
click.echo(f"video calibration: {pair.video.width}x{pair.video.height}")
click.echo(f"depth calibration: {pair.depth.width}x{pair.depth.height}")
click.echo(
"video intrinsics: "
f"fx={pair.video.fx:.6f} fy={pair.video.fy:.6f} cx={pair.video.cx:.6f} cy={pair.video.cy:.6f}"
)
click.echo(
"depth intrinsics: "
f"fx={pair.depth.fx:.6f} fy={pair.depth.fy:.6f} cx={pair.depth.cx:.6f} cy={pair.depth.cy:.6f}"
)
click.echo(
"zero distortion / identity rectification: "
f"video={has_zero_distortion(pair.video) and is_identity_rectification(pair.video)} "
f"depth={has_zero_distortion(pair.depth) and is_identity_rectification(pair.depth)}"
)
click.echo(describe_mapping(pair))
sample_depth_u = pair.depth.width * 0.5
sample_depth_v = pair.depth.height * 0.5
mapped_rgb_u, mapped_rgb_v = depth_pixel_to_rgb(sample_depth_u, sample_depth_v, pair)
click.echo(
"sample center mapping: "
f"depth({sample_depth_u:.3f}, {sample_depth_v:.3f}) -> rgb({mapped_rgb_u:.3f}, {mapped_rgb_v:.3f})"
)
sample_rgb_u = pair.video.width * 0.5
sample_rgb_v = pair.video.height * 0.5
mapped_depth_u, mapped_depth_v = rgb_pixel_to_depth(sample_rgb_u, sample_rgb_v, pair)
click.echo(
"sample inverse mapping: "
f"rgb({sample_rgb_u:.3f}, {sample_rgb_v:.3f}) -> depth({mapped_depth_u:.3f}, {mapped_depth_v:.3f})"
)
if output_dir is not None:
export_example_images(
mcap_path,
layout_info=layout_info,
camera_label=selected_camera,
pair=pair,
frame_index=frame_index,
ffmpeg_bin=ffmpeg_bin,
output_dir=output_dir,
palette_name=depth_palette,
)
if __name__ == "__main__":
main()
-630
View File
@@ -1,630 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
from dataclasses import asdict, dataclass, field
import json
from pathlib import Path
import subprocess
import tempfile
import click
import cv2
import numpy as np
from google.protobuf import descriptor_pb2, descriptor_pool, message_factory, timestamp_pb2
import mcap_bundle_validator as bundle_validator
import zed_batch_svo_to_mcap as batch
BUNDLE_TOPIC = "/bundle"
DEPTH_PALETTE_TO_OPENCV = {
"Gray": None,
"Turbo": cv2.COLORMAP_TURBO,
"Inferno": cv2.COLORMAP_INFERNO,
"Plasma": cv2.COLORMAP_PLASMA,
"Viridis": cv2.COLORMAP_VIRIDIS,
"Cividis": cv2.COLORMAP_CIVIDIS,
"Magma": cv2.COLORMAP_MAGMA,
"Parula": cv2.COLORMAP_PARULA,
}
VIDEO_INPUT_FORMATS = {"h264": "h264", "h265": "hevc"}
_MESSAGE_CLASS_CACHE: dict[tuple[bytes, str], object] = {}
@dataclass(slots=True)
class TimestampRange:
start_ns: int | None = None
end_ns: int | None = None
def update(self, timestamp_ns: int) -> None:
if self.start_ns is None or timestamp_ns < self.start_ns:
self.start_ns = timestamp_ns
if self.end_ns is None or timestamp_ns > self.end_ns:
self.end_ns = timestamp_ns
@dataclass(slots=True)
class CameraRanges:
video: TimestampRange = field(default_factory=TimestampRange)
depth: TimestampRange = field(default_factory=TimestampRange)
@dataclass(slots=True)
class RecipeSummary:
base: bundle_validator.McapSummary
bundle_timestamps: TimestampRange = field(default_factory=TimestampRange)
camera_ranges: dict[str, CameraRanges] = field(default_factory=dict)
@dataclass(slots=True)
class VideoSample:
timestamp_ns: int
format_name: str
stream_index: int
@dataclass(slots=True)
class DepthSample:
timestamp_ns: int
payload: bytes
stream_index: int
width: int
height: int
encoding_name: str
source_unit_name: str
storage_unit_name: str
@dataclass(slots=True)
class BundleMemberSample:
bundle_index: int
bundle_timestamp_ns: int
member_timestamp_ns: int | None
status_name: str
corrupted_frames_skipped: int
member_stream_index: int
def load_message_class(schema_data: bytes, message_type_name: str):
cache_key = (schema_data, message_type_name)
cached = _MESSAGE_CLASS_CACHE.get(cache_key)
if cached is not None:
return cached
descriptor_set = descriptor_pb2.FileDescriptorSet()
descriptor_set.ParseFromString(schema_data)
pool = descriptor_pool.DescriptorPool()
has_embedded_timestamp = any(
file_descriptor.name == "google/protobuf/timestamp.proto"
for file_descriptor in descriptor_set.file
)
if has_embedded_timestamp:
for file_descriptor in descriptor_set.file:
if file_descriptor.name == "google/protobuf/timestamp.proto":
pool.Add(file_descriptor)
break
else:
pool.AddSerializedFile(timestamp_pb2.DESCRIPTOR.serialized_pb)
for file_descriptor in descriptor_set.file:
if file_descriptor.name == "google/protobuf/timestamp.proto":
continue
pool.Add(file_descriptor)
message_descriptor = pool.FindMessageTypeByName(message_type_name)
message_class = message_factory.GetMessageClass(message_descriptor)
_MESSAGE_CLASS_CACHE[cache_key] = message_class
return message_class
def parse_timestamp_ns(timestamp_message: object, fallback_log_time_ns: int) -> int:
seconds = int(getattr(timestamp_message, "seconds", 0))
nanos = int(getattr(timestamp_message, "nanos", 0))
if seconds == 0 and nanos == 0:
return fallback_log_time_ns
return seconds * 1_000_000_000 + nanos
def format_timestamp_ns(timestamp_ns: int | None) -> str:
if timestamp_ns is None:
return "-"
seconds, nanos = divmod(timestamp_ns, 1_000_000_000)
return f"{seconds}.{nanos:09d}"
def format_range(timestamp_range: TimestampRange) -> str:
return f"{format_timestamp_ns(timestamp_range.start_ns)} .. {format_timestamp_ns(timestamp_range.end_ns)}"
def enum_name(message: object, field_name: str) -> str:
field_descriptor = message.DESCRIPTOR.fields_by_name[field_name]
value = int(getattr(message, field_name))
resolved = field_descriptor.enum_type.values_by_number.get(value)
return resolved.name if resolved is not None else str(value)
def is_present_status(status_name: str) -> bool:
return status_name in {"PRESENT", "BUNDLE_MEMBER_STATUS_PRESENT"}
def topic_for(layout: str, camera_label: str, kind: str) -> str:
if layout == "single-camera":
return f"/camera/{kind}"
if layout not in {"copy", "bundled"}:
raise click.ClickException(f"unsupported layout '{layout}'")
return f"/{camera_label}/{kind}"
def selected_camera_label(base_summary: bundle_validator.McapSummary, camera_label: str | None) -> str:
if camera_label is None:
return base_summary.camera_labels[0]
if camera_label not in base_summary.camera_labels:
available = ", ".join(base_summary.camera_labels)
raise click.ClickException(f"camera label '{camera_label}' not found. available: {available}")
return camera_label
def ensure_supported_layout(base_summary: bundle_validator.McapSummary) -> None:
if base_summary.layout not in {"single-camera", "copy", "bundled"}:
reason = base_summary.validation_reason or "unsupported MCAP layout"
raise click.ClickException(reason)
def summarize_mcap(path: Path) -> RecipeSummary:
base_summary = bundle_validator.summarize_mcap(path)
camera_ranges = {
label: CameraRanges()
for label in (base_summary.camera_labels or ("camera",))
}
bundle_timestamps = TimestampRange()
reader_module = batch.load_mcap_reader()
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, message in reader.iter_messages():
topic = channel.topic
if topic == BUNDLE_TOPIC and schema is not None and schema.name == "cvmmap_streamer.BundleManifest":
bundle_class, _present_value = batch.load_bundle_manifest_type(schema.data)
bundle_message = bundle_class()
bundle_message.ParseFromString(message.data)
bundle_timestamps.update(parse_timestamp_ns(bundle_message.timestamp, int(message.log_time)))
continue
if topic.endswith("/video"):
if topic == "/camera/video":
label = "camera"
else:
label = topic.removeprefix("/").removesuffix("/video")
if schema is None or schema.name != "foxglove.CompressedVideo" or label not in camera_ranges:
continue
message_class = load_message_class(schema.data, "foxglove.CompressedVideo")
payload = message_class()
payload.ParseFromString(message.data)
camera_ranges[label].video.update(parse_timestamp_ns(payload.timestamp, int(message.log_time)))
continue
if topic.endswith("/depth"):
if topic == "/camera/depth":
label = "camera"
else:
label = topic.removeprefix("/").removesuffix("/depth")
if schema is None or schema.name != "cvmmap_streamer.DepthMap" or label not in camera_ranges:
continue
message_class = load_message_class(schema.data, "cvmmap_streamer.DepthMap")
payload = message_class()
payload.ParseFromString(message.data)
camera_ranges[label].depth.update(parse_timestamp_ns(payload.timestamp, int(message.log_time)))
return RecipeSummary(
base=base_summary,
bundle_timestamps=bundle_timestamps,
camera_ranges=camera_ranges,
)
def print_summary(summary: RecipeSummary) -> None:
base = summary.base
click.echo(f"path: {base.path}")
click.echo(f"validation: {base.validation_status}")
if base.validation_reason:
click.echo(f"validation reason: {base.validation_reason}")
click.echo(f"layout: {base.layout}")
click.echo(f"camera labels: {', '.join(base.camera_labels) if base.camera_labels else '-'}")
if base.layout == "bundled":
click.echo(f"bundle count: {base.bundle_count}")
click.echo(f"bundle timestamp range: {format_range(summary.bundle_timestamps)}")
policy_text = ", ".join(
f"{policy}={count}"
for policy, count in sorted(base.policy_counts.items())
) or "-"
click.echo(f"bundle policies: {policy_text}")
for label in base.camera_labels:
stats = base.camera_stats[label]
ranges = summary.camera_ranges[label]
click.echo(f"camera: {label}")
click.echo(f" video messages: {stats.video_messages}")
click.echo(f" video timestamp range: {format_range(ranges.video)}")
click.echo(f" depth messages: {stats.depth_messages}")
click.echo(f" depth timestamp range: {format_range(ranges.depth)}")
click.echo(f" pose messages: {stats.pose_messages}")
click.echo(f" calibration messages: {stats.calibration_messages}")
click.echo(f" depth calibration messages: {stats.depth_calibration_messages}")
click.echo(f" body messages: {stats.body_messages}")
if base.layout == "bundled":
click.echo(f" present bundle members: {stats.present_members}")
click.echo(f" corrupted gap members: {stats.corrupted_gap_members}")
click.echo(f" unknown bundle members: {stats.unknown_members}")
def decode_depth_array(depth_sample: DepthSample) -> np.ndarray:
try:
import rvl
except ModuleNotFoundError as error:
raise click.ClickException(
"depth export needs the optional rvl-impl binding; run `uv sync --extra viewer`"
) from error
if depth_sample.encoding_name == "RVL_U16_LOSSLESS":
depth = rvl.decompress_u16(depth_sample.payload).astype(np.float32)
if (
depth_sample.storage_unit_name == "STORAGE_UNIT_MILLIMETER"
or depth_sample.source_unit_name == "DEPTH_UNIT_MILLIMETER"
):
return depth / 1000.0
return depth
if depth_sample.encoding_name == "RVL_F32":
return rvl.decompress_f32(depth_sample.payload).astype(np.float32)
raise click.ClickException(f"unsupported depth encoding '{depth_sample.encoding_name}'")
def colorize_depth(
depth_m: np.ndarray,
*,
depth_min_m: float,
depth_max_m: float,
depth_palette_name: str,
) -> np.ndarray:
valid = np.isfinite(depth_m) & (depth_m > 0.0)
span = max(depth_max_m - depth_min_m, 1e-6)
clipped = np.clip((depth_m - depth_min_m) / span, 0.0, 1.0)
normalized = np.zeros(depth_m.shape, dtype=np.uint8)
normalized[valid] = np.round((1.0 - clipped[valid]) * 255.0).astype(np.uint8)
colormap = DEPTH_PALETTE_TO_OPENCV[depth_palette_name]
if colormap is None:
colored = cv2.cvtColor(normalized, cv2.COLOR_GRAY2BGR)
else:
colored = cv2.applyColorMap(normalized, colormap)
colored[~valid] = 0
return colored
def export_rgb_frame(
*,
ffmpeg_bin: str,
raw_video_path: Path,
video_format: str,
frame_index: int,
output_path: Path,
) -> None:
input_format = VIDEO_INPUT_FORMATS.get(video_format)
if input_format is None:
raise click.ClickException(f"unsupported video format '{video_format}'")
command = [
ffmpeg_bin,
"-hide_banner",
"-loglevel",
"error",
"-y",
"-fflags",
"+genpts",
"-f",
input_format,
"-i",
str(raw_video_path),
"-vf",
f"select=eq(n\\,{frame_index})",
"-frames:v",
"1",
str(output_path),
]
try:
completed = subprocess.run(command, check=False, capture_output=True, text=True)
except FileNotFoundError as error:
raise click.ClickException(f"ffmpeg binary not found: {ffmpeg_bin}") from error
if completed.returncode != 0:
reason = completed.stderr.strip() or completed.stdout.strip() or "ffmpeg failed to export the RGB frame"
raise click.ClickException(reason)
if not output_path.is_file():
raise click.ClickException(f"ffmpeg did not write {output_path}")
def collect_sample_data(
path: Path,
*,
layout: str,
camera_label: str,
sample_index: int,
) -> tuple[VideoSample, DepthSample, BundleMemberSample | None, bytes]:
reader_module = batch.load_mcap_reader()
video_topic = topic_for(layout, camera_label, "video")
depth_topic = topic_for(layout, camera_label, "depth")
video_sample: VideoSample | None = None
depth_sample: DepthSample | None = None
bundle_sample: BundleMemberSample | None = None
video_index = 0
depth_index = 0
bundle_member_index = 0
video_format: str | None = None
with tempfile.TemporaryDirectory(prefix="mcap_rgbd_example_") as temp_dir_name:
raw_video_path = Path(temp_dir_name) / "stream.bin"
with raw_video_path.open("wb") as raw_video_stream:
with path.open("rb") as stream:
reader = reader_module.make_reader(stream)
for schema, channel, message in reader.iter_messages():
topic = channel.topic
if layout == "bundled" and topic == BUNDLE_TOPIC and bundle_sample is None:
if schema is None or schema.name != "cvmmap_streamer.BundleManifest":
continue
bundle_class, present_value = batch.load_bundle_manifest_type(schema.data)
bundle_message = bundle_class()
bundle_message.ParseFromString(message.data)
for member in bundle_message.members:
if str(member.camera_label) != camera_label:
continue
status_name = bundle_validator.status_name_from_member(member, present_value)
member_timestamp_ns = None
if member.HasField("timestamp"):
member_timestamp_ns = parse_timestamp_ns(member.timestamp, int(message.log_time))
if is_present_status(status_name):
if bundle_member_index == sample_index:
bundle_sample = BundleMemberSample(
bundle_index=int(bundle_message.bundle_index),
bundle_timestamp_ns=parse_timestamp_ns(bundle_message.timestamp, int(message.log_time)),
member_timestamp_ns=member_timestamp_ns,
status_name=status_name,
corrupted_frames_skipped=int(getattr(member, "corrupted_frames_skipped", 0)),
member_stream_index=bundle_member_index,
)
bundle_member_index += 1
break
continue
if topic == video_topic:
if schema is None or schema.name != "foxglove.CompressedVideo":
raise click.ClickException(f"unexpected schema on {video_topic}: {schema.name if schema else 'none'}")
message_class = load_message_class(schema.data, "foxglove.CompressedVideo")
payload = message_class()
payload.ParseFromString(message.data)
frame_format = str(payload.format)
if frame_format not in VIDEO_INPUT_FORMATS:
raise click.ClickException(f"unsupported video format '{frame_format}' on {video_topic}")
if video_format is None:
video_format = frame_format
elif video_format != frame_format:
raise click.ClickException(
f"inconsistent video format on {video_topic}: {video_format} then {frame_format}"
)
if video_index <= sample_index:
raw_video_stream.write(bytes(payload.data))
if video_index == sample_index:
video_sample = VideoSample(
timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)),
format_name=frame_format,
stream_index=video_index,
)
video_index += 1
continue
if topic == depth_topic:
if schema is None or schema.name != "cvmmap_streamer.DepthMap":
raise click.ClickException(f"unexpected schema on {depth_topic}: {schema.name if schema else 'none'}")
message_class = load_message_class(schema.data, "cvmmap_streamer.DepthMap")
payload = message_class()
payload.ParseFromString(message.data)
if depth_index == sample_index:
depth_sample = DepthSample(
timestamp_ns=parse_timestamp_ns(payload.timestamp, int(message.log_time)),
payload=bytes(payload.data),
stream_index=depth_index,
width=int(payload.width),
height=int(payload.height),
encoding_name=enum_name(payload, "encoding"),
source_unit_name=enum_name(payload, "source_unit"),
storage_unit_name=enum_name(payload, "storage_unit"),
)
depth_index += 1
continue
if (
video_sample is not None
and depth_sample is not None
and (layout != "bundled" or bundle_sample is not None)
):
break
raw_video_bytes = raw_video_path.read_bytes()
if video_sample is None:
raise click.ClickException(f"sample index {sample_index} exceeded available video samples")
if depth_sample is None:
raise click.ClickException(f"sample index {sample_index} exceeded available depth samples")
if layout == "bundled" and bundle_sample is None:
raise click.ClickException(
f"could not map per-camera sample index {sample_index} to a bundle member for {camera_label}"
)
return video_sample, depth_sample, bundle_sample, raw_video_bytes
def write_sample_outputs(
*,
path: Path,
layout: str,
output_dir: Path,
camera_label: str,
sample_index: int,
video_sample: VideoSample,
depth_sample: DepthSample,
bundle_sample: BundleMemberSample | None,
raw_video_bytes: bytes,
ffmpeg_bin: str,
depth_min_m: float,
depth_max_m: float,
depth_palette_name: str,
) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
rgb_output_path = output_dir / "rgb.png"
depth_output_path = output_dir / "depth.npy"
depth_preview_path = output_dir / "depth_preview.png"
metadata_path = output_dir / "sample_metadata.json"
with tempfile.TemporaryDirectory(prefix="mcap_rgbd_example_export_") as temp_dir_name:
raw_video_path = Path(temp_dir_name) / f"sample.{video_sample.format_name}"
raw_video_path.write_bytes(raw_video_bytes)
export_rgb_frame(
ffmpeg_bin=ffmpeg_bin,
raw_video_path=raw_video_path,
video_format=video_sample.format_name,
frame_index=sample_index,
output_path=rgb_output_path,
)
depth_m = decode_depth_array(depth_sample)
np.save(depth_output_path, depth_m)
depth_preview = colorize_depth(
depth_m,
depth_min_m=depth_min_m,
depth_max_m=depth_max_m,
depth_palette_name=depth_palette_name,
)
if not cv2.imwrite(str(depth_preview_path), depth_preview):
raise click.ClickException(f"failed to write depth preview to {depth_preview_path}")
metadata = {
"mcap_path": str(path),
"layout": layout,
}
metadata.update(
{
"camera_label": camera_label,
"sample_index": sample_index,
"video_stream_index": video_sample.stream_index,
"video_timestamp_ns": video_sample.timestamp_ns,
"video_timestamp": format_timestamp_ns(video_sample.timestamp_ns),
"video_format": video_sample.format_name,
"depth_stream_index": depth_sample.stream_index,
"depth_timestamp_ns": depth_sample.timestamp_ns,
"depth_timestamp": format_timestamp_ns(depth_sample.timestamp_ns),
"depth_width": depth_sample.width,
"depth_height": depth_sample.height,
"depth_encoding": depth_sample.encoding_name,
"depth_source_unit": depth_sample.source_unit_name,
"depth_storage_unit": depth_sample.storage_unit_name,
"depth_palette": depth_palette_name,
"depth_min_m": depth_min_m,
"depth_max_m": depth_max_m,
"rgb_output_path": str(rgb_output_path),
"depth_output_path": str(depth_output_path),
"depth_preview_path": str(depth_preview_path),
}
)
if bundle_sample is not None:
metadata["bundle"] = asdict(bundle_sample)
metadata["bundle"]["bundle_timestamp"] = format_timestamp_ns(bundle_sample.bundle_timestamp_ns)
metadata["bundle"]["member_timestamp"] = format_timestamp_ns(bundle_sample.member_timestamp_ns)
metadata_path.write_text(json.dumps(metadata, indent=2, sort_keys=True) + "\n", encoding="utf-8")
@click.group()
def main() -> None:
"""Small MCAP RGBD example helper for bundled, copy, and legacy single-camera MCAP files."""
@main.command("summary")
@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True))
def summary_command(mcap_path: Path) -> None:
"""Print a compact metadata summary for a single MCAP file."""
summary = summarize_mcap(mcap_path.resolve())
ensure_supported_layout(summary.base)
print_summary(summary)
@main.command("export-sample")
@click.argument("mcap_path", type=click.Path(path_type=Path, exists=True))
@click.option("--camera-label", help="Camera label to export. Defaults to `camera` for legacy files or the first sorted namespaced label.")
@click.option("--sample-index", default=0, show_default=True, type=click.IntRange(min=0), help="Zero-based per-camera RGB+depth sample index.")
@click.option("--output-dir", required=True, type=click.Path(path_type=Path), help="Directory to write rgb.png, depth.npy, depth_preview.png, and sample_metadata.json.")
@click.option("--ffmpeg-bin", default="ffmpeg", show_default=True, help="ffmpeg binary used to decode the selected RGB frame.")
@click.option("--depth-min-m", default=0.2, show_default=True, type=float, help="Minimum displayed depth in meters for depth_preview.png.")
@click.option("--depth-max-m", default=5.0, show_default=True, type=float, help="Maximum displayed depth in meters for depth_preview.png.")
@click.option(
"--depth-palette",
default="Turbo",
show_default=True,
type=click.Choice(tuple(DEPTH_PALETTE_TO_OPENCV.keys()), case_sensitive=False),
help="Depth color palette for depth_preview.png.",
)
def export_sample_command(
mcap_path: Path,
camera_label: str | None,
sample_index: int,
output_dir: Path,
ffmpeg_bin: str,
depth_min_m: float,
depth_max_m: float,
depth_palette: str,
) -> None:
"""Export one per-camera RGB/depth sample from a bundled, copy, or legacy single-camera MCAP file."""
summary = summarize_mcap(mcap_path.resolve())
ensure_supported_layout(summary.base)
if summary.base.validation_status != "valid":
raise click.ClickException(
f"refusing to export from invalid MCAP: {summary.base.validation_reason or summary.base.validation_status}"
)
label = selected_camera_label(summary.base, camera_label)
stats = summary.base.camera_stats[label]
pair_count = min(stats.video_messages, stats.depth_messages)
if pair_count <= 0:
raise click.ClickException(f"camera '{label}' has no paired RGB+depth samples")
if sample_index >= pair_count:
raise click.ClickException(
f"--sample-index {sample_index} is outside 0..{pair_count - 1} for camera '{label}'"
)
selected_palette = next(
palette_name
for palette_name in DEPTH_PALETTE_TO_OPENCV
if palette_name.lower() == depth_palette.lower()
)
video_sample, depth_sample, bundle_sample, raw_video_bytes = collect_sample_data(
mcap_path.resolve(),
layout=summary.base.layout,
camera_label=label,
sample_index=sample_index,
)
write_sample_outputs(
path=mcap_path.resolve(),
layout=summary.base.layout,
output_dir=output_dir.expanduser().resolve(),
camera_label=label,
sample_index=sample_index,
video_sample=video_sample,
depth_sample=depth_sample,
bundle_sample=bundle_sample,
raw_video_bytes=raw_video_bytes,
ffmpeg_bin=ffmpeg_bin,
depth_min_m=depth_min_m,
depth_max_m=depth_max_m,
depth_palette_name=selected_palette,
)
click.echo(f"wrote sample export: {output_dir.expanduser().resolve()}")
if __name__ == "__main__":
main()
File diff suppressed because it is too large Load Diff
-255
View File
@@ -1,255 +0,0 @@
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Generic, Protocol, TypeVar
import click
from click.core import ParameterSource
class SegmentScanLike(Protocol):
segment_dir: Path
matched_files: int
is_valid: bool
ScanT = TypeVar("ScanT", bound=SegmentScanLike)
@dataclass(slots=True, frozen=True)
class SourceResolution(Generic[ScanT]):
mode: str
segment_dirs: tuple[Path, ...]
ignored_partial_dirs: tuple[ScanT, ...]
def dedupe_paths(paths: list[Path]) -> list[Path]:
ordered: list[Path] = []
seen: set[Path] = set()
for path in paths:
resolved = path.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
ordered.append(resolved)
return ordered
def parse_segments_csv(csv_path: Path, csv_root: Path | None) -> tuple[Path, ...]:
csv_path = csv_path.expanduser().resolve()
if not csv_path.is_file():
raise click.ClickException(f"CSV not found: {csv_path}")
if csv_root is not None:
base_dir = csv_root.expanduser().resolve()
if not base_dir.is_dir():
raise click.ClickException(f"CSV root is not a directory: {base_dir}")
else:
base_dir = csv_path.parent
segment_dirs: list[Path] = []
seen: set[Path] = set()
with csv_path.open(newline="") as stream:
reader = csv.DictReader(stream)
if reader.fieldnames is None or "segment_dir" not in reader.fieldnames:
raise click.ClickException(f"{csv_path} must contain a 'segment_dir' header")
for row_number, row in enumerate(reader, start=2):
raw_segment_dir = (row.get("segment_dir") or "").strip()
if not raw_segment_dir:
raise click.ClickException(f"{csv_path}:{row_number} has an empty segment_dir value")
segment_dir = Path(raw_segment_dir)
resolved = segment_dir if segment_dir.is_absolute() else base_dir / segment_dir
resolved = resolved.expanduser().resolve()
if resolved in seen:
continue
seen.add(resolved)
segment_dirs.append(resolved)
if not segment_dirs:
raise click.ClickException(f"{csv_path} did not contain any segment_dir rows")
return tuple(segment_dirs)
def discover_segment_dirs(
root: Path,
recursive: bool,
*,
scan_segment_dir: Callable[[Path], ScanT],
no_matches_message: Callable[[Path], str],
) -> SourceResolution[ScanT]:
resolved_root = root.expanduser().resolve()
if not resolved_root.is_dir():
raise click.ClickException(f"dataset root does not exist: {resolved_root}")
candidate_dirs = {resolved_root}
iterator = resolved_root.rglob("*") if recursive else resolved_root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_dirs: list[Path] = []
ignored_partial_dirs: list[ScanT] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_dirs.append(segment_dir)
elif scan.matched_files > 0:
ignored_partial_dirs.append(scan)
if not valid_dirs:
raise click.ClickException(no_matches_message(resolved_root))
return SourceResolution(
mode="dataset-root",
segment_dirs=tuple(valid_dirs),
ignored_partial_dirs=tuple(ignored_partial_dirs),
)
def raise_if_recursive_flag_is_incompatible(
ctx: click.Context,
dataset_root: Path | None,
*,
dataset_root_flag: str = "--dataset-root",
) -> None:
if ctx.get_parameter_source("recursive") is ParameterSource.DEFAULT:
return
if dataset_root is None:
raise click.ClickException(f"--recursive/--no-recursive can only be used with {dataset_root_flag}")
def raise_for_legacy_source_args(
legacy_input_dir: Path | None,
legacy_segment_dirs: tuple[Path, ...],
*,
dataset_root_flag: str = "--dataset-root",
segment_flag: str = "--segment",
) -> None:
if legacy_input_dir is not None:
resolved = legacy_input_dir.expanduser().resolve()
raise click.ClickException(
f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
)
if legacy_segment_dirs:
resolved = legacy_segment_dirs[0].expanduser().resolve()
raise click.ClickException(
f"--segment-dir is no longer supported in this batch wrapper; use {segment_flag} {resolved} "
f"for an explicit segment directory, or {dataset_root_flag} <DATASET_ROOT> --recursive for discovery"
)
def raise_for_legacy_extra_args(
extra_args: list[str],
*,
dataset_root_flag: str = "--dataset-root",
) -> None:
if not extra_args:
return
first = extra_args[0]
if first.startswith("-"):
extras_text = " ".join(extra_args)
raise click.ClickException(f"unexpected extra arguments: {extras_text}")
resolved = Path(first).expanduser().resolve()
raise click.ClickException(
f"positional dataset paths are no longer supported; use {dataset_root_flag} {resolved}"
)
def raise_if_segment_path_looks_like_dataset_root(
segment_dir: Path,
*,
scan_segment_dir: Callable[[Path], ScanT],
dataset_root_flag: str = "--dataset-root",
segment_flag: str = "--segment",
) -> None:
resolved = segment_dir.expanduser().resolve()
if not resolved.is_dir():
return
scan = scan_segment_dir(resolved)
if scan.is_valid or scan.matched_files > 0:
return
nested_segments = _find_nested_valid_segment_dirs(resolved, scan_segment_dir=scan_segment_dir)
if not nested_segments:
return
example = nested_segments[0]
raise click.ClickException(
f"{resolved} looks like a dataset root, not a segment directory. "
f"{segment_flag} expects a directory that directly contains *_zedN.svo or *_zedN.svo2 files. "
f"Use {dataset_root_flag} {resolved} to discover nested segments such as {example}"
)
def resolve_sources(
dataset_root: Path | None,
segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
*,
scan_segment_dir: Callable[[Path], ScanT],
no_matches_message: Callable[[Path], str],
) -> SourceResolution[ScanT]:
source_count = sum(
(
1 if dataset_root is not None else 0,
1 if segment_dirs else 0,
1 if segments_csv is not None else 0,
)
)
if source_count != 1:
raise click.ClickException(
"provide exactly one source mode: --dataset-root, --segment, or --segments-csv"
)
if dataset_root is not None:
return discover_segment_dirs(
dataset_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=no_matches_message,
)
if segment_dirs:
ordered_dirs = dedupe_paths(list(segment_dirs))
for segment_dir in ordered_dirs:
raise_if_segment_path_looks_like_dataset_root(
segment_dir,
scan_segment_dir=scan_segment_dir,
)
return SourceResolution(mode="segments", segment_dirs=tuple(ordered_dirs), ignored_partial_dirs=())
return SourceResolution(
mode="segments-csv",
segment_dirs=parse_segments_csv(segments_csv, csv_root),
ignored_partial_dirs=(),
)
def _find_nested_valid_segment_dirs(
root: Path,
*,
scan_segment_dir: Callable[[Path], ScanT],
limit: int = 3,
) -> tuple[Path, ...]:
matches: list[Path] = []
for path in sorted(root.rglob("*")):
if not path.is_dir():
continue
resolved = path.resolve()
if resolved == root:
continue
scan = scan_segment_dir(resolved)
if scan.is_valid:
matches.append(resolved)
if len(matches) >= limit:
break
return tuple(matches)
-747
View File
@@ -1,747 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import concurrent.futures
import json
import math
import os
import re
import shutil
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
import click
from tqdm import tqdm
try:
from scripts import zed_batch_segment_sources as segment_sources
except ModuleNotFoundError:
import zed_batch_segment_sources as segment_sources
SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1]
SEGMENT_FILE_PATTERN = re.compile(r".*_zed([1-4])\.svo2?$", re.IGNORECASE)
EXPECTED_CAMERAS = ("zed1", "zed2", "zed3", "zed4")
@dataclass(slots=True, frozen=True)
class BatchConfig:
zed_bin: Path | None
ffprobe_bin: Path | None
probe_existing: bool
cuda_visible_devices: str | None
overwrite: bool
fail_fast: bool
codec: str
encoder_device: str
preset: str
tune: str
quality: int
gop: int
b_frames: int
start_offset_seconds: float
duration_seconds: float | None
output_fps: float | None
tile_scale: float
@dataclass(slots=True, frozen=True)
class ConversionJob:
segment_dir: Path
output_path: Path
@dataclass(slots=True, frozen=True)
class JobResult:
status: str
segment_dir: Path
output_path: Path
command: tuple[str, ...]
return_code: int = 0
stdout: str = ""
stderr: str = ""
@dataclass(slots=True, frozen=True)
class SegmentScan:
segment_dir: Path
matched_files: int
is_valid: bool
reason: str | None = None
@dataclass(slots=True, frozen=True)
class OutputProbeResult:
output_path: Path
status: str
reason: str = ""
duration_seconds: float | None = None
def locate_binary(override: Path | None) -> Path:
if override is not None:
candidate = override.expanduser().resolve()
if not candidate.is_file():
raise click.ClickException(f"binary not found: {candidate}")
return candidate
candidates = (
REPO_ROOT / "build" / "bin" / "zed_svo_grid_to_mp4",
REPO_ROOT / "build" / "zed_svo_grid_to_mp4",
)
for candidate in candidates:
if candidate.is_file():
return candidate
raise click.ClickException(f"could not find zed_svo_grid_to_mp4 under {REPO_ROOT / 'build'}")
def locate_ffprobe(override: Path | None) -> Path:
if override is not None:
candidate = override.expanduser().resolve()
if not candidate.is_file():
raise click.ClickException(f"ffprobe binary not found: {candidate}")
return candidate
resolved = shutil.which("ffprobe")
if resolved is None:
raise click.ClickException("could not find ffprobe on PATH")
return Path(resolved).resolve()
def scan_segment_dir(segment_dir: Path) -> SegmentScan:
if not segment_dir.is_dir():
return SegmentScan(
segment_dir=segment_dir,
matched_files=0,
is_valid=False,
reason=f"segment directory does not exist: {segment_dir}",
)
matched_by_camera: dict[str, list[Path]] = {camera: [] for camera in EXPECTED_CAMERAS}
for child in segment_dir.iterdir():
if not child.is_file():
continue
match = SEGMENT_FILE_PATTERN.fullmatch(child.name)
if match is None:
continue
matched_by_camera[f"zed{match.group(1)}"].append(child)
matched_files = sum(len(paths) for paths in matched_by_camera.values())
duplicate_cameras = [camera for camera, paths in matched_by_camera.items() if len(paths) > 1]
missing_cameras = [camera for camera, paths in matched_by_camera.items() if len(paths) == 0]
if duplicate_cameras:
duplicate_text = ", ".join(duplicate_cameras)
return SegmentScan(
segment_dir=segment_dir,
matched_files=matched_files,
is_valid=False,
reason=f"duplicate camera inputs under {segment_dir}: {duplicate_text}",
)
if missing_cameras:
missing_text = ", ".join(missing_cameras)
return SegmentScan(
segment_dir=segment_dir,
matched_files=matched_files,
is_valid=False,
reason=f"missing camera inputs under {segment_dir}: {missing_text}",
)
return SegmentScan(segment_dir=segment_dir, matched_files=matched_files, is_valid=True)
def output_path_for(segment_dir: Path) -> Path:
return segment_dir / f"{segment_dir.name}_grid.mp4"
def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
if config.zed_bin is None:
raise RuntimeError("zed_svo_grid_to_mp4 binary is not configured")
command = [
str(config.zed_bin),
"--segment-dir",
str(job.segment_dir),
"--codec",
config.codec,
"--encoder-device",
config.encoder_device,
"--preset",
config.preset,
"--tune",
config.tune,
"--quality",
str(config.quality),
"--gop",
str(config.gop),
"--b-frames",
str(config.b_frames),
"--start-offset-seconds",
str(config.start_offset_seconds),
"--tile-scale",
str(config.tile_scale),
]
if config.duration_seconds is not None:
command.extend(["--duration-seconds", str(config.duration_seconds)])
if config.output_fps is not None:
command.extend(["--output-fps", str(config.output_fps)])
return command
def env_for_job(config: BatchConfig) -> dict[str, str]:
env = dict(os.environ)
if config.cuda_visible_devices is not None:
env["CUDA_VISIBLE_DEVICES"] = config.cuda_visible_devices
return env
def probe_output(output_path: Path, ffprobe_bin: Path | None) -> OutputProbeResult:
if not output_path.is_file():
return OutputProbeResult(output_path=output_path, status="missing")
if ffprobe_bin is None:
raise RuntimeError("ffprobe binary is not configured")
completed = subprocess.run(
[
str(ffprobe_bin),
"-v",
"error",
"-print_format",
"json",
"-show_entries",
"format=duration,size:stream=codec_type,codec_name,width,height,nb_frames",
str(output_path),
],
check=False,
capture_output=True,
text=True,
)
if completed.returncode != 0:
reason = completed.stderr.strip() or completed.stdout.strip() or "ffprobe failed"
return OutputProbeResult(output_path=output_path, status="invalid", reason=reason)
try:
payload = json.loads(completed.stdout)
except json.JSONDecodeError as error:
return OutputProbeResult(
output_path=output_path,
status="invalid",
reason=f"ffprobe returned invalid JSON: {error}",
)
streams = payload.get("streams", [])
has_video_stream = any(stream.get("codec_type") == "video" for stream in streams)
if not has_video_stream:
return OutputProbeResult(
output_path=output_path,
status="invalid",
reason="ffprobe found no video stream",
)
format_payload = payload.get("format", {})
duration_text = format_payload.get("duration")
if duration_text in (None, ""):
return OutputProbeResult(
output_path=output_path,
status="invalid",
reason="ffprobe did not report a duration",
)
try:
duration_seconds = float(duration_text)
except (TypeError, ValueError):
return OutputProbeResult(
output_path=output_path,
status="invalid",
reason=f"ffprobe reported a non-numeric duration: {duration_text!r}",
)
if not math.isfinite(duration_seconds) or duration_seconds <= 0.0:
return OutputProbeResult(
output_path=output_path,
status="invalid",
reason=f"ffprobe reported a non-positive duration: {duration_seconds}",
)
return OutputProbeResult(
output_path=output_path,
status="valid",
duration_seconds=duration_seconds,
)
def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult:
command = command_for_job(job, config)
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
env=env_for_job(config),
)
status = "converted" if completed.returncode == 0 else "failed"
return JobResult(
status=status,
segment_dir=job.segment_dir,
output_path=job.output_path,
command=tuple(command),
return_code=completed.returncode,
stdout=completed.stdout,
stderr=completed.stderr,
)
def summarize_failures(results: list[JobResult]) -> None:
failed_results = [result for result in results if result.status == "failed"]
if not failed_results:
return
click.echo("\nFailed conversions:", err=True)
for result in failed_results:
click.echo(f"- {result.segment_dir} (exit {result.return_code})", err=True)
if result.stderr.strip():
click.echo(result.stderr.rstrip(), err=True)
elif result.stdout.strip():
click.echo(result.stdout.rstrip(), err=True)
def report_invalid_existing_outputs(
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]],
) -> None:
if not invalid_existing:
return
click.echo("\nInvalid existing outputs:", err=True)
for job, probe in invalid_existing:
click.echo(f"- {job.segment_dir}", err=True)
click.echo(f" output: {probe.output_path}", err=True)
reason_lines = probe.reason.splitlines() or [probe.reason]
click.echo(f" reason: {reason_lines[0]}", err=True)
for line in reason_lines[1:]:
click.echo(f" {line}", err=True)
def report_dry_run_plan(
pending_jobs: list[ConversionJob],
pending_reasons: dict[Path, str],
pending_details: dict[Path, str],
) -> None:
if not pending_jobs:
click.echo("dry-run: no conversions would be launched", err=True)
return
click.echo("\nDry-run plan:", err=True)
for job in pending_jobs:
reason = pending_reasons[job.segment_dir]
detail = pending_details.get(job.segment_dir)
line = f"- {job.segment_dir} [{reason}]"
if detail:
line = f"{line}: {detail.replace(chr(10), ' | ')}"
click.echo(line, err=True)
def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -> tuple[list[JobResult], int]:
results: list[JobResult] = []
aborted_count = 0
if not jobs:
return results, aborted_count
future_to_job: dict[concurrent.futures.Future[JobResult], ConversionJob] = {}
job_iter = iter(jobs)
stop_submitting = False
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs_limit) as executor:
with tqdm(total=len(jobs), unit="segment", dynamic_ncols=True) as progress:
def submit_next() -> bool:
if stop_submitting:
return False
try:
job = next(job_iter)
except StopIteration:
return False
future = executor.submit(run_conversion, job, config)
future_to_job[future] = job
return True
for _ in range(min(jobs_limit, len(jobs))):
submit_next()
while future_to_job:
done, _ = concurrent.futures.wait(
future_to_job,
return_when=concurrent.futures.FIRST_COMPLETED,
)
for future in done:
job = future_to_job.pop(future)
result = future.result()
results.append(result)
progress.update(1)
if result.status == "failed":
tqdm.write(
f"failed: {job.segment_dir} (exit {result.return_code})",
file=sys.stderr,
)
if config.fail_fast:
stop_submitting = True
if not stop_submitting:
submit_next()
if stop_submitting:
remaining = sum(1 for _ in job_iter)
aborted_count = remaining
progress.total = progress.n + len(future_to_job)
progress.refresh()
return results, aborted_count
@click.command(context_settings={"allow_extra_args": True})
@click.option(
"--dataset-root",
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
help="Dataset root containing segment directories. Mutually exclusive with --segment and --segments-csv.",
)
@click.option(
"--segment",
"segment_dirs",
multiple=True,
type=click.Path(exists=True, path_type=Path, file_okay=False, dir_okay=True),
help=(
"Explicit segment directory. Repeatable. The directory must directly contain "
"*_zedN.svo or *_zedN.svo2 files. Mutually exclusive with --dataset-root and --segments-csv."
),
)
@click.option(
"--segment-dir",
"legacy_segment_dirs",
multiple=True,
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
hidden=True,
)
@click.option(
"--segments-csv",
type=click.Path(path_type=Path, dir_okay=False),
help="CSV file containing a segment_dir column. Mutually exclusive with --dataset-root and --segment.",
)
@click.option(
"--csv-root",
type=click.Path(path_type=Path, file_okay=False, dir_okay=True),
help="Base directory for relative segment_dir entries in --segments-csv. Defaults to the CSV parent directory.",
)
@click.option(
"--recursive/--no-recursive",
default=True,
show_default=True,
help="Recurse when discovering segment directories from --dataset-root.",
)
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option(
"--zed-bin",
type=click.Path(path_type=Path, dir_okay=False),
help="Explicit path to the zed_svo_grid_to_mp4 binary.",
)
@click.option(
"--ffprobe-bin",
type=click.Path(path_type=Path, dir_okay=False),
help="Explicit path to ffprobe. Required when probing existing outputs and ffprobe is not on PATH.",
)
@click.option(
"--cuda-visible-devices",
help="Optional CUDA_VISIBLE_DEVICES value exported for each conversion subprocess.",
)
@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing grid MP4 files.")
@click.option(
"--probe-existing/--trust-existing",
default=False,
show_default=True,
help="Validate existing grid MP4 files with ffprobe before skipping them. Invalid outputs are treated as missing.",
)
@click.option(
"--report-existing",
is_flag=True,
help="Probe existing grid MP4 files with ffprobe, report invalid ones, and do not launch conversions.",
)
@click.option(
"--dry-run",
is_flag=True,
help="Show which segments would be converted after applying skip/probe logic, without launching conversions.",
)
@click.option(
"--fail-fast/--continue-on-error",
default=False,
show_default=True,
help="Stop submitting new work after the first failed conversion.",
)
@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True)
@click.option(
"--encoder-device",
type=click.Choice(("auto", "nvidia", "software")),
default="auto",
show_default=True,
)
@click.option("--preset", type=click.Choice(("fast", "balanced", "quality")), default="fast", show_default=True)
@click.option(
"--tune",
type=click.Choice(("low-latency", "balanced")),
default="low-latency",
show_default=True,
)
@click.option(
"--quality",
type=click.IntRange(min=0, max=51),
default=23,
show_default=True,
help="Lower values mean higher quality.",
)
@click.option("--gop", type=click.IntRange(min=1), default=30, show_default=True)
@click.option("--b-frames", "b_frames", type=click.IntRange(min=0), default=0, show_default=True)
@click.option(
"--start-offset-seconds",
type=click.FloatRange(min=0.0),
default=0.0,
show_default=True,
help="Offset applied after the synced common start time.",
)
@click.option(
"--duration-seconds",
type=click.FloatRange(min=0.0, min_open=True),
default=None,
help="Limit export duration in seconds after sync.",
)
@click.option(
"--output-fps",
type=click.FloatRange(min=0.0, min_open=True),
default=None,
help="Composite output frame rate. Defaults to the grid tool's native behavior.",
)
@click.option(
"--tile-scale",
type=click.FloatRange(min=0.1, max=1.0),
default=0.5,
show_default=True,
help="Scale each tile relative to the source resolution.",
)
@click.pass_context
def main(
ctx: click.Context,
dataset_root: Path | None,
segment_dirs: tuple[Path, ...],
legacy_segment_dirs: tuple[Path, ...],
segments_csv: Path | None,
csv_root: Path | None,
recursive: bool,
jobs: int,
zed_bin: Path | None,
ffprobe_bin: Path | None,
cuda_visible_devices: str | None,
overwrite: bool,
probe_existing: bool,
report_existing: bool,
dry_run: bool,
fail_fast: bool,
codec: str,
encoder_device: str,
preset: str,
tune: str,
quality: int,
gop: int,
b_frames: int,
start_offset_seconds: float,
duration_seconds: float | None,
output_fps: float | None,
tile_scale: float,
) -> None:
"""Batch-convert synced four-camera ZED segments into grid MP4 files."""
segment_sources.raise_for_legacy_extra_args(ctx.args)
segment_sources.raise_for_legacy_source_args(None, legacy_segment_dirs)
segment_sources.raise_if_recursive_flag_is_incompatible(ctx, dataset_root)
if b_frames > gop:
raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames")
if report_existing and dry_run:
raise click.ClickException("--report-existing and --dry-run are mutually exclusive")
sources = segment_sources.resolve_sources(
dataset_root,
segment_dirs,
segments_csv,
csv_root,
recursive,
scan_segment_dir=scan_segment_dir,
no_matches_message=lambda root: f"no complete four-camera segments found under {root}",
)
ffprobe_path = locate_ffprobe(ffprobe_bin) if (probe_existing or report_existing) else None
binary_path = None if report_existing else locate_binary(zed_bin)
config = BatchConfig(
zed_bin=binary_path,
ffprobe_bin=ffprobe_path,
probe_existing=probe_existing or report_existing,
cuda_visible_devices=cuda_visible_devices,
overwrite=overwrite,
fail_fast=fail_fast,
codec=codec,
encoder_device=encoder_device,
preset=preset,
tune=tune,
quality=quality,
gop=gop,
b_frames=b_frames,
start_offset_seconds=start_offset_seconds,
duration_seconds=duration_seconds,
output_fps=output_fps,
tile_scale=tile_scale,
)
skipped_results: list[JobResult] = []
failed_results: list[JobResult] = []
pending_jobs: list[ConversionJob] = []
pending_reasons: dict[Path, str] = {}
pending_details: dict[Path, str] = {}
valid_existing: list[OutputProbeResult] = []
invalid_existing: list[tuple[ConversionJob, OutputProbeResult]] = []
missing_outputs: list[ConversionJob] = []
for segment_dir in sources.segment_dirs:
output_path = output_path_for(segment_dir)
job = ConversionJob(segment_dir=segment_dir, output_path=output_path)
command = tuple(command_for_job(job, config)) if config.zed_bin is not None else ()
scan = scan_segment_dir(segment_dir)
if not scan.is_valid:
failed_results.append(
JobResult(
status="failed",
segment_dir=segment_dir,
output_path=output_path,
command=command,
return_code=2,
stderr=scan.reason or "",
)
)
continue
if report_existing:
probe_result = probe_output(output_path, config.ffprobe_bin)
if probe_result.status == "valid":
valid_existing.append(probe_result)
elif probe_result.status == "invalid":
invalid_existing.append((job, probe_result))
else:
missing_outputs.append(job)
continue
if overwrite:
pending_jobs.append(job)
pending_reasons[segment_dir] = "overwrite"
continue
if config.probe_existing:
probe_result = probe_output(output_path, config.ffprobe_bin)
if probe_result.status == "valid":
valid_existing.append(probe_result)
skipped_results.append(
JobResult(
status="skipped",
segment_dir=segment_dir,
output_path=output_path,
command=command,
)
)
continue
if probe_result.status == "invalid":
invalid_existing.append((job, probe_result))
pending_jobs.append(job)
pending_reasons[segment_dir] = "invalid-existing-output"
pending_details[segment_dir] = probe_result.reason
continue
missing_outputs.append(job)
pending_jobs.append(job)
pending_reasons[segment_dir] = "missing-output"
continue
if output_path.exists():
skipped_results.append(
JobResult(
status="skipped",
segment_dir=segment_dir,
output_path=output_path,
command=command,
)
)
continue
pending_jobs.append(job)
pending_reasons[segment_dir] = "missing-output"
if report_existing:
click.echo(
(
f"source={sources.mode} matched={len(sources.segment_dirs)} valid={len(valid_existing)} "
f"invalid={len(invalid_existing)} missing={len(missing_outputs)} "
f"invalid-segments={len(failed_results)}"
),
err=True,
)
if sources.ignored_partial_dirs:
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
report_invalid_existing_outputs(invalid_existing)
summarize_failures(failed_results)
if failed_results or invalid_existing:
raise SystemExit(1)
return
click.echo(
(
f"source={sources.mode} matched={len(sources.segment_dirs)} pending={len(pending_jobs)} "
f"skipped={len(skipped_results)} invalid={len(failed_results)} jobs={jobs} "
f"dry_run={'yes' if dry_run else 'no'}"
),
err=True,
)
if sources.ignored_partial_dirs:
click.echo(f"ignored_incomplete={len(sources.ignored_partial_dirs)}", err=True)
if config.probe_existing:
click.echo(
(
f"probed-existing: valid={len(valid_existing)} invalid={len(invalid_existing)} "
f"missing={len(missing_outputs)}"
),
err=True,
)
if dry_run:
report_dry_run_plan(pending_jobs, pending_reasons, pending_details)
summarize_failures(failed_results)
if failed_results:
raise SystemExit(1)
return
results = list(skipped_results)
results.extend(failed_results)
conversion_results, aborted_count = run_batch(pending_jobs, config, jobs)
results.extend(conversion_results)
converted_count = sum(1 for result in results if result.status == "converted")
skipped_count = sum(1 for result in results if result.status == "skipped")
failed_count = sum(1 for result in results if result.status == "failed")
click.echo(
(
f"summary: matched={len(sources.segment_dirs)} converted={converted_count} "
f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}"
),
err=True,
)
summarize_failures(results)
if failed_count > 0 or aborted_count > 0:
raise SystemExit(1)
if __name__ == "__main__":
main()
File diff suppressed because it is too large Load Diff
-361
View File
@@ -1,361 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import concurrent.futures
import os
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
import click
from tqdm import tqdm
SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1]
DEFAULT_PATTERNS = ("*.svo2",)
SUPPORTED_SUFFIXES = {".svo", ".svo2"}
@dataclass(slots=True, frozen=True)
class BatchConfig:
zed_bin: Path
cuda_visible_devices: str | None
overwrite: bool
fail_fast: bool
codec: str
encoder_device: str
preset: str
tune: str
quality: int
gop: int
b_frames: int
start_frame: int
end_frame: int | None
@dataclass(slots=True, frozen=True)
class ConversionJob:
input_path: Path
output_path: Path
@dataclass(slots=True, frozen=True)
class JobResult:
status: str
input_path: Path
output_path: Path
command: tuple[str, ...]
return_code: int = 0
stdout: str = ""
stderr: str = ""
def locate_binary(override: Path | None) -> Path:
if override is not None:
candidate = override.expanduser().resolve()
if not candidate.is_file():
raise click.ClickException(f"binary not found: {candidate}")
return candidate
candidates = (
REPO_ROOT / "build" / "bin" / "zed_svo_to_mp4",
REPO_ROOT / "build" / "zed_svo_to_mp4",
)
for candidate in candidates:
if candidate.is_file():
return candidate
raise click.ClickException(f"could not find zed_svo_to_mp4 under {REPO_ROOT / 'build'}")
def discover_inputs(root: Path, patterns: Iterable[str], recursive: bool) -> list[Path]:
discovered: set[Path] = set()
for pattern in patterns:
iterator = root.rglob(pattern) if recursive else root.glob(pattern)
for path in iterator:
if path.is_file() and path.suffix.lower() in SUPPORTED_SUFFIXES:
discovered.add(path.absolute())
return sorted(discovered)
def output_path_for(input_path: Path) -> Path:
if input_path.suffix:
return input_path.with_suffix(".mp4")
return input_path.with_name(f"{input_path.name}.mp4")
def command_for_job(job: ConversionJob, config: BatchConfig) -> list[str]:
command = [
str(config.zed_bin),
"--input",
str(job.input_path),
"--codec",
config.codec,
"--encoder-device",
config.encoder_device,
"--preset",
config.preset,
"--tune",
config.tune,
"--quality",
str(config.quality),
"--gop",
str(config.gop),
"--b-frames",
str(config.b_frames),
"--start-frame",
str(config.start_frame),
]
if config.end_frame is not None:
command.extend(["--end-frame", str(config.end_frame)])
return command
def env_for_job(config: BatchConfig) -> dict[str, str]:
env = dict(os.environ)
if config.cuda_visible_devices is not None:
env["CUDA_VISIBLE_DEVICES"] = config.cuda_visible_devices
return env
def run_conversion(job: ConversionJob, config: BatchConfig) -> JobResult:
command = command_for_job(job, config)
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
env=env_for_job(config),
)
status = "converted" if completed.returncode == 0 else "failed"
return JobResult(
status=status,
input_path=job.input_path,
output_path=job.output_path,
command=tuple(command),
return_code=completed.returncode,
stdout=completed.stdout,
stderr=completed.stderr,
)
def summarize_failures(results: list[JobResult]) -> None:
failed_results = [result for result in results if result.status == "failed"]
if not failed_results:
return
click.echo("\nFailed conversions:", err=True)
for result in failed_results:
click.echo(f"- {result.input_path} (exit {result.return_code})", err=True)
if result.stderr.strip():
click.echo(result.stderr.rstrip(), err=True)
elif result.stdout.strip():
click.echo(result.stdout.rstrip(), err=True)
def run_batch(jobs: list[ConversionJob], config: BatchConfig, jobs_limit: int) -> tuple[list[JobResult], int]:
results: list[JobResult] = []
aborted_count = 0
if not jobs:
return results, aborted_count
future_to_job: dict[concurrent.futures.Future[JobResult], ConversionJob] = {}
job_iter = iter(jobs)
stop_submitting = False
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs_limit) as executor:
with tqdm(total=len(jobs), unit="file", dynamic_ncols=True) as progress:
def submit_next() -> bool:
if stop_submitting:
return False
try:
job = next(job_iter)
except StopIteration:
return False
future = executor.submit(run_conversion, job, config)
future_to_job[future] = job
return True
for _ in range(min(jobs_limit, len(jobs))):
submit_next()
while future_to_job:
done, _ = concurrent.futures.wait(
future_to_job,
return_when=concurrent.futures.FIRST_COMPLETED,
)
for future in done:
job = future_to_job.pop(future)
result = future.result()
results.append(result)
progress.update(1)
if result.status == "failed":
tqdm.write(f"failed: {job.input_path} (exit {result.return_code})", file=sys.stderr)
if config.fail_fast:
stop_submitting = True
if not stop_submitting:
submit_next()
if stop_submitting:
remaining = sum(1 for _ in job_iter)
aborted_count = remaining
progress.total = progress.n + len(future_to_job)
progress.refresh()
return results, aborted_count
@click.command()
@click.argument("input_dir", type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
@click.option(
"--pattern",
"patterns",
multiple=True,
default=DEFAULT_PATTERNS,
show_default=True,
help="Glob pattern to match under the input directory. Repeatable.",
)
@click.option("--recursive/--no-recursive", default=True, show_default=True, help="Use rglob instead of glob.")
@click.option("--jobs", default=1, show_default=True, type=click.IntRange(min=1), help="Parallel conversion jobs.")
@click.option(
"--zed-bin",
type=click.Path(path_type=Path, dir_okay=False),
help="Explicit path to the zed_svo_to_mp4 binary.",
)
@click.option(
"--cuda-visible-devices",
help="Optional CUDA_VISIBLE_DEVICES value exported for each conversion subprocess.",
)
@click.option("--overwrite/--skip-existing", default=False, show_default=True, help="Overwrite existing MP4 files.")
@click.option(
"--fail-fast/--continue-on-error",
default=False,
show_default=True,
help="Stop submitting new work after the first failed conversion.",
)
@click.option("--codec", type=click.Choice(("h264", "h265")), default="h265", show_default=True)
@click.option(
"--encoder-device",
type=click.Choice(("auto", "nvidia", "software")),
default="auto",
show_default=True,
)
@click.option("--preset", type=click.Choice(("fast", "balanced", "quality")), default="fast", show_default=True)
@click.option(
"--tune",
type=click.Choice(("low-latency", "balanced")),
default="low-latency",
show_default=True,
)
@click.option(
"--quality",
type=click.IntRange(min=0, max=51),
default=23,
show_default=True,
help="Lower values mean higher quality.",
)
@click.option("--gop", type=click.IntRange(min=1), default=30, show_default=True)
@click.option("--b-frames", "b_frames", type=click.IntRange(min=0), default=0, show_default=True)
@click.option("--start-frame", type=click.IntRange(min=0), default=0, show_default=True)
@click.option("--end-frame", type=click.IntRange(min=0), default=None)
def main(
input_dir: Path,
patterns: tuple[str, ...],
recursive: bool,
jobs: int,
zed_bin: Path | None,
cuda_visible_devices: str | None,
overwrite: bool,
fail_fast: bool,
codec: str,
encoder_device: str,
preset: str,
tune: str,
quality: int,
gop: int,
b_frames: int,
start_frame: int,
end_frame: int | None,
) -> None:
"""Batch-convert ZED SVO/SVO2 recordings in a folder to MP4."""
if b_frames > gop:
raise click.BadParameter(f"b-frames {b_frames} must be <= gop {gop}", param_hint="--b-frames")
if end_frame is not None and end_frame < start_frame:
raise click.BadParameter(
f"end-frame {end_frame} must be >= start-frame {start_frame}",
param_hint="--end-frame",
)
binary_path = locate_binary(zed_bin)
inputs = discover_inputs(input_dir.absolute(), patterns, recursive)
if not inputs:
raise click.ClickException(f"no .svo/.svo2 files matched under {input_dir}")
config = BatchConfig(
zed_bin=binary_path,
cuda_visible_devices=cuda_visible_devices,
overwrite=overwrite,
fail_fast=fail_fast,
codec=codec,
encoder_device=encoder_device,
preset=preset,
tune=tune,
quality=quality,
gop=gop,
b_frames=b_frames,
start_frame=start_frame,
end_frame=end_frame,
)
skipped_results: list[JobResult] = []
pending_jobs: list[ConversionJob] = []
for input_path in inputs:
output_path = output_path_for(input_path)
command = tuple(command_for_job(ConversionJob(input_path, output_path), config))
if output_path.exists() and not overwrite:
skipped_results.append(
JobResult(
status="skipped",
input_path=input_path,
output_path=output_path,
command=command,
)
)
continue
pending_jobs.append(ConversionJob(input_path=input_path, output_path=output_path))
click.echo(
f"matched={len(inputs)} pending={len(pending_jobs)} skipped={len(skipped_results)} jobs={jobs}",
err=True,
)
results = list(skipped_results)
conversion_results, aborted_count = run_batch(pending_jobs, config, jobs)
results.extend(conversion_results)
converted_count = sum(1 for result in results if result.status == "converted")
skipped_count = sum(1 for result in results if result.status == "skipped")
failed_count = sum(1 for result in results if result.status == "failed")
click.echo(
(
f"summary: matched={len(inputs)} converted={converted_count} "
f"skipped={skipped_count} failed={failed_count} aborted={aborted_count}"
),
err=True,
)
summarize_failures(results)
if failed_count > 0:
raise SystemExit(1)
if aborted_count > 0:
raise SystemExit(1)
if __name__ == "__main__":
main()
-374
View File
@@ -1,374 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import math
import os
import shlex
import subprocess
import sys
import tempfile
from collections import Counter
from pathlib import Path
from typing import Iterable
import cv2
import numpy as np
SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1]
WORKSPACE_ROOT = REPO_ROOT.parent
MCAP_PYTHON_ROOT = WORKSPACE_ROOT / "mcap" / "python" / "mcap"
if str(MCAP_PYTHON_ROOT) not in sys.path:
sys.path.insert(0, str(MCAP_PYTHON_ROOT))
from mcap.reader import make_reader # noqa: E402
VIDEO_FORMATS = ("h264", "h265")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Convert ZED SVO/SVO2 recordings to MCAP and generate a lightweight preview. "
"If the input is already an MCAP file, conversion is skipped."
)
)
parser.add_argument("input", help="Input .svo/.svo2 file, .mcap file, or a directory containing SVO files")
parser.add_argument("--output-dir", help="Directory for generated MCAP files and previews")
parser.add_argument(
"--preview-all",
action="store_true",
help="When the input is a directory, generate a preview for every converted MCAP instead of just the first one",
)
parser.add_argument("--no-preview", action="store_true", help="Convert only, do not generate preview images")
parser.add_argument(
"--format",
choices=("auto", "h264", "h265"),
default="auto",
help="CompressedVideo format to extract from MCAP during preview",
)
parser.add_argument("--codec", choices=VIDEO_FORMATS, default="h264", help="Video codec for SVO to MCAP conversion")
parser.add_argument(
"--encoder-device",
choices=("auto", "nvidia", "software"),
default="software",
help="Encoder device passed to zed_svo_to_mcap",
)
parser.add_argument(
"--mcap-compression",
choices=("none", "lz4", "zstd"),
default="none",
help="MCAP chunk compression passed to zed_svo_to_mcap",
)
parser.add_argument(
"--depth-mode",
choices=("neural_light", "neural", "neural_plus"),
default="neural_plus",
help="Depth mode passed to zed_svo_to_mcap",
)
parser.add_argument(
"--depth-size",
default="optimal",
help="Depth size passed to zed_svo_to_mcap (optimal|native|<width>x<height>)",
)
parser.add_argument("--start-frame", type=int, default=0, help="First SVO frame to convert")
parser.add_argument("--end-frame", type=int, help="Last SVO frame to convert")
parser.add_argument(
"--sample-count",
type=int,
default=9,
help="Number of decoded frames to place in the preview contact sheet",
)
parser.add_argument(
"--frame-step",
type=int,
default=15,
help="Decode every Nth frame for the contact sheet",
)
parser.add_argument(
"--contact-sheet-width",
type=int,
default=480,
help="Width of each preview tile in pixels",
)
parser.add_argument(
"--cuda-visible-devices",
help=(
"Optional CUDA_VISIBLE_DEVICES value to export while running zed_svo_to_mcap. "
"Useful when the ZED SDK must be pinned to a specific GPU UUID."
),
)
parser.add_argument("--zed-bin", help="Explicit path to zed_svo_to_mcap")
parser.add_argument("--reader-bin", help="Explicit path to mcap_reader_tester")
return parser.parse_args()
def locate_binary(name: str, override: str | None) -> Path:
if override:
path = Path(override).expanduser().resolve()
if not path.is_file():
raise FileNotFoundError(f"binary not found: {path}")
return path
candidates = (
REPO_ROOT / "build" / "bin" / name,
REPO_ROOT / "build" / name,
)
for candidate in candidates:
if candidate.is_file():
return candidate
raise FileNotFoundError(f"could not find {name} under {REPO_ROOT / 'build'}")
def quote_command(args: Iterable[str]) -> str:
return " ".join(shlex.quote(arg) for arg in args)
def run(args: list[str], env: dict[str, str] | None = None) -> None:
print(f"$ {quote_command(args)}", flush=True)
subprocess.run(args, check=True, env=env)
def summarize_mcap(mcap_path: Path) -> list[tuple[str, str, str, int]]:
counts: Counter[tuple[str, str, str]] = Counter()
with mcap_path.open("rb") as stream:
reader = make_reader(stream)
for schema, channel, _message in reader.iter_messages():
schema_name = schema.name if schema is not None else "<none>"
counts[(channel.topic, channel.message_encoding, schema_name)] += 1
summary_rows = [
(topic, encoding, schema_name, count)
for (topic, encoding, schema_name), count in sorted(counts.items())
]
print(f"MCAP summary: {mcap_path}")
for topic, encoding, schema_name, count in summary_rows:
print(f" {count:6d} topic={topic} encoding={encoding} schema={schema_name}")
return summary_rows
def infer_video_format(reader_bin: Path, mcap_path: Path, requested: str) -> str:
if requested != "auto":
return requested
for candidate in VIDEO_FORMATS:
result = subprocess.run(
[str(reader_bin), str(mcap_path), "--expect-format", candidate, "--min-messages", "1"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=False,
)
if result.returncode == 0:
return candidate
raise RuntimeError(f"could not infer video format from {mcap_path}")
def dump_annexb(reader_bin: Path, mcap_path: Path, video_format: str, output_path: Path) -> None:
run(
[
str(reader_bin),
str(mcap_path),
"--expect-format",
video_format,
"--min-messages",
"1",
"--dump-annexb-output",
str(output_path),
]
)
def make_contact_sheet(stream_path: Path, image_path: Path, sample_count: int, frame_step: int, tile_width: int) -> int:
capture = cv2.VideoCapture(str(stream_path))
if not capture.isOpened():
raise RuntimeError(f"OpenCV could not open decoded stream {stream_path}")
frames: list[np.ndarray] = []
frame_index = 0
while len(frames) < sample_count:
ok, frame = capture.read()
if not ok:
break
if frame_index % frame_step == 0:
annotated = frame.copy()
cv2.putText(
annotated,
f"frame {frame_index}",
(20, 40),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 255, 0),
2,
cv2.LINE_AA,
)
frames.append(annotated)
frame_index += 1
capture.release()
if not frames:
raise RuntimeError(f"no frames decoded from {stream_path}")
tile_width = max(64, tile_width)
resized: list[np.ndarray] = []
for frame in frames:
scale = tile_width / frame.shape[1]
tile_height = max(1, int(round(frame.shape[0] * scale)))
resized.append(cv2.resize(frame, (tile_width, tile_height), interpolation=cv2.INTER_AREA))
max_height = max(frame.shape[0] for frame in resized)
padded: list[np.ndarray] = []
for frame in resized:
if frame.shape[0] == max_height:
padded.append(frame)
continue
canvas = np.zeros((max_height, frame.shape[1], 3), dtype=np.uint8)
canvas[: frame.shape[0], :, :] = frame
padded.append(canvas)
columns = max(1, math.ceil(math.sqrt(len(padded))))
rows = math.ceil(len(padded) / columns)
blank = np.zeros_like(padded[0])
row_images: list[np.ndarray] = []
for row_index in range(rows):
row_frames = padded[row_index * columns : (row_index + 1) * columns]
while len(row_frames) < columns:
row_frames.append(blank)
row_images.append(np.concatenate(row_frames, axis=1))
sheet = np.concatenate(row_images, axis=0)
image_path.parent.mkdir(parents=True, exist_ok=True)
if not cv2.imwrite(str(image_path), sheet):
raise RuntimeError(f"failed to write preview image {image_path}")
print(f"Preview contact sheet: {image_path}")
return len(frames)
def collect_svo_inputs(input_path: Path) -> list[Path]:
if input_path.is_file():
if input_path.suffix.lower() in {".svo", ".svo2"}:
return [input_path]
if input_path.suffix.lower() == ".mcap":
return []
raise ValueError(f"unsupported input file: {input_path}")
if input_path.is_dir():
return sorted(
path for path in input_path.rglob("*") if path.suffix.lower() in {".svo", ".svo2"}
)
raise FileNotFoundError(f"input not found: {input_path}")
def default_output_dir(input_path: Path) -> Path:
if input_path.is_dir():
return input_path / "mcap_preview"
return input_path.parent / "mcap_preview"
def convert_svo(
zed_bin: Path,
svo_path: Path,
mcap_path: Path,
args: argparse.Namespace,
) -> None:
env = os.environ.copy()
if args.cuda_visible_devices:
env["CUDA_VISIBLE_DEVICES"] = args.cuda_visible_devices
command = [
str(zed_bin),
"--input",
str(svo_path),
"--output",
str(mcap_path),
"--codec",
args.codec,
"--encoder-device",
args.encoder_device,
"--mcap-compression",
args.mcap_compression,
"--depth-mode",
args.depth_mode,
"--depth-size",
args.depth_size,
"--start-frame",
str(args.start_frame),
]
if args.end_frame is not None:
command.extend(["--end-frame", str(args.end_frame)])
mcap_path.parent.mkdir(parents=True, exist_ok=True)
run(command, env=env)
def preview_mcap(reader_bin: Path, mcap_path: Path, args: argparse.Namespace) -> None:
summarize_mcap(mcap_path)
video_format = infer_video_format(reader_bin, mcap_path, args.format)
print(f"Detected video format: {video_format}")
stream_extension = ".h265" if video_format == "h265" else ".h264"
with tempfile.TemporaryDirectory(prefix="zed_mcap_preview_") as temp_dir:
temp_root = Path(temp_dir)
stream_path = temp_root / f"preview{stream_extension}"
dump_annexb(reader_bin, mcap_path, video_format, stream_path)
preview_path = mcap_path.with_suffix(".preview.png")
decoded = make_contact_sheet(
stream_path,
preview_path,
sample_count=args.sample_count,
frame_step=args.frame_step,
tile_width=args.contact_sheet_width,
)
print(f"Decoded {decoded} preview frame(s)")
def main() -> int:
args = parse_args()
input_path = Path(args.input).expanduser().resolve()
output_dir = Path(args.output_dir).expanduser().resolve() if args.output_dir else default_output_dir(input_path)
output_dir.mkdir(parents=True, exist_ok=True)
reader_bin = locate_binary("mcap_reader_tester", args.reader_bin)
zed_bin = locate_binary("zed_svo_to_mcap", args.zed_bin) if input_path.suffix.lower() != ".mcap" or input_path.is_dir() else None
if input_path.is_file() and input_path.suffix.lower() == ".mcap":
if not args.no_preview:
preview_mcap(reader_bin, input_path, args)
return 0
svo_inputs = collect_svo_inputs(input_path)
if not svo_inputs:
raise RuntimeError(f"no .svo/.svo2 files found under {input_path}")
converted_paths: list[Path] = []
for svo_path in svo_inputs:
output_name = f"{svo_path.stem}.mcap"
mcap_path = output_dir / output_name
convert_svo(zed_bin, svo_path, mcap_path, args)
converted_paths.append(mcap_path)
if args.no_preview:
return 0
preview_targets = converted_paths if args.preview_all else converted_paths[:1]
for mcap_path in preview_targets:
preview_mcap(reader_bin, mcap_path, args)
print("Generated MCAP files:")
for mcap_path in converted_paths:
print(f" {mcap_path}")
return 0
if __name__ == "__main__":
try:
raise SystemExit(main())
except KeyboardInterrupt:
raise SystemExit(130)
-658
View File
@@ -1,658 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import concurrent.futures
import datetime as dt
import json
import os
import re
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from zoneinfo import ZoneInfo
import click
import duckdb
SCRIPT_PATH = Path(__file__).resolve()
REPO_ROOT = SCRIPT_PATH.parents[1]
DEFAULT_INDEX_NAME = "segment_time_index.duckdb"
INDEX_SCHEMA_VERSION = "1"
SEGMENT_FILE_PATTERN = re.compile(r".*_zed([0-9]+)\.svo2?$", re.IGNORECASE)
FOLDER_TIMESTAMP_PATTERN = re.compile(
r"^(?P<date>\d{4}-\d{2}-\d{2})[T ](?P<hour>\d{2})-(?P<minute>\d{2})-(?P<second>\d{2})(?P<fraction>\.\d+)?(?P<timezone>Z|[+-]\d{2}:\d{2})?$"
)
@dataclass(slots=True, frozen=True)
class SegmentScan:
segment_dir: Path
matched_files: int
camera_labels: tuple[str, ...]
is_valid: bool
reason: str | None = None
@dataclass(slots=True, frozen=True)
class BoundsRow:
segment_dir: Path
relative_segment_dir: str
group_path: str
activity: str
segment_name: str
mcap_path: Path
start_ns: int
end_ns: int
duration_ns: int
start_iso_utc: str
end_iso_utc: str
camera_count: int
camera_labels: str
video_message_count: int
index_source: str
def sorted_camera_labels(labels: set[str]) -> tuple[str, ...]:
return tuple(sorted(labels, key=lambda label: int(label[3:])))
def scan_segment_dir(segment_dir: Path) -> SegmentScan:
if not segment_dir.is_dir():
return SegmentScan(
segment_dir=segment_dir,
matched_files=0,
camera_labels=(),
is_valid=False,
reason=f"segment directory does not exist: {segment_dir}",
)
matched_by_camera: dict[str, list[Path]] = {}
for child in segment_dir.iterdir():
if not child.is_file():
continue
match = SEGMENT_FILE_PATTERN.fullmatch(child.name)
if match is None:
continue
label = f"zed{int(match.group(1))}"
matched_by_camera.setdefault(label, []).append(child)
matched_files = sum(len(paths) for paths in matched_by_camera.values())
camera_labels = sorted_camera_labels(set(matched_by_camera))
duplicate_cameras = [label for label, paths in sorted(matched_by_camera.items()) if len(paths) > 1]
if duplicate_cameras:
return SegmentScan(
segment_dir=segment_dir,
matched_files=matched_files,
camera_labels=camera_labels,
is_valid=False,
reason=f"duplicate camera inputs under {segment_dir}: {', '.join(duplicate_cameras)}",
)
if len(camera_labels) < 2:
return SegmentScan(
segment_dir=segment_dir,
matched_files=matched_files,
camera_labels=camera_labels,
is_valid=False,
reason=f"expected at least 2 camera inputs under {segment_dir}, found {len(camera_labels)}",
)
return SegmentScan(
segment_dir=segment_dir,
matched_files=matched_files,
camera_labels=camera_labels,
is_valid=True,
)
def discover_segment_dirs(root: Path, recursive: bool) -> tuple[list[SegmentScan], list[SegmentScan]]:
if not root.is_dir():
raise click.ClickException(f"input directory does not exist: {root}")
candidate_dirs = {root.resolve()}
iterator = root.rglob("*") if recursive else root.iterdir()
for path in iterator:
if path.is_dir():
candidate_dirs.add(path.resolve())
valid_scans: list[SegmentScan] = []
ignored_partial_scans: list[SegmentScan] = []
for segment_dir in sorted(candidate_dirs):
scan = scan_segment_dir(segment_dir)
if scan.is_valid:
valid_scans.append(scan)
elif scan.matched_files > 0:
ignored_partial_scans.append(scan)
if not valid_scans:
raise click.ClickException(f"no multi-camera segments found under {root}")
return valid_scans, ignored_partial_scans
def locate_binary(name: str, override: Path | None) -> Path:
if override is not None:
candidate = override.expanduser().resolve()
if not candidate.is_file():
raise click.ClickException(f"binary not found: {candidate}")
return candidate
candidates = (
REPO_ROOT / "build" / "bin" / name,
REPO_ROOT / "build" / name,
)
for candidate in candidates:
if candidate.is_file():
return candidate
raise click.ClickException(f"could not find {name} under {REPO_ROOT / 'build'}")
def default_index_path(dataset_root: Path) -> Path:
return dataset_root / DEFAULT_INDEX_NAME
def find_unique_mcap(segment_dir: Path) -> Path | None:
matches = sorted(path for path in segment_dir.iterdir() if path.is_file() and path.suffix.lower() == ".mcap")
if len(matches) == 1:
return matches[0]
return None
def format_ns_iso(ns: int, tzinfo: dt.tzinfo) -> str:
seconds, nanos = divmod(ns, 1_000_000_000)
stamp = dt.datetime.fromtimestamp(seconds, tz=dt.timezone.utc).astimezone(tzinfo)
offset = stamp.strftime("%z")
offset = f"{offset[:3]}:{offset[3:]}" if offset else ""
return f"{stamp.strftime('%Y-%m-%dT%H:%M:%S')}.{nanos:09d}{offset}"
def format_ns_utc(ns: int) -> str:
return format_ns_iso(ns, dt.timezone.utc).replace("+00:00", "Z")
def resolve_timezone(name: str) -> dt.tzinfo:
if name == "local":
local = dt.datetime.now().astimezone().tzinfo
if local is None:
raise click.ClickException("could not resolve local timezone")
return local
if name == "UTC":
return dt.timezone.utc
if name.startswith("UTC") and len(name) == len("UTC+00:00"):
try:
sign = 1 if name[3] == "+" else -1
hours = int(name[4:6])
minutes = int(name[7:9])
except ValueError as exc:
raise click.ClickException(f"invalid fixed UTC offset '{name}'") from exc
return dt.timezone(sign * dt.timedelta(hours=hours, minutes=minutes))
try:
return ZoneInfo(name)
except Exception as exc: # pragma: no cover - defensive wrapper around system tzdb
raise click.ClickException(f"unknown timezone '{name}': {exc}") from exc
def normalize_timestamp_text(value: str) -> str:
match = FOLDER_TIMESTAMP_PATTERN.fullmatch(value)
if match is None:
return value
parts = match.groupdict()
fraction = parts["fraction"] or ""
timezone_text = parts["timezone"] or ""
return f"{parts['date']}T{parts['hour']}:{parts['minute']}:{parts['second']}{fraction}{timezone_text}"
def parse_folder_name_naive(value: str) -> dt.datetime | None:
normalized = normalize_timestamp_text(value)
try:
parsed = dt.datetime.fromisoformat(normalized)
except ValueError:
return None
if parsed.tzinfo is not None:
return None
return parsed
def datetime_to_ns(value: dt.datetime) -> int:
utc_value = value.astimezone(dt.timezone.utc)
return int(utc_value.timestamp()) * 1_000_000_000 + utc_value.microsecond * 1_000
def parse_timestamp_to_ns(value: str, timezone_name: str) -> int:
stripped = value.strip()
if not stripped:
raise click.ClickException("timestamp value is empty")
digit_text = stripped.lstrip("+-")
if digit_text.isdigit():
raw_value = int(stripped)
digits = len(digit_text)
if digits <= 10:
return raw_value * 1_000_000_000
if digits <= 13:
return raw_value * 1_000_000
if digits <= 16:
return raw_value * 1_000
return raw_value
normalized = normalize_timestamp_text(stripped)
if normalized.endswith("Z"):
normalized = normalized[:-1] + "+00:00"
try:
parsed = dt.datetime.fromisoformat(normalized)
except ValueError as exc:
raise click.ClickException(f"invalid timestamp '{value}': {exc}") from exc
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=resolve_timezone(timezone_name))
return datetime_to_ns(parsed)
def parse_timestamp_window(value: str, timezone_name: str) -> tuple[int, int]:
stripped = value.strip()
if not stripped:
raise click.ClickException("timestamp value is empty")
digit_text = stripped.lstrip("+-")
if digit_text.isdigit():
base_ns = parse_timestamp_to_ns(stripped, timezone_name)
digits = len(digit_text)
if digits <= 10:
precision_ns = 1_000_000_000
elif digits <= 13:
precision_ns = 1_000_000
elif digits <= 16:
precision_ns = 1_000
else:
precision_ns = 1
return base_ns, base_ns + precision_ns - 1
normalized = normalize_timestamp_text(stripped)
base_ns = parse_timestamp_to_ns(stripped, timezone_name)
fraction_match = re.search(r"\.(\d+)", normalized)
if fraction_match is None:
precision_ns = 1_000_000_000
else:
digits = min(len(fraction_match.group(1)), 9)
precision_ns = 10 ** (9 - digits)
return base_ns, base_ns + precision_ns - 1
def probe_mcap_bounds(bounds_bin: Path, mcap_path: Path) -> dict[str, Any]:
result = subprocess.run(
[str(bounds_bin), str(mcap_path), "--json"],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if result.returncode != 0:
stderr = result.stderr.strip() or result.stdout.strip() or f"exit {result.returncode}"
raise RuntimeError(f"{mcap_path}: {stderr}")
try:
return json.loads(result.stdout)
except json.JSONDecodeError as exc:
raise RuntimeError(f"{mcap_path}: failed to parse helper JSON: {exc}") from exc
def build_row(dataset_root: Path, scan: SegmentScan, bounds_bin: Path) -> BoundsRow | None:
mcap_path = find_unique_mcap(scan.segment_dir)
if mcap_path is None:
return None
bounds = probe_mcap_bounds(bounds_bin, mcap_path)
relative_segment_dir = scan.segment_dir.relative_to(dataset_root).as_posix()
parent = Path(relative_segment_dir).parent
group_path = "" if str(parent) == "." else parent.as_posix()
parts = Path(relative_segment_dir).parts
activity = parts[0] if parts else scan.segment_dir.name
start_ns = int(bounds["start_ns"])
end_ns = int(bounds["end_ns"])
return BoundsRow(
segment_dir=scan.segment_dir,
relative_segment_dir=relative_segment_dir,
group_path=group_path,
activity=activity,
segment_name=scan.segment_dir.name,
mcap_path=mcap_path,
start_ns=start_ns,
end_ns=end_ns,
duration_ns=max(0, end_ns - start_ns),
start_iso_utc=str(bounds["start_iso_utc"]),
end_iso_utc=str(bounds["end_iso_utc"]),
camera_count=len(scan.camera_labels),
camera_labels=",".join(scan.camera_labels),
video_message_count=int(bounds["video_message_count"]),
index_source="mcap_video_bounds",
)
def init_db(conn: duckdb.DuckDBPyConnection) -> None:
conn.execute(
"""
CREATE TABLE meta (
key VARCHAR PRIMARY KEY,
value VARCHAR NOT NULL
);
"""
)
conn.execute(
"""
CREATE TABLE segments (
segment_dir VARCHAR PRIMARY KEY,
relative_segment_dir VARCHAR NOT NULL,
group_path VARCHAR NOT NULL,
activity VARCHAR NOT NULL,
segment_name VARCHAR NOT NULL,
mcap_path VARCHAR NOT NULL,
start_ns BIGINT NOT NULL,
end_ns BIGINT NOT NULL,
duration_ns BIGINT NOT NULL,
start_iso_utc VARCHAR NOT NULL,
end_iso_utc VARCHAR NOT NULL,
camera_count INTEGER NOT NULL,
camera_labels VARCHAR NOT NULL,
video_message_count BIGINT NOT NULL,
index_source VARCHAR NOT NULL
);
"""
)
conn.execute("CREATE INDEX segments_start_ns_idx ON segments(start_ns);")
conn.execute("CREATE INDEX segments_end_ns_idx ON segments(end_ns);")
def write_index(index_path: Path, dataset_root: Path, rows: list[BoundsRow]) -> None:
index_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix=f"{index_path.name}.", suffix=".tmp", dir=index_path.parent, delete=False) as handle:
temp_path = Path(handle.name)
temp_path.unlink(missing_ok=True)
inferred_timezone = infer_dataset_timezone(rows)
try:
conn = duckdb.connect(str(temp_path))
try:
init_db(conn)
conn.executemany(
"INSERT INTO meta (key, value) VALUES (?, ?)",
[
("schema_version", INDEX_SCHEMA_VERSION),
("dataset_root", str(dataset_root)),
("built_at_utc", dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")),
("default_timezone", inferred_timezone),
],
)
conn.executemany(
"""
INSERT INTO segments (
segment_dir,
relative_segment_dir,
group_path,
activity,
segment_name,
mcap_path,
start_ns,
end_ns,
duration_ns,
start_iso_utc,
end_iso_utc,
camera_count,
camera_labels,
video_message_count,
index_source
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
str(row.segment_dir),
row.relative_segment_dir,
row.group_path,
row.activity,
row.segment_name,
str(row.mcap_path),
row.start_ns,
row.end_ns,
row.duration_ns,
row.start_iso_utc,
row.end_iso_utc,
row.camera_count,
row.camera_labels,
row.video_message_count,
row.index_source,
)
for row in rows
],
)
finally:
conn.close()
temp_path.replace(index_path)
except Exception:
temp_path.unlink(missing_ok=True)
raise
def infer_dataset_timezone(rows: list[BoundsRow]) -> str:
offset_counts: dict[int, int] = {}
for row in rows:
folder_time = parse_folder_name_naive(row.segment_name)
if folder_time is None:
continue
actual_utc = dt.datetime.fromtimestamp(row.start_ns / 1_000_000_000, tz=dt.timezone.utc).replace(tzinfo=None)
offset_minutes = round((folder_time - actual_utc).total_seconds() / 60.0)
offset_counts[offset_minutes] = offset_counts.get(offset_minutes, 0) + 1
if not offset_counts:
return "local"
minutes = max(offset_counts.items(), key=lambda item: item[1])[0]
if minutes == 0:
return "UTC"
sign = "+" if minutes >= 0 else "-"
absolute_minutes = abs(minutes)
hours, mins = divmod(absolute_minutes, 60)
return f"UTC{sign}{hours:02d}:{mins:02d}"
def require_query_window(at: str | None, start: str | None, end: str | None, timezone_name: str) -> tuple[int, int]:
if at is not None and (start is not None or end is not None):
raise click.ClickException("use either --at or --start/--end, not both")
if at is not None:
return parse_timestamp_window(at, timezone_name)
if start is None or end is None:
raise click.ClickException("provide --at or both --start and --end")
start_ns = parse_timestamp_to_ns(start, timezone_name)
end_ns = parse_timestamp_to_ns(end, timezone_name)
if start_ns > end_ns:
raise click.ClickException("query start must be before or equal to query end")
return start_ns, end_ns
def load_meta(conn: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = conn.execute("SELECT key, value FROM meta").fetchall()
return {str(key): str(value) for key, value in rows}
def format_duration(duration_ns: int) -> str:
return f"{duration_ns / 1_000_000_000:.3f}s"
@click.group()
def cli() -> None:
"""Build and query a DuckDB index of bundled ZED segment timestamps."""
@cli.command()
@click.argument("dataset_root", type=click.Path(path_type=Path, file_okay=False))
@click.option("--index", "index_path", type=click.Path(path_type=Path, dir_okay=False))
@click.option("--recursive/--no-recursive", default=True, show_default=True)
@click.option("--jobs", type=click.IntRange(min=1), default=min(8, os.cpu_count() or 1), show_default=True)
@click.option("--bounds-bin", type=click.Path(path_type=Path, dir_okay=False))
def build(dataset_root: Path, index_path: Path | None, recursive: bool, jobs: int, bounds_bin: Path | None) -> None:
"""Build or replace the embedded DuckDB time index for DATASET_ROOT."""
dataset_root = dataset_root.expanduser().resolve()
index_path = (index_path or default_index_path(dataset_root)).expanduser().resolve()
bounds_binary = locate_binary("mcap_video_bounds", bounds_bin)
valid_scans, ignored_partial_scans = discover_segment_dirs(dataset_root, recursive)
click.echo(
f"discovered {len(valid_scans)} valid segment directories under {dataset_root}",
err=True,
)
if ignored_partial_scans:
click.echo(f"ignored {len(ignored_partial_scans)} partial segment directories", err=True)
rows: list[BoundsRow] = []
skipped_missing_mcap: list[Path] = []
errors: list[str] = []
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
future_to_scan: dict[concurrent.futures.Future[BoundsRow | None], SegmentScan] = {
executor.submit(build_row, dataset_root, scan, bounds_binary): scan for scan in valid_scans
}
for future in concurrent.futures.as_completed(future_to_scan):
scan = future_to_scan[future]
try:
row = future.result()
except Exception as exc:
errors.append(f"{scan.segment_dir}: {exc}")
continue
if row is None:
skipped_missing_mcap.append(scan.segment_dir)
continue
rows.append(row)
rows.sort(key=lambda row: (row.start_ns, row.segment_dir.as_posix()))
if skipped_missing_mcap:
click.echo(f"skipped {len(skipped_missing_mcap)} segments with missing or ambiguous MCAP files", err=True)
if errors:
for error in errors:
click.echo(f"error: {error}", err=True)
raise click.ClickException(f"failed to probe {len(errors)} segment(s)")
if not rows:
raise click.ClickException("no indexable MCAP segments were found")
write_index(index_path, dataset_root, rows)
click.echo(
f"wrote {len(rows)} segments to {index_path} (skipped_missing_mcap={len(skipped_missing_mcap)})",
err=True,
)
@cli.command()
@click.argument("dataset_root", type=click.Path(path_type=Path, file_okay=False))
@click.option("--index", "index_path", type=click.Path(path_type=Path, dir_okay=False))
@click.option("--at")
@click.option("--start")
@click.option("--end")
@click.option("--json", "as_json", is_flag=True)
@click.option("--timezone", "timezone_name", default="dataset", show_default=True)
def query(
dataset_root: Path,
index_path: Path | None,
at: str | None,
start: str | None,
end: str | None,
as_json: bool,
timezone_name: str,
) -> None:
"""Query the embedded time index for matching segment folders."""
dataset_root = dataset_root.expanduser().resolve()
index_path = (index_path or default_index_path(dataset_root)).expanduser().resolve()
if not index_path.is_file():
raise click.ClickException(f"index not found: {index_path}")
conn = duckdb.connect(str(index_path), read_only=True)
try:
meta = load_meta(conn)
indexed_root = Path(meta.get("dataset_root", "")).expanduser().resolve()
if indexed_root != dataset_root:
raise click.ClickException(
f"index root mismatch: index was built for {indexed_root}, not {dataset_root}"
)
effective_timezone_name = meta.get("default_timezone", "local") if timezone_name == "dataset" else timezone_name
query_start_ns, query_end_ns = require_query_window(at, start, end, effective_timezone_name)
display_timezone = resolve_timezone(effective_timezone_name)
result_rows = conn.execute(
"""
SELECT
segment_dir,
relative_segment_dir,
group_path,
activity,
segment_name,
mcap_path,
start_ns,
end_ns,
duration_ns,
start_iso_utc,
end_iso_utc,
camera_count,
camera_labels,
video_message_count,
index_source
FROM segments
WHERE start_ns <= ? AND end_ns >= ?
ORDER BY start_ns, segment_dir
""",
[query_end_ns, query_start_ns],
).fetchall()
finally:
conn.close()
payload = [
{
"segment_dir": row[0],
"relative_segment_dir": row[1],
"group_path": row[2],
"activity": row[3],
"segment_name": row[4],
"mcap_path": row[5],
"start_ns": row[6],
"end_ns": row[7],
"duration_ns": row[8],
"start_iso_utc": row[9],
"end_iso_utc": row[10],
"camera_count": row[11],
"camera_labels": row[12].split(",") if row[12] else [],
"video_message_count": row[13],
"index_source": row[14],
"start_display": format_ns_iso(row[6], display_timezone),
"end_display": format_ns_iso(row[7], display_timezone),
}
for row in result_rows
]
if as_json:
click.echo(json.dumps(payload, indent=2, ensure_ascii=False))
return
if not payload:
click.echo("no matching segments")
return
click.echo(f"matched {len(payload)} segment(s)")
for row in payload:
click.echo(
" | ".join(
(
row["start_display"],
row["end_display"],
format_duration(int(row["duration_ns"])),
row["segment_dir"],
row["mcap_path"],
)
)
)
if __name__ == "__main__":
cli()
-219
View File
@@ -1,219 +0,0 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <foxglove/CompressedVideo.pb.h>
#include <mcap/reader.hpp>
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
namespace {
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
OpenError = 3,
SchemaError = 4,
ParseError = 5,
EmptyError = 6,
};
struct Config {
std::string input_path{};
bool json{false};
};
struct BoundsSummary {
std::uint64_t start_ns{std::numeric_limits<std::uint64_t>::max()};
std::uint64_t end_ns{0};
std::uint64_t message_count{0};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::uint64_t proto_timestamp_ns(const google::protobuf::Timestamp &timestamp) {
return static_cast<std::uint64_t>(timestamp.seconds()) * 1000000000ull + static_cast<std::uint64_t>(timestamp.nanos());
}
[[nodiscard]]
std::string json_escape(const std::string &input) {
std::ostringstream output;
for (const unsigned char ch : input) {
switch (ch) {
case '\\':
output << "\\\\";
break;
case '"':
output << "\\\"";
break;
case '\b':
output << "\\b";
break;
case '\f':
output << "\\f";
break;
case '\n':
output << "\\n";
break;
case '\r':
output << "\\r";
break;
case '\t':
output << "\\t";
break;
default:
if (ch < 0x20) {
output << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(ch) << std::dec;
} else {
output << static_cast<char>(ch);
}
break;
}
}
return output.str();
}
[[nodiscard]]
std::string format_iso_utc(const std::uint64_t timestamp_ns) {
const auto seconds = static_cast<std::time_t>(timestamp_ns / 1000000000ull);
const auto nanos = timestamp_ns % 1000000000ull;
std::tm tm{};
#if defined(_WIN32)
gmtime_s(&tm, &seconds);
#else
gmtime_r(&seconds, &tm);
#endif
std::ostringstream output;
output << std::put_time(&tm, "%Y-%m-%dT%H:%M:%S") << '.' << std::setw(9) << std::setfill('0') << nanos << 'Z';
return output.str();
}
[[nodiscard]]
bool is_video_message(const auto &view) {
if (view.channel == nullptr || view.schema == nullptr) {
return false;
}
return view.schema->encoding == "protobuf" &&
view.schema->name == "foxglove.CompressedVideo" &&
view.channel->messageEncoding == "protobuf";
}
[[nodiscard]]
BoundsSummary collect_bounds(const Config &config, ToolExitCode &error_code) {
mcap::McapReader reader{};
const auto open_status = reader.open(config.input_path);
if (!open_status.ok()) {
spdlog::error("failed to open MCAP file '{}': {}", config.input_path, open_status.message);
error_code = ToolExitCode::OpenError;
return {};
}
BoundsSummary summary{};
auto messages = reader.readMessages();
for (auto it = messages.begin(); it != messages.end(); ++it) {
if (it->channel == nullptr) {
spdlog::error("MCAP message missing channel metadata");
reader.close();
error_code = ToolExitCode::SchemaError;
return {};
}
if (it->schema == nullptr) {
continue;
}
if (!is_video_message(*it)) {
continue;
}
foxglove::CompressedVideo message{};
if (!message.ParseFromArray(it->message.data, static_cast<int>(it->message.dataSize))) {
spdlog::error("failed to parse foxglove.CompressedVideo payload from '{}'", config.input_path);
reader.close();
error_code = ToolExitCode::ParseError;
return {};
}
auto timestamp_ns = proto_timestamp_ns(message.timestamp());
if (timestamp_ns == 0) {
timestamp_ns = it->message.logTime;
}
summary.start_ns = std::min(summary.start_ns, timestamp_ns);
summary.end_ns = std::max(summary.end_ns, timestamp_ns);
summary.message_count += 1;
}
reader.close();
if (summary.message_count == 0) {
spdlog::error("no foxglove.CompressedVideo messages found in '{}'", config.input_path);
error_code = ToolExitCode::EmptyError;
return {};
}
error_code = ToolExitCode::Success;
return summary;
}
void print_json(const Config &config, const BoundsSummary &summary) {
std::cout
<< '{'
<< "\"input_path\":\"" << json_escape(config.input_path) << "\","
<< "\"start_ns\":" << summary.start_ns << ','
<< "\"end_ns\":" << summary.end_ns << ','
<< "\"duration_ns\":" << (summary.end_ns - summary.start_ns) << ','
<< "\"video_message_count\":" << summary.message_count << ','
<< "\"start_iso_utc\":\"" << format_iso_utc(summary.start_ns) << "\","
<< "\"end_iso_utc\":\"" << format_iso_utc(summary.end_ns) << "\""
<< "}\n";
}
void print_text(const Config &config, const BoundsSummary &summary) {
std::cout
<< config.input_path << '\t'
<< summary.start_ns << '\t'
<< summary.end_ns << '\t'
<< summary.message_count << '\t'
<< format_iso_utc(summary.start_ns) << '\t'
<< format_iso_utc(summary.end_ns)
<< '\n';
}
} // namespace
int main(int argc, char **argv) {
Config config{};
CLI::App app{"mcap_video_bounds - emit bundled video timestamp bounds from an MCAP"};
app.add_option("input", config.input_path, "Input MCAP path")->required();
app.add_flag("--json", config.json, "Emit a JSON object instead of tab-separated text");
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &e) {
return app.exit(e);
}
auto error_code = ToolExitCode::Success;
const auto summary = collect_bounds(config, error_code);
if (error_code != ToolExitCode::Success) {
return exit_code(error_code);
}
if (config.json) {
print_json(config, summary);
} else {
print_text(config, summary);
}
return exit_code(ToolExitCode::Success);
}
-178
View File
@@ -1,178 +0,0 @@
#include "cvmmap_streamer/tools/zed_progress_bar.hpp"
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <string>
#include <unistd.h>
namespace cvmmap_streamer::zed_tools {
namespace {
[[nodiscard]]
std::string format_duration(const double seconds_raw) {
const auto seconds = seconds_raw > 0.0 ? static_cast<long long>(std::llround(seconds_raw)) : 0ll;
const auto hours = seconds / 3600;
const auto minutes = (seconds % 3600) / 60;
const auto secs = seconds % 60;
char buffer[32]{};
if (hours > 0) {
std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld:%02lld", hours, minutes, secs);
} else {
std::snprintf(buffer, sizeof(buffer), "%02lld:%02lld", minutes, secs);
}
return std::string(buffer);
}
} // namespace
bool stderr_supports_progress_bar() {
return ::isatty(STDERR_FILENO) == 1;
}
struct ProgressBar::Impl {
using Clock = std::chrono::steady_clock;
explicit Impl(const std::uint64_t total_frames_arg)
: total_frames(total_frames_arg),
enabled(stderr_supports_progress_bar()),
started_at(Clock::now()),
last_render_at(started_at) {}
void render_prefix(const double ratio, const Clock::time_point now, char *line, const std::size_t line_size) {
const auto filled = static_cast<std::size_t>(std::llround(ratio * 24.0));
std::string bar{};
bar.reserve(24);
for (std::size_t index = 0; index < 24; ++index) {
bar.push_back(index < filled ? '#' : '-');
}
const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
const auto eta_seconds = ratio > 0.0 ? elapsed_seconds * (1.0 - ratio) / ratio : 0.0;
std::snprintf(
line,
line_size,
"\r[%s] %6.2f%% | %s elapsed | %s ETA",
bar.c_str(),
ratio * 100.0,
format_duration(elapsed_seconds).c_str(),
format_duration(eta_seconds).c_str());
}
void render(const std::uint64_t completed_frames, const bool force) {
if (!enabled || total_frames == 0) {
return;
}
const auto now = Clock::now();
if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
return;
}
last_render_at = now;
rendered = true;
const auto bounded_completed = completed_frames > total_frames ? total_frames : completed_frames;
const double ratio = static_cast<double>(bounded_completed) / static_cast<double>(total_frames);
const auto elapsed_seconds = std::chrono::duration<double>(now - started_at).count();
const auto fps = elapsed_seconds > 0.0 ? static_cast<double>(bounded_completed) / elapsed_seconds : 0.0;
char line[256]{};
render_prefix(ratio, now, line, sizeof(line));
const auto written = std::char_traits<char>::length(line);
std::snprintf(
line + written,
sizeof(line) - written,
" | %llu/%llu | %5.1f fps\x1b[K",
static_cast<unsigned long long>(bounded_completed),
static_cast<unsigned long long>(total_frames),
fps);
std::fprintf(stderr, "%s", line);
std::fflush(stderr);
}
void render_fraction(const double fraction, const std::string_view detail, const bool force) {
if (!enabled) {
return;
}
const auto now = Clock::now();
if (!force && rendered && now - last_render_at < std::chrono::milliseconds(125)) {
return;
}
last_render_at = now;
rendered = true;
const double bounded_fraction = std::clamp(fraction, 0.0, 1.0);
char line[256]{};
render_prefix(bounded_fraction, now, line, sizeof(line));
if (!detail.empty()) {
const auto written = std::char_traits<char>::length(line);
std::snprintf(line + written, sizeof(line) - written, " | %.*s\x1b[K", static_cast<int>(detail.size()), detail.data());
} else {
const auto written = std::char_traits<char>::length(line);
std::snprintf(line + written, sizeof(line) - written, "\x1b[K");
}
std::fprintf(stderr, "%s", line);
std::fflush(stderr);
}
std::uint64_t total_frames{0};
bool enabled{false};
bool rendered{false};
Clock::time_point started_at{};
Clock::time_point last_render_at{};
};
ProgressBar::ProgressBar(const std::uint64_t total_frames)
: impl_(std::make_unique<Impl>(total_frames)) {}
ProgressBar::~ProgressBar() = default;
bool ProgressBar::enabled() const {
return impl_ != nullptr && impl_->enabled;
}
void ProgressBar::update(const std::uint64_t completed_frames) {
impl_->render(completed_frames, false);
}
void ProgressBar::update_fraction(const double fraction, const std::string_view detail) {
impl_->render_fraction(fraction, detail, false);
}
void ProgressBar::finish(const std::uint64_t completed_frames, const bool success) {
if (impl_ == nullptr || !impl_->enabled) {
return;
}
if (!(success && impl_->rendered && completed_frames >= impl_->total_frames)) {
impl_->render(completed_frames, true);
if (!impl_->rendered) {
return;
}
}
std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
std::fflush(stderr);
}
void ProgressBar::finish_fraction(const double fraction, const bool success, const std::string_view detail) {
if (impl_ == nullptr || !impl_->enabled) {
return;
}
if (!(success && impl_->rendered && fraction >= 1.0)) {
impl_->render_fraction(fraction, detail, true);
if (!impl_->rendered) {
return;
}
}
std::fprintf(stderr, "%s", success ? "\n" : " [failed]\n");
std::fflush(stderr);
}
} // namespace cvmmap_streamer::zed_tools
-728
View File
@@ -1,728 +0,0 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <sl/Camera.hpp>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include "cvmmap_streamer/tools/zed_progress_bar.hpp"
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <expected>
#include <filesystem>
#include <memory>
#include <optional>
#include <regex>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace {
using cvmmap_streamer::zed_tools::EncodeTuning;
using cvmmap_streamer::zed_tools::Mp4Writer;
using cvmmap_streamer::zed_tools::ProgressBar;
using cvmmap_streamer::zed_tools::frame_period_ns;
using cvmmap_streamer::zed_tools::parse_codec;
using cvmmap_streamer::zed_tools::parse_encoder_device;
using cvmmap_streamer::zed_tools::parse_preset;
using cvmmap_streamer::zed_tools::parse_tune;
constexpr std::size_t kExpectedInputCount = 4;
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
RuntimeError = 3,
};
struct CliOptions {
std::vector<std::string> input_paths{};
std::string segment_dir{};
std::string output_path{};
std::string codec{"h265"};
std::string encoder_device{"auto"};
std::string preset{"fast"};
std::string tune{"low-latency"};
int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
double start_offset_seconds{0.0};
double duration_seconds{0.0};
bool has_duration{false};
double output_fps{0.0};
bool has_output_fps{false};
double tile_scale{0.5};
};
struct SourceSpec {
std::filesystem::path path{};
std::string label{};
};
struct CameraStream {
SourceSpec source{};
std::unique_ptr<sl::Camera> camera{};
sl::RuntimeParameters runtime{};
sl::Mat current_frame{};
sl::Mat next_frame{};
std::uint64_t current_timestamp_ns{0};
std::uint64_t next_timestamp_ns{0};
std::uint64_t first_timestamp_ns{0};
std::uint64_t last_timestamp_ns{0};
std::uint64_t total_frames{0};
std::uint64_t nominal_frame_period_ns{0};
float fps{0.0f};
std::uint32_t width{0};
std::uint32_t height{0};
int sync_position{-1};
bool has_next{false};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::string zed_string(const sl::String &value) {
return std::string(value.c_str() == nullptr ? "" : value.c_str());
}
[[nodiscard]]
std::string zed_status_string(const sl::ERROR_CODE code) {
return zed_string(sl::toString(code));
}
[[nodiscard]]
std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
return std::unexpected(std::string(label) + " must be U8_C3");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
[[nodiscard]]
std::expected<std::vector<SourceSpec>, std::string> discover_segment_inputs(const std::filesystem::path &segment_dir) {
if (!std::filesystem::is_directory(segment_dir)) {
return std::unexpected("segment directory does not exist: " + segment_dir.string());
}
const std::regex pattern{R"(.*_zed([1-4])\.svo2?$)", std::regex::icase};
std::vector<std::pair<int, std::filesystem::path>> ordered_paths{};
for (const auto &entry : std::filesystem::directory_iterator{segment_dir}) {
if (!entry.is_regular_file()) {
continue;
}
std::smatch match{};
const auto filename = entry.path().filename().string();
if (!std::regex_match(filename, match, pattern)) {
continue;
}
ordered_paths.emplace_back(std::stoi(match[1].str()), entry.path());
}
std::sort(
ordered_paths.begin(),
ordered_paths.end(),
[](const auto &left, const auto &right) {
return left.first < right.first;
});
if (ordered_paths.size() != kExpectedInputCount) {
return std::unexpected(
"expected exactly 4 SVO inputs under '" + segment_dir.string() + "', found " + std::to_string(ordered_paths.size()));
}
std::vector<SourceSpec> sources{};
sources.reserve(ordered_paths.size());
for (const auto &[camera_index, path] : ordered_paths) {
sources.push_back(SourceSpec{
.path = path,
.label = "zed" + std::to_string(camera_index),
});
}
return sources;
}
[[nodiscard]]
std::expected<std::vector<SourceSpec>, std::string> resolve_sources(const CliOptions &options) {
if (!options.segment_dir.empty()) {
return discover_segment_inputs(std::filesystem::path{options.segment_dir});
}
if (options.input_paths.size() != kExpectedInputCount) {
return std::unexpected("repeat --input exactly 4 times");
}
std::vector<SourceSpec> sources{};
sources.reserve(options.input_paths.size());
for (std::size_t index = 0; index < options.input_paths.size(); ++index) {
const auto path = std::filesystem::path{options.input_paths[index]};
if (!std::filesystem::is_regular_file(path)) {
return std::unexpected("input file does not exist: " + path.string());
}
sources.push_back(SourceSpec{
.path = path,
.label = "view" + std::to_string(index + 1),
});
}
return sources;
}
[[nodiscard]]
std::filesystem::path derive_grid_output_path(const CliOptions &options, const std::vector<SourceSpec> &sources) {
if (!options.output_path.empty()) {
return std::filesystem::path{options.output_path};
}
if (!options.segment_dir.empty()) {
const auto segment_dir = std::filesystem::path{options.segment_dir};
return segment_dir / (segment_dir.filename().string() + "_grid.mp4");
}
auto output_path = sources.front().path;
output_path.replace_extension("");
output_path += "_grid.mp4";
return output_path;
}
[[nodiscard]]
std::string format_unix_timestamp(const std::uint64_t timestamp_ns) {
const auto seconds = timestamp_ns / cvmmap_streamer::zed_tools::kNanosPerSecond;
const auto milliseconds = (timestamp_ns % cvmmap_streamer::zed_tools::kNanosPerSecond) / 1'000'000ull;
return std::to_string(seconds) + "." + (milliseconds < 100 ? (milliseconds < 10 ? "00" : "0") : "") + std::to_string(milliseconds);
}
void draw_timestamp_overlay(cv::Mat &canvas, const std::uint64_t timestamp_ns) {
const auto text = format_unix_timestamp(timestamp_ns);
int baseline = 0;
const auto font_face = cv::FONT_HERSHEY_SIMPLEX;
const double font_scale = 0.8;
const int thickness = 2;
const auto text_size = cv::getTextSize(text, font_face, font_scale, thickness, &baseline);
const cv::Point origin{16, 16 + text_size.height};
const cv::Rect background{
8,
8,
text_size.width + 16,
text_size.height + baseline + 16,
};
cv::rectangle(canvas, background, cv::Scalar(0, 0, 0), cv::FILLED);
cv::putText(
canvas,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness,
cv::LINE_AA);
}
[[nodiscard]]
std::expected<std::uint64_t, std::string> read_image_timestamp_ns(
sl::Camera &camera,
const std::optional<std::uint64_t> fallback_timestamp_ns,
const std::uint64_t nominal_frame_period_ns) {
auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
if (timestamp_ns == 0) {
if (!fallback_timestamp_ns) {
return std::unexpected("ZED SDK returned a zero image timestamp for the first frame");
}
timestamp_ns = *fallback_timestamp_ns + nominal_frame_period_ns;
}
return timestamp_ns;
}
[[nodiscard]]
std::expected<void, std::string> read_into_mat(
sl::Camera &camera,
sl::RuntimeParameters &runtime,
sl::Mat &target,
std::optional<std::uint64_t> fallback_timestamp_ns,
std::uint64_t nominal_frame_period_ns,
std::uint64_t &timestamp_ns_out,
const std::string_view label) {
const auto grab_status = camera.grab(runtime);
if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
return std::unexpected("end-of-svo");
}
if (grab_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to grab frame for " + std::string(label) + ": " + zed_status_string(grab_status));
}
const auto image_status = camera.retrieveImage(target, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
if (image_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to retrieve left image for " + std::string(label) + ": " + zed_status_string(image_status));
}
if (auto valid = validate_u8c3_mat(target, label); !valid) {
return std::unexpected(valid.error());
}
auto timestamp_ns = read_image_timestamp_ns(camera, fallback_timestamp_ns, nominal_frame_period_ns);
if (!timestamp_ns) {
return std::unexpected(timestamp_ns.error());
}
timestamp_ns_out = *timestamp_ns;
return {};
}
[[nodiscard]]
std::expected<void, std::string> fill_next_frame(CameraStream &stream) {
std::uint64_t timestamp_ns = 0;
auto next = read_into_mat(
*stream.camera,
stream.runtime,
stream.next_frame,
stream.current_timestamp_ns,
stream.nominal_frame_period_ns,
timestamp_ns,
stream.source.label);
if (!next) {
if (next.error() == "end-of-svo") {
stream.has_next = false;
return {};
}
return std::unexpected(next.error());
}
stream.next_timestamp_ns = timestamp_ns;
stream.has_next = true;
return {};
}
[[nodiscard]]
std::expected<void, std::string> promote_next_frame(CameraStream &stream) {
if (!stream.has_next) {
return std::unexpected("no buffered next frame is available for " + stream.source.label);
}
std::swap(stream.current_frame, stream.next_frame);
std::swap(stream.current_timestamp_ns, stream.next_timestamp_ns);
stream.has_next = false;
return fill_next_frame(stream);
}
[[nodiscard]]
std::expected<std::uint64_t, std::string> read_last_readable_timestamp(CameraStream &stream) {
const auto last_candidate = static_cast<int>(stream.total_frames - 1);
std::string last_error{};
for (int position = last_candidate; position >= 0; --position) {
stream.camera->setSVOPosition(position);
std::uint64_t timestamp_ns = 0;
auto frame = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
timestamp_ns,
stream.source.label);
if (frame) {
const auto skipped_tail_frames = static_cast<std::uint64_t>(last_candidate - position);
if (skipped_tail_frames > 0) {
spdlog::warn(
"skipping {} unreadable tail frame(s) for {} last_error={}",
skipped_tail_frames,
stream.source.path.string(),
last_error);
}
return timestamp_ns;
}
last_error = frame.error();
}
return std::unexpected(
"failed to read any trailing frame for " + stream.source.path.string() + ": " + last_error);
}
[[nodiscard]]
std::expected<CameraStream, std::string> open_camera_stream(const SourceSpec &source) {
CameraStream stream{};
stream.source = source;
stream.camera = std::make_unique<sl::Camera>();
sl::InitParameters init{};
init.input.setFromSVOFile(source.path.c_str());
init.svo_real_time_mode = false;
init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
init.coordinate_units = sl::UNIT::METER;
init.depth_mode = sl::DEPTH_MODE::NONE;
init.sdk_verbose = false;
const auto open_status = stream.camera->open(init);
if (open_status != sl::ERROR_CODE::SUCCESS) {
return std::unexpected("failed to open SVO '" + source.path.string() + "': " + zed_status_string(open_status));
}
const auto total_frames = stream.camera->getSVONumberOfFrames();
if (total_frames <= 0) {
return std::unexpected("input SVO has no frames: " + source.path.string());
}
stream.total_frames = static_cast<std::uint64_t>(total_frames);
const auto camera_info = stream.camera->getCameraInformation().camera_configuration;
stream.width = static_cast<std::uint32_t>(camera_info.resolution.width);
stream.height = static_cast<std::uint32_t>(camera_info.resolution.height);
stream.fps = camera_info.fps;
stream.nominal_frame_period_ns = frame_period_ns(camera_info.fps);
if (stream.width == 0 || stream.height == 0) {
return std::unexpected("camera resolution reported by the ZED SDK is invalid for " + source.path.string());
}
std::uint64_t first_timestamp_ns = 0;
auto first_frame = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
first_timestamp_ns,
source.label);
if (!first_frame) {
return std::unexpected(first_frame.error());
}
stream.first_timestamp_ns = first_timestamp_ns;
auto last_timestamp_ns = read_last_readable_timestamp(stream);
if (!last_timestamp_ns) {
return std::unexpected(last_timestamp_ns.error());
}
stream.last_timestamp_ns = *last_timestamp_ns;
return stream;
}
void close_camera_streams(std::vector<CameraStream> &streams) {
for (auto &stream : streams) {
if (stream.camera != nullptr && stream.camera->isOpened()) {
stream.camera->close();
}
}
}
} // namespace
int main(int argc, char **argv) {
CliOptions options{};
CLI::App app{"zed_svo_grid_to_mp4 - merge four synced ZED SVO/SVO2 inputs into a CCTV-style grid MP4"};
auto *input_option = app.add_option("--input", options.input_paths, "Input SVO/SVO2 file in row-major order (repeat exactly 4 times)");
auto *segment_dir_option = app.add_option("--segment-dir", options.segment_dir, "Segment directory containing *_zed[1-4].svo or *_zed[1-4].svo2 files");
input_option->excludes(segment_dir_option);
segment_dir_option->excludes(input_option);
app.add_option("--output", options.output_path, "Output MP4 file");
app.add_option("--codec", options.codec, "Video codec (h264|h265)")
->check(CLI::IsMember({"h264", "h265"}));
app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
->check(CLI::IsMember({"auto", "nvidia", "software"}));
app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
->check(CLI::IsMember({"fast", "balanced", "quality"}));
app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
->check(CLI::IsMember({"low-latency", "balanced"}));
app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
->check(CLI::Range(0, 51));
app.add_option("--gop", options.gop, "Encoder GOP length in frames")
->check(CLI::PositiveNumber);
app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
->check(CLI::NonNegativeNumber);
app.add_option("--start-offset-seconds", options.start_offset_seconds, "Offset to apply after the synced common start time in seconds")
->check(CLI::NonNegativeNumber);
auto *duration_option = app.add_option("--duration-seconds", options.duration_seconds, "Limit export duration in seconds after sync")
->check(CLI::PositiveNumber);
auto *output_fps_option = app.add_option("--output-fps", options.output_fps, "Composite output frame rate (default: max input fps)")
->check(CLI::PositiveNumber);
app.add_option("--tile-scale", options.tile_scale, "Scale each tile relative to the source resolution")
->check(CLI::Range(0.1, 1.0));
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &error) {
return app.exit(error);
}
options.has_duration = duration_option->count() > 0;
options.has_output_fps = output_fps_option->count() > 0;
if (options.input_paths.empty() && options.segment_dir.empty()) {
spdlog::error("provide either --segment-dir or repeat --input exactly 4 times");
return exit_code(ToolExitCode::UsageError);
}
if (options.b_frames > options.gop) {
spdlog::error(
"invalid encoder config: b-frames {} must be <= gop {}",
options.b_frames,
options.gop);
return exit_code(ToolExitCode::UsageError);
}
auto codec = parse_codec(options.codec);
if (!codec) {
spdlog::error("{}", codec.error());
return exit_code(ToolExitCode::UsageError);
}
auto encoder_device = parse_encoder_device(options.encoder_device);
if (!encoder_device) {
spdlog::error("{}", encoder_device.error());
return exit_code(ToolExitCode::UsageError);
}
auto preset = parse_preset(options.preset);
if (!preset) {
spdlog::error("{}", preset.error());
return exit_code(ToolExitCode::UsageError);
}
auto tune = parse_tune(options.tune);
if (!tune) {
spdlog::error("{}", tune.error());
return exit_code(ToolExitCode::UsageError);
}
auto sources = resolve_sources(options);
if (!sources) {
spdlog::error("{}", sources.error());
return exit_code(ToolExitCode::UsageError);
}
const auto output_path = derive_grid_output_path(options, *sources);
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
const EncodeTuning tuning{
.preset = *preset,
.tune = *tune,
.quality = options.quality,
.gop = options.gop,
.b_frames = options.b_frames,
};
std::vector<CameraStream> streams{};
streams.reserve(sources->size());
for (const auto &source : *sources) {
auto stream = open_camera_stream(source);
if (!stream) {
close_camera_streams(streams);
spdlog::error("{}", stream.error());
return exit_code(ToolExitCode::RuntimeError);
}
streams.push_back(std::move(*stream));
}
const auto sync_start_ts = std::max_element(
streams.begin(),
streams.end(),
[](const auto &left, const auto &right) {
return left.first_timestamp_ns < right.first_timestamp_ns;
})->first_timestamp_ns;
const auto start_offset_ns = static_cast<std::uint64_t>(std::llround(options.start_offset_seconds * 1'000'000'000.0));
const auto effective_start_ts = sync_start_ts + start_offset_ns;
const auto common_end_ts = std::min_element(
streams.begin(),
streams.end(),
[](const auto &left, const auto &right) {
return left.last_timestamp_ns < right.last_timestamp_ns;
})->last_timestamp_ns;
const auto requested_end_exclusive_ts = options.has_duration
? effective_start_ts + static_cast<std::uint64_t>(std::llround(options.duration_seconds * 1'000'000'000.0))
: common_end_ts + 1;
const auto output_end_exclusive_ts = std::min(requested_end_exclusive_ts, common_end_ts + 1);
if (effective_start_ts >= output_end_exclusive_ts) {
close_camera_streams(streams);
spdlog::error(
"synced time window is empty: start_ts={} end_ts={}",
effective_start_ts,
output_end_exclusive_ts);
return exit_code(ToolExitCode::UsageError);
}
std::uint32_t source_width = streams.front().width;
std::uint32_t source_height = streams.front().height;
float max_input_fps = streams.front().fps;
for (const auto &stream : streams) {
if (stream.width != source_width || stream.height != source_height) {
close_camera_streams(streams);
spdlog::error(
"all inputs must share the same resolution: expected {}x{}, got {}x{} for {}",
source_width,
source_height,
stream.width,
stream.height,
stream.source.path.string());
return exit_code(ToolExitCode::UsageError);
}
max_input_fps = std::max(max_input_fps, stream.fps);
}
const auto output_fps = options.has_output_fps ? static_cast<float>(options.output_fps) : max_input_fps;
const auto output_period_ns = frame_period_ns(output_fps);
const auto total_frames_to_emit =
static_cast<std::uint64_t>((output_end_exclusive_ts - effective_start_ts + output_period_ns - 1) / output_period_ns);
for (auto &stream : streams) {
stream.sync_position = stream.camera->getSVOPositionAtTimestamp(sl::Timestamp{effective_start_ts});
if (stream.sync_position < 0) {
close_camera_streams(streams);
spdlog::error(
"failed to compute synced start frame for {} at timestamp {}",
stream.source.path.string(),
effective_start_ts);
return exit_code(ToolExitCode::RuntimeError);
}
stream.camera->setSVOPosition(stream.sync_position);
std::uint64_t current_timestamp_ns = 0;
auto current = read_into_mat(
*stream.camera,
stream.runtime,
stream.current_frame,
std::nullopt,
stream.nominal_frame_period_ns,
current_timestamp_ns,
stream.source.label);
if (!current) {
close_camera_streams(streams);
spdlog::error("{}", current.error());
return exit_code(ToolExitCode::RuntimeError);
}
stream.current_timestamp_ns = current_timestamp_ns;
auto next = fill_next_frame(stream);
if (!next) {
close_camera_streams(streams);
spdlog::error("{}", next.error());
return exit_code(ToolExitCode::RuntimeError);
}
while (stream.current_timestamp_ns < effective_start_ts && stream.has_next) {
auto promote = promote_next_frame(stream);
if (!promote) {
close_camera_streams(streams);
spdlog::error("{}", promote.error());
return exit_code(ToolExitCode::RuntimeError);
}
}
spdlog::info(
"ZED_SVO_GRID_SYNC input={} label={} sync_position={} first_timestamp_ns={} current_timestamp_ns={} next_timestamp_ns={}",
stream.source.path.string(),
stream.source.label,
stream.sync_position,
stream.first_timestamp_ns,
stream.current_timestamp_ns,
stream.has_next ? stream.next_timestamp_ns : 0);
}
const auto tile_width = static_cast<int>(std::llround(static_cast<double>(source_width) * options.tile_scale));
const auto tile_height = static_cast<int>(std::llround(static_cast<double>(source_height) * options.tile_scale));
if (tile_width <= 0 || tile_height <= 0) {
close_camera_streams(streams);
spdlog::error("tile-scale {} produced invalid tile dimensions", options.tile_scale);
return exit_code(ToolExitCode::UsageError);
}
const auto composite_width = tile_width * 2;
const auto composite_height = tile_height * 2;
Mp4Writer writer{};
if (auto open_writer = writer.open(
output_path,
*codec,
*encoder_device,
static_cast<std::uint32_t>(composite_width),
static_cast<std::uint32_t>(composite_height),
output_fps,
tuning);
!open_writer) {
close_camera_streams(streams);
spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
return exit_code(ToolExitCode::RuntimeError);
}
cv::Mat composite(composite_height, composite_width, CV_8UC3);
std::vector<cv::Mat> resized_tiles(streams.size());
ProgressBar progress{total_frames_to_emit};
for (std::uint64_t emitted_frames = 0; emitted_frames < total_frames_to_emit; ++emitted_frames) {
const auto target_timestamp_ns = effective_start_ts + emitted_frames * output_period_ns;
if (target_timestamp_ns >= output_end_exclusive_ts) {
break;
}
for (auto &stream : streams) {
while (stream.has_next && stream.next_timestamp_ns <= target_timestamp_ns) {
auto promote = promote_next_frame(stream);
if (!promote) {
progress.finish(emitted_frames, false);
close_camera_streams(streams);
spdlog::error("{}", promote.error());
return exit_code(ToolExitCode::RuntimeError);
}
}
}
composite.setTo(cv::Scalar(0, 0, 0));
for (std::size_t index = 0; index < streams.size(); ++index) {
auto &stream = streams[index];
cv::Mat source_view(
static_cast<int>(stream.current_frame.getHeight()),
static_cast<int>(stream.current_frame.getWidth()),
CV_8UC3,
stream.current_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
stream.current_frame.getStepBytes(sl::MEM::CPU));
cv::resize(source_view, resized_tiles[index], cv::Size(tile_width, tile_height), 0.0, 0.0, cv::INTER_AREA);
const int row = static_cast<int>(index / 2);
const int col = static_cast<int>(index % 2);
const cv::Rect roi{col * tile_width, row * tile_height, tile_width, tile_height};
resized_tiles[index].copyTo(composite(roi));
}
draw_timestamp_overlay(composite, target_timestamp_ns);
if (auto write = writer.write_bgr_frame(
composite.data,
static_cast<std::size_t>(composite.step),
target_timestamp_ns - effective_start_ts);
!write) {
progress.finish(emitted_frames, false);
close_camera_streams(streams);
spdlog::error("failed to encode or mux frame: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.update(emitted_frames + 1);
}
if (auto flush = writer.flush(); !flush) {
progress.finish(total_frames_to_emit, false);
close_camera_streams(streams);
spdlog::error("failed to finalize MP4 output: {}", flush.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.finish(total_frames_to_emit, true);
close_camera_streams(streams);
spdlog::info(
"converted {} synced frames to '{}' using codec={} hardware={}",
total_frames_to_emit,
output_path.string(),
cvmmap_streamer::zed_tools::codec_name(*codec),
writer.using_hardware());
return exit_code(ToolExitCode::Success);
}
-707
View File
@@ -1,707 +0,0 @@
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <spdlog/spdlog.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libavutil/pixfmt.h>
#include <libswscale/swscale.h>
}
#include <cmath>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace cvmmap_streamer::zed_tools {
namespace {
struct EncoderCandidate {
std::string name{};
bool using_hardware{false};
AVPixelFormat pixel_format{AV_PIX_FMT_NONE};
};
struct ResolvedEncoderSettings {
std::string requested_preset{};
std::string requested_tune{};
std::string mapped_preset{};
std::optional<std::string> mapped_tune{};
std::optional<std::string> rate_control_mode{};
std::string quality_key{};
int quality_value{kDefaultQuality};
std::uint32_t gop{kDefaultGopSize};
std::uint32_t b_frames{kDefaultBFrames};
};
[[nodiscard]]
std::string av_error_string(const int error_code) {
char buffer[AV_ERROR_MAX_STRING_SIZE]{};
av_strerror(error_code, buffer, sizeof(buffer));
return std::string(buffer);
}
[[nodiscard]]
AVCodecID codec_id(const CodecType codec) {
return codec == CodecType::H265 ? AV_CODEC_ID_HEVC : AV_CODEC_ID_H264;
}
[[nodiscard]]
AVRational frame_rate_rational(const float fps) {
if (!(fps > 0.0f)) {
return AVRational{30, 1};
}
const auto scaled = static_cast<int>(std::llround(static_cast<double>(fps) * 1000.0));
if (scaled <= 0) {
return AVRational{30, 1};
}
return AVRational{scaled, 1000};
}
[[nodiscard]]
std::uint64_t frame_period_ns(const AVRational frame_rate) {
if (frame_rate.num <= 0 || frame_rate.den <= 0) {
return 33'333'333ull;
}
const auto numerator =
static_cast<std::uint64_t>(frame_rate.den) * 1'000'000'000ull;
const auto denominator = static_cast<std::uint64_t>(frame_rate.num);
if (denominator == 0) {
return 33'333'333ull;
}
const auto interval = numerator / denominator;
return interval == 0 ? 1ull : interval;
}
[[nodiscard]]
std::vector<EncoderCandidate> encoder_candidates(const CodecType codec, const EncoderDeviceType device) {
const std::string hardware_name = codec == CodecType::H265 ? "hevc_nvenc" : "h264_nvenc";
const std::string software_name = codec == CodecType::H265 ? "libx265" : "libx264";
switch (device) {
case EncoderDeviceType::Auto:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
case EncoderDeviceType::Nvidia:
return {
EncoderCandidate{.name = hardware_name, .using_hardware = true, .pixel_format = AV_PIX_FMT_NV12},
};
case EncoderDeviceType::Software:
return {
EncoderCandidate{.name = software_name, .using_hardware = false, .pixel_format = AV_PIX_FMT_YUV420P},
};
}
return {};
}
[[nodiscard]]
std::string mapped_preset_value(const EncoderCandidate &candidate, const PresetKind preset) {
if (candidate.using_hardware) {
switch (preset) {
case PresetKind::Fast:
return "p1";
case PresetKind::Balanced:
return "p4";
case PresetKind::Quality:
return "p7";
}
}
switch (preset) {
case PresetKind::Fast:
return "veryfast";
case PresetKind::Balanced:
return "medium";
case PresetKind::Quality:
return "slow";
}
return "veryfast";
}
[[nodiscard]]
std::optional<std::string> mapped_tune_value(const EncoderCandidate &candidate, const TuneKind tune) {
if (candidate.using_hardware) {
return tune == TuneKind::LowLatency ? std::optional<std::string>{"ull"} : std::optional<std::string>{"hq"};
}
if (candidate.name == "libx264" && tune == TuneKind::LowLatency) {
return std::optional<std::string>{"zerolatency"};
}
return std::nullopt;
}
[[nodiscard]]
std::optional<std::string> x265_params_value(const EncoderCandidate &candidate, const TuneKind tune) {
if (candidate.name != "libx265") {
return std::nullopt;
}
if (tune == TuneKind::LowLatency) {
return std::optional<std::string>{"repeat-headers=1:scenecut=0"};
}
return std::optional<std::string>{"repeat-headers=1"};
}
[[nodiscard]]
std::expected<void, std::string> set_string_option(AVCodecContext *context, const char *key, const std::string &value) {
const auto result = av_opt_set(context->priv_data, key, value.c_str(), 0);
if (result < 0) {
return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + value + "': " + av_error_string(result));
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> set_int_option(AVCodecContext *context, const char *key, const std::int64_t value) {
const auto result = av_opt_set_int(context->priv_data, key, value, 0);
if (result < 0) {
return std::unexpected("failed to set encoder option '" + std::string(key) + "=" + std::to_string(value) + "': " + av_error_string(result));
}
return {};
}
[[nodiscard]]
std::expected<ResolvedEncoderSettings, std::string> configure_codec_context(
AVCodecContext *context,
const EncoderCandidate &candidate,
const CodecType codec,
const std::uint32_t width,
const std::uint32_t height,
const AVRational framerate,
const EncodeTuning &tuning) {
context->codec_type = AVMEDIA_TYPE_VIDEO;
context->codec_id = codec_id(codec);
context->width = static_cast<int>(width);
context->height = static_cast<int>(height);
context->pix_fmt = candidate.pixel_format;
context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
context->time_base = AVRational{1, static_cast<int>(kNanosPerSecond)};
context->framerate = framerate;
context->gop_size = static_cast<int>(tuning.gop);
context->max_b_frames = static_cast<int>(tuning.b_frames);
context->thread_count = 1;
ResolvedEncoderSettings resolved{
.requested_preset = std::string(preset_name(tuning.preset)),
.requested_tune = std::string(tune_name(tuning.tune)),
.mapped_preset = mapped_preset_value(candidate, tuning.preset),
.mapped_tune = mapped_tune_value(candidate, tuning.tune),
.quality_value = tuning.quality,
.gop = tuning.gop,
.b_frames = tuning.b_frames,
};
if (auto set = set_string_option(context, "preset", resolved.mapped_preset); !set) {
return std::unexpected(set.error());
}
if (resolved.mapped_tune) {
if (auto set = set_string_option(context, "tune", *resolved.mapped_tune); !set) {
return std::unexpected(set.error());
}
}
if (candidate.using_hardware) {
resolved.rate_control_mode = "vbr";
resolved.quality_key = "cq";
if (auto set = set_string_option(context, "rc", *resolved.rate_control_mode); !set) {
return std::unexpected(set.error());
}
if (auto set = set_int_option(context, "cq", resolved.quality_value); !set) {
return std::unexpected(set.error());
}
if (tuning.tune == TuneKind::LowLatency) {
if (auto set = set_string_option(context, "zerolatency", "1"); !set) {
return std::unexpected(set.error());
}
if (auto set = set_string_option(context, "rc-lookahead", "0"); !set) {
return std::unexpected(set.error());
}
}
} else {
resolved.quality_key = "crf";
if (auto set = set_int_option(context, "crf", resolved.quality_value); !set) {
return std::unexpected(set.error());
}
if (const auto x265_params = x265_params_value(candidate, tuning.tune); x265_params) {
if (auto set = set_string_option(context, "x265-params", *x265_params); !set) {
return std::unexpected(set.error());
}
}
}
if (auto set = set_int_option(context, "forced-idr", 1); !set) {
return std::unexpected(set.error());
}
return resolved;
}
struct OpenedEncoder {
AVCodecContext *context{nullptr};
EncoderCandidate candidate{};
ResolvedEncoderSettings resolved{};
};
[[nodiscard]]
std::expected<OpenedEncoder, std::string> open_encoder(
const CodecType codec,
const EncoderDeviceType device,
const std::uint32_t width,
const std::uint32_t height,
const AVRational framerate,
const EncodeTuning &tuning) {
std::string last_error{};
for (const auto &candidate : encoder_candidates(codec, device)) {
const auto *encoder = avcodec_find_encoder_by_name(candidate.name.c_str());
if (encoder == nullptr) {
last_error = "FFmpeg encoder '" + candidate.name + "' is unavailable";
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"encoder '{}' unavailable for codec={} in auto mode, trying next candidate",
candidate.name,
codec_name(codec));
continue;
}
return std::unexpected(last_error);
}
auto *context = avcodec_alloc_context3(encoder);
if (context == nullptr) {
return std::unexpected("failed to allocate FFmpeg encoder context");
}
auto resolved = configure_codec_context(context, candidate, codec, width, height, framerate, tuning);
if (!resolved) {
avcodec_free_context(&context);
return std::unexpected(resolved.error());
}
const auto open_result = avcodec_open2(context, encoder, nullptr);
if (open_result < 0) {
last_error = "failed to open FFmpeg encoder '" + candidate.name + "': " + av_error_string(open_result);
avcodec_free_context(&context);
if (device == EncoderDeviceType::Auto) {
spdlog::warn(
"encoder '{}' failed to open in auto mode: {}. trying software fallback",
candidate.name,
av_error_string(open_result));
continue;
}
return std::unexpected(last_error);
}
return OpenedEncoder{
.context = context,
.candidate = candidate,
.resolved = std::move(*resolved),
};
}
if (last_error.empty()) {
last_error = "no usable FFmpeg encoder candidates were configured";
}
return std::unexpected(last_error);
}
} // namespace
struct Mp4Writer::Impl {
[[nodiscard]]
std::expected<void, std::string> open(
const std::filesystem::path &output_path,
const CodecType codec_arg,
const EncoderDeviceType encoder_device,
const std::uint32_t width,
const std::uint32_t height,
const float fps,
const EncodeTuning &tuning) {
close();
codec = codec_arg;
frame_rate = frame_rate_rational(fps);
frame_period = frame_period_ns(frame_rate);
last_frame_pts_ns.reset();
auto encoder = open_encoder(codec, encoder_device, width, height, frame_rate, tuning);
if (!encoder) {
return std::unexpected(encoder.error());
}
encoder_context = encoder->context;
encoder_name = encoder->candidate.name;
using_hardware = encoder->candidate.using_hardware;
encoder_pixel_format = encoder->candidate.pixel_format;
resolved_settings = std::move(encoder->resolved);
scaler = sws_getCachedContext(
nullptr,
static_cast<int>(width),
static_cast<int>(height),
AV_PIX_FMT_BGR24,
static_cast<int>(width),
static_cast<int>(height),
encoder_pixel_format,
SWS_BILINEAR,
nullptr,
nullptr,
nullptr);
if (scaler == nullptr) {
return std::unexpected("failed to create swscale conversion context");
}
frame = av_frame_alloc();
if (frame == nullptr) {
return std::unexpected("failed to allocate FFmpeg frame");
}
frame->format = encoder_pixel_format;
frame->width = encoder_context->width;
frame->height = encoder_context->height;
const auto frame_buffer_result = av_frame_get_buffer(frame, 32);
if (frame_buffer_result < 0) {
return std::unexpected("failed to allocate FFmpeg frame buffer: " + av_error_string(frame_buffer_result));
}
packet = av_packet_alloc();
if (packet == nullptr) {
return std::unexpected("failed to allocate FFmpeg packet");
}
const auto alloc_result = avformat_alloc_output_context2(
&format_context,
nullptr,
"mp4",
output_path.string().c_str());
if (alloc_result < 0 || format_context == nullptr) {
return std::unexpected("failed to allocate MP4 output context: " + av_error_string(alloc_result));
}
video_stream = avformat_new_stream(format_context, nullptr);
if (video_stream == nullptr) {
return std::unexpected("failed to allocate MP4 video stream");
}
video_stream->time_base = encoder_context->time_base;
video_stream->avg_frame_rate = frame_rate;
const auto params_result = avcodec_parameters_from_context(video_stream->codecpar, encoder_context);
if (params_result < 0) {
return std::unexpected("failed to copy encoder parameters into MP4 stream: " + av_error_string(params_result));
}
if ((format_context->oformat->flags & AVFMT_NOFILE) == 0) {
const auto open_result = avio_open2(
&format_context->pb,
output_path.string().c_str(),
AVIO_FLAG_WRITE,
nullptr,
nullptr);
if (open_result < 0) {
return std::unexpected("failed to open output MP4 '" + output_path.string() + "': " + av_error_string(open_result));
}
}
AVDictionary *muxer_options = nullptr;
av_dict_set(&muxer_options, "movflags", "+faststart", 0);
const auto header_result = avformat_write_header(format_context, &muxer_options);
av_dict_free(&muxer_options);
if (header_result < 0) {
return std::unexpected("failed to write MP4 header: " + av_error_string(header_result));
}
spdlog::info(
"ZED_SVO_MP4_READY codec={} encoder={} hardware={} width={} height={} fps={}/{} requested_preset={} requested_tune={} mapped_preset={} mapped_tune={} rc={} {}={} gop={} b_frames={} output={}",
codec_name(codec),
encoder_name,
using_hardware,
width,
height,
frame_rate.num,
frame_rate.den,
resolved_settings.requested_preset,
resolved_settings.requested_tune,
resolved_settings.mapped_preset,
resolved_settings.mapped_tune.value_or("none"),
resolved_settings.rate_control_mode.value_or("auto"),
resolved_settings.quality_key,
resolved_settings.quality_value,
resolved_settings.gop,
resolved_settings.b_frames,
output_path.string());
return {};
}
[[nodiscard]]
std::expected<void, std::string> write_bgr_frame(
const std::uint8_t *data,
const std::size_t row_stride_bytes,
const std::uint64_t relative_timestamp_ns) {
if (encoder_context == nullptr || frame == nullptr || scaler == nullptr || packet == nullptr || video_stream == nullptr) {
return std::unexpected("MP4 writer is not initialized");
}
const auto writable_result = av_frame_make_writable(frame);
if (writable_result < 0) {
return std::unexpected("failed to make FFmpeg frame writable: " + av_error_string(writable_result));
}
const std::uint8_t *source_planes[4]{data, nullptr, nullptr, nullptr};
const int source_strides[4]{static_cast<int>(row_stride_bytes), 0, 0, 0};
sws_scale(
scaler,
source_planes,
source_strides,
0,
encoder_context->height,
frame->data,
frame->linesize);
auto normalized_timestamp_ns = relative_timestamp_ns;
if (last_frame_pts_ns && normalized_timestamp_ns <= *last_frame_pts_ns) {
normalized_timestamp_ns = *last_frame_pts_ns + frame_period;
}
frame->pts = static_cast<std::int64_t>(normalized_timestamp_ns);
last_frame_pts_ns = normalized_timestamp_ns;
const auto send_result = avcodec_send_frame(encoder_context, frame);
if (send_result < 0) {
return std::unexpected("failed to send frame to FFmpeg encoder: " + av_error_string(send_result));
}
return drain_packets();
}
[[nodiscard]]
std::expected<void, std::string> flush() {
if (encoder_context == nullptr) {
return {};
}
const auto flush_result = avcodec_send_frame(encoder_context, nullptr);
if (flush_result < 0 && flush_result != AVERROR_EOF) {
return std::unexpected("failed to flush FFmpeg encoder: " + av_error_string(flush_result));
}
auto drained = drain_packets();
if (!drained) {
return drained;
}
return close_output();
}
[[nodiscard]]
std::expected<void, std::string> drain_packets() {
while (true) {
const auto receive_result = avcodec_receive_packet(encoder_context, packet);
if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) {
break;
}
if (receive_result < 0) {
return std::unexpected("failed to receive FFmpeg packet: " + av_error_string(receive_result));
}
packet->stream_index = video_stream->index;
av_packet_rescale_ts(packet, encoder_context->time_base, video_stream->time_base);
const auto write_result = av_interleaved_write_frame(format_context, packet);
av_packet_unref(packet);
if (write_result < 0) {
return std::unexpected("failed to write MP4 packet: " + av_error_string(write_result));
}
}
return {};
}
[[nodiscard]]
std::expected<void, std::string> close_output() {
if (format_context == nullptr || trailer_written) {
return {};
}
const auto trailer_result = av_write_trailer(format_context);
if (trailer_result < 0) {
return std::unexpected("failed to write MP4 trailer: " + av_error_string(trailer_result));
}
trailer_written = true;
return {};
}
void close() {
(void)close_output();
if (packet != nullptr) {
av_packet_free(&packet);
}
if (frame != nullptr) {
av_frame_free(&frame);
}
if (encoder_context != nullptr) {
avcodec_free_context(&encoder_context);
}
if (scaler != nullptr) {
sws_freeContext(scaler);
scaler = nullptr;
}
if (format_context != nullptr) {
if ((format_context->oformat->flags & AVFMT_NOFILE) == 0 && format_context->pb != nullptr) {
avio_closep(&format_context->pb);
}
avformat_free_context(format_context);
format_context = nullptr;
}
video_stream = nullptr;
encoder_name.clear();
using_hardware = false;
trailer_written = false;
frame_period = 33'333'333ull;
last_frame_pts_ns.reset();
resolved_settings = ResolvedEncoderSettings{};
}
~Impl() {
close();
}
CodecType codec{CodecType::H265};
AVCodecContext *encoder_context{nullptr};
AVFormatContext *format_context{nullptr};
AVStream *video_stream{nullptr};
AVFrame *frame{nullptr};
AVPacket *packet{nullptr};
SwsContext *scaler{nullptr};
AVPixelFormat encoder_pixel_format{AV_PIX_FMT_NONE};
AVRational frame_rate{30, 1};
std::uint64_t frame_period{33'333'333ull};
std::optional<std::uint64_t> last_frame_pts_ns{};
std::string encoder_name{};
ResolvedEncoderSettings resolved_settings{};
bool using_hardware{false};
bool trailer_written{false};
};
std::expected<CodecType, std::string> parse_codec(const std::string_view raw) {
if (raw == "h264") {
return CodecType::H264;
}
if (raw == "h265") {
return CodecType::H265;
}
return std::unexpected("invalid codec: '" + std::string(raw) + "' (expected: h264|h265)");
}
std::expected<EncoderDeviceType, std::string> parse_encoder_device(const std::string_view raw) {
if (raw == "auto") {
return EncoderDeviceType::Auto;
}
if (raw == "nvidia") {
return EncoderDeviceType::Nvidia;
}
if (raw == "software") {
return EncoderDeviceType::Software;
}
return std::unexpected("invalid encoder device: '" + std::string(raw) + "' (expected: auto|nvidia|software)");
}
std::expected<PresetKind, std::string> parse_preset(const std::string_view raw) {
if (raw == "fast") {
return PresetKind::Fast;
}
if (raw == "balanced") {
return PresetKind::Balanced;
}
if (raw == "quality") {
return PresetKind::Quality;
}
return std::unexpected("invalid preset: '" + std::string(raw) + "' (expected: fast|balanced|quality)");
}
std::expected<TuneKind, std::string> parse_tune(const std::string_view raw) {
if (raw == "low-latency") {
return TuneKind::LowLatency;
}
if (raw == "balanced") {
return TuneKind::Balanced;
}
return std::unexpected("invalid tune: '" + std::string(raw) + "' (expected: low-latency|balanced)");
}
std::string_view codec_name(const CodecType codec) {
return codec == CodecType::H265 ? "h265" : "h264";
}
std::string_view preset_name(const PresetKind preset) {
switch (preset) {
case PresetKind::Fast:
return "fast";
case PresetKind::Balanced:
return "balanced";
case PresetKind::Quality:
return "quality";
}
return "fast";
}
std::string_view tune_name(const TuneKind tune) {
switch (tune) {
case TuneKind::LowLatency:
return "low-latency";
case TuneKind::Balanced:
return "balanced";
}
return "low-latency";
}
std::filesystem::path derive_output_path(const std::filesystem::path &input_path) {
auto output_path = input_path;
output_path.replace_extension(".mp4");
return output_path;
}
Mp4Writer::Mp4Writer()
: impl_(std::make_unique<Impl>()) {}
Mp4Writer::Mp4Writer(Mp4Writer &&) noexcept = default;
Mp4Writer &Mp4Writer::operator=(Mp4Writer &&) noexcept = default;
Mp4Writer::~Mp4Writer() = default;
std::expected<void, std::string> Mp4Writer::open(
const std::filesystem::path &output_path,
const CodecType codec,
const EncoderDeviceType encoder_device,
const std::uint32_t width,
const std::uint32_t height,
const float fps,
const EncodeTuning &tuning) {
return impl_->open(output_path, codec, encoder_device, width, height, fps, tuning);
}
std::expected<void, std::string> Mp4Writer::write_bgr_frame(
const std::uint8_t *data,
const std::size_t row_stride_bytes,
const std::uint64_t relative_timestamp_ns) {
return impl_->write_bgr_frame(data, row_stride_bytes, relative_timestamp_ns);
}
std::expected<void, std::string> Mp4Writer::flush() {
return impl_->flush();
}
bool Mp4Writer::using_hardware() const {
return impl_ != nullptr && impl_->using_hardware;
}
} // namespace cvmmap_streamer::zed_tools
File diff suppressed because it is too large Load Diff
-320
View File
@@ -1,320 +0,0 @@
#include <CLI/CLI.hpp>
#include <spdlog/spdlog.h>
#include <sl/Camera.hpp>
#include "cvmmap_streamer/tools/zed_progress_bar.hpp"
#include "cvmmap_streamer/tools/zed_svo_mp4_support.hpp"
#include <cstdint>
#include <expected>
#include <filesystem>
#include <optional>
#include <string>
#include <utility>
namespace {
using cvmmap_streamer::zed_tools::EncodeTuning;
using cvmmap_streamer::zed_tools::Mp4Writer;
using cvmmap_streamer::zed_tools::ProgressBar;
using cvmmap_streamer::zed_tools::derive_output_path;
using cvmmap_streamer::zed_tools::frame_period_ns;
using cvmmap_streamer::zed_tools::parse_codec;
using cvmmap_streamer::zed_tools::parse_encoder_device;
using cvmmap_streamer::zed_tools::parse_preset;
using cvmmap_streamer::zed_tools::parse_tune;
enum class ToolExitCode : int {
Success = 0,
UsageError = 2,
RuntimeError = 3,
};
struct CliOptions {
std::string input_path{};
std::string output_path{};
std::string codec{"h265"};
std::string encoder_device{"auto"};
std::string preset{"fast"};
std::string tune{"low-latency"};
int quality{cvmmap_streamer::zed_tools::kDefaultQuality};
std::uint32_t gop{cvmmap_streamer::zed_tools::kDefaultGopSize};
std::uint32_t b_frames{cvmmap_streamer::zed_tools::kDefaultBFrames};
std::uint32_t start_frame{0};
std::uint32_t end_frame{0};
bool has_end_frame{false};
};
[[nodiscard]]
constexpr int exit_code(const ToolExitCode code) {
return static_cast<int>(code);
}
[[nodiscard]]
std::string zed_string(const sl::String &value) {
return std::string(value.c_str() == nullptr ? "" : value.c_str());
}
[[nodiscard]]
std::string zed_status_string(const sl::ERROR_CODE code) {
return zed_string(sl::toString(code));
}
[[nodiscard]]
std::expected<void, std::string> validate_u8c3_mat(const sl::Mat &mat, const std::string_view label) {
if (mat.getDataType() != sl::MAT_TYPE::U8_C3) {
return std::unexpected(std::string(label) + " must be U8_C3");
}
if (mat.getWidth() == 0 || mat.getHeight() == 0) {
return std::unexpected(std::string(label) + " dimensions must be non-zero");
}
if (mat.getPtr<sl::uchar1>(sl::MEM::CPU) == nullptr) {
return std::unexpected(std::string(label) + " CPU buffer is null");
}
return {};
}
} // namespace
int main(int argc, char **argv) {
CliOptions options{};
CLI::App app{"zed_svo_to_mp4 - convert ZED SVO/SVO2 playback to MP4"};
app.add_option("--input", options.input_path, "Input SVO/SVO2 file")->required();
app.add_option("--output", options.output_path, "Output MP4 file (default: input path with .mp4 suffix)");
app.add_option("--codec", options.codec, "Video codec (h264|h265)")
->check(CLI::IsMember({"h264", "h265"}));
app.add_option("--encoder-device", options.encoder_device, "Encoder device (auto|nvidia|software)")
->check(CLI::IsMember({"auto", "nvidia", "software"}));
app.add_option("--preset", options.preset, "Encoding preset (fast|balanced|quality)")
->check(CLI::IsMember({"fast", "balanced", "quality"}));
app.add_option("--tune", options.tune, "Encoding tune (low-latency|balanced)")
->check(CLI::IsMember({"low-latency", "balanced"}));
app.add_option("--quality", options.quality, "Encoder quality target (0-51, lower is better)")
->check(CLI::Range(0, 51));
app.add_option("--gop", options.gop, "Encoder GOP length in frames")
->check(CLI::PositiveNumber);
app.add_option("--b-frames", options.b_frames, "Encoder B-frame count")
->check(CLI::NonNegativeNumber);
app.add_option("--start-frame", options.start_frame, "First SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
auto *end_frame_option = app.add_option("--end-frame", options.end_frame, "Last SVO frame to export (inclusive)")
->check(CLI::NonNegativeNumber);
try {
app.parse(argc, argv);
} catch (const CLI::ParseError &error) {
return app.exit(error);
}
options.has_end_frame = end_frame_option->count() > 0;
auto codec = parse_codec(options.codec);
if (!codec) {
spdlog::error("{}", codec.error());
return exit_code(ToolExitCode::UsageError);
}
auto encoder_device = parse_encoder_device(options.encoder_device);
if (!encoder_device) {
spdlog::error("{}", encoder_device.error());
return exit_code(ToolExitCode::UsageError);
}
auto preset = parse_preset(options.preset);
if (!preset) {
spdlog::error("{}", preset.error());
return exit_code(ToolExitCode::UsageError);
}
auto tune = parse_tune(options.tune);
if (!tune) {
spdlog::error("{}", tune.error());
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame < options.start_frame) {
spdlog::error(
"invalid frame range: start-frame={} end-frame={}",
options.start_frame,
options.end_frame);
return exit_code(ToolExitCode::UsageError);
}
if (options.b_frames > options.gop) {
spdlog::error(
"invalid encoder config: b-frames {} must be <= gop {}",
options.b_frames,
options.gop);
return exit_code(ToolExitCode::UsageError);
}
const auto output_path = options.output_path.empty()
? derive_output_path(std::filesystem::path{options.input_path})
: std::filesystem::path{options.output_path};
if (output_path.empty()) {
spdlog::error("output path must not be empty");
return exit_code(ToolExitCode::UsageError);
}
if (output_path.has_parent_path()) {
std::filesystem::create_directories(output_path.parent_path());
}
const EncodeTuning tuning{
.preset = *preset,
.tune = *tune,
.quality = options.quality,
.gop = options.gop,
.b_frames = options.b_frames,
};
sl::Camera camera{};
auto close_camera = [&]() {
if (camera.isOpened()) {
camera.close();
}
};
sl::InitParameters init{};
init.input.setFromSVOFile(options.input_path.c_str());
init.svo_real_time_mode = false;
init.coordinate_system = sl::COORDINATE_SYSTEM::IMAGE;
init.coordinate_units = sl::UNIT::METER;
init.depth_mode = sl::DEPTH_MODE::NONE;
init.sdk_verbose = false;
const auto open_status = camera.open(init);
if (open_status != sl::ERROR_CODE::SUCCESS) {
spdlog::error(
"failed to open SVO '{}': {}",
options.input_path,
zed_status_string(open_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto total_frames = camera.getSVONumberOfFrames();
if (total_frames <= 0) {
close_camera();
spdlog::error("input SVO has no frames");
return exit_code(ToolExitCode::RuntimeError);
}
if (options.start_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"start-frame {} is out of range for {} frames",
options.start_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
if (options.has_end_frame && options.end_frame >= static_cast<std::uint32_t>(total_frames)) {
close_camera();
spdlog::error(
"end-frame {} is out of range for {} frames",
options.end_frame,
total_frames);
return exit_code(ToolExitCode::UsageError);
}
camera.setSVOPosition(static_cast<int>(options.start_frame));
const auto camera_info = camera.getCameraInformation();
const auto &camera_config = camera_info.camera_configuration;
const auto width = static_cast<std::uint32_t>(camera_config.resolution.width);
const auto height = static_cast<std::uint32_t>(camera_config.resolution.height);
if (width == 0 || height == 0) {
close_camera();
spdlog::error("camera resolution reported by the ZED SDK is invalid");
return exit_code(ToolExitCode::RuntimeError);
}
Mp4Writer writer{};
if (auto open_writer = writer.open(output_path, *codec, *encoder_device, width, height, camera_config.fps, tuning); !open_writer) {
close_camera();
spdlog::error("failed to initialize MP4 writer: {}", open_writer.error());
return exit_code(ToolExitCode::RuntimeError);
}
sl::RuntimeParameters runtime_parameters{};
sl::Mat left_frame{};
std::optional<std::uint64_t> first_timestamp_ns{};
std::optional<std::uint64_t> last_timestamp_ns{};
std::uint64_t emitted_frames{0};
const auto nominal_frame_period_ns = frame_period_ns(camera_config.fps);
const auto last_frame = options.has_end_frame
? options.end_frame
: static_cast<std::uint32_t>(total_frames - 1);
const auto total_frames_to_emit = static_cast<std::uint64_t>(last_frame - options.start_frame + 1);
ProgressBar progress{total_frames_to_emit};
while (options.start_frame + emitted_frames <= last_frame) {
const auto grab_status = camera.grab(runtime_parameters);
if (grab_status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
break;
}
if (grab_status != sl::ERROR_CODE::SUCCESS) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to grab SVO frame: {}", zed_status_string(grab_status));
return exit_code(ToolExitCode::RuntimeError);
}
const auto image_status = camera.retrieveImage(left_frame, sl::VIEW::LEFT_BGR, sl::MEM::CPU);
if (image_status != sl::ERROR_CODE::SUCCESS) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to retrieve left image: {}", zed_status_string(image_status));
return exit_code(ToolExitCode::RuntimeError);
}
if (auto valid = validate_u8c3_mat(left_frame, "left image"); !valid) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("{}", valid.error());
return exit_code(ToolExitCode::RuntimeError);
}
auto timestamp_ns = camera.getTimestamp(sl::TIME_REFERENCE::IMAGE).getNanoseconds();
if (timestamp_ns == 0) {
timestamp_ns = emitted_frames * nominal_frame_period_ns;
}
if (last_timestamp_ns && timestamp_ns <= *last_timestamp_ns) {
timestamp_ns = *last_timestamp_ns + 1;
}
last_timestamp_ns = timestamp_ns;
if (!first_timestamp_ns) {
first_timestamp_ns = timestamp_ns;
}
const auto relative_timestamp_ns = timestamp_ns - *first_timestamp_ns;
if (auto write = writer.write_bgr_frame(
left_frame.getPtr<sl::uchar1>(sl::MEM::CPU),
left_frame.getStepBytes(sl::MEM::CPU),
relative_timestamp_ns);
!write) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to encode or mux frame: {}", write.error());
return exit_code(ToolExitCode::RuntimeError);
}
emitted_frames += 1;
progress.update(emitted_frames);
}
if (auto flush = writer.flush(); !flush) {
progress.finish(emitted_frames, false);
close_camera();
spdlog::error("failed to finalize MP4 output: {}", flush.error());
return exit_code(ToolExitCode::RuntimeError);
}
progress.finish(emitted_frames, true);
close_camera();
spdlog::info(
"converted {} frames from '{}' to '{}' using codec={} hardware={}",
emitted_frames,
options.input_path,
output_path.string(),
cvmmap_streamer::zed_tools::codec_name(*codec),
writer.using_hardware());
return exit_code(ToolExitCode::Success);
}
-268
View File
@@ -1,268 +0,0 @@
from __future__ import annotations
import dataclasses
import tempfile
import unittest
from pathlib import Path
import click
from click.testing import CliRunner
from scripts import zed_batch_segment_sources as segment_sources
from scripts.zed_batch_svo_grid_to_mp4 import main as grid_main
from scripts.zed_batch_svo_to_mcap import main as mcap_main
@dataclasses.dataclass(slots=True, frozen=True)
class FakeScan:
segment_dir: Path
matched_files: int
is_valid: bool
reason: str | None = None
def fake_scan(segment_dir: Path) -> FakeScan:
if not segment_dir.is_dir():
return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="missing directory")
if (segment_dir / "valid.segment").is_file():
return FakeScan(segment_dir=segment_dir, matched_files=2, is_valid=True)
if (segment_dir / "partial.segment").is_file():
return FakeScan(segment_dir=segment_dir, matched_files=1, is_valid=False, reason="partial segment")
return FakeScan(segment_dir=segment_dir, matched_files=0, is_valid=False, reason="no camera files")
def create_multicamera_segment(parent: Path, segment_name: str) -> Path:
segment_dir = parent / segment_name
segment_dir.mkdir(parents=True)
for camera_index in range(1, 5):
(segment_dir / f"{segment_name}_zed{camera_index}.svo2").write_bytes(b"")
return segment_dir
class SharedSourceResolutionTests(unittest.TestCase):
def test_dataset_root_recursive_discovers_nested_segments(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
sources = segment_sources.resolve_sources(
dataset_root,
(),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "dataset-root")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_dataset_root_without_recursive_does_not_descend(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
with self.assertRaises(click.ClickException) as error:
segment_sources.resolve_sources(
dataset_root,
(),
None,
None,
False,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertIn("no segments under", str(error.exception))
def test_explicit_segments_are_deduped(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = Path(tmp) / "2026-04-08T11-50-32"
segment_dir.mkdir()
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
sources = segment_sources.resolve_sources(
None,
(segment_dir, segment_dir),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "segments")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_segments_csv_uses_segment_dir_column(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
segment_dir = tmp_path / "segments" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
csv_path = tmp_path / "segments.csv"
csv_path.write_text("segment_dir\nsegments/2026-04-08T11-50-32\n", encoding="utf-8")
sources = segment_sources.resolve_sources(
None,
(),
csv_path,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
self.assertEqual(sources.mode, "segments-csv")
self.assertEqual(sources.segment_dirs, (segment_dir.resolve(),))
def test_segment_path_like_dataset_root_has_hint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
segment_dir = dataset_root / "run" / "2026-04-08T11-50-32"
segment_dir.mkdir(parents=True)
(segment_dir / "valid.segment").write_text("", encoding="utf-8")
with self.assertRaises(click.ClickException) as error:
segment_sources.resolve_sources(
None,
(dataset_root,),
None,
None,
True,
scan_segment_dir=fake_scan,
no_matches_message=lambda root: f"no segments under {root}",
)
message = str(error.exception)
self.assertIn("looks like a dataset root", message)
self.assertIn("--dataset-root", message)
class BatchCliSmokeTests(unittest.TestCase):
def setUp(self) -> None:
self.runner = CliRunner()
def test_mcap_dataset_root_flag_discovers_segments(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--dataset-root",
str(dataset_root),
"--recursive",
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertEqual(result.exit_code, 0, result.output)
self.assertIn("source=dataset-root matched=1 pending=1", result.output)
def test_mcap_segment_flag_rejects_dataset_root_with_hint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--segment",
str(dataset_root),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("looks like a dataset root", result.output)
self.assertIn("--dataset-root", result.output)
def test_mcap_rejects_legacy_positional_dataset_root(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
dataset_root = Path(tmp) / "dataset"
create_multicamera_segment(dataset_root / "run", "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
str(dataset_root),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("positional dataset paths are no longer supported", result.output)
self.assertIn("--dataset-root", result.output)
def test_mcap_rejects_recursive_without_dataset_root(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
mcap_main,
[
"--segment",
str(segment_dir),
"--no-recursive",
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("--recursive/--no-recursive can only be used with --dataset-root", result.output)
def test_grid_segment_flag_discovers_one_segment(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
grid_main,
[
"--segment",
str(segment_dir),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertEqual(result.exit_code, 0, result.output)
self.assertIn("source=segments matched=1 pending=1", result.output)
def test_grid_rejects_legacy_segment_dir_flag(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = create_multicamera_segment(Path(tmp), "2026-04-08T11-50-32")
result = self.runner.invoke(
grid_main,
[
"--segment-dir",
str(segment_dir),
"--dry-run",
"--zed-bin",
"/bin/true",
],
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("--segment-dir is no longer supported", result.output)
self.assertIn("--segment", result.output)
if __name__ == "__main__":
unittest.main()
-139
View File
@@ -1,139 +0,0 @@
from __future__ import annotations
import datetime as dt
import tempfile
import unittest
from pathlib import Path
import duckdb
from scripts.zed_segment_time_index import (
BoundsRow,
format_ns_iso,
infer_dataset_timezone,
parse_timestamp_to_ns,
parse_timestamp_window,
require_query_window,
scan_segment_dir,
write_index,
)
class TimestampParseTests(unittest.TestCase):
def test_parse_folder_style_timestamp(self) -> None:
actual = parse_timestamp_to_ns("2026-03-18T12-00-23", "UTC")
expected = parse_timestamp_to_ns("2026-03-18T12:00:23+00:00", "UTC")
self.assertEqual(actual, expected)
def test_parse_integer_epoch_milliseconds(self) -> None:
self.assertEqual(parse_timestamp_to_ns("1710000000123", "UTC"), 1710000000123 * 1_000_000)
def test_parse_timestamp_window_for_second_precision_text(self) -> None:
start_ns, end_ns = parse_timestamp_window("2026-03-18T12-00-23", "UTC")
self.assertEqual(end_ns - start_ns, 999_999_999)
def test_require_query_window_rejects_mixed_modes(self) -> None:
with self.assertRaises(Exception):
require_query_window("1", "2", "3", "UTC")
def test_format_ns_iso_utc(self) -> None:
rendered = format_ns_iso(1_710_000_000_123_000_000, dt.timezone.utc)
self.assertTrue(rendered.startswith("2024-03-09T16:00:00.123000000"))
class SegmentDiscoveryTests(unittest.TestCase):
def test_scan_segment_dir_accepts_multicamera_dir(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = Path(tmp)
for label in ("zed1", "zed2", "zed3", "zed4"):
(segment_dir / f"2026-03-18T12-00-23_{label}.svo2").write_bytes(b"")
scan = scan_segment_dir(segment_dir)
self.assertTrue(scan.is_valid)
self.assertEqual(scan.camera_labels, ("zed1", "zed2", "zed3", "zed4"))
def test_scan_segment_dir_rejects_partial_dir(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
segment_dir = Path(tmp)
(segment_dir / "2026-03-18T12-00-23_zed1.svo2").write_bytes(b"")
scan = scan_segment_dir(segment_dir)
self.assertFalse(scan.is_valid)
class DuckDbIndexTests(unittest.TestCase):
def test_infer_dataset_timezone_from_folder_names(self) -> None:
row = BoundsRow(
segment_dir=Path("/tmp/bar/2026-03-18T11-59-41"),
relative_segment_dir="bar/2026-03-18T11-59-41",
group_path="bar",
activity="bar",
segment_name="2026-03-18T11-59-41",
mcap_path=Path("/tmp/bar/2026-03-18T11-59-41/2026-03-18T11-59-41.mcap"),
start_ns=1_773_806_381_201_081_000,
end_ns=1_773_806_392_268_226_000,
duration_ns=11_067_145_000,
start_iso_utc="2026-03-18T03:59:41.201081000Z",
end_iso_utc="2026-03-18T03:59:52.268226000Z",
camera_count=4,
camera_labels="zed1,zed2,zed3,zed4",
video_message_count=1330,
index_source="mcap_video_bounds",
)
self.assertEqual(infer_dataset_timezone([row]), "UTC+08:00")
def test_write_index_and_query_overlap(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp) / "dataset"
root.mkdir()
index_path = root / "segment_time_index.duckdb"
rows = [
BoundsRow(
segment_dir=root / "bar" / "2026-03-18T12-00-23",
relative_segment_dir="bar/2026-03-18T12-00-23",
group_path="bar",
activity="bar",
segment_name="2026-03-18T12-00-23",
mcap_path=root / "bar" / "2026-03-18T12-00-23" / "2026-03-18T12-00-23.mcap",
start_ns=100,
end_ns=200,
duration_ns=100,
start_iso_utc="1970-01-01T00:00:00.000000100Z",
end_iso_utc="1970-01-01T00:00:00.000000200Z",
camera_count=4,
camera_labels="zed1,zed2,zed3,zed4",
video_message_count=1330,
index_source="mcap_video_bounds",
),
BoundsRow(
segment_dir=root / "run" / "2026-03-18T12-01-00",
relative_segment_dir="run/2026-03-18T12-01-00",
group_path="run",
activity="run",
segment_name="2026-03-18T12-01-00",
mcap_path=root / "run" / "2026-03-18T12-01-00" / "2026-03-18T12-01-00.mcap",
start_ns=250,
end_ns=400,
duration_ns=150,
start_iso_utc="1970-01-01T00:00:00.000000250Z",
end_iso_utc="1970-01-01T00:00:00.000000400Z",
camera_count=4,
camera_labels="zed1,zed2,zed3,zed4",
video_message_count=1400,
index_source="mcap_video_bounds",
),
]
write_index(index_path, root, rows)
conn = duckdb.connect(str(index_path), read_only=True)
try:
matches = conn.execute(
"SELECT relative_segment_dir FROM segments WHERE start_ns <= ? AND end_ns >= ? ORDER BY start_ns",
[300, 180],
).fetchall()
self.assertEqual(matches, [("bar/2026-03-18T12-00-23",), ("run/2026-03-18T12-01-00",)])
finally:
conn.close()
if __name__ == "__main__":
unittest.main()