BREAKING CHANGE: ClientPublisher::open() signature changed from open(sl::InputType input, Trigger* ref, int sdk_gpu_id) to open(const std::string& ip, int port, Trigger* ref, int sdk_gpu_id) The ClientPublisher now receives camera streams over network using setFromStream() instead of opening local USB cameras. This allows the host to receive video from edge devices running enableStreaming(). Changes: - Use init_parameters.input.setFromStream(ip, port) for stream input - Keep body tracking and Fusion publishing (INTRA_PROCESS) unchanged - Add getSerialNumber() getter for logging/debugging - Improve error messages with context (ip:port) Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
128 lines
4.2 KiB
Markdown
128 lines
4.2 KiB
Markdown
# GLViewer CUDA-OpenGL Interop Fix
|
|
|
|
## Problem
|
|
|
|
The GLViewer had CUDA-OpenGL interop issues where the application would crash or display corrupted graphics when rendering point clouds and camera views. This was caused by improper resource mapping between CUDA and OpenGL.
|
|
|
|
## Root Cause
|
|
|
|
The original code kept CUDA graphics resources permanently mapped:
|
|
- Resources were mapped once during initialization
|
|
- Never unmapped until cleanup/resize
|
|
- CUDA and OpenGL were competing for the same memory resources
|
|
|
|
This violates CUDA-OpenGL interop best practices which require:
|
|
1. **Map** resource before CUDA operations
|
|
2. **Use** the mapped pointer/array
|
|
3. **Unmap** resource before OpenGL uses it
|
|
|
|
## Solution Applied
|
|
|
|
### 1. PointCloud::pushNewPC() Fix
|
|
|
|
**Before:**
|
|
```cpp
|
|
void PointCloud::pushNewPC() {
|
|
// ... initialization code ...
|
|
// Resource mapped once, never unmapped
|
|
cudaMemcpy(xyzrgbaMappedBuf_, refMat.getPtr<sl::float4>(sl::MEM::CPU),
|
|
numBytes_, cudaMemcpyHostToDevice);
|
|
}
|
|
```
|
|
|
|
**After:**
|
|
```cpp
|
|
void PointCloud::pushNewPC() {
|
|
// ... initialization code ...
|
|
|
|
// Map the resource for CUDA access
|
|
cudaError_t err = cudaGraphicsMapResources(1, &bufferCudaID_, 0);
|
|
|
|
err = cudaGraphicsResourceGetMappedPointer((void**)&xyzrgbaMappedBuf_,
|
|
&numBytes_, bufferCudaID_);
|
|
|
|
// Copy data from ZED SDK to mapped OpenGL buffer
|
|
cudaMemcpy(xyzrgbaMappedBuf_, refMat.getPtr<sl::float4>(sl::MEM::CPU),
|
|
numBytes_, cudaMemcpyHostToDevice);
|
|
|
|
// Synchronize to ensure copy completes
|
|
cudaStreamSynchronize(0);
|
|
|
|
// Unmap so OpenGL can use it
|
|
err = cudaGraphicsUnmapResources(1, &bufferCudaID_, 0);
|
|
}
|
|
```
|
|
|
|
### 2. CameraViewer::pushNewImage() Fix
|
|
|
|
**Before:**
|
|
```cpp
|
|
void CameraViewer::pushNewImage() {
|
|
if (!ref.isInit()) return;
|
|
// ArrIm was permanently mapped, direct copy
|
|
cudaMemcpy2DToArray(ArrIm, 0, 0, ref.getPtr<sl::uchar1>(sl::MEM::CPU),
|
|
ref.getStepBytes(sl::MEM::CPU),
|
|
ref.getPixelBytes() * ref.getWidth(),
|
|
ref.getHeight(), cudaMemcpyHostToDevice);
|
|
}
|
|
```
|
|
|
|
**After:**
|
|
```cpp
|
|
void CameraViewer::pushNewImage() {
|
|
if (!ref.isInit()) return;
|
|
|
|
// Map the resource for CUDA access
|
|
auto err = cudaGraphicsMapResources(1, &cuda_gl_ressource, 0);
|
|
|
|
err = cudaGraphicsSubResourceGetMappedArray(&ArrIm, cuda_gl_ressource, 0, 0);
|
|
|
|
// Copy data to mapped array
|
|
err = cudaMemcpy2DToArray(ArrIm, 0, 0, ref.getPtr<sl::uchar1>(sl::MEM::CPU),
|
|
ref.getStepBytes(sl::MEM::CPU),
|
|
ref.getPixelBytes() * ref.getWidth(),
|
|
ref.getHeight(), cudaMemcpyHostToDevice);
|
|
|
|
// Synchronize to ensure copy completes
|
|
cudaStreamSynchronize(0);
|
|
|
|
// Unmap so OpenGL can use it
|
|
err = cudaGraphicsUnmapResources(1, &cuda_gl_ressource, 0);
|
|
}
|
|
```
|
|
|
|
### 3. CameraViewer Initialization Cleanup
|
|
|
|
Removed permanent mapping from initialization:
|
|
|
|
**Before:**
|
|
```cpp
|
|
// In CameraViewer::initialize():
|
|
cudaGraphicsGLRegisterImage(&cuda_gl_ressource, texture, GL_TEXTURE_2D,
|
|
cudaGraphicsRegisterFlagsWriteDiscard);
|
|
cudaGraphicsMapResources(1, &cuda_gl_ressource, 0); // REMOVED
|
|
cudaGraphicsSubResourceGetMappedArray(&ArrIm, cuda_gl_ressource, 0, 0); // REMOVED
|
|
```
|
|
|
|
**After:**
|
|
```cpp
|
|
// In CameraViewer::initialize():
|
|
cudaGraphicsGLRegisterImage(&cuda_gl_ressource, texture, GL_TEXTURE_2D,
|
|
cudaGraphicsRegisterFlagsWriteDiscard);
|
|
// Mapping now done in pushNewImage()
|
|
```
|
|
|
|
## Key CUDA-OpenGL Interop Rules
|
|
|
|
1. **Register once** - `cudaGraphicsGLRegisterBuffer/RegisterImage`
|
|
2. **Map before CUDA** - `cudaGraphicsMapResources`
|
|
3. **Get pointer/array** - `cudaGraphicsResourceGetMappedPointer/SubResourceGetMappedArray`
|
|
4. **Copy data** - `cudaMemcpy/cudaMemcpy2DToArray`
|
|
5. **Synchronize** - `cudaStreamSynchronize` (ensures copy completes)
|
|
6. **Unmap before GL** - `cudaGraphicsUnmapResources`
|
|
7. **Unregister on cleanup** - `cudaGraphicsUnregisterResource`
|
|
|
|
## Reference
|
|
|
|
This fix is based on commit `45fcb88e9dcc7a1b1fb86b928b884bfcdc1da0f2` in the zed-sdk repository which applied the same fix to the depth sensing sample.
|