This commit is contained in:
2026-02-04 03:03:16 +00:00
commit 53a443d120
9 changed files with 2353 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build

61
CMakeLists.txt Executable file
View File

@ -0,0 +1,61 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.5)
PROJECT(ZED_BodyFusion)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
SET(CMAKE_BUILD_TYPE "Release")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(LINK_SHARED_ZED "Link with the ZED SDK shared executable" ON)
if (NOT LINK_SHARED_ZED AND MSVC)
message(FATAL_ERROR "LINK_SHARED_ZED OFF : ZED SDK static libraries not available on Windows")
endif()
find_package(ZED REQUIRED)
find_package(CUDA REQUIRED)
find_package(GLUT REQUIRED)
find_package(GLEW REQUIRED)
SET(OpenGL_GL_PREFERENCE GLVND)
find_package(OpenGL REQUIRED)
find_package(OpenCV)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${ZED_INCLUDE_DIRS})
include_directories(${GLEW_INCLUDE_DIRS})
include_directories(${GLUT_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${ZED_LIBRARY_DIR})
link_directories(${CUDA_LIBRARY_DIRS})
link_directories(${GLEW_LIBRARY_DIRS})
link_directories(${GLUT_LIBRARY_DIRS})
link_directories(${OpenGL_LIBRARY_DIRS})
link_directories(${OpenCV_LIBRARY_DIRS})
IF(NOT WIN32)
SET(SPECIAL_OS_LIBS "pthread")
IF (CMAKE_SYSTEM_PROCESSOR MATCHES aarch64)
add_definitions(-DJETSON_STYLE)
ENDIF()
ENDIF()
FILE(GLOB_RECURSE SRC_FILES src/*.c*)
FILE(GLOB_RECURSE HDR_FILES include/*.h*)
add_executable(${PROJECT_NAME} ${HDR_FILES} ${SRC_FILES})
if (LINK_SHARED_ZED)
SET(ZED_LIBS ${ZED_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUDART_LIBRARY})
else()
SET(ZED_LIBS ${ZED_STATIC_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_LIBRARY})
endif()
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${ZED_LIBS} ${UTILS_LIB} ${SPECIAL_OS_LIBS} ${OpenCV_LIBRARIES} ${OPENGL_LIBRARIES} ${GLUT_LIBRARIES} ${GLEW_LIBRARIES})
if(INSTALL_SAMPLES)
LIST(APPEND SAMPLE_LIST ${PROJECT_NAME})
SET(SAMPLE_LIST "${SAMPLE_LIST}" PARENT_SCOPE)
endif()

349
ZED_SDK_ARCHITECTURE.md Normal file
View File

@ -0,0 +1,349 @@
# ZED SDK Architecture: Streaming vs Fusion API
## Overview
The ZED SDK provides two distinct APIs for transmitting camera data over a network:
1. **Streaming API** (`enableStreaming`) - Video streaming
2. **Fusion API** (`startPublishing`) - Metadata publishing
These serve fundamentally different use cases and have different compute/bandwidth tradeoffs.
## API Comparison
| Feature | Streaming API | Fusion API |
|---------|---------------|------------|
| **Primary Use Case** | Remote camera access | Multi-camera data fusion |
| **Data Transmitted** | Compressed video (H264/H265) | Metadata only (bodies, objects, poses) |
| **Bandwidth per Camera** | 10-40 Mbps | <100 Kbps |
| **Edge Compute** | Video encoding only (NVENC) | Full depth NN + tracking + detection |
| **Host Compute** | Full depth NN + tracking + detection | Lightweight fusion only |
| **Synchronization** | None | Time-synced + geometric calibration |
| **360° Coverage** | No | Yes (fuses overlapping views) |
| **Receiver API** | `zed.open()` with `INPUT_TYPE::STREAM` | `fusion.subscribe()` |
## Architecture Diagrams
### Streaming API (Single Camera Remote Access)
```
┌─────────────────────┐ ┌─────────────────────┐
│ Edge (Jetson) │ │ Host (Server) │
│ │ │ │
│ ┌───────────────┐ │ H264/H265 RTP │ ┌───────────────┐ │
│ │ ZED Camera │ │ (10-40 Mbps) │ │ Decode │ │
│ └───────┬───────┘ │ ───────────────────► │ │ (NVENC) │ │
│ │ │ │ └───────┬───────┘ │
│ ┌───────▼───────┐ │ │ │ │
│ │ NVENC │ │ │ ┌───────▼───────┐ │
│ │ Encode │──┘ │ │ Neural Depth │ │
│ │ (hardware) │ │ │ (NN on GPU) │ │
│ └───────────────┘ │ └───────┬───────┘ │
│ │ │ │
│ │ ┌───────▼───────┐ │
│ │ │ Tracking / │ │
│ │ │ Detection │ │
│ │ └───────┬───────┘ │
│ │ │ │
│ │ ┌───────▼───────┐ │
│ │ │ Point Cloud │ │
│ │ └───────────────┘ │
└─────────────────────┘ └─────────────────────┘
Edge: Lightweight (encode only)
Host: Heavy (NN depth + all processing)
```
### Fusion API (Multi-Camera 360° Coverage)
```
┌─────────────────────┐
│ Edge #1 (Jetson) │
│ ┌───────────────┐ │
│ │ ZED Camera │ │
│ └───────┬───────┘ │ Metadata Only
│ ┌───────▼───────┐ │ (bodies, poses)
│ │ Neural Depth │ │ (<100 Kbps) ┌─────────────────────┐
│ │ (NN on GPU) │ │ ──────────────────────►│ │
│ └───────┬───────┘ │ │ Fusion Server │
│ ┌───────▼───────┐ │ │ │
│ │ Body Track │──┘ │ ┌───────────────┐ │
│ └───────────────┘ │ │ Subscribe │ │
└─────────────────────┘ │ │ to all │ │
│ │ cameras │ │
┌─────────────────────┐ │ └───────┬───────┘ │
│ Edge #2 (Jetson) │ │ │ │
│ ┌───────────────┐ │ Metadata Only │ ┌───────▼───────┐ │
│ │ ZED Camera │ │ ──────────────────────►│ │ Time Sync │ │
│ └───────┬───────┘ │ │ │ + Geometric │ │
│ ┌───────▼───────┐ │ │ │ Calibration │ │
│ │ Neural Depth │ │ │ └───────┬───────┘ │
│ └───────┬───────┘ │ │ │ │
│ ┌───────▼───────┐ │ │ ┌───────▼───────┐ │
│ │ Body Track │──┘ │ │ 360° Fusion │ │
│ └───────────────┘ │ │ (merge views)│ │
└─────────────────────┘ │ └───────────────┘ │
│ │
┌─────────────────────┐ │ Lightweight GPU │
│ Edge #3 (Jetson) │ Metadata Only │ requirements │
│ ... │ ──────────────────────►│ │
└─────────────────────┘ └─────────────────────┘
Each Edge: Heavy (NN depth + tracking)
Fusion Server: Lightweight (data fusion only)
```
## Communication Modes
### Streaming API
| Mode | Description |
|------|-------------|
| **H264** | AVCHD encoding, wider GPU support |
| **H265** | HEVC encoding, better compression, requires Pascal+ GPU |
Port: Even number (default 30000), uses RTP protocol.
### Fusion API
| Mode | Description |
|------|-------------|
| **INTRA_PROCESS** | Same machine, shared memory (zero-copy) |
| **LOCAL_NETWORK** | Different machines, RTP over network |
Port: Default 30000, configurable per camera.
## Bandwidth Requirements
### Streaming (H265 Compressed Video)
| Resolution | FPS | Bitrate per Camera | 4 Cameras |
|------------|-----|-------------------|-----------|
| 2K | 15 | 7 Mbps | 28 Mbps |
| HD1080 | 30 | 11 Mbps | 44 Mbps |
| HD720 | 60 | 6 Mbps | 24 Mbps |
| HD1200 | 30 | ~12 Mbps | ~48 Mbps |
### Fusion (Metadata Only)
| Data Type | Size per Frame | @ 30 FPS | 4 Cameras |
|-----------|---------------|----------|-----------|
| Body (18 keypoints) | ~2 KB | ~60 KB/s | ~240 KB/s |
| Object detection | ~1 KB | ~30 KB/s | ~120 KB/s |
| Pose/Transform | ~100 B | ~3 KB/s | ~12 KB/s |
**Fusion uses 100-1000x less bandwidth than Streaming.**
## The Architectural Gap
### What You CAN Do
| Scenario | API | Edge Computes | Host Receives |
|----------|-----|---------------|---------------|
| Remote camera access | Streaming | Video encoding | Video computes depth/tracking |
| Multi-camera fusion | Fusion | Depth + tracking | Metadata only (bodies, poses) |
| Local processing | Direct | Everything | N/A (same machine) |
### What You CANNOT Do
**There is no ZED SDK mode for:**
```
┌─────────────────────┐ ┌─────────────────────┐
│ Edge (Jetson) │ │ Host (Server) │
│ │ │ │
│ ┌───────────────┐ │ Depth Map / │ ┌───────────────┐ │
│ │ ZED Camera │ │ Point Cloud │ │ Receive │ │
│ └───────┬───────┘ │ │ │ Depth/PC │ │
│ │ │ ??? │ └───────┬───────┘ │
│ ┌───────▼───────┐ │ ─────────────────X─► │ │ │
│ │ Neural Depth │ │ NOT SUPPORTED │ ┌───────▼───────┐ │
│ │ (NN on GPU) │ │ │ │ Further │ │
│ └───────┬───────┘ │ │ │ Processing │ │
│ │ │ │ └───────────────┘ │
│ ┌───────▼───────┐ │ │ │
│ │ Point Cloud │──┘ │ │
│ └───────────────┘ │ │
└─────────────────────┘ └─────────────────────┘
❌ Edge computes depth → streams depth map → Host receives depth
❌ Edge computes point cloud → streams point cloud → Host receives point cloud
```
## Why This Architecture?
### 1. Bandwidth Economics
Point cloud streaming would require significantly more bandwidth than video:
| Data Type | Size per Frame (HD1080) | @ 30 FPS |
|-----------|------------------------|----------|
| Raw stereo video | ~12 MB | 360 MB/s |
| H265 compressed | ~46 KB | 11 Mbps |
| Depth map (16-bit) | ~4 MB | 120 MB/s |
| Point cloud (XYZ float) | ~12 MB | 360 MB/s |
Compressed depth/point cloud is lossy and still large (~50-100 Mbps).
### 2. Compute Distribution Philosophy
ZED SDK follows: **"Compute entirely at edge OR entirely at host, not split"**
| Scenario | Solution |
|----------|----------|
| Low bandwidth, multi-camera | Fusion (edge computes all, sends metadata) |
| High bandwidth, single camera | Streaming (host computes all) |
| Same machine | INTRA_PROCESS (shared memory) |
### 3. Fusion API Design Goals
From Stereolabs documentation:
> "The Fusion module is **lightweight** (in computation resources requirements) compared to the requirements for camera publishers."
The Fusion receiver is intentionally lightweight because:
- It only needs to fuse pre-computed metadata
- It handles time synchronization and geometric calibration
- It can run on modest hardware while edges do heavy compute
### 4. Product Strategy
Stereolabs sells:
- **ZED cameras** (hardware)
- **ZED Box** (edge compute appliances)
- **ZED Hub** (cloud management)
The Fusion API encourages purchasing ZED Boxes for edge compute rather than building custom streaming solutions.
## Workarounds for Custom Point Cloud Streaming
If you need to stream point clouds from edge to host (outside ZED SDK):
### Option 1: Custom Compression + Streaming
```cpp
// On edge: compute point cloud, compress, send
sl::Mat point_cloud;
zed.retrieveMeasure(point_cloud, MEASURE::XYZRGBA);
// Compress with Draco/PCL octree
std::vector<uint8_t> compressed = draco_compress(point_cloud);
// Send via ZeroMQ/gRPC/raw UDP
socket.send(compressed);
```
### Option 2: Depth Map Streaming
```cpp
// On edge: get depth, compress as 16-bit PNG, send
sl::Mat depth;
zed.retrieveMeasure(depth, MEASURE::DEPTH);
// Compress as lossless PNG
cv::Mat depth_cv = slMat2cvMat(depth);
std::vector<uint8_t> png;
cv::imencode(".png", depth_cv, png);
// Send via network
socket.send(png);
```
### Bandwidth Estimate for Custom Streaming
| Method | Compression | Bandwidth (HD1080@30fps) |
|--------|-------------|-------------------------|
| Depth PNG (lossless) | ~4:1 | ~240 Mbps |
| Depth JPEG (lossy) | ~20:1 | ~48 Mbps |
| Point cloud Draco | ~10:1 | ~100 Mbps |
**10 Gbps Ethernet could handle 4 cameras with custom depth streaming.**
## Recommendations
| Use Case | Recommended API |
|----------|-----------------|
| Single camera, remote development | Streaming |
| Multi-camera body tracking | Fusion |
| Multi-camera 360° coverage | Fusion |
| Custom point cloud pipeline | Manual (ZeroMQ + Draco) |
| Low latency, same machine | INTRA_PROCESS |
## Code Examples
### Streaming Sender (Edge)
```cpp
sl::StreamingParameters stream_params;
stream_params.codec = sl::STREAMING_CODEC::H265;
stream_params.bitrate = 12000;
stream_params.port = 30000;
zed.enableStreaming(stream_params);
while (running) {
zed.grab(); // Encodes and sends frame
}
```
### Streaming Receiver (Host)
```cpp
sl::InitParameters init_params;
init_params.input.setFromStream("192.168.1.100", 30000);
zed.open(init_params);
while (running) {
if (zed.grab() == ERROR_CODE::SUCCESS) {
// Full ZED SDK available - depth, tracking, etc.
zed.retrieveMeasure(depth, MEASURE::DEPTH);
zed.retrieveMeasure(point_cloud, MEASURE::XYZRGBA);
}
}
```
### Fusion Sender (Edge)
```cpp
// Enable body tracking
zed.enableBodyTracking(body_params);
// Start publishing metadata
sl::CommunicationParameters comm_params;
comm_params.setForLocalNetwork(30000);
zed.startPublishing(comm_params);
while (running) {
if (zed.grab() == ERROR_CODE::SUCCESS) {
zed.retrieveBodies(bodies); // Computes and publishes
}
}
```
### Fusion Receiver (Host)
```cpp
sl::Fusion fusion;
fusion.init(init_fusion_params);
sl::CameraIdentifier cam1(serial_number);
fusion.subscribe(cam1, comm_params, pose);
while (running) {
if (fusion.process() == FUSION_ERROR_CODE::SUCCESS) {
fusion.retrieveBodies(fused_bodies); // Already computed by edges
}
}
```
## Summary
The ZED SDK architecture forces a choice:
1. **Streaming**: Edge sends video Host computes depth (NN inference on host)
2. **Fusion**: Edge computes depth Sends metadata only (no point cloud)
There is **no built-in support** for streaming computed depth maps or point clouds from edge to host. This is by design for bandwidth efficiency and to encourage use of ZED Box edge compute products.
For custom depth/point cloud streaming, you must implement your own compression and network layer outside the ZED SDK.

74
include/ClientPublisher.hpp Executable file
View File

@ -0,0 +1,74 @@
#ifndef __SENDER_RUNNER_HDR__
#define __SENDER_RUNNER_HDR__
#include <sl/Camera.hpp>
#include <sl/Fusion.hpp>
#include <thread>
#include <condition_variable>
// Hard-coded camera stream configuration from inside_network.json
struct CameraStreamConfig {
int serial_number;
std::string ip_address;
int port;
};
// Predefined camera stream configurations
const std::vector<CameraStreamConfig> CAMERA_STREAM_CONFIGS = {
{44289123, "192.168.128.2", 30000},
{44435674, "192.168.128.2", 30002},
{41831756, "192.168.128.2", 30004},
{46195029, "192.168.128.2", 30006}
};
struct Trigger{
void notifyZED(){
cv.notify_all();
if(running){
bool wait_for_zed = true;
const int nb_zed = states.size();
while(wait_for_zed){
int count_r = 0;
for(auto &it:states)
count_r += it.second;
wait_for_zed = count_r != nb_zed;
sl::sleep_ms(1);
}
for(auto &it:states)
it.second = false;
}
}
std::condition_variable cv;
bool running = true;
std::map<int, bool> states;
};
class ClientPublisher{
public:
ClientPublisher();
~ClientPublisher();
// Open camera from stream (IP:port) instead of local input
bool open(const std::string& ip, int port, Trigger* ref, int sdk_gpu_id);
void start();
void stop();
void setStartSVOPosition(unsigned pos);
int getSerialNumber() const { return serial; }
private:
sl::Camera zed;
void work();
std::thread runner;
int serial;
std::mutex mtx;
Trigger *p_trigger;
};
#endif // ! __SENDER_RUNNER_HDR__

336
include/GLViewer.hpp Executable file
View File

@ -0,0 +1,336 @@
#ifndef __VIEWER_INCLUDE__
#define __VIEWER_INCLUDE__
#include <random>
#include <sl/Camera.hpp>
#include <sl/Fusion.hpp>
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <cuda.h>
#include <cuda_gl_interop.h>
#ifndef M_PI
#define M_PI 3.141592653f
#endif
#define MOUSE_R_SENSITIVITY 0.03f
#define MOUSE_UZ_SENSITIVITY 0.75f
#define MOUSE_DZ_SENSITIVITY 1.25f
#define MOUSE_T_SENSITIVITY 0.05f
#define KEY_T_SENSITIVITY 0.1f
///////////////////////////////////////////////////////////////////////////////////////////////
class Shader {
public:
Shader() : verterxId_(0), fragmentId_(0), programId_(0) {}
Shader(const GLchar* vs, const GLchar* fs);
~Shader();
// Delete the move constructor and move assignment operator
Shader(Shader&&) = delete;
Shader& operator=(Shader&&) = delete;
// Delete the copy constructor and copy assignment operator
Shader(const Shader&) = delete;
Shader& operator=(const Shader&) = delete;
void set(const GLchar* vs, const GLchar* fs);
GLuint getProgramId();
static const GLint ATTRIB_VERTICES_POS = 0;
static const GLint ATTRIB_COLOR_POS = 1;
static const GLint ATTRIB_NORMAL = 2;
private:
bool compile(GLuint &shaderId, GLenum type, const GLchar* src);
GLuint verterxId_;
GLuint fragmentId_;
GLuint programId_;
};
struct ShaderData {
Shader it;
GLuint MVP_Mat;
};
class Simple3DObject {
public:
Simple3DObject();
~Simple3DObject();
void addPoint(sl::float3 pt, sl::float3 clr);
void addLine(sl::float3 pt1, sl::float3 pt2, sl::float3 clr);
void addFace(sl::float3 p1, sl::float3 p2, sl::float3 p3, sl::float3 clr);
void pushToGPU();
void clear();
void setStatic(bool _static) {
isStatic_ = _static;
}
void setDrawingType(GLenum type);
void draw();
private:
std::vector<sl::float3> vertices_;
std::vector<sl::float3> colors_;
std::vector<unsigned int> indices_;
bool isStatic_;
bool need_update;
GLenum drawingType_;
GLuint vaoID_;
GLuint vboID_[3];
};
class CameraGL {
public:
CameraGL() {
}
enum DIRECTION {
UP, DOWN, LEFT, RIGHT, FORWARD, BACK
};
CameraGL(sl::Translation position, sl::Translation direction, sl::Translation vertical = sl::Translation(0, 1, 0)); // vertical = Eigen::Vector3f(0, 1, 0)
~CameraGL();
void update();
void setProjection(float horizontalFOV, float verticalFOV, float znear, float zfar);
const sl::Transform& getViewProjectionMatrix() const;
float getHorizontalFOV() const;
float getVerticalFOV() const;
// Set an offset between the eye of the camera and its position
// Note: Useful to use the camera as a trackball camera with z>0 and x = 0, y = 0
// Note: coordinates are in local space
void setOffsetFromPosition(const sl::Translation& offset);
const sl::Translation& getOffsetFromPosition() const;
void setDirection(const sl::Translation& direction, const sl::Translation &vertical);
void translate(const sl::Translation& t);
void setPosition(const sl::Translation& p);
void rotate(const sl::Orientation& rot);
void rotate(const sl::Rotation& m);
void setRotation(const sl::Orientation& rot);
void setRotation(const sl::Rotation& m);
const sl::Translation& getPosition() const;
const sl::Translation& getForward() const;
const sl::Translation& getRight() const;
const sl::Translation& getUp() const;
const sl::Translation& getVertical() const;
float getZNear() const;
float getZFar() const;
static const sl::Translation ORIGINAL_FORWARD;
static const sl::Translation ORIGINAL_UP;
static const sl::Translation ORIGINAL_RIGHT;
sl::Transform projection_;
bool usePerspective_;
private:
void updateVectors();
void updateView();
void updateVPMatrix();
sl::Translation offset_;
sl::Translation position_;
sl::Translation forward_;
sl::Translation up_;
sl::Translation right_;
sl::Translation vertical_;
sl::Orientation rotation_;
sl::Transform view_;
sl::Transform vpMatrix_;
float horizontalFieldOfView_;
float verticalFieldOfView_;
float znear_;
float zfar_;
};
class PointCloud {
public:
PointCloud();
~PointCloud();
// Initialize Opengl and Cuda buffers
// Warning: must be called in the Opengl thread
void initialize(sl::Mat&, sl::float3 clr);
// Push a new point cloud
// Warning: can be called from any thread but the mutex "mutexData" must be locked
void pushNewPC();
// Draw the point cloud
// Warning: must be called in the Opengl thread
void draw(const sl::Transform& vp, bool draw_clr);
// Close (disable update)
void close();
private:
sl::Mat refMat;
sl::float3 clr;
Shader shader_;
GLuint shMVPMatrixLoc_;
GLuint shDrawColor;
GLuint shColor;
size_t numBytes_;
float* xyzrgbaMappedBuf_;
GLuint bufferGLID_;
cudaGraphicsResource* bufferCudaID_;
};
class CameraViewer {
public:
CameraViewer();
~CameraViewer();
// Initialize Opengl and Cuda buffers
bool initialize(sl::Mat& image, sl::float3 clr);
// Push a new Image + Z buffer and transform into a point cloud
void pushNewImage();
// Draw the Image
void draw(sl::Transform vpMatrix);
// Close (disable update)
void close();
Simple3DObject frustum;
private:
sl::Mat ref;
cudaArray_t ArrIm;
cudaGraphicsResource* cuda_gl_ressource;//cuda GL resource
Shader shader;
GLuint shMVPMatrixLocTex_;
GLuint texture;
GLuint vaoID_;
GLuint vboID_[3];
std::vector<sl::uint3> faces;
std::vector<sl::float3> vert;
std::vector<sl::float2> uv;
};
struct ObjectClassName {
sl::float3 position;
std::string name_lineA;
std::string name_lineB;
sl::float3 color;
};
// This class manages input events, window and Opengl rendering pipeline
class GLViewer {
public:
GLViewer();
~GLViewer();
bool isAvailable();
void init(int argc, char **argv);
void updateCamera(int, sl::Mat &, sl::Mat &);
void updateCamera(sl::Mat &);
void updateBodies(sl::Bodies &objs,std::map<sl::CameraIdentifier, sl::Bodies>& singledata, sl::FusionMetrics& metrics);
void setCameraPose(int, sl::Transform);
int getKey() {
const int key = last_key;
last_key = -1;
return key;
}
void exit();
private:
void render();
void update();
void draw();
void clearInputs();
void setRenderCameraProjection(sl::CameraParameters params, float znear, float zfar);
void printText();
// Glut functions callbacks
static void drawCallback();
static void mouseButtonCallback(int button, int state, int x, int y);
static void mouseMotionCallback(int x, int y);
static void reshapeCallback(int width, int height);
static void keyPressedCallback(unsigned char c, int x, int y);
static void keyReleasedCallback(unsigned char c, int x, int y);
static void idle();
sl::float3 getColor(int, bool);
bool available;
bool drawBbox = false;
enum MOUSE_BUTTON {
LEFT = 0,
MIDDLE = 1,
RIGHT = 2,
WHEEL_UP = 3,
WHEEL_DOWN = 4
};
enum KEY_STATE {
UP = 'u',
DOWN = 'd',
FREE = 'f'
};
unsigned char lastPressedKey;
bool mouseButton_[3];
int mouseWheelPosition_;
int mouseCurrentPosition_[2];
int mouseMotion_[2];
int previousMouseMotion_[2];
KEY_STATE keyStates_[256];
std::mutex mtx;
std::mutex mtx_clr;
ShaderData shader;
sl::Transform projection_;
sl::float3 bckgrnd_clr;
std::map<int, PointCloud> point_clouds;
std::map<int, CameraViewer> viewers;
std::map<int, sl::Transform> poses;
std::map<int, Simple3DObject> skeletons_raw;
std::map<int, sl::float3> colors;
std::map<int, sl::float3> colors_sk;
std::vector<ObjectClassName> fusionStats;
CameraGL camera_;
Simple3DObject skeletons;
Simple3DObject floor_grid;
bool show_pc = true;
bool show_raw = false;
bool draw_flat_color = false;
std::uniform_int_distribution<uint16_t> uint_dist360;
std::mt19937 rng;
int last_key = -1;
};
#endif /* __VIEWER_INCLUDE__ */

56
include/utils.hpp Executable file
View File

@ -0,0 +1,56 @@
#pragma once
#include <sl/Camera.hpp>
/**
* @brief Compute the start frame of each SVO for playback to be synced
*
* @param svo_files Map camera index to SVO file path
* @return Map camera index to starting SVO frame for synced playback
*/
std::map<int, int> syncDATA(std::map<int, std::string> svo_files) {
std::map<int, int> output; // map of camera index and frame index of the starting point for each
// Open all SVO
std::map<int, std::shared_ptr<sl::Camera>> p_zeds;
for (auto &it : svo_files) {
auto p_zed = std::make_shared<sl::Camera>();
sl::InitParameters init_param;
init_param.depth_mode = sl::DEPTH_MODE::NONE;
init_param.camera_disable_self_calib = true;
init_param.input.setFromSVOFile(it.second.c_str());
auto error = p_zed->open(init_param);
if (error == sl::ERROR_CODE::SUCCESS)
p_zeds.insert(std::make_pair(it.first, p_zed));
else {
std::cerr << "Could not open file " << it.second.c_str() << ": " << sl::toString(error) << ". Skipping" << std::endl;
}
}
// Compute the starting point, we have to take the latest one
sl::Timestamp start_ts = 0;
for (auto &it : p_zeds) {
it.second->grab();
auto ts = it.second->getTimestamp(sl::TIME_REFERENCE::IMAGE);
if (ts > start_ts)
start_ts = ts;
}
std::cout << "Found SVOs common starting time: " << start_ts << std::endl;
// The starting point is now known, let's find the frame idx for all corresponding
for (auto &it : p_zeds) {
auto frame_position_at_ts = it.second->getSVOPositionAtTimestamp(start_ts);
if (frame_position_at_ts != -1)
output.insert(std::make_pair(it.first, frame_position_at_ts));
}
for (auto &it : p_zeds) it.second->close();
return output;
}

104
src/ClientPublisher.cpp Executable file
View File

@ -0,0 +1,104 @@
#include "ClientPublisher.hpp"
ClientPublisher::ClientPublisher() { }
ClientPublisher::~ClientPublisher()
{
zed.close();
}
bool ClientPublisher::open(const std::string& ip, int port, Trigger* ref, int sdk_gpu_id) {
p_trigger = ref;
sl::InitParameters init_parameters;
init_parameters.depth_mode = sl::DEPTH_MODE::NEURAL_PLUS;
// Use stream input instead of local camera
init_parameters.input.setFromStream(sl::String(ip.c_str()), port);
init_parameters.coordinate_units = sl::UNIT::METER;
init_parameters.depth_stabilization = 30;
init_parameters.sdk_gpu_id = sdk_gpu_id;
auto state = zed.open(init_parameters);
if (state != sl::ERROR_CODE::SUCCESS)
{
std::cout << "Error opening stream " << ip << ":" << port << " - " << state << std::endl;
return false;
}
serial = zed.getCameraInformation().serial_number;
p_trigger->states[serial] = false;
// in most cases in body tracking setup, the cameras are static
sl::PositionalTrackingParameters positional_tracking_parameters;
// in most cases for body detection application the camera is static:
positional_tracking_parameters.set_as_static = true;
state = zed.enablePositionalTracking(positional_tracking_parameters);
if (state != sl::ERROR_CODE::SUCCESS)
{
std::cout << "Error enabling positional tracking: " << state << std::endl;
return false;
}
// define the body tracking parameters, as the fusion can does the tracking and fitting you don't need to enable them here, unless you need it for your app
sl::BodyTrackingParameters body_tracking_parameters;
body_tracking_parameters.detection_model = sl::BODY_TRACKING_MODEL::HUMAN_BODY_ACCURATE;
body_tracking_parameters.body_format = sl::BODY_FORMAT::BODY_18;
body_tracking_parameters.enable_body_fitting = false;
body_tracking_parameters.enable_tracking = false;
state = zed.enableBodyTracking(body_tracking_parameters);
if (state != sl::ERROR_CODE::SUCCESS)
{
std::cout << "Error enabling body tracking: " << state << std::endl;
return false;
}
return true;
}
void ClientPublisher::start()
{
if (zed.isOpened()) {
// the camera should stream its data so the fusion can subscibe to it to gather the detected body and others metadata needed for the process.
zed.startPublishing();
// the thread can start to process the camera grab in background
runner = std::thread(&ClientPublisher::work, this);
}
}
void ClientPublisher::stop()
{
if (runner.joinable())
runner.join();
zed.close();
}
void ClientPublisher::work()
{
sl::Bodies bodies;
sl::BodyTrackingRuntimeParameters body_runtime_parameters;
body_runtime_parameters.detection_confidence_threshold = 40;
zed.setBodyTrackingRuntimeParameters(body_runtime_parameters);
sl::RuntimeParameters rt;
rt.confidence_threshold = 50;
// In this sample we use a dummy thread to process the ZED data.
// you can replace it by your own application and use the ZED like you use to, retrieve its images, depth, sensors data and so on.
// As long as you call the grab method, since the camera is subscribed to fusion it will run the detection and
// the camera will be able to seamlessly transmit the data to the fusion module.
while (p_trigger->running) {
std::unique_lock<std::mutex> lk(mtx);
p_trigger->cv.wait(lk);
if (p_trigger->running) {
if (zed.grab(rt) == sl::ERROR_CODE::SUCCESS) {
}
}
p_trigger->states[serial] = true;
}
}
void ClientPublisher::setStartSVOPosition(unsigned pos) {
zed.setSVOPosition(pos);
}

1161
src/GLViewer.cpp Executable file

File diff suppressed because it is too large Load Diff

211
src/main.cpp Executable file
View File

@ -0,0 +1,211 @@
///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2025, STEREOLABS.
//
// All rights reserved.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////
// ZED include
#include "ClientPublisher.hpp"
#include "GLViewer.hpp"
#include <memory>
int main(int argc, char **argv) {
#ifdef _SL_JETSON_
const bool isJetson = true;
#else
const bool isJetson = false;
#endif
if (argc != 2) {
// this file should be generated by using the tool ZED360
std::cout << "Need a Configuration file in input" << std::endl;
return 1;
}
// Defines the Coordinate system and unit used in this sample
constexpr sl::COORDINATE_SYSTEM COORDINATE_SYSTEM =
sl::COORDINATE_SYSTEM::RIGHT_HANDED_Y_UP;
constexpr sl::UNIT UNIT = sl::UNIT::METER;
// Read json file containing the configuration of your multicamera setup.
auto configurations =
sl::readFusionConfigurationFile(argv[1], COORDINATE_SYSTEM, UNIT);
if (configurations.empty()) {
std::cout << "Empty configuration File." << std::endl;
return EXIT_FAILURE;
}
Trigger trigger;
// Open camera streams from hard-coded configuration
std::vector<std::unique_ptr<ClientPublisher>> clients;
int id_ = 0;
int gpu_id = 0;
int nb_gpu = 0;
if (!isJetson)
cudaGetDeviceCount(&nb_gpu);
// Use hard-coded camera stream configurations
for (const auto& cam_config : CAMERA_STREAM_CONFIGS) {
std::cout << "Try to open stream for ZED " << cam_config.serial_number
<< " at " << cam_config.ip_address << ":" << cam_config.port << ".."
<< std::flush;
gpu_id = (nb_gpu == 0) ? 0 : id_ % nb_gpu;
auto client = std::make_unique<ClientPublisher>();
auto state = client->open(cam_config.ip_address, cam_config.port, &trigger, gpu_id);
if (!state) {
std::cerr << "Could not open stream for ZED: " << cam_config.serial_number
<< ". Skipping..." << std::endl;
continue;
}
std::cout << ". ready ! (Serial: " << client->getSerialNumber() << ")" << std::endl;
clients.push_back(std::move(client));
id_++;
}
// start camera threads
for (auto &it : clients) {
it->start();
}
// Now that the ZED camera are running, we need to initialize the fusion
// module
sl::InitFusionParameters init_params;
init_params.coordinate_units = UNIT;
init_params.coordinate_system = COORDINATE_SYSTEM;
sl::Resolution low_res(512, 360);
init_params.maximum_working_resolution = low_res;
// create and initialize it
sl::Fusion fusion;
fusion.init(init_params);
// subscribe to every cameras of the setup to internally gather their data
std::vector<sl::CameraIdentifier> cameras;
for (auto &it : configurations) {
sl::CameraIdentifier uuid(it.serial_number);
// to subscribe to a camera you must give its serial number, the way to
// communicate with it (shared memory or local network), and its world pose
// in the setup.
auto state = fusion.subscribe(uuid, it.communication_parameters, it.pose,
it.override_gravity);
if (state != sl::FUSION_ERROR_CODE::SUCCESS)
std::cout << "Unable to subscribe to " << std::to_string(uuid.sn) << " . "
<< state << std::endl;
else
cameras.push_back(uuid);
}
// check that at least one camera is connected
if (cameras.empty()) {
std::cout << "no connections " << std::endl;
return EXIT_FAILURE;
}
// as this sample shows how to fuse body detection from the multi camera setup
// we enable the Body Tracking module with its options
sl::BodyTrackingFusionParameters body_fusion_init_params;
body_fusion_init_params.enable_tracking = true;
body_fusion_init_params.enable_body_fitting =
!isJetson; // skeletons will looks more natural but requires more
// computations
fusion.enableBodyTracking(body_fusion_init_params);
// define fusion behavior
sl::BodyTrackingFusionRuntimeParameters body_tracking_runtime_parameters;
// be sure that the detection skeleton is complete enough
body_tracking_runtime_parameters.skeleton_minimum_allowed_keypoints = 7;
// we can also want to retrieve skeleton seen by multiple camera, in this case
// at least half of them
body_tracking_runtime_parameters.skeleton_minimum_allowed_camera =
cameras.size() / 2.;
body_tracking_runtime_parameters.skeleton_smoothing = 0.1f;
// creation of a 3D viewer
GLViewer viewer;
viewer.init(argc, argv);
std::cout << "Viewer Shortcuts\n"
<< "\t- 'q': quit the application\n"
<< "\t- 'r': switch on/off for raw skeleton display\n"
<< "\t- 'p': switch on/off for live point cloud display\n"
<< "\t- 'c': switch on/off point cloud display with raw color\n"
<< std::endl;
// fusion outputs
sl::Bodies fused_bodies;
std::map<sl::CameraIdentifier, sl::Bodies> camera_raw_data;
sl::FusionMetrics metrics;
std::map<sl::CameraIdentifier, sl::Mat> views;
std::map<sl::CameraIdentifier, sl::Mat> pointClouds;
sl::CameraIdentifier fused_camera(0);
// run the fusion as long as the viewer is available.
while (viewer.isAvailable()) {
trigger.notifyZED();
// run the fusion process (which gather data from all camera, sync them and
// process them)
if (fusion.process() == sl::FUSION_ERROR_CODE::SUCCESS) {
// Retrieve fused body
fusion.retrieveBodies(fused_bodies, body_tracking_runtime_parameters);
// for debug, you can retrieve the data sent by each camera
for (auto &id : cameras) {
fusion.retrieveBodies(camera_raw_data[id],
body_tracking_runtime_parameters, id);
auto state_view = fusion.retrieveImage(views[id], id, low_res);
auto state_pc = fusion.retrieveMeasure(pointClouds[id], id,
sl::MEASURE::XYZBGRA, low_res);
if (state_view == sl::FUSION_ERROR_CODE::SUCCESS &&
state_pc == sl::FUSION_ERROR_CODE::SUCCESS)
viewer.updateCamera(id.sn, views[id], pointClouds[id]);
sl::Pose pose;
if (fusion.getPosition(pose, sl::REFERENCE_FRAME::WORLD, id,
sl::POSITION_TYPE::RAW) ==
sl::POSITIONAL_TRACKING_STATE::OK)
viewer.setCameraPose(id.sn, pose.pose_data);
}
// get metrics about the fusion process for monitoring purposes
fusion.getProcessMetrics(metrics);
}
// update the 3D view
viewer.updateBodies(fused_bodies, camera_raw_data, metrics);
}
trigger.running = false;
trigger.notifyZED();
for (auto &it : clients)
it->stop();
fusion.close();
return EXIT_SUCCESS;
}