feat(demo): add realtime visualization pipeline flow
Integrate an opt-in OpenCV visualizer into the demo runtime so operators can monitor tracking, segmentation, and inference confidence in real time without changing the default non-visual execution path.
This commit is contained in:
@@ -0,0 +1,446 @@
|
||||
"""OpenCV-based visualizer for demo pipeline.
|
||||
|
||||
Provides real-time visualization of detection, segmentation, and classification results
|
||||
with interactive mode switching for mask display.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import cast
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Window names
|
||||
MAIN_WINDOW = "Scoliosis Detection"
|
||||
SEG_WINDOW = "Segmentation"
|
||||
|
||||
# Silhouette dimensions (from preprocess.py)
|
||||
SIL_HEIGHT = 64
|
||||
SIL_WIDTH = 44
|
||||
|
||||
# Display dimensions for upscaled silhouette
|
||||
DISPLAY_HEIGHT = 256
|
||||
DISPLAY_WIDTH = 176
|
||||
|
||||
# Colors (BGR)
|
||||
COLOR_GREEN = (0, 255, 0)
|
||||
COLOR_WHITE = (255, 255, 255)
|
||||
COLOR_BLACK = (0, 0, 0)
|
||||
COLOR_RED = (0, 0, 255)
|
||||
COLOR_YELLOW = (0, 255, 255)
|
||||
|
||||
# Mode labels
|
||||
MODE_LABELS = ["Both", "Raw Mask", "Normalized"]
|
||||
|
||||
# Type alias for image arrays (NDArray or cv2.Mat)
|
||||
ImageArray = NDArray[np.uint8]
|
||||
|
||||
|
||||
class OpenCVVisualizer:
|
||||
"""Real-time visualizer for gait analysis demo.
|
||||
|
||||
Displays two windows:
|
||||
- Main stream: Original frame with bounding box and metadata overlay
|
||||
- Segmentation: Raw mask, normalized silhouette, or side-by-side view
|
||||
|
||||
Supports interactive mode switching via keyboard.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize visualizer with default mask mode."""
|
||||
self.mask_mode: int = 0 # 0: Both, 1: Raw, 2: Normalized
|
||||
self._windows_created: bool = False
|
||||
|
||||
def _ensure_windows(self) -> None:
|
||||
"""Create OpenCV windows if not already created."""
|
||||
if not self._windows_created:
|
||||
cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
|
||||
cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
|
||||
self._windows_created = True
|
||||
|
||||
def _draw_bbox(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: tuple[int, int, int, int] | None,
|
||||
) -> None:
|
||||
"""Draw bounding box on frame if present.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
bbox: Bounding box as (x1, y1, x2, y2) or None
|
||||
"""
|
||||
if bbox is None:
|
||||
return
|
||||
|
||||
x1, y1, x2, y2 = bbox
|
||||
# Draw rectangle with green color, thickness 2
|
||||
_ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)
|
||||
|
||||
def _draw_text_overlay(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
track_id: int,
|
||||
fps: float,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
) -> None:
|
||||
"""Draw text overlay with track info, FPS, label, and confidence.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
track_id: Tracking ID
|
||||
fps: Current FPS
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence or None
|
||||
"""
|
||||
# Prepare text lines
|
||||
lines: list[str] = []
|
||||
lines.append(f"ID: {track_id}")
|
||||
lines.append(f"FPS: {fps:.1f}")
|
||||
|
||||
if label is not None:
|
||||
if confidence is not None:
|
||||
lines.append(f"{label}: {confidence:.2%}")
|
||||
else:
|
||||
lines.append(label)
|
||||
|
||||
# Draw text with background for readability
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.6
|
||||
thickness = 1
|
||||
line_height = 25
|
||||
margin = 10
|
||||
|
||||
for i, text in enumerate(lines):
|
||||
y_pos = margin + (i + 1) * line_height
|
||||
|
||||
# Draw background rectangle
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
text, font, font_scale, thickness
|
||||
)
|
||||
_ = cv2.rectangle(
|
||||
frame,
|
||||
(margin, y_pos - text_height - 5),
|
||||
(margin + text_width + 10, y_pos + 5),
|
||||
COLOR_BLACK,
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
_ = cv2.putText(
|
||||
frame,
|
||||
text,
|
||||
(margin + 5, y_pos),
|
||||
font,
|
||||
font_scale,
|
||||
COLOR_WHITE,
|
||||
thickness,
|
||||
)
|
||||
|
||||
def _prepare_main_frame(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: tuple[int, int, int, int] | None,
|
||||
track_id: int,
|
||||
fps: float,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare main display frame with bbox and text overlay.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, C) uint8
|
||||
bbox: Bounding box or None
|
||||
track_id: Tracking ID
|
||||
fps: Current FPS
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence or None
|
||||
|
||||
Returns:
|
||||
Processed frame ready for display
|
||||
"""
|
||||
# Ensure BGR format (convert grayscale if needed)
|
||||
if len(frame.shape) == 2:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
|
||||
elif frame.shape[2] == 1:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
|
||||
elif frame.shape[2] == 3:
|
||||
display_frame = frame.copy()
|
||||
elif frame.shape[2] == 4:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
|
||||
else:
|
||||
display_frame = frame.copy()
|
||||
|
||||
# Draw bbox and text (modifies in place)
|
||||
self._draw_bbox(display_frame, bbox)
|
||||
self._draw_text_overlay(display_frame, track_id, fps, label, confidence)
|
||||
|
||||
return display_frame
|
||||
|
||||
def _upscale_silhouette(
|
||||
self,
|
||||
silhouette: NDArray[np.float32] | NDArray[np.uint8],
|
||||
) -> ImageArray:
|
||||
"""Upscale silhouette to display size.
|
||||
|
||||
Args:
|
||||
silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]
|
||||
|
||||
Returns:
|
||||
Upscaled silhouette (256, 176) uint8
|
||||
"""
|
||||
# Normalize to uint8 if needed
|
||||
if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
|
||||
sil_u8 = (silhouette * 255).astype(np.uint8)
|
||||
else:
|
||||
sil_u8 = silhouette.astype(np.uint8)
|
||||
|
||||
# Upscale using nearest neighbor to preserve pixelation
|
||||
upscaled = cast(
|
||||
ImageArray,
|
||||
cv2.resize(
|
||||
sil_u8,
|
||||
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
),
|
||||
)
|
||||
|
||||
return upscaled
|
||||
|
||||
def _prepare_segmentation_view(
|
||||
self,
|
||||
mask_raw: ImageArray | None,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare segmentation window content based on current mode.
|
||||
|
||||
Args:
|
||||
mask_raw: Raw binary mask (H, W) uint8 or None
|
||||
silhouette: Normalized silhouette (64, 44) float32 or None
|
||||
|
||||
Returns:
|
||||
Displayable image (H, W, 3) uint8
|
||||
"""
|
||||
if self.mask_mode == 0:
|
||||
# Mode 0: Both (side by side)
|
||||
return self._prepare_both_view(mask_raw, silhouette)
|
||||
elif self.mask_mode == 1:
|
||||
# Mode 1: Raw mask only
|
||||
return self._prepare_raw_view(mask_raw)
|
||||
else:
|
||||
# Mode 2: Normalized silhouette only
|
||||
return self._prepare_normalized_view(silhouette)
|
||||
|
||||
def _prepare_raw_view(
|
||||
self,
|
||||
mask_raw: ImageArray | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare raw mask view.
|
||||
|
||||
Args:
|
||||
mask_raw: Raw binary mask or None
|
||||
|
||||
Returns:
|
||||
Displayable image with mode indicator
|
||||
"""
|
||||
if mask_raw is None:
|
||||
# Create placeholder
|
||||
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
|
||||
self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
|
||||
return placeholder
|
||||
|
||||
# Ensure single channel
|
||||
if len(mask_raw.shape) == 3:
|
||||
mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
|
||||
else:
|
||||
mask_gray = mask_raw
|
||||
|
||||
# Resize to display size
|
||||
mask_resized = cast(
|
||||
ImageArray,
|
||||
cv2.resize(
|
||||
mask_gray,
|
||||
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
),
|
||||
)
|
||||
|
||||
# Convert to BGR for display
|
||||
mask_bgr = cast(ImageArray, cv2.cvtColor(mask_resized, cv2.COLOR_GRAY2BGR))
|
||||
self._draw_mode_indicator(mask_bgr, "Raw Mask")
|
||||
|
||||
return mask_bgr
|
||||
|
||||
def _prepare_normalized_view(
|
||||
self,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare normalized silhouette view.
|
||||
|
||||
Args:
|
||||
silhouette: Normalized silhouette (64, 44) or None
|
||||
|
||||
Returns:
|
||||
Displayable image with mode indicator
|
||||
"""
|
||||
if silhouette is None:
|
||||
# Create placeholder
|
||||
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
|
||||
self._draw_mode_indicator(placeholder, "Normalized (No Data)")
|
||||
return placeholder
|
||||
|
||||
# Upscale and convert
|
||||
upscaled = self._upscale_silhouette(silhouette)
|
||||
sil_bgr = cast(ImageArray, cv2.cvtColor(upscaled, cv2.COLOR_GRAY2BGR))
|
||||
self._draw_mode_indicator(sil_bgr, "Normalized")
|
||||
|
||||
return sil_bgr
|
||||
|
||||
def _prepare_both_view(
|
||||
self,
|
||||
mask_raw: ImageArray | None,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare side-by-side view of both masks.
|
||||
|
||||
Args:
|
||||
mask_raw: Raw binary mask or None
|
||||
silhouette: Normalized silhouette or None
|
||||
|
||||
Returns:
|
||||
Displayable side-by-side image
|
||||
"""
|
||||
# Prepare individual views
|
||||
raw_view = self._prepare_raw_view(mask_raw)
|
||||
norm_view = self._prepare_normalized_view(silhouette)
|
||||
|
||||
# Convert to grayscale for side-by-side composition
|
||||
if len(raw_view.shape) == 3:
|
||||
raw_gray = cast(ImageArray, cv2.cvtColor(raw_view, cv2.COLOR_BGR2GRAY))
|
||||
else:
|
||||
raw_gray = raw_view
|
||||
|
||||
if len(norm_view.shape) == 3:
|
||||
norm_gray = cast(ImageArray, cv2.cvtColor(norm_view, cv2.COLOR_BGR2GRAY))
|
||||
else:
|
||||
norm_gray = norm_view
|
||||
|
||||
# Stack horizontally
|
||||
combined = np.hstack([raw_gray, norm_gray])
|
||||
|
||||
# Convert back to BGR
|
||||
combined_bgr = cast(ImageArray, cv2.cvtColor(combined, cv2.COLOR_GRAY2BGR))
|
||||
|
||||
# Add mode indicator
|
||||
self._draw_mode_indicator(combined_bgr, "Both: Raw | Normalized")
|
||||
|
||||
return combined_bgr
|
||||
|
||||
def _draw_mode_indicator(
|
||||
self,
|
||||
image: ImageArray,
|
||||
label: str,
|
||||
) -> None:
|
||||
"""Draw mode indicator text on image.
|
||||
|
||||
Args:
|
||||
image: Image to draw on (modified in place)
|
||||
label: Mode label text
|
||||
"""
|
||||
h, w = image.shape[:2]
|
||||
|
||||
# Mode text at bottom
|
||||
mode_text = f"Mode: {MODE_LABELS[self.mask_mode]} ({self.mask_mode}) - {label}"
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.5
|
||||
thickness = 1
|
||||
|
||||
# Get text size for background
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
mode_text, font, font_scale, thickness
|
||||
)
|
||||
|
||||
# Draw background at bottom center
|
||||
x_pos = (w - text_width) // 2
|
||||
y_pos = h - 10
|
||||
|
||||
_ = cv2.rectangle(
|
||||
image,
|
||||
(x_pos - 5, y_pos - text_height - 5),
|
||||
(x_pos + text_width + 5, y_pos + 5),
|
||||
COLOR_BLACK,
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
_ = cv2.putText(
|
||||
image,
|
||||
mode_text,
|
||||
(x_pos, y_pos),
|
||||
font,
|
||||
font_scale,
|
||||
COLOR_YELLOW,
|
||||
thickness,
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: tuple[int, int, int, int] | None,
|
||||
track_id: int,
|
||||
mask_raw: ImageArray | None,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
fps: float,
|
||||
) -> bool:
|
||||
"""Update visualization with new frame data.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, C) uint8
|
||||
bbox: Bounding box as (x1, y1, x2, y2) or None
|
||||
track_id: Tracking ID
|
||||
mask_raw: Raw binary mask (H, W) uint8 or None
|
||||
silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence [0,1] or None
|
||||
fps: Current FPS
|
||||
|
||||
Returns:
|
||||
False if user requested quit (pressed 'q'), True otherwise
|
||||
"""
|
||||
self._ensure_windows()
|
||||
|
||||
# Prepare and show main window
|
||||
main_display = self._prepare_main_frame(
|
||||
frame, bbox, track_id, fps, label, confidence
|
||||
)
|
||||
cv2.imshow(MAIN_WINDOW, main_display)
|
||||
|
||||
# Prepare and show segmentation window
|
||||
seg_display = self._prepare_segmentation_view(mask_raw, silhouette)
|
||||
cv2.imshow(SEG_WINDOW, seg_display)
|
||||
|
||||
# Handle keyboard input
|
||||
key = cv2.waitKey(1) & 0xFF
|
||||
|
||||
if key == ord("q"):
|
||||
return False
|
||||
elif key == ord("m"):
|
||||
# Cycle through modes: 0 -> 1 -> 2 -> 0
|
||||
self.mask_mode = (self.mask_mode + 1) % 3
|
||||
logger.debug("Switched to mask mode: %s", MODE_LABELS[self.mask_mode])
|
||||
|
||||
return True
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close all OpenCV windows and cleanup."""
|
||||
if self._windows_created:
|
||||
cv2.destroyAllWindows()
|
||||
self._windows_created = False
|
||||
Reference in New Issue
Block a user