Files
OpenGait/opengait/demo/visualizer.py
T
crosstyan 7f073179d7 fix(demo): stabilize visualizer bbox and mask rendering
Align bbox coordinate handling across primary and fallback paths, normalize Both-mode raw mask rendering, and tighten demo result typing to reduce runtime/display inconsistencies.
2026-02-28 18:05:33 +08:00

464 lines
14 KiB
Python

"""OpenCV-based visualizer for demo pipeline.
Provides real-time visualization of detection, segmentation, and classification results
with interactive mode switching for mask display.
"""
from __future__ import annotations
import logging
from typing import cast
import cv2
import numpy as np
from numpy.typing import NDArray
from .preprocess import BBoxXYXY
logger = logging.getLogger(__name__)
# Window names
MAIN_WINDOW = "Scoliosis Detection"
SEG_WINDOW = "Segmentation"
# Silhouette dimensions (from preprocess.py)
SIL_HEIGHT = 64
SIL_WIDTH = 44
# Display dimensions for upscaled silhouette
DISPLAY_HEIGHT = 256
DISPLAY_WIDTH = 176
# Colors (BGR)
COLOR_GREEN = (0, 255, 0)
COLOR_WHITE = (255, 255, 255)
COLOR_BLACK = (0, 0, 0)
COLOR_RED = (0, 0, 255)
COLOR_YELLOW = (0, 255, 255)
# Mode labels
MODE_LABELS = ["Both", "Raw Mask", "Normalized"]
# Type alias for image arrays (NDArray or cv2.Mat)
ImageArray = NDArray[np.uint8]
class OpenCVVisualizer:
"""Real-time visualizer for gait analysis demo.
Displays two windows:
- Main stream: Original frame with bounding box and metadata overlay
- Segmentation: Raw mask, normalized silhouette, or side-by-side view
Supports interactive mode switching via keyboard.
"""
def __init__(self) -> None:
"""Initialize visualizer with default mask mode."""
self.mask_mode: int = 0 # 0: Both, 1: Raw, 2: Normalized
self._windows_created: bool = False
def _ensure_windows(self) -> None:
"""Create OpenCV windows if not already created."""
if not self._windows_created:
cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
self._windows_created = True
def _draw_bbox(
self,
frame: ImageArray,
bbox: BBoxXYXY | None,
) -> None:
"""Draw bounding box on frame if present.
Args:
frame: Input frame (H, W, 3) uint8 - modified in place
bbox: Bounding box in XYXY format as (x1, y1, x2, y2) or None
"""
if bbox is None:
return
x1, y1, x2, y2 = bbox
# Draw rectangle with green color, thickness 2
_ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)
def _draw_text_overlay(
self,
frame: ImageArray,
track_id: int,
fps: float,
label: str | None,
confidence: float | None,
) -> None:
"""Draw text overlay with track info, FPS, label, and confidence.
Args:
frame: Input frame (H, W, 3) uint8 - modified in place
track_id: Tracking ID
fps: Current FPS
label: Classification label or None
confidence: Classification confidence or None
"""
# Prepare text lines
lines: list[str] = []
lines.append(f"ID: {track_id}")
lines.append(f"FPS: {fps:.1f}")
if label is not None:
if confidence is not None:
lines.append(f"{label}: {confidence:.2%}")
else:
lines.append(label)
# Draw text with background for readability
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.6
thickness = 1
line_height = 25
margin = 10
for i, text in enumerate(lines):
y_pos = margin + (i + 1) * line_height
# Draw background rectangle
(text_width, text_height), _ = cv2.getTextSize(
text, font, font_scale, thickness
)
_ = cv2.rectangle(
frame,
(margin, y_pos - text_height - 5),
(margin + text_width + 10, y_pos + 5),
COLOR_BLACK,
-1,
)
# Draw text
_ = cv2.putText(
frame,
text,
(margin + 5, y_pos),
font,
font_scale,
COLOR_WHITE,
thickness,
)
def _prepare_main_frame(
self,
frame: ImageArray,
bbox: BBoxXYXY | None,
track_id: int,
fps: float,
label: str | None,
confidence: float | None,
) -> ImageArray:
"""Prepare main display frame with bbox and text overlay.
Args:
frame: Input frame (H, W, C) uint8
bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
track_id: Tracking ID
fps: Current FPS
label: Classification label or None
confidence: Classification confidence or None
Returns:
Processed frame ready for display
"""
# Ensure BGR format (convert grayscale if needed)
if len(frame.shape) == 2:
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
elif frame.shape[2] == 1:
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
elif frame.shape[2] == 3:
display_frame = frame.copy()
elif frame.shape[2] == 4:
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
else:
display_frame = frame.copy()
# Draw bbox and text (modifies in place)
self._draw_bbox(display_frame, bbox)
self._draw_text_overlay(display_frame, track_id, fps, label, confidence)
return display_frame
def _upscale_silhouette(
self,
silhouette: NDArray[np.float32] | NDArray[np.uint8],
) -> ImageArray:
"""Upscale silhouette to display size.
Args:
silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]
Returns:
Upscaled silhouette (256, 176) uint8
"""
# Normalize to uint8 if needed
if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
sil_u8 = (silhouette * 255).astype(np.uint8)
else:
sil_u8 = silhouette.astype(np.uint8)
# Upscale using nearest neighbor to preserve pixelation
upscaled = cast(
ImageArray,
cv2.resize(
sil_u8,
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
interpolation=cv2.INTER_NEAREST,
),
)
return upscaled
def _prepare_segmentation_view(
self,
mask_raw: ImageArray | None,
silhouette: NDArray[np.float32] | None,
) -> ImageArray:
"""Prepare segmentation window content based on current mode.
Args:
mask_raw: Raw binary mask (H, W) uint8 or None
silhouette: Normalized silhouette (64, 44) float32 or None
Returns:
Displayable image (H, W, 3) uint8
"""
if self.mask_mode == 0:
# Mode 0: Both (side by side)
return self._prepare_both_view(mask_raw, silhouette)
elif self.mask_mode == 1:
# Mode 1: Raw mask only
return self._prepare_raw_view(mask_raw)
else:
# Mode 2: Normalized silhouette only
return self._prepare_normalized_view(silhouette)
def _prepare_raw_view(
self,
mask_raw: ImageArray | None,
) -> ImageArray:
"""Prepare raw mask view.
Args:
mask_raw: Raw binary mask or None
Returns:
Displayable image with mode indicator
"""
if mask_raw is None:
# Create placeholder
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
return placeholder
# Ensure single channel
if len(mask_raw.shape) == 3:
mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
else:
mask_gray = mask_raw
# Resize to display size
mask_resized = cast(
ImageArray,
cv2.resize(
mask_gray,
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
interpolation=cv2.INTER_NEAREST,
),
)
# Convert to BGR for display
mask_bgr = cast(ImageArray, cv2.cvtColor(mask_resized, cv2.COLOR_GRAY2BGR))
self._draw_mode_indicator(mask_bgr, "Raw Mask")
return mask_bgr
def _prepare_normalized_view(
self,
silhouette: NDArray[np.float32] | None,
) -> ImageArray:
"""Prepare normalized silhouette view.
Args:
silhouette: Normalized silhouette (64, 44) or None
Returns:
Displayable image with mode indicator
"""
if silhouette is None:
# Create placeholder
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
self._draw_mode_indicator(placeholder, "Normalized (No Data)")
return placeholder
# Upscale and convert
upscaled = self._upscale_silhouette(silhouette)
sil_bgr = cast(ImageArray, cv2.cvtColor(upscaled, cv2.COLOR_GRAY2BGR))
self._draw_mode_indicator(sil_bgr, "Normalized")
return sil_bgr
def _prepare_both_view(
self,
mask_raw: ImageArray | None,
silhouette: NDArray[np.float32] | None,
) -> ImageArray:
"""Prepare side-by-side view of both masks.
Args:
mask_raw: Raw binary mask or None
silhouette: Normalized silhouette or None
Returns:
Displayable side-by-side image
"""
# Prepare individual views without mode indicators (will be drawn on combined)
# Raw view preparation (without indicator)
if mask_raw is None:
raw_gray = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
else:
if len(mask_raw.shape) == 3:
mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
else:
mask_gray = mask_raw
# Normalize to uint8 [0,255] for display (handles both float [0,1] and uint8 inputs)
if mask_gray.dtype == np.float32 or mask_gray.dtype == np.float64:
mask_gray = (mask_gray * 255).astype(np.uint8)
raw_gray = cast(
ImageArray,
cv2.resize(
mask_gray,
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
interpolation=cv2.INTER_NEAREST,
),
)
# Normalized view preparation (without indicator)
if silhouette is None:
norm_gray = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
else:
upscaled = self._upscale_silhouette(silhouette)
norm_gray = upscaled
# Stack horizontally
combined = np.hstack([raw_gray, norm_gray])
# Convert back to BGR
combined_bgr = cast(ImageArray, cv2.cvtColor(combined, cv2.COLOR_GRAY2BGR))
# Add mode indicator
self._draw_mode_indicator(combined_bgr, "Both: Raw | Normalized")
return combined_bgr
def _draw_mode_indicator(
self,
image: ImageArray,
label: str,
) -> None:
"""Draw mode indicator text on image.
Args:
image: Image to draw on (modified in place)
label: Mode label text
"""
h, w = image.shape[:2]
# Mode text at bottom
mode_text = f"Mode: {MODE_LABELS[self.mask_mode]} ({self.mask_mode}) - {label}"
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.5
thickness = 1
# Get text size for background
(text_width, text_height), _ = cv2.getTextSize(
mode_text, font, font_scale, thickness
)
# Draw background at bottom center
x_pos = (w - text_width) // 2
y_pos = h - 10
_ = cv2.rectangle(
image,
(x_pos - 5, y_pos - text_height - 5),
(x_pos + text_width + 5, y_pos + 5),
COLOR_BLACK,
-1,
)
# Draw text
_ = cv2.putText(
image,
mode_text,
(x_pos, y_pos),
font,
font_scale,
COLOR_YELLOW,
thickness,
)
def update(
self,
frame: ImageArray,
bbox: BBoxXYXY | None,
track_id: int,
mask_raw: ImageArray | None,
silhouette: NDArray[np.float32] | None,
label: str | None,
confidence: float | None,
fps: float,
) -> bool:
"""Update visualization with new frame data.
Args:
frame: Input frame (H, W, C) uint8
bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
track_id: Tracking ID
mask_raw: Raw binary mask (H, W) uint8 or None
silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
label: Classification label or None
confidence: Classification confidence [0,1] or None
fps: Current FPS
Returns:
False if user requested quit (pressed 'q'), True otherwise
"""
self._ensure_windows()
# Prepare and show main window
main_display = self._prepare_main_frame(
frame, bbox, track_id, fps, label, confidence
)
cv2.imshow(MAIN_WINDOW, main_display)
# Prepare and show segmentation window
seg_display = self._prepare_segmentation_view(mask_raw, silhouette)
cv2.imshow(SEG_WINDOW, seg_display)
# Handle keyboard input
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
return False
elif key == ord("m"):
# Cycle through modes: 0 -> 1 -> 2 -> 0
self.mask_mode = (self.mask_mode + 1) % 3
logger.debug("Switched to mask mode: %s", MODE_LABELS[self.mask_mode])
return True
def close(self) -> None:
"""Close all OpenCV windows and cleanup."""
if self._windows_created:
cv2.destroyAllWindows()
self._windows_created = False