"""OpenCV-based visualizer for demo pipeline. Provides real-time visualization of detection, segmentation, and classification results with interactive mode switching for mask display. """ from __future__ import annotations import logging from typing import cast import cv2 import numpy as np from numpy.typing import NDArray logger = logging.getLogger(__name__) # Window names MAIN_WINDOW = "Scoliosis Detection" SEG_WINDOW = "Segmentation" # Silhouette dimensions (from preprocess.py) SIL_HEIGHT = 64 SIL_WIDTH = 44 # Display dimensions for upscaled silhouette DISPLAY_HEIGHT = 256 DISPLAY_WIDTH = 176 # Colors (BGR) COLOR_GREEN = (0, 255, 0) COLOR_WHITE = (255, 255, 255) COLOR_BLACK = (0, 0, 0) COLOR_RED = (0, 0, 255) COLOR_YELLOW = (0, 255, 255) # Mode labels MODE_LABELS = ["Both", "Raw Mask", "Normalized"] # Type alias for image arrays (NDArray or cv2.Mat) ImageArray = NDArray[np.uint8] class OpenCVVisualizer: """Real-time visualizer for gait analysis demo. Displays two windows: - Main stream: Original frame with bounding box and metadata overlay - Segmentation: Raw mask, normalized silhouette, or side-by-side view Supports interactive mode switching via keyboard. """ def __init__(self) -> None: """Initialize visualizer with default mask mode.""" self.mask_mode: int = 0 # 0: Both, 1: Raw, 2: Normalized self._windows_created: bool = False def _ensure_windows(self) -> None: """Create OpenCV windows if not already created.""" if not self._windows_created: cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL) cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL) self._windows_created = True def _draw_bbox( self, frame: ImageArray, bbox: tuple[int, int, int, int] | None, ) -> None: """Draw bounding box on frame if present. Args: frame: Input frame (H, W, 3) uint8 - modified in place bbox: Bounding box as (x1, y1, x2, y2) or None """ if bbox is None: return x1, y1, x2, y2 = bbox # Draw rectangle with green color, thickness 2 _ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2) def _draw_text_overlay( self, frame: ImageArray, track_id: int, fps: float, label: str | None, confidence: float | None, ) -> None: """Draw text overlay with track info, FPS, label, and confidence. Args: frame: Input frame (H, W, 3) uint8 - modified in place track_id: Tracking ID fps: Current FPS label: Classification label or None confidence: Classification confidence or None """ # Prepare text lines lines: list[str] = [] lines.append(f"ID: {track_id}") lines.append(f"FPS: {fps:.1f}") if label is not None: if confidence is not None: lines.append(f"{label}: {confidence:.2%}") else: lines.append(label) # Draw text with background for readability font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.6 thickness = 1 line_height = 25 margin = 10 for i, text in enumerate(lines): y_pos = margin + (i + 1) * line_height # Draw background rectangle (text_width, text_height), _ = cv2.getTextSize( text, font, font_scale, thickness ) _ = cv2.rectangle( frame, (margin, y_pos - text_height - 5), (margin + text_width + 10, y_pos + 5), COLOR_BLACK, -1, ) # Draw text _ = cv2.putText( frame, text, (margin + 5, y_pos), font, font_scale, COLOR_WHITE, thickness, ) def _prepare_main_frame( self, frame: ImageArray, bbox: tuple[int, int, int, int] | None, track_id: int, fps: float, label: str | None, confidence: float | None, ) -> ImageArray: """Prepare main display frame with bbox and text overlay. Args: frame: Input frame (H, W, C) uint8 bbox: Bounding box or None track_id: Tracking ID fps: Current FPS label: Classification label or None confidence: Classification confidence or None Returns: Processed frame ready for display """ # Ensure BGR format (convert grayscale if needed) if len(frame.shape) == 2: display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)) elif frame.shape[2] == 1: display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)) elif frame.shape[2] == 3: display_frame = frame.copy() elif frame.shape[2] == 4: display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)) else: display_frame = frame.copy() # Draw bbox and text (modifies in place) self._draw_bbox(display_frame, bbox) self._draw_text_overlay(display_frame, track_id, fps, label, confidence) return display_frame def _upscale_silhouette( self, silhouette: NDArray[np.float32] | NDArray[np.uint8], ) -> ImageArray: """Upscale silhouette to display size. Args: silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255] Returns: Upscaled silhouette (256, 176) uint8 """ # Normalize to uint8 if needed if silhouette.dtype == np.float32 or silhouette.dtype == np.float64: sil_u8 = (silhouette * 255).astype(np.uint8) else: sil_u8 = silhouette.astype(np.uint8) # Upscale using nearest neighbor to preserve pixelation upscaled = cast( ImageArray, cv2.resize( sil_u8, (DISPLAY_WIDTH, DISPLAY_HEIGHT), interpolation=cv2.INTER_NEAREST, ), ) return upscaled def _prepare_segmentation_view( self, mask_raw: ImageArray | None, silhouette: NDArray[np.float32] | None, ) -> ImageArray: """Prepare segmentation window content based on current mode. Args: mask_raw: Raw binary mask (H, W) uint8 or None silhouette: Normalized silhouette (64, 44) float32 or None Returns: Displayable image (H, W, 3) uint8 """ if self.mask_mode == 0: # Mode 0: Both (side by side) return self._prepare_both_view(mask_raw, silhouette) elif self.mask_mode == 1: # Mode 1: Raw mask only return self._prepare_raw_view(mask_raw) else: # Mode 2: Normalized silhouette only return self._prepare_normalized_view(silhouette) def _prepare_raw_view( self, mask_raw: ImageArray | None, ) -> ImageArray: """Prepare raw mask view. Args: mask_raw: Raw binary mask or None Returns: Displayable image with mode indicator """ if mask_raw is None: # Create placeholder placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8) self._draw_mode_indicator(placeholder, "Raw Mask (No Data)") return placeholder # Ensure single channel if len(mask_raw.shape) == 3: mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY)) else: mask_gray = mask_raw # Resize to display size mask_resized = cast( ImageArray, cv2.resize( mask_gray, (DISPLAY_WIDTH, DISPLAY_HEIGHT), interpolation=cv2.INTER_NEAREST, ), ) # Convert to BGR for display mask_bgr = cast(ImageArray, cv2.cvtColor(mask_resized, cv2.COLOR_GRAY2BGR)) self._draw_mode_indicator(mask_bgr, "Raw Mask") return mask_bgr def _prepare_normalized_view( self, silhouette: NDArray[np.float32] | None, ) -> ImageArray: """Prepare normalized silhouette view. Args: silhouette: Normalized silhouette (64, 44) or None Returns: Displayable image with mode indicator """ if silhouette is None: # Create placeholder placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8) self._draw_mode_indicator(placeholder, "Normalized (No Data)") return placeholder # Upscale and convert upscaled = self._upscale_silhouette(silhouette) sil_bgr = cast(ImageArray, cv2.cvtColor(upscaled, cv2.COLOR_GRAY2BGR)) self._draw_mode_indicator(sil_bgr, "Normalized") return sil_bgr def _prepare_both_view( self, mask_raw: ImageArray | None, silhouette: NDArray[np.float32] | None, ) -> ImageArray: """Prepare side-by-side view of both masks. Args: mask_raw: Raw binary mask or None silhouette: Normalized silhouette or None Returns: Displayable side-by-side image """ # Prepare individual views raw_view = self._prepare_raw_view(mask_raw) norm_view = self._prepare_normalized_view(silhouette) # Convert to grayscale for side-by-side composition if len(raw_view.shape) == 3: raw_gray = cast(ImageArray, cv2.cvtColor(raw_view, cv2.COLOR_BGR2GRAY)) else: raw_gray = raw_view if len(norm_view.shape) == 3: norm_gray = cast(ImageArray, cv2.cvtColor(norm_view, cv2.COLOR_BGR2GRAY)) else: norm_gray = norm_view # Stack horizontally combined = np.hstack([raw_gray, norm_gray]) # Convert back to BGR combined_bgr = cast(ImageArray, cv2.cvtColor(combined, cv2.COLOR_GRAY2BGR)) # Add mode indicator self._draw_mode_indicator(combined_bgr, "Both: Raw | Normalized") return combined_bgr def _draw_mode_indicator( self, image: ImageArray, label: str, ) -> None: """Draw mode indicator text on image. Args: image: Image to draw on (modified in place) label: Mode label text """ h, w = image.shape[:2] # Mode text at bottom mode_text = f"Mode: {MODE_LABELS[self.mask_mode]} ({self.mask_mode}) - {label}" font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.5 thickness = 1 # Get text size for background (text_width, text_height), _ = cv2.getTextSize( mode_text, font, font_scale, thickness ) # Draw background at bottom center x_pos = (w - text_width) // 2 y_pos = h - 10 _ = cv2.rectangle( image, (x_pos - 5, y_pos - text_height - 5), (x_pos + text_width + 5, y_pos + 5), COLOR_BLACK, -1, ) # Draw text _ = cv2.putText( image, mode_text, (x_pos, y_pos), font, font_scale, COLOR_YELLOW, thickness, ) def update( self, frame: ImageArray, bbox: tuple[int, int, int, int] | None, track_id: int, mask_raw: ImageArray | None, silhouette: NDArray[np.float32] | None, label: str | None, confidence: float | None, fps: float, ) -> bool: """Update visualization with new frame data. Args: frame: Input frame (H, W, C) uint8 bbox: Bounding box as (x1, y1, x2, y2) or None track_id: Tracking ID mask_raw: Raw binary mask (H, W) uint8 or None silhouette: Normalized silhouette (64, 44) float32 [0,1] or None label: Classification label or None confidence: Classification confidence [0,1] or None fps: Current FPS Returns: False if user requested quit (pressed 'q'), True otherwise """ self._ensure_windows() # Prepare and show main window main_display = self._prepare_main_frame( frame, bbox, track_id, fps, label, confidence ) cv2.imshow(MAIN_WINDOW, main_display) # Prepare and show segmentation window seg_display = self._prepare_segmentation_view(mask_raw, silhouette) cv2.imshow(SEG_WINDOW, seg_display) # Handle keyboard input key = cv2.waitKey(1) & 0xFF if key == ord("q"): return False elif key == ord("m"): # Cycle through modes: 0 -> 1 -> 2 -> 0 self.mask_mode = (self.mask_mode + 1) % 3 logger.debug("Switched to mask mode: %s", MODE_LABELS[self.mask_mode]) return True def close(self) -> None: """Close all OpenCV windows and cleanup.""" if self._windows_created: cv2.destroyAllWindows() self._windows_created = False