feat(demo): add realtime visualization pipeline flow

Integrate an opt-in OpenCV visualizer into the demo runtime so operators can monitor tracking, segmentation, and inference confidence in real time without changing the default non-visual execution path.
2026-02-27 20:14:24 +08:00
parent 846549498c
commit 4cc2ef7c63
3 changed files with 670 additions and 11 deletions
@@ -0,0 +1,446 @@
+"""OpenCV-based visualizer for demo pipeline.
+
+Provides real-time visualization of detection, segmentation, and classification results
+with interactive mode switching for mask display.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import cast
+
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+
+logger = logging.getLogger(__name__)
+
+# Window names
+MAIN_WINDOW = "Scoliosis Detection"
+SEG_WINDOW = "Segmentation"
+
+# Silhouette dimensions (from preprocess.py)
+SIL_HEIGHT = 64
+SIL_WIDTH = 44
+
+# Display dimensions for upscaled silhouette
+DISPLAY_HEIGHT = 256
+DISPLAY_WIDTH = 176
+
+# Colors (BGR)
+COLOR_GREEN = (0, 255, 0)
+COLOR_WHITE = (255, 255, 255)
+COLOR_BLACK = (0, 0, 0)
+COLOR_RED = (0, 0, 255)
+COLOR_YELLOW = (0, 255, 255)
+
+# Mode labels
+MODE_LABELS = ["Both", "Raw Mask", "Normalized"]
+
+# Type alias for image arrays (NDArray or cv2.Mat)
+ImageArray = NDArray[np.uint8]
+
+
+class OpenCVVisualizer:
+    """Real-time visualizer for gait analysis demo.
+
+    Displays two windows:
+    - Main stream: Original frame with bounding box and metadata overlay
+    - Segmentation: Raw mask, normalized silhouette, or side-by-side view
+
+    Supports interactive mode switching via keyboard.
+    """
+
+    def __init__(self) -> None:
+        """Initialize visualizer with default mask mode."""
+        self.mask_mode: int = 0  # 0: Both, 1: Raw, 2: Normalized
+        self._windows_created: bool = False
+
+    def _ensure_windows(self) -> None:
+        """Create OpenCV windows if not already created."""
+        if not self._windows_created:
+            cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
+            cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
+            self._windows_created = True
+
+    def _draw_bbox(
+        self,
+        frame: ImageArray,
+        bbox: tuple[int, int, int, int] | None,
+    ) -> None:
+        """Draw bounding box on frame if present.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            bbox: Bounding box as (x1, y1, x2, y2) or None
+        """
+        if bbox is None:
+            return
+
+        x1, y1, x2, y2 = bbox
+        # Draw rectangle with green color, thickness 2
+        _ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)
+
+    def _draw_text_overlay(
+        self,
+        frame: ImageArray,
+        track_id: int,
+        fps: float,
+        label: str | None,
+        confidence: float | None,
+    ) -> None:
+        """Draw text overlay with track info, FPS, label, and confidence.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            track_id: Tracking ID
+            fps: Current FPS
+            label: Classification label or None
+            confidence: Classification confidence or None
+        """
+        # Prepare text lines
+        lines: list[str] = []
+        lines.append(f"ID: {track_id}")
+        lines.append(f"FPS: {fps:.1f}")
+
+        if label is not None:
+            if confidence is not None:
+                lines.append(f"{label}: {confidence:.2%}")
+            else:
+                lines.append(label)
+
+        # Draw text with background for readability
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.6
+        thickness = 1
+        line_height = 25
+        margin = 10
+
+        for i, text in enumerate(lines):
+            y_pos = margin + (i + 1) * line_height
+
+            # Draw background rectangle
+            (text_width, text_height), _ = cv2.getTextSize(
+                text, font, font_scale, thickness
+            )
+            _ = cv2.rectangle(
+                frame,
+                (margin, y_pos - text_height - 5),
+                (margin + text_width + 10, y_pos + 5),
+                COLOR_BLACK,
+                -1,
+            )
+
+            # Draw text
+            _ = cv2.putText(
+                frame,
+                text,
+                (margin + 5, y_pos),
+                font,
+                font_scale,
+                COLOR_WHITE,
+                thickness,
+            )
+
+    def _prepare_main_frame(
+        self,
+        frame: ImageArray,
+        bbox: tuple[int, int, int, int] | None,
+        track_id: int,
+        fps: float,
+        label: str | None,
+        confidence: float | None,
+    ) -> ImageArray:
+        """Prepare main display frame with bbox and text overlay.
+
+        Args:
+            frame: Input frame (H, W, C) uint8
+            bbox: Bounding box or None
+            track_id: Tracking ID
+            fps: Current FPS
+            label: Classification label or None
+            confidence: Classification confidence or None
+
+        Returns:
+            Processed frame ready for display
+        """
+        # Ensure BGR format (convert grayscale if needed)
+        if len(frame.shape) == 2:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
+        elif frame.shape[2] == 1:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
+        elif frame.shape[2] == 3:
+            display_frame = frame.copy()
+        elif frame.shape[2] == 4:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
+        else:
+            display_frame = frame.copy()
+
+        # Draw bbox and text (modifies in place)
+        self._draw_bbox(display_frame, bbox)
+        self._draw_text_overlay(display_frame, track_id, fps, label, confidence)
+
+        return display_frame
+
+    def _upscale_silhouette(
+        self,
+        silhouette: NDArray[np.float32] | NDArray[np.uint8],
+    ) -> ImageArray:
+        """Upscale silhouette to display size.
+
+        Args:
+            silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]
+
+        Returns:
+            Upscaled silhouette (256, 176) uint8
+        """
+        # Normalize to uint8 if needed
+        if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
+            sil_u8 = (silhouette * 255).astype(np.uint8)
+        else:
+            sil_u8 = silhouette.astype(np.uint8)
+
+        # Upscale using nearest neighbor to preserve pixelation
+        upscaled = cast(
+            ImageArray,
+            cv2.resize(
+                sil_u8,
+                (DISPLAY_WIDTH, DISPLAY_HEIGHT),
+                interpolation=cv2.INTER_NEAREST,
+            ),
+        )
+
+        return upscaled
+
+    def _prepare_segmentation_view(
+        self,
+        mask_raw: ImageArray | None,
+        silhouette: NDArray[np.float32] | None,
+    ) -> ImageArray:
+        """Prepare segmentation window content based on current mode.
+
+        Args:
+            mask_raw: Raw binary mask (H, W) uint8 or None
+            silhouette: Normalized silhouette (64, 44) float32 or None
+
+        Returns:
+            Displayable image (H, W, 3) uint8
+        """
+        if self.mask_mode == 0:
+            # Mode 0: Both (side by side)
+            return self._prepare_both_view(mask_raw, silhouette)
+        elif self.mask_mode == 1:
+            # Mode 1: Raw mask only
+            return self._prepare_raw_view(mask_raw)
+        else:
+            # Mode 2: Normalized silhouette only
+            return self._prepare_normalized_view(silhouette)
+
+    def _prepare_raw_view(
+        self,
+        mask_raw: ImageArray | None,
+    ) -> ImageArray:
+        """Prepare raw mask view.
+
+        Args:
+            mask_raw: Raw binary mask or None
+
+        Returns:
+            Displayable image with mode indicator
+        """
+        if mask_raw is None:
+            # Create placeholder
+            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
+            self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
+            return placeholder
+
+        # Ensure single channel
+        if len(mask_raw.shape) == 3:
+            mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
+        else:
+            mask_gray = mask_raw
+
+        # Resize to display size
+        mask_resized = cast(
+            ImageArray,
+            cv2.resize(
+                mask_gray,
+                (DISPLAY_WIDTH, DISPLAY_HEIGHT),
+                interpolation=cv2.INTER_NEAREST,
+            ),
+        )
+
+        # Convert to BGR for display
+        mask_bgr = cast(ImageArray, cv2.cvtColor(mask_resized, cv2.COLOR_GRAY2BGR))
+        self._draw_mode_indicator(mask_bgr, "Raw Mask")
+
+        return mask_bgr
+
+    def _prepare_normalized_view(
+        self,
+        silhouette: NDArray[np.float32] | None,
+    ) -> ImageArray:
+        """Prepare normalized silhouette view.
+
+        Args:
+            silhouette: Normalized silhouette (64, 44) or None
+
+        Returns:
+            Displayable image with mode indicator
+        """
+        if silhouette is None:
+            # Create placeholder
+            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
+            self._draw_mode_indicator(placeholder, "Normalized (No Data)")
+            return placeholder
+
+        # Upscale and convert
+        upscaled = self._upscale_silhouette(silhouette)
+        sil_bgr = cast(ImageArray, cv2.cvtColor(upscaled, cv2.COLOR_GRAY2BGR))
+        self._draw_mode_indicator(sil_bgr, "Normalized")
+
+        return sil_bgr
+
+    def _prepare_both_view(
+        self,
+        mask_raw: ImageArray | None,
+        silhouette: NDArray[np.float32] | None,
+    ) -> ImageArray:
+        """Prepare side-by-side view of both masks.
+
+        Args:
+            mask_raw: Raw binary mask or None
+            silhouette: Normalized silhouette or None
+
+        Returns:
+            Displayable side-by-side image
+        """
+        # Prepare individual views
+        raw_view = self._prepare_raw_view(mask_raw)
+        norm_view = self._prepare_normalized_view(silhouette)
+
+        # Convert to grayscale for side-by-side composition
+        if len(raw_view.shape) == 3:
+            raw_gray = cast(ImageArray, cv2.cvtColor(raw_view, cv2.COLOR_BGR2GRAY))
+        else:
+            raw_gray = raw_view
+
+        if len(norm_view.shape) == 3:
+            norm_gray = cast(ImageArray, cv2.cvtColor(norm_view, cv2.COLOR_BGR2GRAY))
+        else:
+            norm_gray = norm_view
+
+        # Stack horizontally
+        combined = np.hstack([raw_gray, norm_gray])
+
+        # Convert back to BGR
+        combined_bgr = cast(ImageArray, cv2.cvtColor(combined, cv2.COLOR_GRAY2BGR))
+
+        # Add mode indicator
+        self._draw_mode_indicator(combined_bgr, "Both: Raw | Normalized")
+
+        return combined_bgr
+
+    def _draw_mode_indicator(
+        self,
+        image: ImageArray,
+        label: str,
+    ) -> None:
+        """Draw mode indicator text on image.
+
+        Args:
+            image: Image to draw on (modified in place)
+            label: Mode label text
+        """
+        h, w = image.shape[:2]
+
+        # Mode text at bottom
+        mode_text = f"Mode: {MODE_LABELS[self.mask_mode]} ({self.mask_mode}) - {label}"
+
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.5
+        thickness = 1
+
+        # Get text size for background
+        (text_width, text_height), _ = cv2.getTextSize(
+            mode_text, font, font_scale, thickness
+        )
+
+        # Draw background at bottom center
+        x_pos = (w - text_width) // 2
+        y_pos = h - 10
+
+        _ = cv2.rectangle(
+            image,
+            (x_pos - 5, y_pos - text_height - 5),
+            (x_pos + text_width + 5, y_pos + 5),
+            COLOR_BLACK,
+            -1,
+        )
+
+        # Draw text
+        _ = cv2.putText(
+            image,
+            mode_text,
+            (x_pos, y_pos),
+            font,
+            font_scale,
+            COLOR_YELLOW,
+            thickness,
+        )
+
+    def update(
+        self,
+        frame: ImageArray,
+        bbox: tuple[int, int, int, int] | None,
+        track_id: int,
+        mask_raw: ImageArray | None,
+        silhouette: NDArray[np.float32] | None,
+        label: str | None,
+        confidence: float | None,
+        fps: float,
+    ) -> bool:
+        """Update visualization with new frame data.
+
+        Args:
+            frame: Input frame (H, W, C) uint8
+            bbox: Bounding box as (x1, y1, x2, y2) or None
+            track_id: Tracking ID
+            mask_raw: Raw binary mask (H, W) uint8 or None
+            silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
+            label: Classification label or None
+            confidence: Classification confidence [0,1] or None
+            fps: Current FPS
+
+        Returns:
+            False if user requested quit (pressed 'q'), True otherwise
+        """
+        self._ensure_windows()
+
+        # Prepare and show main window
+        main_display = self._prepare_main_frame(
+            frame, bbox, track_id, fps, label, confidence
+        )
+        cv2.imshow(MAIN_WINDOW, main_display)
+
+        # Prepare and show segmentation window
+        seg_display = self._prepare_segmentation_view(mask_raw, silhouette)
+        cv2.imshow(SEG_WINDOW, seg_display)
+
+        # Handle keyboard input
+        key = cv2.waitKey(1) & 0xFF
+
+        if key == ord("q"):
+            return False
+        elif key == ord("m"):
+            # Cycle through modes: 0 -> 1 -> 2 -> 0
+            self.mask_mode = (self.mask_mode + 1) % 3
+            logger.debug("Switched to mask mode: %s", MODE_LABELS[self.mask_mode])
+
+        return True
+
+    def close(self) -> None:
+        """Close all OpenCV windows and cleanup."""
+        if self._windows_created:
+            cv2.destroyAllWindows()
+            self._windows_created = False