feat: extract opengait_studio monorepo module

Move demo implementation into opengait_studio, retire Sports2D runtime integration, and align packaging with root-level monorepo dependency management.
2026-03-03 17:16:17 +08:00
parent 5c6bef1ca1
commit 00fcda4fe3
39 changed files with 359 additions and 270 deletions
@@ -0,0 +1,767 @@
+"""OpenCV-based visualizer for demo pipeline.
+
+Provides real-time visualization of detection, segmentation, and classification results
+with interactive mode switching for mask display.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import cast
+
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+
+from .preprocess import BBoxXYXY
+
+logger = logging.getLogger(__name__)
+
+
+# Window names
+MAIN_WINDOW = "Scoliosis Detection"
+SEG_WINDOW = "Normalized Silhouette"
+RAW_WINDOW = "Raw Mask"
+WINDOW_SEG_INPUT = "Segmentation Input"
+
+# Silhouette dimensions (from preprocess.py)
+SIL_HEIGHT = 64
+SIL_WIDTH = 44
+
+# Display dimensions for upscaled silhouette
+DISPLAY_HEIGHT = 256
+DISPLAY_WIDTH = 176
+RAW_STATS_PAD = 54
+MODE_LABEL_PAD = 26
+
+# Colors (BGR)
+COLOR_GREEN = (0, 255, 0)
+COLOR_WHITE = (255, 255, 255)
+COLOR_BLACK = (0, 0, 0)
+COLOR_DARK_GRAY = (56, 56, 56)
+COLOR_RED = (0, 0, 255)
+COLOR_YELLOW = (0, 255, 255)
+# Type alias for image arrays (NDArray or cv2.Mat)
+COLOR_CYAN = (255, 255, 0)
+COLOR_ORANGE = (0, 165, 255)
+COLOR_MAGENTA = (255, 0, 255)
+ImageArray = NDArray[np.uint8]
+
+# COCO-format skeleton connections (17 keypoints)
+# Connections are pairs of keypoint indices
+SKELETON_CONNECTIONS: list[tuple[int, int]] = [
+    (0, 1),   # nose -> left_eye
+    (0, 2),   # nose -> right_eye
+    (1, 3),   # left_eye -> left_ear
+    (2, 4),   # right_eye -> right_ear
+    (5, 6),   # left_shoulder -> right_shoulder
+    (5, 7),   # left_shoulder -> left_elbow
+    (7, 9),   # left_elbow -> left_wrist
+    (6, 8),   # right_shoulder -> right_elbow
+    (8, 10),  # right_elbow -> right_wrist
+    (11, 12), # left_hip -> right_hip
+    (5, 11),  # left_shoulder -> left_hip
+    (6, 12),  # right_shoulder -> right_hip
+    (11, 13), # left_hip -> left_knee
+    (13, 15), # left_knee -> left_ankle
+    (12, 14), # right_hip -> right_knee
+    (14, 16), # right_knee -> right_ankle
+]
+
+# Keypoint names for COCO format (17 keypoints)
+KEYPOINT_NAMES: list[str] = [
+    "nose", "left_eye", "right_eye", "left_ear", "right_ear",
+    "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
+    "left_wrist", "right_wrist", "left_hip", "right_hip",
+    "left_knee", "right_knee", "left_ankle", "right_ankle"
+]
+
+# Joints where angles are typically calculated (for scoliosis/ gait analysis)
+ANGLE_JOINTS: list[tuple[int, int, int]] = [
+    (5, 7, 9),   # left_shoulder -> left_elbow -> left_wrist
+    (6, 8, 10),  # right_shoulder -> right_elbow -> right_wrist
+    (7, 5, 11),  # left_elbow -> left_shoulder -> left_hip
+    (8, 6, 12),  # right_elbow -> right_shoulder -> right_hip
+    (5, 11, 13), # left_shoulder -> left_hip -> left_knee
+    (6, 12, 14), # right_shoulder -> right_hip -> right_knee
+    (11, 13, 15),# left_hip -> left_knee -> left_ankle
+    (12, 14, 16),# right_hip -> right_knee -> right_ankle
+]
+
+
+
+class OpenCVVisualizer:
+    def __init__(self) -> None:
+        self.show_raw_window: bool = False
+        self.show_raw_debug: bool = False
+        self._windows_created: bool = False
+        self._raw_window_created: bool = False
+
+    def _ensure_windows(self) -> None:
+        if not self._windows_created:
+            cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
+            cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
+            cv2.namedWindow(WINDOW_SEG_INPUT, cv2.WINDOW_NORMAL)
+            self._windows_created = True
+
+    def _ensure_raw_window(self) -> None:
+        if not self._raw_window_created:
+            cv2.namedWindow(RAW_WINDOW, cv2.WINDOW_NORMAL)
+            self._raw_window_created = True
+
+    def _hide_raw_window(self) -> None:
+        if self._raw_window_created:
+            cv2.destroyWindow(RAW_WINDOW)
+            self._raw_window_created = False
+
+    def _draw_bbox(
+        self,
+        frame: ImageArray,
+        bbox: BBoxXYXY | None,
+    ) -> None:
+        """Draw bounding box on frame if present.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            bbox: Bounding box in XYXY format as (x1, y1, x2, y2) or None
+        """
+        if bbox is None:
+            return
+
+        x1, y1, x2, y2 = bbox
+        # Draw rectangle with green color, thickness 2
+        _ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)
+
+    def _draw_text_overlay(
+        self,
+        frame: ImageArray,
+        track_id: int,
+        fps: float,
+        label: str | None,
+        confidence: float | None,
+    ) -> None:
+        """Draw text overlay with track info, FPS, label, and confidence.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            track_id: Tracking ID
+            fps: Current FPS
+            label: Classification label or None
+            confidence: Classification confidence or None
+        """
+        # Prepare text lines
+        lines: list[str] = []
+        lines.append(f"ID: {track_id}")
+        lines.append(f"FPS: {fps:.1f}")
+
+        if label is not None:
+            if confidence is not None:
+                lines.append(f"{label}: {confidence:.2%}")
+            else:
+                lines.append(label)
+
+        # Draw text with background for readability
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.6
+        thickness = 1
+        line_height = 25
+        margin = 10
+
+        for i, text in enumerate(lines):
+            y_pos = margin + (i + 1) * line_height
+
+            # Draw background rectangle
+            (text_width, text_height), _ = cv2.getTextSize(
+                text, font, font_scale, thickness
+            )
+            _ = cv2.rectangle(
+                frame,
+                (margin, y_pos - text_height - 5),
+                (margin + text_width + 10, y_pos + 5),
+                COLOR_BLACK,
+                -1,
+            )
+
+            # Draw text
+            _ = cv2.putText(
+                frame,
+                text,
+                (margin + 5, y_pos),
+                font,
+                font_scale,
+                COLOR_WHITE,
+                thickness,
+            )
+
+    def _draw_pose_skeleton(
+        self,
+        frame: ImageArray,
+        pose_data: dict[str, object] | None,
+    ) -> None:
+        """Draw pose skeleton on frame.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            pose_data: Pose data dictionary from Sports2D or similar
+                     Expected format: {'keypoints': [[x1, y1], [x2, y2], ...],
+                                      'confidence': [c1, c2, ...],
+                                      'angles': {'joint_name': angle, ...}}
+        """
+        if pose_data is None:
+            return
+
+        keypoints_obj = pose_data.get('keypoints')
+        if keypoints_obj is None:
+            return
+
+        # Convert keypoints to numpy array
+        keypoints = np.asarray(keypoints_obj, dtype=np.float32)
+        if keypoints.size == 0:
+            return
+
+        h, w = frame.shape[:2]
+
+        # Get confidence scores if available
+        confidence_obj = pose_data.get('confidence')
+        confidences = (
+            np.asarray(confidence_obj, dtype=np.float32)
+            if confidence_obj is not None
+            else np.ones(len(keypoints), dtype=np.float32)
+        )
+
+        # Draw skeleton connections
+        for connection in SKELETON_CONNECTIONS:
+            idx1, idx2 = connection
+            if idx1 < len(keypoints) and idx2 < len(keypoints):
+                # Check confidence threshold (0.3)
+                if confidences[idx1] > 0.3 and confidences[idx2] > 0.3:
+                    pt1 = (int(keypoints[idx1][0]), int(keypoints[idx1][1]))
+                    pt2 = (int(keypoints[idx2][0]), int(keypoints[idx2][1]))
+                    # Clip to frame bounds
+                    pt1 = (max(0, min(w - 1, pt1[0])), max(0, min(h - 1, pt1[1])))
+                    pt2 = (max(0, min(w - 1, pt2[0])), max(0, min(h - 1, pt2[1])))
+                    _ = cv2.line(frame, pt1, pt2, COLOR_CYAN, 2)
+
+        # Draw keypoints
+        for i, (kp, conf) in enumerate(zip(keypoints, confidences)):
+            if conf > 0.3 and i < len(keypoints):
+                x, y = int(kp[0]), int(kp[1])
+                # Clip to frame bounds
+                x = max(0, min(w - 1, x))
+                y = max(0, min(h - 1, y))
+                # Draw keypoint as circle
+                _ = cv2.circle(frame, (x, y), 4, COLOR_MAGENTA, -1)
+                _ = cv2.circle(frame, (x, y), 4, COLOR_WHITE, 1)
+
+    def _draw_pose_angles(
+        self,
+        frame: ImageArray,
+        pose_data: dict[str, object] | None,
+    ) -> None:
+        """Draw pose angles as text overlay.
+
+        Args:
+            frame: Input frame (H, W, 3) uint8 - modified in place
+            pose_data: Pose data dictionary with 'angles' key
+        """
+        if pose_data is None:
+            return
+
+        angles_obj = pose_data.get('angles')
+        if angles_obj is None:
+            return
+
+        angles = cast(dict[str, float], angles_obj)
+        if not angles:
+            return
+
+        # Draw angles in top-right corner
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.45
+        thickness = 1
+        line_height = 20
+        margin = 10
+        h, w = frame.shape[:2]
+
+        # Filter and format angles
+        angle_texts: list[tuple[str, float]] = []
+        for name, angle in angles.items():
+            # Only show angles that are reasonable (0-180 degrees)
+            if 0 <= angle <= 180:
+                angle_texts.append((str(name), float(angle)))
+
+        # Sort by name for consistent display
+        angle_texts.sort(key=lambda x: x[0])
+
+        # Draw from top-right
+        for i, (name, angle) in enumerate(angle_texts[:8]):  # Limit to 8 angles
+            text = f"{name}: {angle:.1f}"
+            (text_width, text_height), _ = cv2.getTextSize(
+                text, font, font_scale, thickness
+            )
+            x_pos = w - margin - text_width - 10
+            y_pos = margin + (i + 1) * line_height
+
+            # Draw background rectangle
+            _ = cv2.rectangle(
+                frame,
+                (x_pos - 4, y_pos - text_height - 4),
+                (x_pos + text_width + 4, y_pos + 4),
+                COLOR_BLACK,
+                -1,
+            )
+            # Draw text in orange
+            _ = cv2.putText(
+                frame,
+                text,
+                (x_pos, y_pos),
+                font,
+                font_scale,
+                COLOR_ORANGE,
+                thickness,
+            )
+
+    def _prepare_main_frame(
+        self,
+        frame: ImageArray,
+        bbox: BBoxXYXY | None,
+        track_id: int,
+        fps: float,
+        label: str | None,
+        confidence: float | None,
+        pose_data: dict[str, object] | None = None,
+    ) -> ImageArray:
+        """Prepare main display frame with bbox and text overlay.
+
+        Args:
+            frame: Input frame (H, W, C) uint8
+            bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
+            track_id: Tracking ID
+            fps: Current FPS
+            label: Classification label or None
+            confidence: Classification confidence or None
+            pose_data: Pose data dictionary or None
+
+        Returns:
+            Processed frame ready for display
+        """
+        # Ensure BGR format (convert grayscale if needed)
+        if len(frame.shape) == 2:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
+        elif frame.shape[2] == 1:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
+        elif frame.shape[2] == 3:
+            display_frame = frame.copy()
+        elif frame.shape[2] == 4:
+            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
+        else:
+            display_frame = frame.copy()
+
+        # Draw bbox and text (modifies in place)
+        self._draw_bbox(display_frame, bbox)
+        self._draw_text_overlay(display_frame, track_id, fps, label, confidence)
+
+        # Draw pose skeleton and angles if available
+        self._draw_pose_skeleton(display_frame, pose_data)
+        self._draw_pose_angles(display_frame, pose_data)
+
+        return display_frame
+
+    def _upscale_silhouette(
+        self,
+        silhouette: NDArray[np.float32] | NDArray[np.uint8],
+    ) -> ImageArray:
+        """Upscale silhouette to display size.
+
+        Args:
+            silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]
+
+        Returns:
+            Upscaled silhouette (256, 176) uint8
+        """
+        # Normalize to uint8 if needed
+        if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
+            sil_u8 = (silhouette * 255).astype(np.uint8)
+        else:
+            sil_u8 = silhouette.astype(np.uint8)
+
+        # Upscale using nearest neighbor to preserve pixelation
+        upscaled = cast(
+            ImageArray,
+            cv2.resize(
+                sil_u8,
+                (DISPLAY_WIDTH, DISPLAY_HEIGHT),
+                interpolation=cv2.INTER_NEAREST,
+            ),
+        )
+
+        return upscaled
+
+    def _normalize_mask_for_display(self, mask: NDArray[np.generic]) -> ImageArray:
+        mask_array = np.asarray(mask)
+        if mask_array.dtype == np.bool_:
+            bool_scaled = np.where(mask_array, np.uint8(255), np.uint8(0)).astype(
+                np.uint8
+            )
+            return cast(ImageArray, bool_scaled)
+
+        if mask_array.dtype == np.uint8:
+            mask_array = cast(ImageArray, mask_array)
+            max_u8 = int(np.max(mask_array)) if mask_array.size > 0 else 0
+            if max_u8 <= 1:
+                scaled_u8 = np.where(mask_array > 0, np.uint8(255), np.uint8(0)).astype(
+                    np.uint8
+                )
+                return cast(ImageArray, scaled_u8)
+            return cast(ImageArray, mask_array)
+
+        if np.issubdtype(mask_array.dtype, np.integer):
+            max_int = float(np.max(mask_array)) if mask_array.size > 0 else 0.0
+            if max_int <= 1.0:
+                return cast(
+                    ImageArray, (mask_array.astype(np.float32) * 255.0).astype(np.uint8)
+                )
+            clipped = np.clip(mask_array, 0, 255).astype(np.uint8)
+            return cast(ImageArray, clipped)
+
+        mask_float = np.asarray(mask_array, dtype=np.float32)
+        max_val = float(np.max(mask_float)) if mask_float.size > 0 else 0.0
+        if max_val <= 0.0:
+            return np.zeros(mask_float.shape, dtype=np.uint8)
+
+        normalized = np.clip((mask_float / max_val) * 255.0, 0.0, 255.0).astype(
+            np.uint8
+        )
+        return cast(ImageArray, normalized)
+
+    def _draw_raw_stats(self, image: ImageArray, mask_raw: ImageArray | None) -> None:
+        if mask_raw is None:
+            return
+
+        mask = np.asarray(mask_raw)
+        if mask.size == 0:
+            return
+
+        stats = [
+            f"raw: {mask.dtype}",
+            f"min/max: {float(mask.min()):.3f}/{float(mask.max()):.3f}",
+            f"nnz: {int(np.count_nonzero(mask))}",
+        ]
+
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.45
+        thickness = 1
+        line_h = 18
+        x0 = 8
+        y0 = 20
+
+        for i, txt in enumerate(stats):
+            y = y0 + i * line_h
+            (tw, th), _ = cv2.getTextSize(txt, font, font_scale, thickness)
+            _ = cv2.rectangle(
+                image, (x0 - 4, y - th - 4), (x0 + tw + 4, y + 4), COLOR_BLACK, -1
+            )
+            _ = cv2.putText(
+                image, txt, (x0, y), font, font_scale, COLOR_YELLOW, thickness
+            )
+
+    def _prepare_segmentation_view(
+        self,
+        mask_raw: ImageArray | None,
+        silhouette: NDArray[np.float32] | None,
+        bbox: BBoxXYXY | None,
+    ) -> ImageArray:
+        _ = mask_raw
+        _ = bbox
+        return self._prepare_normalized_view(silhouette)
+
+    def _fit_gray_to_display(
+        self,
+        gray: ImageArray,
+        out_h: int = DISPLAY_HEIGHT,
+        out_w: int = DISPLAY_WIDTH,
+    ) -> ImageArray:
+        src_h, src_w = gray.shape[:2]
+        if src_h <= 0 or src_w <= 0:
+            return np.zeros((out_h, out_w), dtype=np.uint8)
+
+        scale = min(out_w / src_w, out_h / src_h)
+        new_w = max(1, int(round(src_w * scale)))
+        new_h = max(1, int(round(src_h * scale)))
+
+        resized = cast(
+            ImageArray,
+            cv2.resize(gray, (new_w, new_h), interpolation=cv2.INTER_NEAREST),
+        )
+        canvas = np.zeros((out_h, out_w), dtype=np.uint8)
+        x0 = (out_w - new_w) // 2
+        y0 = (out_h - new_h) // 2
+        canvas[y0 : y0 + new_h, x0 : x0 + new_w] = resized
+        return cast(ImageArray, canvas)
+
+    def _crop_mask_to_bbox(
+        self,
+        mask_gray: ImageArray,
+        bbox: BBoxXYXY | None,
+    ) -> ImageArray:
+        if bbox is None:
+            return mask_gray
+
+        h, w = mask_gray.shape[:2]
+        x1, y1, x2, y2 = bbox
+        x1c = max(0, min(w, int(x1)))
+        x2c = max(0, min(w, int(x2)))
+        y1c = max(0, min(h, int(y1)))
+        y2c = max(0, min(h, int(y2)))
+
+        if x2c <= x1c or y2c <= y1c:
+            return mask_gray
+
+        cropped = mask_gray[y1c:y2c, x1c:x2c]
+        if cropped.size == 0:
+            return mask_gray
+        return cast(ImageArray, cropped)
+
+    def _prepare_segmentation_input_view(
+        self,
+        silhouettes: NDArray[np.float32] | None,
+    ) -> ImageArray:
+        if silhouettes is None or silhouettes.size == 0:
+            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
+            self._draw_mode_indicator(placeholder, "Input Silhouettes (No Data)")
+            return placeholder
+
+        n_frames = int(silhouettes.shape[0])
+        tiles_per_row = int(np.ceil(np.sqrt(n_frames)))
+        rows = int(np.ceil(n_frames / tiles_per_row))
+
+        tile_h = DISPLAY_HEIGHT
+        tile_w = DISPLAY_WIDTH
+        grid = np.zeros((rows * tile_h, tiles_per_row * tile_w), dtype=np.uint8)
+
+        for idx in range(n_frames):
+            sil = silhouettes[idx]
+            tile = self._upscale_silhouette(sil)
+            r = idx // tiles_per_row
+            c = idx % tiles_per_row
+            y0, y1 = r * tile_h, (r + 1) * tile_h
+            x0, x1 = c * tile_w, (c + 1) * tile_w
+            grid[y0:y1, x0:x1] = tile
+
+        grid_bgr = cast(ImageArray, cv2.cvtColor(grid, cv2.COLOR_GRAY2BGR))
+
+        for idx in range(n_frames):
+            r = idx // tiles_per_row
+            c = idx % tiles_per_row
+            y0 = r * tile_h
+            x0 = c * tile_w
+            cv2.putText(
+                grid_bgr,
+                str(idx),
+                (x0 + 8, y0 + 22),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.6,
+                (0, 255, 255),
+                2,
+                cv2.LINE_AA,
+            )
+
+        return grid_bgr
+
+    def _prepare_raw_view(
+        self,
+        mask_raw: ImageArray | None,
+        bbox: BBoxXYXY | None = None,
+    ) -> ImageArray:
+        """Prepare raw mask view.
+
+        Args:
+            mask_raw: Raw binary mask or None
+
+        Returns:
+            Displayable image with mode indicator
+        """
+        if mask_raw is None:
+            # Create placeholder
+            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
+            self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
+            return placeholder
+
+        # Ensure single channel
+        if len(mask_raw.shape) == 3:
+            mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
+        else:
+            mask_gray = cast(ImageArray, mask_raw)
+
+        mask_gray = self._normalize_mask_for_display(mask_gray)
+        mask_gray = self._crop_mask_to_bbox(mask_gray, bbox)
+
+        debug_pad = RAW_STATS_PAD if self.show_raw_debug else 0
+        content_h = max(1, DISPLAY_HEIGHT - debug_pad - MODE_LABEL_PAD)
+        mask_resized = self._fit_gray_to_display(
+            mask_gray, out_h=content_h, out_w=DISPLAY_WIDTH
+        )
+        full_mask = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
+        full_mask[debug_pad : debug_pad + content_h, :] = mask_resized
+
+        # Convert to BGR for display
+        mask_bgr = cast(ImageArray, cv2.cvtColor(full_mask, cv2.COLOR_GRAY2BGR))
+        if self.show_raw_debug:
+            self._draw_raw_stats(mask_bgr, mask_raw)
+        self._draw_mode_indicator(mask_bgr, "Raw Mask")
+
+        return mask_bgr
+
+    def _prepare_normalized_view(
+        self,
+        silhouette: NDArray[np.float32] | None,
+    ) -> ImageArray:
+        """Prepare normalized silhouette view.
+
+        Args:
+            silhouette: Normalized silhouette (64, 44) or None
+
+        Returns:
+            Displayable image with mode indicator
+        """
+        if silhouette is None:
+            # Create placeholder
+            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
+            self._draw_mode_indicator(placeholder, "Normalized (No Data)")
+            return placeholder
+
+        # Upscale and convert
+        upscaled = self._upscale_silhouette(silhouette)
+        content_h = max(1, DISPLAY_HEIGHT - MODE_LABEL_PAD)
+        sil_compact = self._fit_gray_to_display(
+            upscaled, out_h=content_h, out_w=DISPLAY_WIDTH
+        )
+        sil_canvas = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
+        sil_canvas[:content_h, :] = sil_compact
+        sil_bgr = cast(ImageArray, cv2.cvtColor(sil_canvas, cv2.COLOR_GRAY2BGR))
+        self._draw_mode_indicator(sil_bgr, "Normalized")
+
+        return sil_bgr
+
+    def _draw_mode_indicator(self, image: ImageArray, label: str) -> None:
+        h, w = image.shape[:2]
+
+        mode_text = label
+
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.5
+        thickness = 1
+
+        # Get text size for background
+        (text_width, text_height), _ = cv2.getTextSize(
+            mode_text, font, font_scale, thickness
+        )
+
+        x_pos = 14
+        y_pos = h - 8
+        y_top = max(0, h - MODE_LABEL_PAD)
+
+        _ = cv2.rectangle(
+            image,
+            (0, y_top),
+            (w, h),
+            COLOR_DARK_GRAY,
+            -1,
+        )
+        _ = cv2.rectangle(
+            image,
+            (x_pos - 6, y_pos - text_height - 6),
+            (x_pos + text_width + 8, y_pos + 6),
+            COLOR_DARK_GRAY,
+            -1,
+        )
+
+        # Draw text
+        _ = cv2.putText(
+            image,
+            mode_text,
+            (x_pos, y_pos),
+            font,
+            font_scale,
+            COLOR_YELLOW,
+            thickness,
+        )
+
+    def update(
+        self,
+        frame: ImageArray,
+        bbox: BBoxXYXY | None,
+        bbox_mask: BBoxXYXY | None,
+        track_id: int,
+        mask_raw: ImageArray | None,
+        silhouette: NDArray[np.float32] | None,
+        segmentation_input: NDArray[np.float32] | None,
+        label: str | None,
+        confidence: float | None,
+        fps: float,
+        pose_data: dict[str, object] | None = None,
+    ) -> bool:
+        """Update visualization with new frame data.
+
+        Args:
+            frame: Input frame (H, W, C) uint8
+            bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
+            track_id: Tracking ID
+            mask_raw: Raw binary mask (H, W) uint8 or None
+            silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
+            label: Classification label or None
+            confidence: Classification confidence [0,1] or None
+            fps: Current FPS
+            pose_data: Pose data dictionary or None
+
+        Returns:
+            False if user requested quit (pressed 'q'), True otherwise
+        """
+        self._ensure_windows()
+
+        # Prepare and show main window
+        main_display = self._prepare_main_frame(
+            frame, bbox, track_id, fps, label, confidence, pose_data
+        )
+        cv2.imshow(MAIN_WINDOW, main_display)
+
+        # Prepare and show segmentation window
+        seg_display = self._prepare_segmentation_view(mask_raw, silhouette, bbox)
+        cv2.imshow(SEG_WINDOW, seg_display)
+
+        if self.show_raw_window:
+            self._ensure_raw_window()
+            raw_display = self._prepare_raw_view(mask_raw, bbox_mask)
+            cv2.imshow(RAW_WINDOW, raw_display)
+
+        seg_input_display = self._prepare_segmentation_input_view(segmentation_input)
+        cv2.imshow(WINDOW_SEG_INPUT, seg_input_display)
+
+        # Handle keyboard input
+        key = cv2.waitKey(1) & 0xFF
+
+        if key == ord("q"):
+            return False
+        elif key == ord("r"):
+            self.show_raw_window = not self.show_raw_window
+            if self.show_raw_window:
+                self._ensure_raw_window()
+                logger.debug("Raw mask window enabled")
+            else:
+                self._hide_raw_window()
+                logger.debug("Raw mask window disabled")
+        elif key == ord("d"):
+            self.show_raw_debug = not self.show_raw_debug
+            logger.debug(
+                "Raw mask debug overlay %s",
+                "enabled" if self.show_raw_debug else "disabled",
+            )
+
+        return True
+
+    def close(self) -> None:
+        if self._windows_created:
+            self._hide_raw_window()
+            cv2.destroyAllWindows()
+            self._windows_created = False
+            self._raw_window_created = False