OpenGait/opengait/demo/visualizer.py

"""OpenCV-based visualizer for demo pipeline.

Provides real-time visualization of detection, segmentation, and classification results
with interactive mode switching for mask display.
"""

from __future__ import annotations

import logging
from typing import cast

import cv2
import numpy as np
from numpy.typing import NDArray

logger = logging.getLogger(__name__)

# Window names
MAIN_WINDOW = "Scoliosis Detection"
SEG_WINDOW = "Segmentation"

# Silhouette dimensions (from preprocess.py)
SIL_HEIGHT = 64
SIL_WIDTH = 44

# Display dimensions for upscaled silhouette
DISPLAY_HEIGHT = 256
DISPLAY_WIDTH = 176

# Colors (BGR)
COLOR_GREEN = (0, 255, 0)
COLOR_WHITE = (255, 255, 255)
COLOR_BLACK = (0, 0, 0)
COLOR_RED = (0, 0, 255)
COLOR_YELLOW = (0, 255, 255)

# Mode labels
MODE_LABELS = ["Both", "Raw Mask", "Normalized"]

# Type alias for image arrays (NDArray or cv2.Mat)
ImageArray = NDArray[np.uint8]


class OpenCVVisualizer:
    """Real-time visualizer for gait analysis demo.

    Displays two windows:
    - Main stream: Original frame with bounding box and metadata overlay
    - Segmentation: Raw mask, normalized silhouette, or side-by-side view

    Supports interactive mode switching via keyboard.
    """

    def __init__(self) -> None:
        """Initialize visualizer with default mask mode."""
        self.mask_mode: int = 0  # 0: Both, 1: Raw, 2: Normalized
        self._windows_created: bool = False

    def _ensure_windows(self) -> None:
        """Create OpenCV windows if not already created."""
        if not self._windows_created:
            cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
            cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
            self._windows_created = True

    def _draw_bbox(
        self,
        frame: ImageArray,
        bbox: tuple[int, int, int, int] | None,
    ) -> None:
        """Draw bounding box on frame if present.

        Args:
            frame: Input frame (H, W, 3) uint8 - modified in place
            bbox: Bounding box as (x1, y1, x2, y2) or None
        """
        if bbox is None:
            return

        x1, y1, x2, y2 = bbox
        # Draw rectangle with green color, thickness 2
        _ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)

    def _draw_text_overlay(
        self,
        frame: ImageArray,
        track_id: int,
        fps: float,
        label: str | None,
        confidence: float | None,
    ) -> None:
        """Draw text overlay with track info, FPS, label, and confidence.

        Args:
            frame: Input frame (H, W, 3) uint8 - modified in place
            track_id: Tracking ID
            fps: Current FPS
            label: Classification label or None
            confidence: Classification confidence or None
        """
        # Prepare text lines
        lines: list[str] = []
        lines.append(f"ID: {track_id}")
        lines.append(f"FPS: {fps:.1f}")

        if label is not None:
            if confidence is not None:
                lines.append(f"{label}: {confidence:.2%}")
            else:
                lines.append(label)

        # Draw text with background for readability
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.6
        thickness = 1
        line_height = 25
        margin = 10

        for i, text in enumerate(lines):
            y_pos = margin + (i + 1) * line_height

            # Draw background rectangle
            (text_width, text_height), _ = cv2.getTextSize(
                text, font, font_scale, thickness
            )
            _ = cv2.rectangle(
                frame,
                (margin, y_pos - text_height - 5),
                (margin + text_width + 10, y_pos + 5),
                COLOR_BLACK,
                -1,
            )

            # Draw text
            _ = cv2.putText(
                frame,
                text,
                (margin + 5, y_pos),
                font,
                font_scale,
                COLOR_WHITE,
                thickness,
            )

    def _prepare_main_frame(
        self,
        frame: ImageArray,
        bbox: tuple[int, int, int, int] | None,
        track_id: int,
        fps: float,
        label: str | None,
        confidence: float | None,
    ) -> ImageArray:
        """Prepare main display frame with bbox and text overlay.

        Args:
            frame: Input frame (H, W, C) uint8
            bbox: Bounding box or None
            track_id: Tracking ID
            fps: Current FPS
            label: Classification label or None
            confidence: Classification confidence or None

        Returns:
            Processed frame ready for display
        """
        # Ensure BGR format (convert grayscale if needed)
        if len(frame.shape) == 2:
            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
        elif frame.shape[2] == 1:
            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
        elif frame.shape[2] == 3:
            display_frame = frame.copy()
        elif frame.shape[2] == 4:
            display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
        else:
            display_frame = frame.copy()

        # Draw bbox and text (modifies in place)
        self._draw_bbox(display_frame, bbox)
        self._draw_text_overlay(display_frame, track_id, fps, label, confidence)

        return display_frame

    def _upscale_silhouette(
        self,
        silhouette: NDArray[np.float32] | NDArray[np.uint8],
    ) -> ImageArray:
        """Upscale silhouette to display size.

        Args:
            silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]

        Returns:
            Upscaled silhouette (256, 176) uint8
        """
        # Normalize to uint8 if needed
        if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
            sil_u8 = (silhouette * 255).astype(np.uint8)
        else:
            sil_u8 = silhouette.astype(np.uint8)

        # Upscale using nearest neighbor to preserve pixelation
        upscaled = cast(
            ImageArray,
            cv2.resize(
                sil_u8,
                (DISPLAY_WIDTH, DISPLAY_HEIGHT),
                interpolation=cv2.INTER_NEAREST,
            ),
        )

        return upscaled

    def _prepare_segmentation_view(
        self,
        mask_raw: ImageArray | None,
        silhouette: NDArray[np.float32] | None,
    ) -> ImageArray:
        """Prepare segmentation window content based on current mode.

        Args:
            mask_raw: Raw binary mask (H, W) uint8 or None
            silhouette: Normalized silhouette (64, 44) float32 or None

        Returns:
            Displayable image (H, W, 3) uint8
        """
        if self.mask_mode == 0:
            # Mode 0: Both (side by side)
            return self._prepare_both_view(mask_raw, silhouette)
        elif self.mask_mode == 1:
            # Mode 1: Raw mask only
            return self._prepare_raw_view(mask_raw)
        else:
            # Mode 2: Normalized silhouette only
            return self._prepare_normalized_view(silhouette)

    def _prepare_raw_view(
        self,
        mask_raw: ImageArray | None,
    ) -> ImageArray:
        """Prepare raw mask view.

        Args:
            mask_raw: Raw binary mask or None

        Returns:
            Displayable image with mode indicator
        """
        if mask_raw is None:
            # Create placeholder
            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
            self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
            return placeholder

        # Ensure single channel
        if len(mask_raw.shape) == 3:
            mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
        else:
            mask_gray = mask_raw

        # Resize to display size
        mask_resized = cast(
            ImageArray,
            cv2.resize(
                mask_gray,
                (DISPLAY_WIDTH, DISPLAY_HEIGHT),
                interpolation=cv2.INTER_NEAREST,
            ),
        )

        # Convert to BGR for display
        mask_bgr = cast(ImageArray, cv2.cvtColor(mask_resized, cv2.COLOR_GRAY2BGR))
        self._draw_mode_indicator(mask_bgr, "Raw Mask")

        return mask_bgr

    def _prepare_normalized_view(
        self,
        silhouette: NDArray[np.float32] | None,
    ) -> ImageArray:
        """Prepare normalized silhouette view.

        Args:
            silhouette: Normalized silhouette (64, 44) or None

        Returns:
            Displayable image with mode indicator
        """
        if silhouette is None:
            # Create placeholder
            placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
            self._draw_mode_indicator(placeholder, "Normalized (No Data)")
            return placeholder

        # Upscale and convert
        upscaled = self._upscale_silhouette(silhouette)
        sil_bgr = cast(ImageArray, cv2.cvtColor(upscaled, cv2.COLOR_GRAY2BGR))
        self._draw_mode_indicator(sil_bgr, "Normalized")

        return sil_bgr

    def _prepare_both_view(
        self,
        mask_raw: ImageArray | None,
        silhouette: NDArray[np.float32] | None,
    ) -> ImageArray:
        """Prepare side-by-side view of both masks.

        Args:
            mask_raw: Raw binary mask or None
            silhouette: Normalized silhouette or None

        Returns:
            Displayable side-by-side image
        """
        # Prepare individual views without mode indicators (will be drawn on combined)
        # Raw view preparation (without indicator)
        if mask_raw is None:
            raw_gray = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
        else:
            if len(mask_raw.shape) == 3:
                mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
            else:
                mask_gray = mask_raw
            raw_gray = cast(
                ImageArray,
                cv2.resize(
                    mask_gray,
                    (DISPLAY_WIDTH, DISPLAY_HEIGHT),
                    interpolation=cv2.INTER_NEAREST,
                ),
            )

        # Normalized view preparation (without indicator)
        if silhouette is None:
            norm_gray = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
        else:
            upscaled = self._upscale_silhouette(silhouette)
            norm_gray = upscaled

        # Stack horizontally
        combined = np.hstack([raw_gray, norm_gray])

        # Convert back to BGR
        combined_bgr = cast(ImageArray, cv2.cvtColor(combined, cv2.COLOR_GRAY2BGR))

        # Add mode indicator
        self._draw_mode_indicator(combined_bgr, "Both: Raw | Normalized")

        return combined_bgr

    def _draw_mode_indicator(
        self,
        image: ImageArray,
        label: str,
    ) -> None:
        """Draw mode indicator text on image.

        Args:
            image: Image to draw on (modified in place)
            label: Mode label text
        """
        h, w = image.shape[:2]

        # Mode text at bottom
        mode_text = f"Mode: {MODE_LABELS[self.mask_mode]} ({self.mask_mode}) - {label}"

        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.5
        thickness = 1

        # Get text size for background
        (text_width, text_height), _ = cv2.getTextSize(
            mode_text, font, font_scale, thickness
        )

        # Draw background at bottom center
        x_pos = (w - text_width) // 2
        y_pos = h - 10

        _ = cv2.rectangle(
            image,
            (x_pos - 5, y_pos - text_height - 5),
            (x_pos + text_width + 5, y_pos + 5),
            COLOR_BLACK,
            -1,
        )

        # Draw text
        _ = cv2.putText(
            image,
            mode_text,
            (x_pos, y_pos),
            font,
            font_scale,
            COLOR_YELLOW,
            thickness,
        )

    def update(
        self,
        frame: ImageArray,
        bbox: tuple[int, int, int, int] | None,
        track_id: int,
        mask_raw: ImageArray | None,
        silhouette: NDArray[np.float32] | None,
        label: str | None,
        confidence: float | None,
        fps: float,
    ) -> bool:
        """Update visualization with new frame data.

        Args:
            frame: Input frame (H, W, C) uint8
            bbox: Bounding box as (x1, y1, x2, y2) or None
            track_id: Tracking ID
            mask_raw: Raw binary mask (H, W) uint8 or None
            silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
            label: Classification label or None
            confidence: Classification confidence [0,1] or None
            fps: Current FPS

        Returns:
            False if user requested quit (pressed 'q'), True otherwise
        """
        self._ensure_windows()

        # Prepare and show main window
        main_display = self._prepare_main_frame(
            frame, bbox, track_id, fps, label, confidence
        )
        cv2.imshow(MAIN_WINDOW, main_display)

        # Prepare and show segmentation window
        seg_display = self._prepare_segmentation_view(mask_raw, silhouette)
        cv2.imshow(SEG_WINDOW, seg_display)

        # Handle keyboard input
        key = cv2.waitKey(1) & 0xFF

        if key == ord("q"):
            return False
        elif key == ord("m"):
            # Cycle through modes: 0 -> 1 -> 2 -> 0
            self.mask_mode = (self.mask_mode + 1) % 3
            logger.debug("Switched to mask mode: %s", MODE_LABELS[self.mask_mode])

        return True

    def close(self) -> None:
        """Close all OpenCV windows and cleanup."""
        if self._windows_created:
            cv2.destroyAllWindows()
            self._windows_created = False