feat: extract opengait_studio monorepo module
Move demo implementation into opengait_studio, retire Sports2D runtime integration, and align packaging with root-level monorepo dependency management.
This commit is contained in:
@@ -0,0 +1,767 @@
|
||||
"""OpenCV-based visualizer for demo pipeline.
|
||||
|
||||
Provides real-time visualization of detection, segmentation, and classification results
|
||||
with interactive mode switching for mask display.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import cast
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from .preprocess import BBoxXYXY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Window names
|
||||
MAIN_WINDOW = "Scoliosis Detection"
|
||||
SEG_WINDOW = "Normalized Silhouette"
|
||||
RAW_WINDOW = "Raw Mask"
|
||||
WINDOW_SEG_INPUT = "Segmentation Input"
|
||||
|
||||
# Silhouette dimensions (from preprocess.py)
|
||||
SIL_HEIGHT = 64
|
||||
SIL_WIDTH = 44
|
||||
|
||||
# Display dimensions for upscaled silhouette
|
||||
DISPLAY_HEIGHT = 256
|
||||
DISPLAY_WIDTH = 176
|
||||
RAW_STATS_PAD = 54
|
||||
MODE_LABEL_PAD = 26
|
||||
|
||||
# Colors (BGR)
|
||||
COLOR_GREEN = (0, 255, 0)
|
||||
COLOR_WHITE = (255, 255, 255)
|
||||
COLOR_BLACK = (0, 0, 0)
|
||||
COLOR_DARK_GRAY = (56, 56, 56)
|
||||
COLOR_RED = (0, 0, 255)
|
||||
COLOR_YELLOW = (0, 255, 255)
|
||||
# Type alias for image arrays (NDArray or cv2.Mat)
|
||||
COLOR_CYAN = (255, 255, 0)
|
||||
COLOR_ORANGE = (0, 165, 255)
|
||||
COLOR_MAGENTA = (255, 0, 255)
|
||||
ImageArray = NDArray[np.uint8]
|
||||
|
||||
# COCO-format skeleton connections (17 keypoints)
|
||||
# Connections are pairs of keypoint indices
|
||||
SKELETON_CONNECTIONS: list[tuple[int, int]] = [
|
||||
(0, 1), # nose -> left_eye
|
||||
(0, 2), # nose -> right_eye
|
||||
(1, 3), # left_eye -> left_ear
|
||||
(2, 4), # right_eye -> right_ear
|
||||
(5, 6), # left_shoulder -> right_shoulder
|
||||
(5, 7), # left_shoulder -> left_elbow
|
||||
(7, 9), # left_elbow -> left_wrist
|
||||
(6, 8), # right_shoulder -> right_elbow
|
||||
(8, 10), # right_elbow -> right_wrist
|
||||
(11, 12), # left_hip -> right_hip
|
||||
(5, 11), # left_shoulder -> left_hip
|
||||
(6, 12), # right_shoulder -> right_hip
|
||||
(11, 13), # left_hip -> left_knee
|
||||
(13, 15), # left_knee -> left_ankle
|
||||
(12, 14), # right_hip -> right_knee
|
||||
(14, 16), # right_knee -> right_ankle
|
||||
]
|
||||
|
||||
# Keypoint names for COCO format (17 keypoints)
|
||||
KEYPOINT_NAMES: list[str] = [
|
||||
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
||||
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
||||
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
||||
"left_knee", "right_knee", "left_ankle", "right_ankle"
|
||||
]
|
||||
|
||||
# Joints where angles are typically calculated (for scoliosis/ gait analysis)
|
||||
ANGLE_JOINTS: list[tuple[int, int, int]] = [
|
||||
(5, 7, 9), # left_shoulder -> left_elbow -> left_wrist
|
||||
(6, 8, 10), # right_shoulder -> right_elbow -> right_wrist
|
||||
(7, 5, 11), # left_elbow -> left_shoulder -> left_hip
|
||||
(8, 6, 12), # right_elbow -> right_shoulder -> right_hip
|
||||
(5, 11, 13), # left_shoulder -> left_hip -> left_knee
|
||||
(6, 12, 14), # right_shoulder -> right_hip -> right_knee
|
||||
(11, 13, 15),# left_hip -> left_knee -> left_ankle
|
||||
(12, 14, 16),# right_hip -> right_knee -> right_ankle
|
||||
]
|
||||
|
||||
|
||||
|
||||
class OpenCVVisualizer:
|
||||
def __init__(self) -> None:
|
||||
self.show_raw_window: bool = False
|
||||
self.show_raw_debug: bool = False
|
||||
self._windows_created: bool = False
|
||||
self._raw_window_created: bool = False
|
||||
|
||||
def _ensure_windows(self) -> None:
|
||||
if not self._windows_created:
|
||||
cv2.namedWindow(MAIN_WINDOW, cv2.WINDOW_NORMAL)
|
||||
cv2.namedWindow(SEG_WINDOW, cv2.WINDOW_NORMAL)
|
||||
cv2.namedWindow(WINDOW_SEG_INPUT, cv2.WINDOW_NORMAL)
|
||||
self._windows_created = True
|
||||
|
||||
def _ensure_raw_window(self) -> None:
|
||||
if not self._raw_window_created:
|
||||
cv2.namedWindow(RAW_WINDOW, cv2.WINDOW_NORMAL)
|
||||
self._raw_window_created = True
|
||||
|
||||
def _hide_raw_window(self) -> None:
|
||||
if self._raw_window_created:
|
||||
cv2.destroyWindow(RAW_WINDOW)
|
||||
self._raw_window_created = False
|
||||
|
||||
def _draw_bbox(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: BBoxXYXY | None,
|
||||
) -> None:
|
||||
"""Draw bounding box on frame if present.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
bbox: Bounding box in XYXY format as (x1, y1, x2, y2) or None
|
||||
"""
|
||||
if bbox is None:
|
||||
return
|
||||
|
||||
x1, y1, x2, y2 = bbox
|
||||
# Draw rectangle with green color, thickness 2
|
||||
_ = cv2.rectangle(frame, (x1, y1), (x2, y2), COLOR_GREEN, 2)
|
||||
|
||||
def _draw_text_overlay(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
track_id: int,
|
||||
fps: float,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
) -> None:
|
||||
"""Draw text overlay with track info, FPS, label, and confidence.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
track_id: Tracking ID
|
||||
fps: Current FPS
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence or None
|
||||
"""
|
||||
# Prepare text lines
|
||||
lines: list[str] = []
|
||||
lines.append(f"ID: {track_id}")
|
||||
lines.append(f"FPS: {fps:.1f}")
|
||||
|
||||
if label is not None:
|
||||
if confidence is not None:
|
||||
lines.append(f"{label}: {confidence:.2%}")
|
||||
else:
|
||||
lines.append(label)
|
||||
|
||||
# Draw text with background for readability
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.6
|
||||
thickness = 1
|
||||
line_height = 25
|
||||
margin = 10
|
||||
|
||||
for i, text in enumerate(lines):
|
||||
y_pos = margin + (i + 1) * line_height
|
||||
|
||||
# Draw background rectangle
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
text, font, font_scale, thickness
|
||||
)
|
||||
_ = cv2.rectangle(
|
||||
frame,
|
||||
(margin, y_pos - text_height - 5),
|
||||
(margin + text_width + 10, y_pos + 5),
|
||||
COLOR_BLACK,
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
_ = cv2.putText(
|
||||
frame,
|
||||
text,
|
||||
(margin + 5, y_pos),
|
||||
font,
|
||||
font_scale,
|
||||
COLOR_WHITE,
|
||||
thickness,
|
||||
)
|
||||
|
||||
def _draw_pose_skeleton(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
pose_data: dict[str, object] | None,
|
||||
) -> None:
|
||||
"""Draw pose skeleton on frame.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
pose_data: Pose data dictionary from Sports2D or similar
|
||||
Expected format: {'keypoints': [[x1, y1], [x2, y2], ...],
|
||||
'confidence': [c1, c2, ...],
|
||||
'angles': {'joint_name': angle, ...}}
|
||||
"""
|
||||
if pose_data is None:
|
||||
return
|
||||
|
||||
keypoints_obj = pose_data.get('keypoints')
|
||||
if keypoints_obj is None:
|
||||
return
|
||||
|
||||
# Convert keypoints to numpy array
|
||||
keypoints = np.asarray(keypoints_obj, dtype=np.float32)
|
||||
if keypoints.size == 0:
|
||||
return
|
||||
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Get confidence scores if available
|
||||
confidence_obj = pose_data.get('confidence')
|
||||
confidences = (
|
||||
np.asarray(confidence_obj, dtype=np.float32)
|
||||
if confidence_obj is not None
|
||||
else np.ones(len(keypoints), dtype=np.float32)
|
||||
)
|
||||
|
||||
# Draw skeleton connections
|
||||
for connection in SKELETON_CONNECTIONS:
|
||||
idx1, idx2 = connection
|
||||
if idx1 < len(keypoints) and idx2 < len(keypoints):
|
||||
# Check confidence threshold (0.3)
|
||||
if confidences[idx1] > 0.3 and confidences[idx2] > 0.3:
|
||||
pt1 = (int(keypoints[idx1][0]), int(keypoints[idx1][1]))
|
||||
pt2 = (int(keypoints[idx2][0]), int(keypoints[idx2][1]))
|
||||
# Clip to frame bounds
|
||||
pt1 = (max(0, min(w - 1, pt1[0])), max(0, min(h - 1, pt1[1])))
|
||||
pt2 = (max(0, min(w - 1, pt2[0])), max(0, min(h - 1, pt2[1])))
|
||||
_ = cv2.line(frame, pt1, pt2, COLOR_CYAN, 2)
|
||||
|
||||
# Draw keypoints
|
||||
for i, (kp, conf) in enumerate(zip(keypoints, confidences)):
|
||||
if conf > 0.3 and i < len(keypoints):
|
||||
x, y = int(kp[0]), int(kp[1])
|
||||
# Clip to frame bounds
|
||||
x = max(0, min(w - 1, x))
|
||||
y = max(0, min(h - 1, y))
|
||||
# Draw keypoint as circle
|
||||
_ = cv2.circle(frame, (x, y), 4, COLOR_MAGENTA, -1)
|
||||
_ = cv2.circle(frame, (x, y), 4, COLOR_WHITE, 1)
|
||||
|
||||
def _draw_pose_angles(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
pose_data: dict[str, object] | None,
|
||||
) -> None:
|
||||
"""Draw pose angles as text overlay.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, 3) uint8 - modified in place
|
||||
pose_data: Pose data dictionary with 'angles' key
|
||||
"""
|
||||
if pose_data is None:
|
||||
return
|
||||
|
||||
angles_obj = pose_data.get('angles')
|
||||
if angles_obj is None:
|
||||
return
|
||||
|
||||
angles = cast(dict[str, float], angles_obj)
|
||||
if not angles:
|
||||
return
|
||||
|
||||
# Draw angles in top-right corner
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.45
|
||||
thickness = 1
|
||||
line_height = 20
|
||||
margin = 10
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Filter and format angles
|
||||
angle_texts: list[tuple[str, float]] = []
|
||||
for name, angle in angles.items():
|
||||
# Only show angles that are reasonable (0-180 degrees)
|
||||
if 0 <= angle <= 180:
|
||||
angle_texts.append((str(name), float(angle)))
|
||||
|
||||
# Sort by name for consistent display
|
||||
angle_texts.sort(key=lambda x: x[0])
|
||||
|
||||
# Draw from top-right
|
||||
for i, (name, angle) in enumerate(angle_texts[:8]): # Limit to 8 angles
|
||||
text = f"{name}: {angle:.1f}"
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
text, font, font_scale, thickness
|
||||
)
|
||||
x_pos = w - margin - text_width - 10
|
||||
y_pos = margin + (i + 1) * line_height
|
||||
|
||||
# Draw background rectangle
|
||||
_ = cv2.rectangle(
|
||||
frame,
|
||||
(x_pos - 4, y_pos - text_height - 4),
|
||||
(x_pos + text_width + 4, y_pos + 4),
|
||||
COLOR_BLACK,
|
||||
-1,
|
||||
)
|
||||
# Draw text in orange
|
||||
_ = cv2.putText(
|
||||
frame,
|
||||
text,
|
||||
(x_pos, y_pos),
|
||||
font,
|
||||
font_scale,
|
||||
COLOR_ORANGE,
|
||||
thickness,
|
||||
)
|
||||
|
||||
def _prepare_main_frame(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: BBoxXYXY | None,
|
||||
track_id: int,
|
||||
fps: float,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
pose_data: dict[str, object] | None = None,
|
||||
) -> ImageArray:
|
||||
"""Prepare main display frame with bbox and text overlay.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, C) uint8
|
||||
bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
|
||||
track_id: Tracking ID
|
||||
fps: Current FPS
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence or None
|
||||
pose_data: Pose data dictionary or None
|
||||
|
||||
Returns:
|
||||
Processed frame ready for display
|
||||
"""
|
||||
# Ensure BGR format (convert grayscale if needed)
|
||||
if len(frame.shape) == 2:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
|
||||
elif frame.shape[2] == 1:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR))
|
||||
elif frame.shape[2] == 3:
|
||||
display_frame = frame.copy()
|
||||
elif frame.shape[2] == 4:
|
||||
display_frame = cast(ImageArray, cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR))
|
||||
else:
|
||||
display_frame = frame.copy()
|
||||
|
||||
# Draw bbox and text (modifies in place)
|
||||
self._draw_bbox(display_frame, bbox)
|
||||
self._draw_text_overlay(display_frame, track_id, fps, label, confidence)
|
||||
|
||||
# Draw pose skeleton and angles if available
|
||||
self._draw_pose_skeleton(display_frame, pose_data)
|
||||
self._draw_pose_angles(display_frame, pose_data)
|
||||
|
||||
return display_frame
|
||||
|
||||
def _upscale_silhouette(
|
||||
self,
|
||||
silhouette: NDArray[np.float32] | NDArray[np.uint8],
|
||||
) -> ImageArray:
|
||||
"""Upscale silhouette to display size.
|
||||
|
||||
Args:
|
||||
silhouette: Input silhouette (64, 44) float32 [0,1] or uint8 [0,255]
|
||||
|
||||
Returns:
|
||||
Upscaled silhouette (256, 176) uint8
|
||||
"""
|
||||
# Normalize to uint8 if needed
|
||||
if silhouette.dtype == np.float32 or silhouette.dtype == np.float64:
|
||||
sil_u8 = (silhouette * 255).astype(np.uint8)
|
||||
else:
|
||||
sil_u8 = silhouette.astype(np.uint8)
|
||||
|
||||
# Upscale using nearest neighbor to preserve pixelation
|
||||
upscaled = cast(
|
||||
ImageArray,
|
||||
cv2.resize(
|
||||
sil_u8,
|
||||
(DISPLAY_WIDTH, DISPLAY_HEIGHT),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
),
|
||||
)
|
||||
|
||||
return upscaled
|
||||
|
||||
def _normalize_mask_for_display(self, mask: NDArray[np.generic]) -> ImageArray:
|
||||
mask_array = np.asarray(mask)
|
||||
if mask_array.dtype == np.bool_:
|
||||
bool_scaled = np.where(mask_array, np.uint8(255), np.uint8(0)).astype(
|
||||
np.uint8
|
||||
)
|
||||
return cast(ImageArray, bool_scaled)
|
||||
|
||||
if mask_array.dtype == np.uint8:
|
||||
mask_array = cast(ImageArray, mask_array)
|
||||
max_u8 = int(np.max(mask_array)) if mask_array.size > 0 else 0
|
||||
if max_u8 <= 1:
|
||||
scaled_u8 = np.where(mask_array > 0, np.uint8(255), np.uint8(0)).astype(
|
||||
np.uint8
|
||||
)
|
||||
return cast(ImageArray, scaled_u8)
|
||||
return cast(ImageArray, mask_array)
|
||||
|
||||
if np.issubdtype(mask_array.dtype, np.integer):
|
||||
max_int = float(np.max(mask_array)) if mask_array.size > 0 else 0.0
|
||||
if max_int <= 1.0:
|
||||
return cast(
|
||||
ImageArray, (mask_array.astype(np.float32) * 255.0).astype(np.uint8)
|
||||
)
|
||||
clipped = np.clip(mask_array, 0, 255).astype(np.uint8)
|
||||
return cast(ImageArray, clipped)
|
||||
|
||||
mask_float = np.asarray(mask_array, dtype=np.float32)
|
||||
max_val = float(np.max(mask_float)) if mask_float.size > 0 else 0.0
|
||||
if max_val <= 0.0:
|
||||
return np.zeros(mask_float.shape, dtype=np.uint8)
|
||||
|
||||
normalized = np.clip((mask_float / max_val) * 255.0, 0.0, 255.0).astype(
|
||||
np.uint8
|
||||
)
|
||||
return cast(ImageArray, normalized)
|
||||
|
||||
def _draw_raw_stats(self, image: ImageArray, mask_raw: ImageArray | None) -> None:
|
||||
if mask_raw is None:
|
||||
return
|
||||
|
||||
mask = np.asarray(mask_raw)
|
||||
if mask.size == 0:
|
||||
return
|
||||
|
||||
stats = [
|
||||
f"raw: {mask.dtype}",
|
||||
f"min/max: {float(mask.min()):.3f}/{float(mask.max()):.3f}",
|
||||
f"nnz: {int(np.count_nonzero(mask))}",
|
||||
]
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.45
|
||||
thickness = 1
|
||||
line_h = 18
|
||||
x0 = 8
|
||||
y0 = 20
|
||||
|
||||
for i, txt in enumerate(stats):
|
||||
y = y0 + i * line_h
|
||||
(tw, th), _ = cv2.getTextSize(txt, font, font_scale, thickness)
|
||||
_ = cv2.rectangle(
|
||||
image, (x0 - 4, y - th - 4), (x0 + tw + 4, y + 4), COLOR_BLACK, -1
|
||||
)
|
||||
_ = cv2.putText(
|
||||
image, txt, (x0, y), font, font_scale, COLOR_YELLOW, thickness
|
||||
)
|
||||
|
||||
def _prepare_segmentation_view(
|
||||
self,
|
||||
mask_raw: ImageArray | None,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
bbox: BBoxXYXY | None,
|
||||
) -> ImageArray:
|
||||
_ = mask_raw
|
||||
_ = bbox
|
||||
return self._prepare_normalized_view(silhouette)
|
||||
|
||||
def _fit_gray_to_display(
|
||||
self,
|
||||
gray: ImageArray,
|
||||
out_h: int = DISPLAY_HEIGHT,
|
||||
out_w: int = DISPLAY_WIDTH,
|
||||
) -> ImageArray:
|
||||
src_h, src_w = gray.shape[:2]
|
||||
if src_h <= 0 or src_w <= 0:
|
||||
return np.zeros((out_h, out_w), dtype=np.uint8)
|
||||
|
||||
scale = min(out_w / src_w, out_h / src_h)
|
||||
new_w = max(1, int(round(src_w * scale)))
|
||||
new_h = max(1, int(round(src_h * scale)))
|
||||
|
||||
resized = cast(
|
||||
ImageArray,
|
||||
cv2.resize(gray, (new_w, new_h), interpolation=cv2.INTER_NEAREST),
|
||||
)
|
||||
canvas = np.zeros((out_h, out_w), dtype=np.uint8)
|
||||
x0 = (out_w - new_w) // 2
|
||||
y0 = (out_h - new_h) // 2
|
||||
canvas[y0 : y0 + new_h, x0 : x0 + new_w] = resized
|
||||
return cast(ImageArray, canvas)
|
||||
|
||||
def _crop_mask_to_bbox(
|
||||
self,
|
||||
mask_gray: ImageArray,
|
||||
bbox: BBoxXYXY | None,
|
||||
) -> ImageArray:
|
||||
if bbox is None:
|
||||
return mask_gray
|
||||
|
||||
h, w = mask_gray.shape[:2]
|
||||
x1, y1, x2, y2 = bbox
|
||||
x1c = max(0, min(w, int(x1)))
|
||||
x2c = max(0, min(w, int(x2)))
|
||||
y1c = max(0, min(h, int(y1)))
|
||||
y2c = max(0, min(h, int(y2)))
|
||||
|
||||
if x2c <= x1c or y2c <= y1c:
|
||||
return mask_gray
|
||||
|
||||
cropped = mask_gray[y1c:y2c, x1c:x2c]
|
||||
if cropped.size == 0:
|
||||
return mask_gray
|
||||
return cast(ImageArray, cropped)
|
||||
|
||||
def _prepare_segmentation_input_view(
|
||||
self,
|
||||
silhouettes: NDArray[np.float32] | None,
|
||||
) -> ImageArray:
|
||||
if silhouettes is None or silhouettes.size == 0:
|
||||
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
|
||||
self._draw_mode_indicator(placeholder, "Input Silhouettes (No Data)")
|
||||
return placeholder
|
||||
|
||||
n_frames = int(silhouettes.shape[0])
|
||||
tiles_per_row = int(np.ceil(np.sqrt(n_frames)))
|
||||
rows = int(np.ceil(n_frames / tiles_per_row))
|
||||
|
||||
tile_h = DISPLAY_HEIGHT
|
||||
tile_w = DISPLAY_WIDTH
|
||||
grid = np.zeros((rows * tile_h, tiles_per_row * tile_w), dtype=np.uint8)
|
||||
|
||||
for idx in range(n_frames):
|
||||
sil = silhouettes[idx]
|
||||
tile = self._upscale_silhouette(sil)
|
||||
r = idx // tiles_per_row
|
||||
c = idx % tiles_per_row
|
||||
y0, y1 = r * tile_h, (r + 1) * tile_h
|
||||
x0, x1 = c * tile_w, (c + 1) * tile_w
|
||||
grid[y0:y1, x0:x1] = tile
|
||||
|
||||
grid_bgr = cast(ImageArray, cv2.cvtColor(grid, cv2.COLOR_GRAY2BGR))
|
||||
|
||||
for idx in range(n_frames):
|
||||
r = idx // tiles_per_row
|
||||
c = idx % tiles_per_row
|
||||
y0 = r * tile_h
|
||||
x0 = c * tile_w
|
||||
cv2.putText(
|
||||
grid_bgr,
|
||||
str(idx),
|
||||
(x0 + 8, y0 + 22),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6,
|
||||
(0, 255, 255),
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
return grid_bgr
|
||||
|
||||
def _prepare_raw_view(
|
||||
self,
|
||||
mask_raw: ImageArray | None,
|
||||
bbox: BBoxXYXY | None = None,
|
||||
) -> ImageArray:
|
||||
"""Prepare raw mask view.
|
||||
|
||||
Args:
|
||||
mask_raw: Raw binary mask or None
|
||||
|
||||
Returns:
|
||||
Displayable image with mode indicator
|
||||
"""
|
||||
if mask_raw is None:
|
||||
# Create placeholder
|
||||
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
|
||||
self._draw_mode_indicator(placeholder, "Raw Mask (No Data)")
|
||||
return placeholder
|
||||
|
||||
# Ensure single channel
|
||||
if len(mask_raw.shape) == 3:
|
||||
mask_gray = cast(ImageArray, cv2.cvtColor(mask_raw, cv2.COLOR_BGR2GRAY))
|
||||
else:
|
||||
mask_gray = cast(ImageArray, mask_raw)
|
||||
|
||||
mask_gray = self._normalize_mask_for_display(mask_gray)
|
||||
mask_gray = self._crop_mask_to_bbox(mask_gray, bbox)
|
||||
|
||||
debug_pad = RAW_STATS_PAD if self.show_raw_debug else 0
|
||||
content_h = max(1, DISPLAY_HEIGHT - debug_pad - MODE_LABEL_PAD)
|
||||
mask_resized = self._fit_gray_to_display(
|
||||
mask_gray, out_h=content_h, out_w=DISPLAY_WIDTH
|
||||
)
|
||||
full_mask = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
|
||||
full_mask[debug_pad : debug_pad + content_h, :] = mask_resized
|
||||
|
||||
# Convert to BGR for display
|
||||
mask_bgr = cast(ImageArray, cv2.cvtColor(full_mask, cv2.COLOR_GRAY2BGR))
|
||||
if self.show_raw_debug:
|
||||
self._draw_raw_stats(mask_bgr, mask_raw)
|
||||
self._draw_mode_indicator(mask_bgr, "Raw Mask")
|
||||
|
||||
return mask_bgr
|
||||
|
||||
def _prepare_normalized_view(
|
||||
self,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
) -> ImageArray:
|
||||
"""Prepare normalized silhouette view.
|
||||
|
||||
Args:
|
||||
silhouette: Normalized silhouette (64, 44) or None
|
||||
|
||||
Returns:
|
||||
Displayable image with mode indicator
|
||||
"""
|
||||
if silhouette is None:
|
||||
# Create placeholder
|
||||
placeholder = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH, 3), dtype=np.uint8)
|
||||
self._draw_mode_indicator(placeholder, "Normalized (No Data)")
|
||||
return placeholder
|
||||
|
||||
# Upscale and convert
|
||||
upscaled = self._upscale_silhouette(silhouette)
|
||||
content_h = max(1, DISPLAY_HEIGHT - MODE_LABEL_PAD)
|
||||
sil_compact = self._fit_gray_to_display(
|
||||
upscaled, out_h=content_h, out_w=DISPLAY_WIDTH
|
||||
)
|
||||
sil_canvas = np.zeros((DISPLAY_HEIGHT, DISPLAY_WIDTH), dtype=np.uint8)
|
||||
sil_canvas[:content_h, :] = sil_compact
|
||||
sil_bgr = cast(ImageArray, cv2.cvtColor(sil_canvas, cv2.COLOR_GRAY2BGR))
|
||||
self._draw_mode_indicator(sil_bgr, "Normalized")
|
||||
|
||||
return sil_bgr
|
||||
|
||||
def _draw_mode_indicator(self, image: ImageArray, label: str) -> None:
|
||||
h, w = image.shape[:2]
|
||||
|
||||
mode_text = label
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 0.5
|
||||
thickness = 1
|
||||
|
||||
# Get text size for background
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
mode_text, font, font_scale, thickness
|
||||
)
|
||||
|
||||
x_pos = 14
|
||||
y_pos = h - 8
|
||||
y_top = max(0, h - MODE_LABEL_PAD)
|
||||
|
||||
_ = cv2.rectangle(
|
||||
image,
|
||||
(0, y_top),
|
||||
(w, h),
|
||||
COLOR_DARK_GRAY,
|
||||
-1,
|
||||
)
|
||||
_ = cv2.rectangle(
|
||||
image,
|
||||
(x_pos - 6, y_pos - text_height - 6),
|
||||
(x_pos + text_width + 8, y_pos + 6),
|
||||
COLOR_DARK_GRAY,
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
_ = cv2.putText(
|
||||
image,
|
||||
mode_text,
|
||||
(x_pos, y_pos),
|
||||
font,
|
||||
font_scale,
|
||||
COLOR_YELLOW,
|
||||
thickness,
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
frame: ImageArray,
|
||||
bbox: BBoxXYXY | None,
|
||||
bbox_mask: BBoxXYXY | None,
|
||||
track_id: int,
|
||||
mask_raw: ImageArray | None,
|
||||
silhouette: NDArray[np.float32] | None,
|
||||
segmentation_input: NDArray[np.float32] | None,
|
||||
label: str | None,
|
||||
confidence: float | None,
|
||||
fps: float,
|
||||
pose_data: dict[str, object] | None = None,
|
||||
) -> bool:
|
||||
"""Update visualization with new frame data.
|
||||
|
||||
Args:
|
||||
frame: Input frame (H, W, C) uint8
|
||||
bbox: Bounding box in XYXY format (x1, y1, x2, y2) or None
|
||||
track_id: Tracking ID
|
||||
mask_raw: Raw binary mask (H, W) uint8 or None
|
||||
silhouette: Normalized silhouette (64, 44) float32 [0,1] or None
|
||||
label: Classification label or None
|
||||
confidence: Classification confidence [0,1] or None
|
||||
fps: Current FPS
|
||||
pose_data: Pose data dictionary or None
|
||||
|
||||
Returns:
|
||||
False if user requested quit (pressed 'q'), True otherwise
|
||||
"""
|
||||
self._ensure_windows()
|
||||
|
||||
# Prepare and show main window
|
||||
main_display = self._prepare_main_frame(
|
||||
frame, bbox, track_id, fps, label, confidence, pose_data
|
||||
)
|
||||
cv2.imshow(MAIN_WINDOW, main_display)
|
||||
|
||||
# Prepare and show segmentation window
|
||||
seg_display = self._prepare_segmentation_view(mask_raw, silhouette, bbox)
|
||||
cv2.imshow(SEG_WINDOW, seg_display)
|
||||
|
||||
if self.show_raw_window:
|
||||
self._ensure_raw_window()
|
||||
raw_display = self._prepare_raw_view(mask_raw, bbox_mask)
|
||||
cv2.imshow(RAW_WINDOW, raw_display)
|
||||
|
||||
seg_input_display = self._prepare_segmentation_input_view(segmentation_input)
|
||||
cv2.imshow(WINDOW_SEG_INPUT, seg_input_display)
|
||||
|
||||
# Handle keyboard input
|
||||
key = cv2.waitKey(1) & 0xFF
|
||||
|
||||
if key == ord("q"):
|
||||
return False
|
||||
elif key == ord("r"):
|
||||
self.show_raw_window = not self.show_raw_window
|
||||
if self.show_raw_window:
|
||||
self._ensure_raw_window()
|
||||
logger.debug("Raw mask window enabled")
|
||||
else:
|
||||
self._hide_raw_window()
|
||||
logger.debug("Raw mask window disabled")
|
||||
elif key == ord("d"):
|
||||
self.show_raw_debug = not self.show_raw_debug
|
||||
logger.debug(
|
||||
"Raw mask debug overlay %s",
|
||||
"enabled" if self.show_raw_debug else "disabled",
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def close(self) -> None:
|
||||
if self._windows_created:
|
||||
self._hide_raw_window()
|
||||
cv2.destroyAllWindows()
|
||||
self._windows_created = False
|
||||
self._raw_window_created = False
|
||||
Reference in New Issue
Block a user