fix(demo): stabilize visualizer bbox and mask rendering
Align bbox coordinate handling across primary and fallback paths, normalize Both-mode raw mask rendering, and tighten demo result typing to reduce runtime/display inconsistencies.
This commit is contained in:
+22
-10
@@ -12,10 +12,11 @@ import torch
|
||||
from jaxtyping import Float
|
||||
from numpy import ndarray
|
||||
|
||||
from .preprocess import BBoxXYXY
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from numpy.typing import NDArray
|
||||
|
||||
|
||||
# Silhouette dimensions from preprocess.py
|
||||
SIL_HEIGHT: int = 64
|
||||
SIL_WIDTH: int = 44
|
||||
@@ -239,19 +240,23 @@ def _to_numpy(obj: _ArrayLike) -> ndarray:
|
||||
|
||||
def select_person(
|
||||
results: _DetectionResults,
|
||||
) -> tuple[ndarray, tuple[int, int, int, int], int] | None:
|
||||
) -> tuple[ndarray, BBoxXYXY, BBoxXYXY, int] | None:
|
||||
"""Select the person with largest bounding box from detection results.
|
||||
|
||||
Args:
|
||||
results: Detection results object with boxes and masks attributes.
|
||||
Expected to have:
|
||||
- boxes.xyxy: array of bounding boxes [N, 4]
|
||||
- masks.data: array of masks [N, H, W]
|
||||
- boxes.xyxy: array of bounding boxes [N, 4] in frame coordinates (XYXY format)
|
||||
- masks.data: array of masks [N, H, W] in mask coordinates
|
||||
- boxes.id: optional track IDs [N]
|
||||
|
||||
Returns:
|
||||
Tuple of (mask, bbox, track_id) for the largest person,
|
||||
Tuple of (mask, bbox_mask, bbox_frame, track_id) for the largest person,
|
||||
or None if no valid detections or track IDs unavailable.
|
||||
- mask: the person's segmentation mask
|
||||
- bbox_mask: bounding box in mask coordinate space (XYXY format: x1, y1, x2, y2)
|
||||
- bbox_frame: bounding box in frame coordinate space (XYXY format: x1, y1, x2, y2)
|
||||
- track_id: the person's track ID
|
||||
"""
|
||||
# Check for track IDs
|
||||
boxes_obj: _Boxes | object = getattr(results, "boxes", None)
|
||||
@@ -329,20 +334,27 @@ def select_person(
|
||||
# Scale bbox from frame space to mask space
|
||||
scale_x = mask_w / frame_w if frame_w > 0 else 1.0
|
||||
scale_y = mask_h / frame_h if frame_h > 0 else 1.0
|
||||
bbox = (
|
||||
bbox_mask = (
|
||||
int(float(bboxes[best_idx][0]) * scale_x),
|
||||
int(float(bboxes[best_idx][1]) * scale_y),
|
||||
int(float(bboxes[best_idx][2]) * scale_x),
|
||||
int(float(bboxes[best_idx][3]) * scale_y),
|
||||
)
|
||||
else:
|
||||
# Fallback: use bbox as-is (assume same coordinate space)
|
||||
bbox = (
|
||||
bbox_frame = (
|
||||
int(float(bboxes[best_idx][0])),
|
||||
int(float(bboxes[best_idx][1])),
|
||||
int(float(bboxes[best_idx][2])),
|
||||
int(float(bboxes[best_idx][3])),
|
||||
)
|
||||
else:
|
||||
# Fallback: use bbox as-is for both (assume same coordinate space)
|
||||
bbox_mask = (
|
||||
int(float(bboxes[best_idx][0])),
|
||||
int(float(bboxes[best_idx][1])),
|
||||
int(float(bboxes[best_idx][2])),
|
||||
int(float(bboxes[best_idx][3])),
|
||||
)
|
||||
bbox_frame = bbox_mask
|
||||
track_id = int(track_ids[best_idx]) if best_idx < len(track_ids) else best_idx
|
||||
|
||||
return mask, bbox, track_id
|
||||
return mask, bbox_mask, bbox_frame, track_id
|
||||
|
||||
Reference in New Issue
Block a user