fix(demo): stabilize visualizer bbox and mask rendering

Align bbox coordinate handling across primary and fallback paths, normalize Both-mode raw mask rendering, and tighten demo result typing to reduce runtime/display inconsistencies.
This commit is contained in:
2026-02-28 18:05:33 +08:00
parent 06a6cd1ccf
commit 7f073179d7
7 changed files with 416 additions and 73 deletions
+77 -27
View File
@@ -17,8 +17,8 @@ from numpy.typing import NDArray
from ultralytics.models.yolo.model import YOLO
from .input import FrameStream, create_source
from .output import ResultPublisher, create_publisher, create_result
from .preprocess import frame_to_person_mask, mask_to_silhouette
from .output import DemoResult, ResultPublisher, create_publisher, create_result
from .preprocess import BBoxXYXY, frame_to_person_mask, mask_to_silhouette
from .sconet_demo import ScoNetDemo
from .window import SilhouetteWindow, select_person
@@ -53,6 +53,7 @@ class _DetectionResultsLike(Protocol):
def masks(self) -> _MasksLike: ...
class _TrackCallable(Protocol):
def __call__(
self,
@@ -80,8 +81,9 @@ class ScoliosisPipeline:
_silhouette_visualize_dir: Path | None
_result_export_path: Path | None
_result_export_format: str
_result_buffer: list[dict[str, object]]
_result_buffer: list[DemoResult]
_visualizer: OpenCVVisualizer | None
_last_viz_payload: dict[str, object] | None
def __init__(
self,
@@ -135,6 +137,7 @@ class ScoliosisPipeline:
self._visualizer = OpenCVVisualizer()
else:
self._visualizer = None
self._last_viz_payload = None
@staticmethod
def _extract_int(meta: dict[str, object], key: str, fallback: int) -> int:
@@ -171,37 +174,59 @@ class ScoliosisPipeline:
tuple[
Float[ndarray, "64 44"],
UInt8[ndarray, "h w"],
tuple[int, int, int, int],
BBoxXYXY,
int,
]
| None
):
selected = select_person(result)
if selected is not None:
mask_raw, bbox, track_id = selected
mask_raw, bbox_mask, bbox_frame, track_id = selected
silhouette = cast(
Float[ndarray, "64 44"] | None,
mask_to_silhouette(self._to_mask_u8(mask_raw), bbox),
mask_to_silhouette(self._to_mask_u8(mask_raw), bbox_mask),
)
if silhouette is not None:
return silhouette, mask_raw, bbox, int(track_id)
return silhouette, mask_raw, bbox_frame, int(track_id)
fallback = cast(
tuple[UInt8[ndarray, "h w"], tuple[int, int, int, int]] | None,
tuple[UInt8[ndarray, "h w"], BBoxXYXY] | None,
frame_to_person_mask(result),
)
if fallback is None:
return None
mask_u8, bbox = fallback
mask_u8, bbox_mask = fallback
silhouette = cast(
Float[ndarray, "64 44"] | None,
mask_to_silhouette(mask_u8, bbox),
mask_to_silhouette(mask_u8, bbox_mask),
)
if silhouette is None:
return None
# Convert mask-space bbox to frame-space for visualization
# Use result.orig_shape to get frame dimensions safely
orig_shape = getattr(result, "orig_shape", None)
if orig_shape is not None and isinstance(orig_shape, (tuple, list)) and len(orig_shape) >= 2:
frame_h, frame_w = int(orig_shape[0]), int(orig_shape[1])
mask_h, mask_w = mask_u8.shape[0], mask_u8.shape[1]
if mask_w > 0 and mask_h > 0 and frame_w > 0 and frame_h > 0:
scale_x = frame_w / mask_w
scale_y = frame_h / mask_h
bbox_frame = (
int(bbox_mask[0] * scale_x),
int(bbox_mask[1] * scale_y),
int(bbox_mask[2] * scale_x),
int(bbox_mask[3] * scale_y),
)
else:
# Fallback: use mask-space bbox if dimensions invalid
bbox_frame = bbox_mask
else:
# Fallback: use mask-space bbox if orig_shape unavailable
bbox_frame = bbox_mask
# For fallback case, mask_raw is the same as mask_u8
return silhouette, mask_u8, bbox, 0
return silhouette, mask_u8, bbox_frame, 0
@jaxtyped(typechecker=beartype)
def process_frame(
@@ -342,23 +367,48 @@ class ScoliosisPipeline:
)
# Update visualizer if enabled
if self._visualizer is not None and viz_payload is not None:
# Cast viz_payload to dict for type checking
viz_dict = cast(dict[str, object], viz_payload)
mask_raw_obj = viz_dict.get("mask_raw")
bbox_obj = viz_dict.get("bbox")
silhouette_obj = viz_dict.get("silhouette")
track_id_val = viz_dict.get("track_id", 0)
track_id = track_id_val if isinstance(track_id_val, int) else 0
label_obj = viz_dict.get("label")
confidence_obj = viz_dict.get("confidence")
if self._visualizer is not None:
# Cache valid payload for no-detection frames
if viz_payload is not None:
# Cache a copy to prevent mutation of original data
viz_payload_dict = cast(dict[str, object], viz_payload)
cached: dict[str, object] = {}
for k, v in viz_payload_dict.items():
copy_method = cast(Callable[[], object] | None, getattr(v, "copy", None))
if copy_method is not None:
cached[k] = copy_method()
else:
cached[k] = v
self._last_viz_payload = cached
# Use cached payload if current is None
viz_data = viz_payload if viz_payload is not None else self._last_viz_payload
if viz_data is not None:
# Cast viz_payload to dict for type checking
viz_dict = cast(dict[str, object], viz_data)
mask_raw_obj = viz_dict.get("mask_raw")
bbox_obj = viz_dict.get("bbox")
silhouette_obj = viz_dict.get("silhouette")
track_id_val = viz_dict.get("track_id", 0)
track_id = track_id_val if isinstance(track_id_val, int) else 0
label_obj = viz_dict.get("label")
confidence_obj = viz_dict.get("confidence")
# Cast extracted values to expected types
mask_raw = cast(NDArray[np.uint8] | None, mask_raw_obj)
bbox = cast(tuple[int, int, int, int] | None, bbox_obj)
silhouette = cast(NDArray[np.float32] | None, silhouette_obj)
label = cast(str | None, label_obj)
confidence = cast(float | None, confidence_obj)
# Cast extracted values to expected types
mask_raw = cast(NDArray[np.uint8] | None, mask_raw_obj)
bbox = cast(BBoxXYXY | None, bbox_obj)
silhouette = cast(NDArray[np.float32] | None, silhouette_obj)
label = cast(str | None, label_obj)
confidence = cast(float | None, confidence_obj)
else:
# No detection and no cache - use default values
mask_raw = None
bbox = None
track_id = 0
silhouette = None
label = None
confidence = None
keep_running = self._visualizer.update(
frame_u8,