fix(demo): stabilize visualizer bbox and mask rendering
Align bbox coordinate handling across primary and fallback paths, normalize Both-mode raw mask rendering, and tighten demo result typing to reduce runtime/display inconsistencies.
This commit is contained in:
+77
-27
@@ -17,8 +17,8 @@ from numpy.typing import NDArray
|
||||
from ultralytics.models.yolo.model import YOLO
|
||||
|
||||
from .input import FrameStream, create_source
|
||||
from .output import ResultPublisher, create_publisher, create_result
|
||||
from .preprocess import frame_to_person_mask, mask_to_silhouette
|
||||
from .output import DemoResult, ResultPublisher, create_publisher, create_result
|
||||
from .preprocess import BBoxXYXY, frame_to_person_mask, mask_to_silhouette
|
||||
from .sconet_demo import ScoNetDemo
|
||||
from .window import SilhouetteWindow, select_person
|
||||
|
||||
@@ -53,6 +53,7 @@ class _DetectionResultsLike(Protocol):
|
||||
def masks(self) -> _MasksLike: ...
|
||||
|
||||
|
||||
|
||||
class _TrackCallable(Protocol):
|
||||
def __call__(
|
||||
self,
|
||||
@@ -80,8 +81,9 @@ class ScoliosisPipeline:
|
||||
_silhouette_visualize_dir: Path | None
|
||||
_result_export_path: Path | None
|
||||
_result_export_format: str
|
||||
_result_buffer: list[dict[str, object]]
|
||||
_result_buffer: list[DemoResult]
|
||||
_visualizer: OpenCVVisualizer | None
|
||||
_last_viz_payload: dict[str, object] | None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -135,6 +137,7 @@ class ScoliosisPipeline:
|
||||
self._visualizer = OpenCVVisualizer()
|
||||
else:
|
||||
self._visualizer = None
|
||||
self._last_viz_payload = None
|
||||
|
||||
@staticmethod
|
||||
def _extract_int(meta: dict[str, object], key: str, fallback: int) -> int:
|
||||
@@ -171,37 +174,59 @@ class ScoliosisPipeline:
|
||||
tuple[
|
||||
Float[ndarray, "64 44"],
|
||||
UInt8[ndarray, "h w"],
|
||||
tuple[int, int, int, int],
|
||||
BBoxXYXY,
|
||||
int,
|
||||
]
|
||||
| None
|
||||
):
|
||||
selected = select_person(result)
|
||||
if selected is not None:
|
||||
mask_raw, bbox, track_id = selected
|
||||
mask_raw, bbox_mask, bbox_frame, track_id = selected
|
||||
silhouette = cast(
|
||||
Float[ndarray, "64 44"] | None,
|
||||
mask_to_silhouette(self._to_mask_u8(mask_raw), bbox),
|
||||
mask_to_silhouette(self._to_mask_u8(mask_raw), bbox_mask),
|
||||
)
|
||||
if silhouette is not None:
|
||||
return silhouette, mask_raw, bbox, int(track_id)
|
||||
return silhouette, mask_raw, bbox_frame, int(track_id)
|
||||
|
||||
fallback = cast(
|
||||
tuple[UInt8[ndarray, "h w"], tuple[int, int, int, int]] | None,
|
||||
tuple[UInt8[ndarray, "h w"], BBoxXYXY] | None,
|
||||
frame_to_person_mask(result),
|
||||
)
|
||||
if fallback is None:
|
||||
return None
|
||||
|
||||
mask_u8, bbox = fallback
|
||||
mask_u8, bbox_mask = fallback
|
||||
silhouette = cast(
|
||||
Float[ndarray, "64 44"] | None,
|
||||
mask_to_silhouette(mask_u8, bbox),
|
||||
mask_to_silhouette(mask_u8, bbox_mask),
|
||||
)
|
||||
if silhouette is None:
|
||||
return None
|
||||
# Convert mask-space bbox to frame-space for visualization
|
||||
# Use result.orig_shape to get frame dimensions safely
|
||||
orig_shape = getattr(result, "orig_shape", None)
|
||||
if orig_shape is not None and isinstance(orig_shape, (tuple, list)) and len(orig_shape) >= 2:
|
||||
frame_h, frame_w = int(orig_shape[0]), int(orig_shape[1])
|
||||
mask_h, mask_w = mask_u8.shape[0], mask_u8.shape[1]
|
||||
if mask_w > 0 and mask_h > 0 and frame_w > 0 and frame_h > 0:
|
||||
scale_x = frame_w / mask_w
|
||||
scale_y = frame_h / mask_h
|
||||
bbox_frame = (
|
||||
int(bbox_mask[0] * scale_x),
|
||||
int(bbox_mask[1] * scale_y),
|
||||
int(bbox_mask[2] * scale_x),
|
||||
int(bbox_mask[3] * scale_y),
|
||||
)
|
||||
else:
|
||||
# Fallback: use mask-space bbox if dimensions invalid
|
||||
bbox_frame = bbox_mask
|
||||
else:
|
||||
# Fallback: use mask-space bbox if orig_shape unavailable
|
||||
bbox_frame = bbox_mask
|
||||
# For fallback case, mask_raw is the same as mask_u8
|
||||
return silhouette, mask_u8, bbox, 0
|
||||
return silhouette, mask_u8, bbox_frame, 0
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def process_frame(
|
||||
@@ -342,23 +367,48 @@ class ScoliosisPipeline:
|
||||
)
|
||||
|
||||
# Update visualizer if enabled
|
||||
if self._visualizer is not None and viz_payload is not None:
|
||||
# Cast viz_payload to dict for type checking
|
||||
viz_dict = cast(dict[str, object], viz_payload)
|
||||
mask_raw_obj = viz_dict.get("mask_raw")
|
||||
bbox_obj = viz_dict.get("bbox")
|
||||
silhouette_obj = viz_dict.get("silhouette")
|
||||
track_id_val = viz_dict.get("track_id", 0)
|
||||
track_id = track_id_val if isinstance(track_id_val, int) else 0
|
||||
label_obj = viz_dict.get("label")
|
||||
confidence_obj = viz_dict.get("confidence")
|
||||
if self._visualizer is not None:
|
||||
# Cache valid payload for no-detection frames
|
||||
if viz_payload is not None:
|
||||
# Cache a copy to prevent mutation of original data
|
||||
viz_payload_dict = cast(dict[str, object], viz_payload)
|
||||
cached: dict[str, object] = {}
|
||||
for k, v in viz_payload_dict.items():
|
||||
copy_method = cast(Callable[[], object] | None, getattr(v, "copy", None))
|
||||
if copy_method is not None:
|
||||
cached[k] = copy_method()
|
||||
else:
|
||||
cached[k] = v
|
||||
self._last_viz_payload = cached
|
||||
|
||||
# Use cached payload if current is None
|
||||
viz_data = viz_payload if viz_payload is not None else self._last_viz_payload
|
||||
|
||||
if viz_data is not None:
|
||||
# Cast viz_payload to dict for type checking
|
||||
viz_dict = cast(dict[str, object], viz_data)
|
||||
mask_raw_obj = viz_dict.get("mask_raw")
|
||||
bbox_obj = viz_dict.get("bbox")
|
||||
silhouette_obj = viz_dict.get("silhouette")
|
||||
track_id_val = viz_dict.get("track_id", 0)
|
||||
track_id = track_id_val if isinstance(track_id_val, int) else 0
|
||||
label_obj = viz_dict.get("label")
|
||||
confidence_obj = viz_dict.get("confidence")
|
||||
|
||||
# Cast extracted values to expected types
|
||||
mask_raw = cast(NDArray[np.uint8] | None, mask_raw_obj)
|
||||
bbox = cast(tuple[int, int, int, int] | None, bbox_obj)
|
||||
silhouette = cast(NDArray[np.float32] | None, silhouette_obj)
|
||||
label = cast(str | None, label_obj)
|
||||
confidence = cast(float | None, confidence_obj)
|
||||
# Cast extracted values to expected types
|
||||
mask_raw = cast(NDArray[np.uint8] | None, mask_raw_obj)
|
||||
bbox = cast(BBoxXYXY | None, bbox_obj)
|
||||
silhouette = cast(NDArray[np.float32] | None, silhouette_obj)
|
||||
label = cast(str | None, label_obj)
|
||||
confidence = cast(float | None, confidence_obj)
|
||||
else:
|
||||
# No detection and no cache - use default values
|
||||
mask_raw = None
|
||||
bbox = None
|
||||
track_id = 0
|
||||
silhouette = None
|
||||
label = None
|
||||
confidence = None
|
||||
|
||||
keep_running = self._visualizer.update(
|
||||
frame_u8,
|
||||
|
||||
Reference in New Issue
Block a user