feat(demo): implement ScoNet real-time pipeline runtime

Add the full demo runtime stack for single-person scoliosis inference, including input adapters, silhouette preprocessing, temporal windowing, ScoNet wrapper, result publishing, and click-based CLI orchestration. This commit captures the executable pipeline behavior independently from tests and planning artifacts for clearer review and rollback.
2026-02-27 09:59:04 +08:00
parent cd754ffcfb
commit b24644f16e
8 changed files with 1785 additions and 0 deletions
@@ -0,0 +1,7 @@
 from __future__ import annotations
 from .pipeline import main
 if __name__ == "__main__":
    main()
@@ -0,0 +1,203 @@
 """
 Input adapters for OpenGait demo.
 Provides generator-based interfaces for video sources:
 - OpenCV (video files, cameras)
 - cv-mmap (shared memory streams)
 """
 from collections.abc import AsyncIterator, Generator, Iterable
 from typing import TYPE_CHECKING, Protocol, cast
 import logging
 import numpy as np
 logger = logging.getLogger(__name__)
 # Type alias for frame stream: (frame_array, metadata_dict)
 FrameStream = Iterable[tuple[np.ndarray, dict[str, object]]]
 if TYPE_CHECKING:
    # Protocol for cv-mmap metadata to avoid direct import
    class _FrameMetadata(Protocol):
        frame_count: int
        timestamp_ns: int
    # Protocol for cv-mmap client
    class _CvMmapClient(Protocol):
        def __aiter__(self) -> AsyncIterator[tuple[np.ndarray, _FrameMetadata]]: ...
 def opencv_source(
    path: str | int, max_frames: int | None = None
 ) -> Generator[tuple[np.ndarray, dict[str, object]], None, None]:
    """
    Generator that yields frames from an OpenCV video source.
    Parameters
    ----------
    path : str | int
        Video file path or camera index (e.g., 0 for default camera)
    max_frames : int | None, optional
        Maximum number of frames to yield. None means unlimited.
    Yields
    ------
    tuple[np.ndarray, dict[str, object]]
        (frame_array, metadata_dict) where metadata includes:
        - frame_count: frame index (0-based)
        - timestamp_ns: monotonic timestamp in nanoseconds (if available)
        - source: the path/int provided
    """
    import time
    import cv2
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        raise RuntimeError(f"Failed to open video source: {path}")
    frame_idx = 0
    try:
        while max_frames is None or frame_idx < max_frames:
            ret, frame = cap.read()
            if not ret:
                # End of stream
                break
            # Get timestamp if available (some backends support this)
            timestamp_ns = time.monotonic_ns()
            metadata: dict[str, object] = {
                "frame_count": frame_idx,
                "timestamp_ns": timestamp_ns,
                "source": path,
            }
            yield frame, metadata
            frame_idx += 1
    finally:
        cap.release()
        logger.debug(f"OpenCV source closed: {path}")
 def cvmmap_source(
    name: str, max_frames: int | None = None
 ) -> Generator[tuple[np.ndarray, dict[str, object]], None, None]:
    """
    Generator that yields frames from a cv-mmap shared memory stream.
    Bridges async cv-mmap client to synchronous generator using asyncio.run().
    Parameters
    ----------
    name : str
        Base name of the cv-mmap source (e.g., "default")
    max_frames : int | None, optional
        Maximum number of frames to yield. None means unlimited.
    Yields
    ------
    tuple[np.ndarray, dict[str, object]]
        (frame_array, metadata_dict) where metadata includes:
        - frame_count: frame index from cv-mmap
        - timestamp_ns: timestamp in nanoseconds from cv-mmap
        - source: the cv-mmap name
    Raises
    ------
    ImportError
        If cvmmap package is not available
    RuntimeError
        If cv-mmap stream disconnects or errors
    """
    import asyncio
    # Import cvmmap only when function is called
    # Use try/except for runtime import check
    try:
        from cvmmap import CvMmapClient as _CvMmapClientReal  # pyright: ignore[reportMissingTypeStubs]
    except ImportError as e:
        raise ImportError(
            "cvmmap package is required for cv-mmap sources. "
            + "Install from: https://github.com/crosstyan/cv-mmap"
        ) from e
    # Cast to protocol type for type checking
    client: _CvMmapClient = cast("_CvMmapClient", _CvMmapClientReal(name))
    frame_count = 0
    async def _async_generator() -> AsyncIterator[tuple[np.ndarray, _FrameMetadata]]:
        """Async generator wrapper."""
        async for frame, meta in client:
            yield frame, meta
    # Bridge async to sync using asyncio.run()
    # We process frames one at a time to keep it simple and robust
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
        agen = _async_generator().__aiter__()
        while max_frames is None or frame_count < max_frames:
            try:
                frame, meta = loop.run_until_complete(agen.__anext__())
            except StopAsyncIteration:
                break
            metadata: dict[str, object] = {
                "frame_count": meta.frame_count,
                "timestamp_ns": meta.timestamp_ns,
                "source": f"cvmmap://{name}",
            }
            yield frame, metadata
            frame_count += 1
    finally:
        loop.close()
        logger.debug(f"cv-mmap source closed: {name}")
 def create_source(source: str, max_frames: int | None = None) -> FrameStream:
    """
    Factory function to create a frame source from a string specification.
    Parameters
    ----------
    source : str
        Source specification:
        - '0', '1', etc. -> Camera index (OpenCV)
        - 'cvmmap://name' -> cv-mmap shared memory stream
        - Any other string -> Video file path (OpenCV)
    max_frames : int | None, optional
        Maximum number of frames to yield. None means unlimited.
    Returns
    -------
    FrameStream
        Generator yielding (frame, metadata) tuples
    Examples
    --------
    >>> for frame, meta in create_source('0'):  # Camera 0
    ...     process(frame)
    >>> for frame, meta in create_source('cvmmap://default'):  # cv-mmap
    ...     process(frame)
    >>> for frame, meta in create_source('/path/to/video.mp4'):
    ...     process(frame)
    """
    # Check for cv-mmap protocol
    if source.startswith("cvmmap://"):
        name = source[len("cvmmap://") :]
        return cvmmap_source(name, max_frames)
    # Check for camera index (single digit string)
    if source.isdigit():
        return opencv_source(int(source), max_frames)
    # Otherwise treat as file path
    return opencv_source(source, max_frames)
@@ -0,0 +1,368 @@
 """
 Output publishers for OpenGait demo results.
 Provides pluggable result publishing:
 - ConsolePublisher: JSONL to stdout
 - NatsPublisher: NATS message broker integration
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import sys
 import threading
 import time
 from typing import TYPE_CHECKING, Protocol, TextIO, cast, runtime_checkable
 if TYPE_CHECKING:
    from types import TracebackType
 logger = logging.getLogger(__name__)
@runtime_checkable
 class ResultPublisher(Protocol):
    """Protocol for result publishers."""
    def publish(self, result: dict[str, object]) -> None:
        """
        Publish a result dictionary.
        Parameters
        ----------
        result : dict[str, object]
            Result data with keys: frame, track_id, label, confidence, window, timestamp_ns
        """
        ...
 class ConsolePublisher:
    """Publisher that outputs JSON Lines to stdout."""
    _output: TextIO
    def __init__(self, output: TextIO = sys.stdout) -> None:
        """
        Initialize console publisher.
        Parameters
        ----------
        output : TextIO
            File-like object to write to (default: sys.stdout)
        """
        self._output = output
    def publish(self, result: dict[str, object]) -> None:
        """
        Publish result as JSON line.
        Parameters
        ----------
        result : dict[str, object]
            Result data with keys: frame, track_id, label, confidence, window, timestamp_ns
        """
        try:
            json_line = json.dumps(result, ensure_ascii=False, default=str)
            _ = self._output.write(json_line + "\n")
            self._output.flush()
        except Exception as e:
            logger.warning(f"Failed to publish to console: {e}")
    def close(self) -> None:
        """Close the publisher (no-op for console)."""
        pass
    def __enter__(self) -> ConsolePublisher:
        """Context manager entry."""
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        """Context manager exit."""
        self.close()
 class _NatsClient(Protocol):
    """Protocol for connected NATS client."""
    async def publish(self, subject: str, payload: bytes) -> object: ...
    async def close(self) -> object: ...
    async def flush(self) -> object: ...
 class NatsPublisher:
    """
    Publisher that sends results to NATS message broker.
    This is a sync-friendly wrapper around the async nats-py client.
    Uses a background thread with dedicated event loop to bridge sync
    publish calls to async NATS operations, making it safe to use in
    both sync and async contexts.
    """
    _nats_url: str
    _subject: str
    _nc: _NatsClient | None
    _connected: bool
    _loop: asyncio.AbstractEventLoop | None
    _thread: threading.Thread | None
    _lock: threading.Lock
    def __init__(self, nats_url: str, subject: str = "scoliosis.result") -> None:
        """
        Initialize NATS publisher.
        Parameters
        ----------
        nats_url : str
            NATS server URL (e.g., "nats://localhost:4222")
        subject : str
            NATS subject to publish to (default: "scoliosis.result")
        """
        self._nats_url = nats_url
        self._subject = subject
        self._nc = None
        self._connected = False
        self._loop = None
        self._thread = None
        self._lock = threading.Lock()
    def _start_background_loop(self) -> bool:
        """
        Start background thread with event loop for async operations.
        Returns
        -------
        bool
            True if loop is running, False otherwise
        """
        with self._lock:
            if self._loop is not None and self._loop.is_running():
                return True
            try:
                loop = asyncio.new_event_loop()
                self._loop = loop
                def run_loop() -> None:
                    asyncio.set_event_loop(loop)
                    loop.run_forever()
                self._thread = threading.Thread(target=run_loop, daemon=True)
                self._thread.start()
                return True
            except Exception as e:
                logger.warning(f"Failed to start background event loop: {e}")
                return False
    def _stop_background_loop(self) -> None:
        """Stop the background event loop and thread."""
        with self._lock:
            if self._loop is not None and self._loop.is_running():
                _ = self._loop.call_soon_threadsafe(self._loop.stop)
            if self._thread is not None and self._thread.is_alive():
                self._thread.join(timeout=2.0)
            self._loop = None
            self._thread = None
    def _ensure_connected(self) -> bool:
        """
        Ensure connection to NATS server.
        Returns
        -------
        bool
            True if connected, False otherwise
        """
        with self._lock:
            if self._connected and self._nc is not None:
                return True
        if not self._start_background_loop():
            return False
        try:
            import nats
            async def _connect() -> _NatsClient:
                nc = await nats.connect(self._nats_url)  # pyright: ignore[reportUnknownMemberType]
                return cast(_NatsClient, nc)
            # Run connection in background loop
            future = asyncio.run_coroutine_threadsafe(
                _connect(),
                self._loop,  # pyright: ignore[reportArgumentType]
            )
            self._nc = future.result(timeout=10.0)
            self._connected = True
            logger.info(f"Connected to NATS at {self._nats_url}")
            return True
        except ImportError:
            logger.warning(
                "nats-py package not installed. Install with: pip install nats-py"
            )
            return False
        except Exception as e:
            logger.warning(f"Failed to connect to NATS at {self._nats_url}: {e}")
            return False
    def publish(self, result: dict[str, object]) -> None:
        """
        Publish result to NATS subject.
        Parameters
        ----------
        result : dict[str, object]
            Result data with keys: frame, track_id, label, confidence, window, timestamp_ns
        """
        if not self._ensure_connected():
            # Graceful degradation: log warning but don't crash
            logger.debug(
                f"NATS unavailable, dropping result: {result.get('track_id', 'unknown')}"
            )
            return
        try:
            async def _publish() -> None:
                if self._nc is not None:
                    payload = json.dumps(
                        result, ensure_ascii=False, default=str
                    ).encode("utf-8")
                    _ = await self._nc.publish(self._subject, payload)
                    _ = await self._nc.flush()
            # Run publish in background loop
            future = asyncio.run_coroutine_threadsafe(
                _publish(),
                self._loop,  # pyright: ignore[reportArgumentType]
            )
            future.result(timeout=5.0)  # Wait for publish to complete
        except Exception as e:
            logger.warning(f"Failed to publish to NATS: {e}")
            self._connected = False  # Mark for reconnection on next publish
    def close(self) -> None:
        """Close NATS connection."""
        with self._lock:
            if self._nc is not None and self._connected and self._loop is not None:
                try:
                    async def _close() -> None:
                        if self._nc is not None:
                            _ = await self._nc.close()
                    future = asyncio.run_coroutine_threadsafe(
                        _close(),
                        self._loop,
                    )
                    future.result(timeout=5.0)
                except Exception as e:
                    logger.debug(f"Error closing NATS connection: {e}")
                finally:
                    self._nc = None
                    self._connected = False
        self._stop_background_loop()
    def __enter__(self) -> NatsPublisher:
        """Context manager entry."""
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        """Context manager exit."""
        self.close()
 def create_publisher(
    nats_url: str | None,
    subject: str = "scoliosis.result",
 ) -> ResultPublisher:
    """
    Factory function to create appropriate publisher.
    Parameters
    ----------
    nats_url : str | None
        NATS server URL. If None or empty, returns ConsolePublisher.
    subject : str
        NATS subject to publish to (default: "scoliosis.result")
    Returns
    -------
    ResultPublisher
        NatsPublisher if nats_url provided, otherwise ConsolePublisher
    Examples
    --------
    >>> # Console output (default)
    >>> pub = create_publisher(None)
    >>> pub.publish({"frame": 1, "track_id": 42, "label": "normal", "confidence": 0.95, "window": 30, "timestamp_ns": 1234567890})
    >>>
    >>> # NATS output
    >>> pub = create_publisher("nats://localhost:4222")
    >>> pub.publish({"frame": 1, "track_id": 42, "label": "normal", "confidence": 0.95, "window": 30, "timestamp_ns": 1234567890})
    >>>
    >>> # Context manager usage
    >>> with create_publisher("nats://localhost:4222") as pub:
    ...     pub.publish(result)
    """
    if nats_url:
        return NatsPublisher(nats_url, subject)
    return ConsolePublisher()
 def create_result(
    frame: int,
    track_id: int,
    label: str,
    confidence: float,
    window: int | tuple[int, int],
    timestamp_ns: int | None = None,
 ) -> dict[str, object]:
    """
    Create a standardized result dictionary.
    Parameters
    ----------
    frame : int
        Frame number
    track_id : int
        Track/person identifier
    label : str
        Classification label (e.g., "normal", "scoliosis")
    confidence : float
        Confidence score (0.0 to 1.0)
    window : int | tuple[int, int]
        Frame window as int (end frame) or tuple [start, end] that produced this result
        Frame window [start, end] that produced this result
    timestamp_ns : int | None
        Timestamp in nanoseconds. If None, uses current time.
    Returns
    -------
    dict[str, object]
        Standardized result dictionary
    """
    return {
        "frame": frame,
        "track_id": track_id,
        "label": label,
        "confidence": confidence,
        "window": window if isinstance(window, int) else window[1],
        "timestamp_ns": timestamp_ns
        if timestamp_ns is not None
        else time.monotonic_ns(),
    }
@@ -0,0 +1,325 @@
 from __future__ import annotations
 from collections.abc import Callable
 from contextlib import suppress
 import logging
 from pathlib import Path
 import time
 from typing import Protocol, cast
 from beartype import beartype
 import click
 import jaxtyping
 from jaxtyping import Float, UInt8
 import numpy as np
 from numpy import ndarray
 from numpy.typing import NDArray
 from ultralytics.models.yolo.model import YOLO
 from .input import FrameStream, create_source
 from .output import ResultPublisher, create_publisher, create_result
 from .preprocess import frame_to_person_mask, mask_to_silhouette
 from .sconet_demo import ScoNetDemo
 from .window import SilhouetteWindow, select_person
 logger = logging.getLogger(__name__)
 JaxtypedDecorator = Callable[[Callable[..., object]], Callable[..., object]]
 JaxtypedFactory = Callable[..., JaxtypedDecorator]
 jaxtyped = cast(JaxtypedFactory, jaxtyping.jaxtyped)
 class _BoxesLike(Protocol):
    @property
    def xyxy(self) -> NDArray[np.float32] | object: ...
    @property
    def id(self) -> NDArray[np.int64] | object | None: ...
 class _MasksLike(Protocol):
    @property
    def data(self) -> NDArray[np.float32] | object: ...
 class _DetectionResultsLike(Protocol):
    @property
    def boxes(self) -> _BoxesLike: ...
    @property
    def masks(self) -> _MasksLike: ...
 class _TrackCallable(Protocol):
    def __call__(
        self,
        source: object,
        *,
        persist: bool = True,
        verbose: bool = False,
        device: str | None = None,
        classes: list[int] | None = None,
    ) -> object: ...
 class ScoliosisPipeline:
    _detector: object
    _source: FrameStream
    _window: SilhouetteWindow
    _publisher: ResultPublisher
    _classifier: ScoNetDemo
    _device: str
    _closed: bool
    def __init__(
        self,
        *,
        source: str,
        checkpoint: str,
        config: str,
        device: str,
        yolo_model: str,
        window: int,
        stride: int,
        nats_url: str | None,
        nats_subject: str,
        max_frames: int | None,
    ) -> None:
        self._detector = YOLO(yolo_model)
        self._source = create_source(source, max_frames=max_frames)
        self._window = SilhouetteWindow(window_size=window, stride=stride)
        self._publisher = create_publisher(nats_url=nats_url, subject=nats_subject)
        self._classifier = ScoNetDemo(
            cfg_path=config,
            checkpoint_path=checkpoint,
            device=device,
        )
        self._device = device
        self._closed = False
    @staticmethod
    def _extract_int(meta: dict[str, object], key: str, fallback: int) -> int:
        value = meta.get(key)
        if isinstance(value, int):
            return value
        return fallback
    @staticmethod
    def _extract_timestamp(meta: dict[str, object]) -> int:
        value = meta.get("timestamp_ns")
        if isinstance(value, int):
            return value
        return time.monotonic_ns()
    @staticmethod
    def _to_mask_u8(mask: ndarray) -> UInt8[ndarray, "h w"]:
        binary = np.where(np.asarray(mask) > 0.5, np.uint8(255), np.uint8(0)).astype(
            np.uint8
        )
        return cast(UInt8[ndarray, "h w"], binary)
    def _first_result(self, detections: object) -> _DetectionResultsLike | None:
        if isinstance(detections, list):
            return cast(_DetectionResultsLike, detections[0]) if detections else None
        if isinstance(detections, tuple):
            return cast(_DetectionResultsLike, detections[0]) if detections else None
        return cast(_DetectionResultsLike, detections)
    def _select_silhouette(
        self,
        result: _DetectionResultsLike,
    ) -> tuple[Float[ndarray, "64 44"], int] | None:
        selected = select_person(result)
        if selected is not None:
            mask_raw, bbox, track_id = selected
            silhouette = cast(
                Float[ndarray, "64 44"] | None,
                mask_to_silhouette(self._to_mask_u8(mask_raw), bbox),
            )
            if silhouette is not None:
                return silhouette, int(track_id)
        fallback = cast(
            tuple[UInt8[ndarray, "h w"], tuple[int, int, int, int]] | None,
            frame_to_person_mask(result),
        )
        if fallback is None:
            return None
        mask_u8, bbox = fallback
        silhouette = cast(
            Float[ndarray, "64 44"] | None,
            mask_to_silhouette(mask_u8, bbox),
        )
        if silhouette is None:
            return None
        return silhouette, 0
    @jaxtyped(typechecker=beartype)
    def process_frame(
        self,
        frame: UInt8[ndarray, "h w c"],
        metadata: dict[str, object],
    ) -> dict[str, object] | None:
        frame_idx = self._extract_int(metadata, "frame_count", fallback=0)
        timestamp_ns = self._extract_timestamp(metadata)
        track_fn_obj = getattr(self._detector, "track", None)
        if not callable(track_fn_obj):
            raise RuntimeError("YOLO detector does not expose a callable track()")
        track_fn = cast(_TrackCallable, track_fn_obj)
        detections = track_fn(
            frame,
            persist=True,
            verbose=False,
            device=self._device,
            classes=[0],
        )
        first = self._first_result(detections)
        if first is None:
            return None
        selected = self._select_silhouette(first)
        if selected is None:
            return None
        silhouette, track_id = selected
        self._window.push(silhouette, frame_idx=frame_idx, track_id=track_id)
        if not self._window.should_classify():
            return None
        window_tensor = self._window.get_tensor(device=self._device)
        label, confidence = cast(
            tuple[str, float],
            self._classifier.predict(window_tensor),
        )
        self._window.mark_classified()
        window_start = frame_idx - self._window.window_size + 1
        result = create_result(
            frame=frame_idx,
            track_id=track_id,
            label=label,
            confidence=float(confidence),
            window=(max(0, window_start), frame_idx),
            timestamp_ns=timestamp_ns,
        )
        self._publisher.publish(result)
        return result
    def run(self) -> int:
        frame_count = 0
        start_time = time.perf_counter()
        try:
            for item in self._source:
                frame, metadata = item
                frame_u8 = np.asarray(frame, dtype=np.uint8)
                frame_idx = self._extract_int(metadata, "frame_count", fallback=0)
                frame_count += 1
                try:
                    _ = self.process_frame(frame_u8, metadata)
                except Exception as frame_error:
                    logger.warning(
                        "Skipping frame %d due to processing error: %s",
                        frame_idx,
                        frame_error,
                    )
                if frame_count % 100 == 0:
                    elapsed = time.perf_counter() - start_time
                    fps = frame_count / elapsed if elapsed > 0 else 0.0
                    logger.info("Processed %d frames (%.2f FPS)", frame_count, fps)
            return 0
        except KeyboardInterrupt:
            logger.info("Interrupted by user, shutting down cleanly.")
            return 130
        finally:
            self.close()
    def close(self) -> None:
        if self._closed:
            return
        close_fn = getattr(self._publisher, "close", None)
        if callable(close_fn):
            with suppress(Exception):
                _ = close_fn()
        self._closed = True
 def validate_runtime_inputs(source: str, checkpoint: str, config: str) -> None:
    if source.startswith("cvmmap://") or source.isdigit():
        pass
    else:
        source_path = Path(source)
        if not source_path.is_file():
            raise ValueError(f"Video source not found: {source}")
    checkpoint_path = Path(checkpoint)
    if not checkpoint_path.is_file():
        raise ValueError(f"Checkpoint not found: {checkpoint}")
    config_path = Path(config)
    if not config_path.is_file():
        raise ValueError(f"Config not found: {config}")
@click.command(context_settings={"help_option_names": ["-h", "--help"]})
@click.option("--source", type=str, required=True)
@click.option("--checkpoint", type=str, required=True)
@click.option(
    "--config",
    type=str,
    default="configs/sconet/sconet_scoliosis1k.yaml",
    show_default=True,
 )
@click.option("--device", type=str, default="cuda:0", show_default=True)
@click.option("--yolo-model", type=str, default="yolo11n-seg.pt", show_default=True)
@click.option("--window", type=click.IntRange(min=1), default=30, show_default=True)
@click.option("--stride", type=click.IntRange(min=1), default=30, show_default=True)
@click.option("--nats-url", type=str, default=None)
@click.option(
    "--nats-subject",
    type=str,
    default="scoliosis.result",
    show_default=True,
 )
@click.option("--max-frames", type=click.IntRange(min=1), default=None)
 def main(
    source: str,
    checkpoint: str,
    config: str,
    device: str,
    yolo_model: str,
    window: int,
    stride: int,
    nats_url: str | None,
    nats_subject: str,
    max_frames: int | None,
 ) -> None:
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    )
    try:
        validate_runtime_inputs(source=source, checkpoint=checkpoint, config=config)
        pipeline = ScoliosisPipeline(
            source=source,
            checkpoint=checkpoint,
            config=config,
            device=device,
            yolo_model=yolo_model,
            window=window,
            stride=stride,
            nats_url=nats_url,
            nats_subject=nats_subject,
            max_frames=max_frames,
        )
        raise SystemExit(pipeline.run())
    except ValueError as err:
        click.echo(f"Error: {err}", err=True)
        raise SystemExit(2) from err
    except RuntimeError as err:
        click.echo(f"Runtime error: {err}", err=True)
        raise SystemExit(1) from err
@@ -0,0 +1,270 @@
 from collections.abc import Callable
 import math
 from typing import cast
 import cv2
 from beartype import beartype
 import jaxtyping
 from jaxtyping import Float, UInt8
 import numpy as np
 from numpy import ndarray
 from numpy.typing import NDArray
 SIL_HEIGHT = 64
 SIL_WIDTH = 44
 SIL_FULL_WIDTH = 64
 SIDE_CUT = 10
 MIN_MASK_AREA = 500
 JaxtypedDecorator = Callable[[Callable[..., object]], Callable[..., object]]
 JaxtypedFactory = Callable[..., JaxtypedDecorator]
 jaxtyped = cast(JaxtypedFactory, jaxtyping.jaxtyped)
 UInt8Array = NDArray[np.uint8]
 Float32Array = NDArray[np.float32]
 def _read_attr(container: object, key: str) -> object | None:
    if isinstance(container, dict):
        dict_obj = cast(dict[object, object], container)
        return dict_obj.get(key)
    try:
        return cast(object, object.__getattribute__(container, key))
    except AttributeError:
        return None
 def _to_numpy_array(value: object) -> NDArray[np.generic]:
    current: object = value
    if isinstance(current, np.ndarray):
        return current
    detach_obj = _read_attr(current, "detach")
    if callable(detach_obj):
        detach_fn = cast(Callable[[], object], detach_obj)
        current = detach_fn()
    cpu_obj = _read_attr(current, "cpu")
    if callable(cpu_obj):
        cpu_fn = cast(Callable[[], object], cpu_obj)
        current = cpu_fn()
    numpy_obj = _read_attr(current, "numpy")
    if callable(numpy_obj):
        numpy_fn = cast(Callable[[], object], numpy_obj)
        as_numpy = numpy_fn()
        if isinstance(as_numpy, np.ndarray):
            return as_numpy
    return cast(NDArray[np.generic], np.asarray(current))
 def _bbox_from_mask(mask: UInt8[ndarray, "h w"]) -> tuple[int, int, int, int] | None:
    mask_u8 = np.asarray(mask, dtype=np.uint8)
    coords = np.argwhere(mask_u8 > 0)
    if int(coords.size) == 0:
        return None
    ys = coords[:, 0].astype(np.int64)
    xs = coords[:, 1].astype(np.int64)
    x1 = int(np.min(xs))
    x2 = int(np.max(xs)) + 1
    y1 = int(np.min(ys))
    y2 = int(np.max(ys)) + 1
    if x2 <= x1 or y2 <= y1:
        return None
    return (x1, y1, x2, y2)
 def _sanitize_bbox(
    bbox: tuple[int, int, int, int], height: int, width: int
 ) -> tuple[int, int, int, int] | None:
    x1, y1, x2, y2 = bbox
    x1c = max(0, min(int(x1), width - 1))
    y1c = max(0, min(int(y1), height - 1))
    x2c = max(0, min(int(x2), width))
    y2c = max(0, min(int(y2), height))
    if x2c <= x1c or y2c <= y1c:
        return None
    return (x1c, y1c, x2c, y2c)
@jaxtyped(typechecker=beartype)
 def frame_to_person_mask(
    result: object, min_area: int = MIN_MASK_AREA
 ) -> tuple[UInt8[ndarray, "h w"], tuple[int, int, int, int]] | None:
    masks_obj = _read_attr(result, "masks")
    if masks_obj is None:
        return None
    masks_data_obj = _read_attr(masks_obj, "data")
    if masks_data_obj is None:
        return None
    masks_raw = _to_numpy_array(masks_data_obj)
    masks_float = np.asarray(masks_raw, dtype=np.float32)
    if masks_float.ndim == 2:
        masks_float = masks_float[np.newaxis, ...]
    if masks_float.ndim != 3:
        return None
    mask_count = int(cast(tuple[int, int, int], masks_float.shape)[0])
    if mask_count <= 0:
        return None
    box_values: list[tuple[float, float, float, float]] | None = None
    boxes_obj = _read_attr(result, "boxes")
    if boxes_obj is not None:
        xyxy_obj = _read_attr(boxes_obj, "xyxy")
        if xyxy_obj is not None:
            xyxy_raw = np.asarray(_to_numpy_array(xyxy_obj), dtype=np.float32)
            if xyxy_raw.ndim == 1 and int(xyxy_raw.size) >= 4:
                xyxy_2d = np.asarray(xyxy_raw[:4].reshape(1, 4), dtype=np.float64)
                x1f = cast(np.float64, xyxy_2d[0, 0])
                y1f = cast(np.float64, xyxy_2d[0, 1])
                x2f = cast(np.float64, xyxy_2d[0, 2])
                y2f = cast(np.float64, xyxy_2d[0, 3])
                box_values = [
                    (
                        float(x1f),
                        float(y1f),
                        float(x2f),
                        float(y2f),
                    )
                ]
            elif xyxy_raw.ndim == 2:
                shape_2d = cast(tuple[int, int], xyxy_raw.shape)
                if int(shape_2d[1]) >= 4:
                    xyxy_2d = np.asarray(xyxy_raw[:, :4], dtype=np.float64)
                    box_values = []
                    for row_idx in range(int(cast(tuple[int, int], xyxy_2d.shape)[0])):
                        x1f = cast(np.float64, xyxy_2d[row_idx, 0])
                        y1f = cast(np.float64, xyxy_2d[row_idx, 1])
                        x2f = cast(np.float64, xyxy_2d[row_idx, 2])
                        y2f = cast(np.float64, xyxy_2d[row_idx, 3])
                        box_values.append(
                            (
                                float(x1f),
                                float(y1f),
                                float(x2f),
                                float(y2f),
                            )
                        )
    best_area = -1
    best_mask: UInt8[ndarray, "h w"] | None = None
    best_bbox: tuple[int, int, int, int] | None = None
    for idx in range(mask_count):
        mask_float = np.asarray(masks_float[idx], dtype=np.float32)
        if mask_float.ndim != 2:
            continue
        mask_binary = np.where(mask_float > 0.5, np.uint8(255), np.uint8(0)).astype(
            np.uint8
        )
        mask_u8 = cast(UInt8[ndarray, "h w"], mask_binary)
        area = int(np.count_nonzero(mask_u8))
        if area < min_area:
            continue
        bbox: tuple[int, int, int, int] | None = None
        shape_2d = cast(tuple[int, int], mask_binary.shape)
        h = int(shape_2d[0])
        w = int(shape_2d[1])
        if box_values is not None:
            box_count = len(box_values)
            if idx >= box_count:
                continue
            row0, row1, row2, row3 = box_values[idx]
            bbox_candidate = (
                int(math.floor(row0)),
                int(math.floor(row1)),
                int(math.ceil(row2)),
                int(math.ceil(row3)),
            )
            bbox = _sanitize_bbox(bbox_candidate, h, w)
        if bbox is None:
            bbox = _bbox_from_mask(mask_u8)
        if bbox is None:
            continue
        if area > best_area:
            best_area = area
            best_mask = mask_u8
            best_bbox = bbox
    if best_mask is None or best_bbox is None:
        return None
    return best_mask, best_bbox
@jaxtyped(typechecker=beartype)
 def mask_to_silhouette(
    mask: UInt8[ndarray, "h w"],
    bbox: tuple[int, int, int, int],
 ) -> Float[ndarray, "64 44"] | None:
    mask_u8 = np.where(mask > 0, np.uint8(255), np.uint8(0)).astype(np.uint8)
    if int(np.count_nonzero(mask_u8)) < MIN_MASK_AREA:
        return None
    mask_shape = cast(tuple[int, int], mask_u8.shape)
    h = int(mask_shape[0])
    w = int(mask_shape[1])
    bbox_sanitized = _sanitize_bbox(bbox, h, w)
    if bbox_sanitized is None:
        return None
    x1, y1, x2, y2 = bbox_sanitized
    cropped = mask_u8[y1:y2, x1:x2]
    if cropped.size == 0:
        return None
    cropped_u8 = np.asarray(cropped, dtype=np.uint8)
    row_sums = np.sum(cropped_u8, axis=1, dtype=np.int64)
    row_nonzero = np.nonzero(row_sums > 0)[0].astype(np.int64)
    if int(row_nonzero.size) == 0:
        return None
    top = int(cast(np.int64, row_nonzero[0]))
    bottom = int(cast(np.int64, row_nonzero[-1])) + 1
    tightened = cropped[top:bottom, :]
    if tightened.size == 0:
        return None
    tight_shape = cast(tuple[int, int], tightened.shape)
    tight_h = int(tight_shape[0])
    tight_w = int(tight_shape[1])
    if tight_h <= 0 or tight_w <= 0:
        return None
    resized_w = max(1, int(SIL_HEIGHT * (tight_w / tight_h)))
    resized = np.asarray(
        cv2.resize(tightened, (resized_w, SIL_HEIGHT), interpolation=cv2.INTER_CUBIC),
        dtype=np.uint8,
    )
    if resized_w >= SIL_FULL_WIDTH:
        start = (resized_w - SIL_FULL_WIDTH) // 2
        normalized_64 = resized[:, start : start + SIL_FULL_WIDTH]
    else:
        pad_left = (SIL_FULL_WIDTH - resized_w) // 2
        pad_right = SIL_FULL_WIDTH - resized_w - pad_left
        normalized_64 = np.pad(
            resized,
            ((0, 0), (pad_left, pad_right)),
            mode="constant",
            constant_values=0,
        )
    silhouette = np.asarray(
        normalized_64[:, SIDE_CUT : SIL_FULL_WIDTH - SIDE_CUT], dtype=np.float32
    )
    if silhouette.shape != (SIL_HEIGHT, SIL_WIDTH):
        return None
    silhouette_norm = np.clip(silhouette / np.float32(255.0), 0.0, 1.0).astype(
        np.float32
    )
    return cast(Float[ndarray, "64 44"], silhouette_norm)
@@ -0,0 +1,317 @@
 from __future__ import annotations
 from collections.abc import Callable
 from pathlib import Path
 import sys
 from typing import ClassVar, Protocol, cast, override
 import torch
 import torch.nn as nn
 from beartype import beartype
 from einops import rearrange
 from jaxtyping import Float
 import jaxtyping
 from torch import Tensor
 _OPENGAIT_PACKAGE_ROOT = Path(__file__).resolve().parents[1]
 if str(_OPENGAIT_PACKAGE_ROOT) not in sys.path:
    sys.path.insert(0, str(_OPENGAIT_PACKAGE_ROOT))
 from opengait.modeling.backbones.resnet import ResNet9
 from opengait.modeling.modules import (
    HorizontalPoolingPyramid,
    PackSequenceWrapper as TemporalPool,
    SeparateBNNecks,
    SeparateFCs,
 )
 from opengait.utils import common as common_utils
 JaxtypedDecorator = Callable[[Callable[..., object]], Callable[..., object]]
 JaxtypedFactory = Callable[..., JaxtypedDecorator]
 jaxtyped = cast(JaxtypedFactory, jaxtyping.jaxtyped)
 ConfigLoader = Callable[[str], dict[str, object]]
 config_loader = cast(ConfigLoader, common_utils.config_loader)
 class TemporalPoolLike(Protocol):
    def __call__(
        self,
        seqs: Tensor,
        seqL: object,
        dim: int = 2,
        options: dict[str, int] | None = None,
    ) -> object: ...
 class HppLike(Protocol):
    def __call__(self, x: Tensor) -> Tensor: ...
 class FCsLike(Protocol):
    def __call__(self, x: Tensor) -> Tensor: ...
 class BNNecksLike(Protocol):
    def __call__(self, x: Tensor) -> tuple[Tensor, Tensor]: ...
 class ScoNetDemo(nn.Module):
    LABEL_MAP: ClassVar[dict[int, str]] = {0: "negative", 1: "neutral", 2: "positive"}
    cfg_path: str
    cfg: dict[str, object]
    backbone: ResNet9
    temporal_pool: TemporalPoolLike
    hpp: HppLike
    fcs: FCsLike
    bn_necks: BNNecksLike
    device: torch.device
    @jaxtyped(typechecker=beartype)
    def __init__(
        self,
        cfg_path: str | Path = "configs/sconet/sconet_scoliosis1k.yaml",
        checkpoint_path: str | Path | None = None,
        device: str | torch.device | None = None,
    ) -> None:
        super().__init__()
        resolved_cfg = self._resolve_path(cfg_path)
        self.cfg_path = str(resolved_cfg)
        self.cfg = config_loader(self.cfg_path)
        model_cfg = self._extract_model_cfg(self.cfg)
        backbone_cfg = self._extract_dict(model_cfg, "backbone_cfg")
        if backbone_cfg.get("type") != "ResNet9":
            raise ValueError(
                "ScoNetDemo currently supports backbone type ResNet9 only."
            )
        self.backbone = ResNet9(
            block=self._extract_str(backbone_cfg, "block"),
            channels=self._extract_int_list(backbone_cfg, "channels"),
            in_channel=self._extract_int(backbone_cfg, "in_channel", default=1),
            layers=self._extract_int_list(backbone_cfg, "layers"),
            strides=self._extract_int_list(backbone_cfg, "strides"),
            maxpool=self._extract_bool(backbone_cfg, "maxpool", default=True),
        )
        fcs_cfg = self._extract_dict(model_cfg, "SeparateFCs")
        bn_cfg = self._extract_dict(model_cfg, "SeparateBNNecks")
        bin_num = self._extract_int_list(model_cfg, "bin_num")
        self.temporal_pool = cast(TemporalPoolLike, TemporalPool(torch.max))
        self.hpp = cast(HppLike, HorizontalPoolingPyramid(bin_num=bin_num))
        self.fcs = cast(
            FCsLike,
            SeparateFCs(
                parts_num=self._extract_int(fcs_cfg, "parts_num"),
                in_channels=self._extract_int(fcs_cfg, "in_channels"),
                out_channels=self._extract_int(fcs_cfg, "out_channels"),
                norm=self._extract_bool(fcs_cfg, "norm", default=False),
            ),
        )
        self.bn_necks = cast(
            BNNecksLike,
            SeparateBNNecks(
                parts_num=self._extract_int(bn_cfg, "parts_num"),
                in_channels=self._extract_int(bn_cfg, "in_channels"),
                class_num=self._extract_int(bn_cfg, "class_num"),
                norm=self._extract_bool(bn_cfg, "norm", default=True),
                parallel_BN1d=self._extract_bool(bn_cfg, "parallel_BN1d", default=True),
            ),
        )
        self.device = (
            torch.device(device) if device is not None else torch.device("cpu")
        )
        _ = self.to(self.device)
        if checkpoint_path is not None:
            _ = self.load_checkpoint(checkpoint_path)
        _ = self.eval()
    @staticmethod
    def _resolve_path(path: str | Path) -> Path:
        candidate = Path(path)
        if candidate.is_file():
            return candidate
        if candidate.is_absolute():
            return candidate
        repo_root = Path(__file__).resolve().parents[2]
        return repo_root / candidate
    @staticmethod
    def _extract_model_cfg(cfg: dict[str, object]) -> dict[str, object]:
        model_cfg_obj = cfg.get("model_cfg")
        if not isinstance(model_cfg_obj, dict):
            raise TypeError("model_cfg must be a dictionary.")
        return cast(dict[str, object], model_cfg_obj)
    @staticmethod
    def _extract_dict(container: dict[str, object], key: str) -> dict[str, object]:
        value = container.get(key)
        if not isinstance(value, dict):
            raise TypeError(f"{key} must be a dictionary.")
        return cast(dict[str, object], value)
    @staticmethod
    def _extract_str(container: dict[str, object], key: str) -> str:
        value = container.get(key)
        if not isinstance(value, str):
            raise TypeError(f"{key} must be a string.")
        return value
    @staticmethod
    def _extract_int(
        container: dict[str, object], key: str, default: int | None = None
    ) -> int:
        value = container.get(key, default)
        if not isinstance(value, int):
            raise TypeError(f"{key} must be an int.")
        return value
    @staticmethod
    def _extract_bool(
        container: dict[str, object], key: str, default: bool | None = None
    ) -> bool:
        value = container.get(key, default)
        if not isinstance(value, bool):
            raise TypeError(f"{key} must be a bool.")
        return value
    @staticmethod
    def _extract_int_list(container: dict[str, object], key: str) -> list[int]:
        value = container.get(key)
        if not isinstance(value, list):
            raise TypeError(f"{key} must be a list[int].")
        values = cast(list[object], value)
        if not all(isinstance(v, int) for v in values):
            raise TypeError(f"{key} must be a list[int].")
        return cast(list[int], values)
    @staticmethod
    def _normalize_state_dict(
        state_dict_obj: dict[object, object],
    ) -> dict[str, Tensor]:
        prefix_remap: tuple[tuple[str, str], ...] = (
            ("Backbone.forward_block.", "backbone."),
            ("FCs.", "fcs."),
            ("BNNecks.", "bn_necks."),
        )
        cleaned_state_dict: dict[str, Tensor] = {}
        for key_obj, value_obj in state_dict_obj.items():
            if not isinstance(key_obj, str):
                raise TypeError("Checkpoint state_dict keys must be strings.")
            if not isinstance(value_obj, Tensor):
                raise TypeError("Checkpoint state_dict values must be torch.Tensor.")
            key = key_obj[7:] if key_obj.startswith("module.") else key_obj
            for source_prefix, target_prefix in prefix_remap:
                if key.startswith(source_prefix):
                    key = f"{target_prefix}{key[len(source_prefix) :]}"
                    break
            if key in cleaned_state_dict:
                raise RuntimeError(
                    f"Checkpoint key normalization collision detected for key '{key}'."
                )
            cleaned_state_dict[key] = value_obj
        return cleaned_state_dict
    @jaxtyped(typechecker=beartype)
    def load_checkpoint(
        self,
        checkpoint_path: str | Path,
        map_location: str | torch.device | None = None,
        strict: bool = True,
    ) -> None:
        resolved_ckpt = self._resolve_path(checkpoint_path)
        checkpoint_obj = cast(
            object,
            torch.load(
                str(resolved_ckpt),
                map_location=map_location if map_location is not None else self.device,
            ),
        )
        state_dict_obj: object = checkpoint_obj
        if isinstance(checkpoint_obj, dict) and "model" in checkpoint_obj:
            state_dict_obj = cast(dict[str, object], checkpoint_obj)["model"]
        if not isinstance(state_dict_obj, dict):
            raise TypeError("Unsupported checkpoint format.")
        cleaned_state_dict = self._normalize_state_dict(
            cast(dict[object, object], state_dict_obj)
        )
        try:
            _ = self.load_state_dict(cleaned_state_dict, strict=strict)
        except RuntimeError as exc:
            raise RuntimeError(
                f"Failed to load ScoNetDemo checkpoint after key normalization from '{resolved_ckpt}'."
            ) from exc
        _ = self.eval()
    def _prepare_sils(self, sils: Tensor) -> Tensor:
        if sils.ndim == 4:
            sils = sils.unsqueeze(1)
        elif sils.ndim == 5 and sils.shape[1] != 1 and sils.shape[2] == 1:
            sils = rearrange(sils, "b s c h w -> b c s h w")
        if sils.ndim != 5 or sils.shape[1] != 1:
            raise ValueError("Expected sils shape [B, 1, S, H, W] or [B, S, H, W].")
        return sils.float().to(self.device)
    def _forward_backbone(self, sils: Tensor) -> Tensor:
        batch, channels, seq, height, width = sils.shape
        framewise = sils.transpose(1, 2).reshape(batch * seq, channels, height, width)
        frame_feats = cast(Tensor, self.backbone(framewise))
        _, out_channels, out_h, out_w = frame_feats.shape
        return (
            frame_feats.reshape(batch, seq, out_channels, out_h, out_w)
            .transpose(1, 2)
            .contiguous()
        )
    @override
    @jaxtyped(typechecker=beartype)
    def forward(self, sils: Float[Tensor, "batch 1 seq 64 44"]) -> dict[str, Tensor]:
        with torch.inference_mode():
            prepared_sils = self._prepare_sils(sils)
            outs = self._forward_backbone(prepared_sils)
            pooled_obj = self.temporal_pool(outs, None, options={"dim": 2})
            if (
                not isinstance(pooled_obj, tuple)
                or not pooled_obj
                or not isinstance(pooled_obj[0], Tensor)
            ):
                raise TypeError("TemporalPool output is invalid.")
            pooled = pooled_obj[0]
            feat = self.hpp(pooled)
            embed_1 = self.fcs(feat)
            _, logits = self.bn_necks(embed_1)
            mean_logits = logits.mean(dim=-1)
            pred_ids = torch.argmax(mean_logits, dim=-1)
            probs = torch.softmax(mean_logits, dim=-1)
            confidence = torch.gather(
                probs, dim=-1, index=pred_ids.unsqueeze(-1)
            ).squeeze(-1)
            return {"logits": logits, "label": pred_ids, "confidence": confidence}
    @jaxtyped(typechecker=beartype)
    def predict(self, sils: Float[Tensor, "batch 1 seq 64 44"]) -> tuple[str, float]:
        outputs = cast(dict[str, Tensor], self.forward(sils))
        labels = outputs["label"]
        confidence = outputs["confidence"]
        if labels.numel() != 1:
            raise ValueError("predict expects batch size 1.")
        label_id = int(labels.item())
        return self.LABEL_MAP[label_id], float(confidence.item())
@@ -0,0 +1,295 @@
 """Sliding window / ring buffer manager for real-time gait analysis.
 This module provides bounded buffer management for silhouette sequences
 with track ID tracking and gap detection.
 """
 from collections import deque
 from typing import TYPE_CHECKING, Protocol, final
 import numpy as np
 import torch
 from jaxtyping import Float
 from numpy import ndarray
 if TYPE_CHECKING:
    from numpy.typing import NDArray
 # Silhouette dimensions from preprocess.py
 SIL_HEIGHT: int = 64
 SIL_WIDTH: int = 44
 class _Boxes(Protocol):
    """Protocol for boxes with xyxy and id attributes."""
    @property
    def xyxy(self) -> "NDArray[np.float32] | object": ...
    @property
    def id(self) -> "NDArray[np.int64] | object | None": ...
 class _Masks(Protocol):
    """Protocol for masks with data attribute."""
    @property
    def data(self) -> "NDArray[np.float32] | object": ...
 class _DetectionResults(Protocol):
    """Protocol for detection results from Ultralytics-style objects."""
    @property
    def boxes(self) -> _Boxes: ...
    @property
    def masks(self) -> _Masks: ...
@final
 class SilhouetteWindow:
    """Bounded sliding window for silhouette sequences.
    Manages a fixed-size buffer of silhouettes with track ID tracking
    and automatic reset on track changes or frame gaps.
    Attributes:
        window_size: Maximum number of frames in the buffer.
        stride: Classification stride (frames between classifications).
        gap_threshold: Maximum allowed frame gap before reset.
    """
    window_size: int
    stride: int
    gap_threshold: int
    _buffer: deque[Float[ndarray, "64 44"]]
    _frame_indices: deque[int]
    _track_id: int | None
    _last_classified_frame: int
    _frame_count: int
    def __init__(
        self,
        window_size: int = 30,
        stride: int = 1,
        gap_threshold: int = 15,
    ) -> None:
        """Initialize the silhouette window.
        Args:
            window_size: Maximum buffer size (default 30).
            stride: Frames between classifications (default 1).
            gap_threshold: Max frame gap before reset (default 15).
        """
        self.window_size = window_size
        self.stride = stride
        self.gap_threshold = gap_threshold
        # Bounded storage via deque
        self._buffer = deque(maxlen=window_size)
        self._frame_indices = deque(maxlen=window_size)
        self._track_id = None
        self._last_classified_frame = -1
        self._frame_count = 0
    def push(self, sil: np.ndarray, frame_idx: int, track_id: int) -> None:
        """Push a new silhouette into the window.
        Automatically resets buffer on track ID change or frame gap
        exceeding gap_threshold.
        Args:
            sil: Silhouette array of shape (64, 44), float32.
            frame_idx: Current frame index for gap detection.
            track_id: Track ID for the person.
        """
        # Check for track ID change
        if self._track_id is not None and track_id != self._track_id:
            self.reset()
        # Check for frame gap
        if self._frame_indices:
            last_frame = self._frame_indices[-1]
            gap = frame_idx - last_frame
            if gap > self.gap_threshold:
                self.reset()
        # Update track ID
        self._track_id = track_id
        # Validate and append silhouette
        sil_array = np.asarray(sil, dtype=np.float32)
        if sil_array.shape != (SIL_HEIGHT, SIL_WIDTH):
            raise ValueError(
                f"Expected silhouette shape ({SIL_HEIGHT}, {SIL_WIDTH}), got {sil_array.shape}"
            )
        self._buffer.append(sil_array)
        self._frame_indices.append(frame_idx)
        self._frame_count += 1
    def is_ready(self) -> bool:
        """Check if window has enough frames for classification.
        Returns:
            True if buffer is full (window_size frames).
        """
        return len(self._buffer) >= self.window_size
    def should_classify(self) -> bool:
        """Check if classification should run based on stride.
        Returns:
            True if enough frames have passed since last classification.
        """
        if not self.is_ready():
            return False
        if self._last_classified_frame < 0:
            return True
        current_frame = self._frame_indices[-1]
        frames_since = current_frame - self._last_classified_frame
        return frames_since >= self.stride
    def get_tensor(self, device: str = "cpu") -> torch.Tensor:
        """Get window contents as a tensor for model input.
        Args:
            device: Target device for the tensor (default 'cpu').
        Returns:
            Tensor of shape [1, 1, window_size, 64, 44] with dtype float32.
        Raises:
            ValueError: If buffer is not full.
        """
        if not self.is_ready():
            raise ValueError(
                f"Window not ready: {len(self._buffer)}/{self.window_size} frames"
            )
        # Stack buffer into array [window_size, 64, 44]
        stacked = np.stack(list(self._buffer), axis=0)
        # Add batch and channel dims: [1, 1, window_size, 64, 44]
        tensor = torch.from_numpy(stacked.astype(np.float32))
        tensor = tensor.unsqueeze(0).unsqueeze(0)
        return tensor.to(device)
    def reset(self) -> None:
        """Reset the window, clearing all buffers and counters."""
        self._buffer.clear()
        self._frame_indices.clear()
        self._track_id = None
        self._last_classified_frame = -1
        self._frame_count = 0
    def mark_classified(self) -> None:
        """Mark current frame as classified, updating stride tracking."""
        if self._frame_indices:
            self._last_classified_frame = self._frame_indices[-1]
    @property
    def current_track_id(self) -> int | None:
        """Current track ID, or None if buffer is empty."""
        return self._track_id
    @property
    def frame_count(self) -> int:
        """Total frames pushed since last reset."""
        return self._frame_count
    @property
    def fill_level(self) -> float:
        """Fill ratio of the buffer (0.0 to 1.0)."""
        return len(self._buffer) / self.window_size
 def select_person(
    results: _DetectionResults,
 ) -> tuple[ndarray, tuple[int, int, int, int], int] | None:
    """Select the person with largest bounding box from detection results.
    Args:
        results: Detection results object with boxes and masks attributes.
            Expected to have:
            - boxes.xyxy: array of bounding boxes [N, 4]
            - masks.data: array of masks [N, H, W]
            - boxes.id: optional track IDs [N]
    Returns:
        Tuple of (mask, bbox, track_id) for the largest person,
        or None if no valid detections or track IDs unavailable.
    """
    # Check for track IDs
    boxes_obj: _Boxes | object = getattr(results, "boxes", None)
    if boxes_obj is None:
        return None
    track_ids_obj: ndarray | object | None = getattr(boxes_obj, "id", None)
    if track_ids_obj is None:
        return None
    track_ids: ndarray = np.asarray(track_ids_obj)
    if track_ids.size == 0:
        return None
    # Get bounding boxes
    xyxy_obj: ndarray | object = getattr(boxes_obj, "xyxy", None)
    if xyxy_obj is None:
        return None
    bboxes: ndarray = np.asarray(xyxy_obj)
    if bboxes.ndim == 1:
        bboxes = bboxes.reshape(1, -1)
    if bboxes.shape[0] == 0:
        return None
    # Get masks
    masks_obj: _Masks | object = getattr(results, "masks", None)
    if masks_obj is None:
        return None
    masks_data: ndarray | object = getattr(masks_obj, "data", None)
    if masks_data is None:
        return None
    masks: ndarray = np.asarray(masks_data)
    if masks.ndim == 2:
        masks = masks[np.newaxis, ...]
    if masks.shape[0] != bboxes.shape[0]:
        return None
    # Find largest bbox by area
    best_idx: int = -1
    best_area: float = -1.0
    for i in range(int(bboxes.shape[0])):
        row: "NDArray[np.float32]" = bboxes[i][:4]
        x1f: float = float(row[0])
        y1f: float = float(row[1])
        x2f: float = float(row[2])
        y2f: float = float(row[3])
        area: float = (x2f - x1f) * (y2f - y1f)
        if area > best_area:
            best_area = area
            best_idx = i
    if best_idx < 0:
        return None
    # Extract mask and bbox
    mask: "NDArray[np.float32]" = masks[best_idx]
    bbox = (
        int(float(bboxes[best_idx][0])),
        int(float(bboxes[best_idx][1])),
        int(float(bboxes[best_idx][2])),
        int(float(bboxes[best_idx][3])),
    )
    track_id = int(track_ids[best_idx]) if best_idx < len(track_ids) else best_idx
    return mask, bbox, track_id