chore: update demo runtime, tests, and agent docs

2026-03-02 12:33:17 +08:00
parent 1f8f959ad7
commit cbb3284c13
14 changed files with 1491 additions and 236 deletions
@@ -7,7 +7,7 @@ from pathlib import Path
 import subprocess
 import sys
 import time
-from typing import Final, cast
+from typing import Final, Literal, cast
 from unittest import mock

 import numpy as np
@@ -693,9 +693,11 @@ class MockVisualizer:
        self,
        frame: NDArray[np.uint8],
        bbox: tuple[int, int, int, int] | None,
+        bbox_mask: tuple[int, int, int, int] | None,
        track_id: int,
        mask_raw: NDArray[np.uint8] | None,
        silhouette: NDArray[np.float32] | None,
+        segmentation_input: NDArray[np.float32] | None,
        label: str | None,
        confidence: float | None,
        fps: float,
@@ -704,9 +706,11 @@ class MockVisualizer:
            {
                "frame": frame,
                "bbox": bbox,
+                "bbox_mask": bbox_mask,
                "track_id": track_id,
                "mask_raw": mask_raw,
                "silhouette": silhouette,
+                "segmentation_input": segmentation_input,
                "label": label,
                "confidence": confidence,
                "fps": fps,
@@ -761,9 +765,8 @@ def test_pipeline_visualizer_updates_on_no_detection() -> None:
            visualize=True,
        )

-        # Replace the visualizer with our mock
        mock_viz = MockVisualizer()
-        pipeline._visualizer = mock_viz  # type: ignore[assignment]
+        setattr(pipeline, "_visualizer", mock_viz)

        # Run pipeline
        _ = pipeline.run()
@@ -779,13 +782,14 @@ def test_pipeline_visualizer_updates_on_no_detection() -> None:
        for call in mock_viz.update_calls:
            assert call["track_id"] == 0  # Default track_id when no detection
            assert call["bbox"] is None  # No bbox when no detection
+            assert call["bbox_mask"] is None
            assert call["mask_raw"] is None  # No mask when no detection
            assert call["silhouette"] is None  # No silhouette when no detection
+            assert call["segmentation_input"] is None
            assert call["label"] is None  # No label when no detection
            assert call["confidence"] is None  # No confidence when no detection


-
 def test_pipeline_visualizer_uses_cached_detection_on_no_detection() -> None:
    """Test that visualizer reuses last valid detection when current frame has no detection.

@@ -818,8 +822,8 @@ def test_pipeline_visualizer_uses_cached_detection_on_no_detection() -> None:
        mock_detector.track.side_effect = [
            [mock_result],  # Frame 0: valid detection
            [mock_result],  # Frame 1: valid detection
-            [],             # Frame 2: no detection
-            [],             # Frame 3: no detection
+            [],  # Frame 2: no detection
+            [],  # Frame 3: no detection
        ]
        mock_yolo.return_value = mock_detector

@@ -835,7 +839,12 @@ def test_pipeline_visualizer_uses_cached_detection_on_no_detection() -> None:
        dummy_mask = np.random.randint(0, 256, (480, 640), dtype=np.uint8)
        dummy_bbox_mask = (100, 100, 200, 300)
        dummy_bbox_frame = (100, 100, 200, 300)
-        mock_select_person.return_value = (dummy_mask, dummy_bbox_mask, dummy_bbox_frame, 1)
+        mock_select_person.return_value = (
+            dummy_mask,
+            dummy_bbox_mask,
+            dummy_bbox_frame,
+            1,
+        )

        # Setup mock mask_to_silhouette to return valid silhouette
        dummy_silhouette = np.random.rand(64, 44).astype(np.float32)
@@ -856,9 +865,8 @@ def test_pipeline_visualizer_uses_cached_detection_on_no_detection() -> None:
            visualize=True,
        )

-        # Replace the visualizer with our mock
        mock_viz = MockVisualizer()
-        pipeline._visualizer = mock_viz  # type: ignore[assignment]
+        setattr(pipeline, "_visualizer", mock_viz)

        # Run pipeline
        _ = pipeline.run()
@@ -886,9 +894,57 @@ def test_pipeline_visualizer_uses_cached_detection_on_no_detection() -> None:
            "not None/blank"
        )

-        # The cached masks should be copies (different objects) to prevent mutation issues
+        segmentation_inputs = [
+            call["segmentation_input"] for call in mock_viz.update_calls
+        ]
+        bbox_mask_calls = [call["bbox_mask"] for call in mock_viz.update_calls]
+        assert segmentation_inputs[0] is not None
+        assert segmentation_inputs[1] is not None
+        assert segmentation_inputs[2] is not None
+        assert segmentation_inputs[3] is not None
+        assert bbox_mask_calls[0] == dummy_bbox_mask
+        assert bbox_mask_calls[1] == dummy_bbox_mask
+        assert bbox_mask_calls[2] == dummy_bbox_mask
+        assert bbox_mask_calls[3] == dummy_bbox_mask
+
        if mask_raw_calls[1] is not None and mask_raw_calls[2] is not None:
            assert mask_raw_calls[1] is not mask_raw_calls[2], (
                "Cached mask should be a copy, not the same object reference"
            )

+
+def test_frame_pacer_emission_count_24_to_15() -> None:
+    from opengait.demo.pipeline import _FramePacer
+
+    pacer = _FramePacer(15.0)
+    interval_ns = int(1_000_000_000 / 24)
+    emitted = sum(pacer.should_emit(i * interval_ns) for i in range(100))
+    assert 60 <= emitted <= 65
+
+
+def test_frame_pacer_requires_positive_target_fps() -> None:
+    from opengait.demo.pipeline import _FramePacer
+
+    with pytest.raises(ValueError, match="target_fps must be positive"):
+        _FramePacer(0.0)
+
+
+@pytest.mark.parametrize(
+    ("window", "stride", "mode", "expected"),
+    [
+        (30, 30, "manual", 30),
+        (30, 7, "manual", 7),
+        (30, 30, "sliding", 1),
+        (30, 1, "chunked", 30),
+        (15, 3, "chunked", 15),
+    ],
+)
+def test_resolve_stride_modes(
+    window: int,
+    stride: int,
+    mode: Literal["manual", "sliding", "chunked"],
+    expected: int,
+) -> None:
+    from opengait.demo.pipeline import resolve_stride
+
+    assert resolve_stride(window, stride, mode) == expected
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import cast
+from unittest import mock
+
+import numpy as np
+import pytest
+
+from opengait.demo.input import create_source
+from opengait.demo.visualizer import (
+    DISPLAY_HEIGHT,
+    DISPLAY_WIDTH,
+    ImageArray,
+    OpenCVVisualizer,
+)
+from opengait.demo.window import select_person
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SAMPLE_VIDEO_PATH = REPO_ROOT / "assets" / "sample.mp4"
+YOLO_MODEL_PATH = REPO_ROOT / "ckpt" / "yolo11n-seg.pt"
+
+
+def test_prepare_raw_view_float_mask_has_visible_signal() -> None:
+    viz = OpenCVVisualizer()
+
+    mask_float = np.zeros((64, 64), dtype=np.float32)
+    mask_float[16:48, 16:48] = 1.0
+
+    rendered = viz._prepare_raw_view(cast(ImageArray, mask_float))
+    assert rendered.dtype == np.uint8
+    assert rendered.shape == (256, 176, 3)
+
+    mask_zero = np.zeros((64, 64), dtype=np.float32)
+    rendered_zero = viz._prepare_raw_view(cast(ImageArray, mask_zero))
+
+    roi = slice(0, DISPLAY_HEIGHT - 40)
+    diff = np.abs(rendered[roi].astype(np.int16) - rendered_zero[roi].astype(np.int16))
+    assert int(np.count_nonzero(diff)) > 0
+
+
+def test_prepare_raw_view_handles_values_slightly_above_one() -> None:
+    viz = OpenCVVisualizer()
+
+    mask = np.zeros((64, 64), dtype=np.float32)
+    mask[20:40, 20:40] = 1.0001
+    rendered = viz._prepare_raw_view(cast(ImageArray, mask))
+
+    roi = rendered[: DISPLAY_HEIGHT - 40, :, 0]
+    assert int(np.count_nonzero(roi)) > 0
+
+
+def test_segmentation_view_is_normalized_only_shape() -> None:
+    viz = OpenCVVisualizer()
+    mask = np.zeros((480, 640), dtype=np.uint8)
+    sil = np.random.rand(64, 44).astype(np.float32)
+
+    seg = viz._prepare_segmentation_view(cast(ImageArray, mask), sil, (0, 0, 100, 100))
+    assert seg.shape == (DISPLAY_HEIGHT, DISPLAY_WIDTH, 3)
+
+
+def test_update_toggles_raw_window_with_r_key() -> None:
+    viz = OpenCVVisualizer()
+    frame = np.zeros((240, 320, 3), dtype=np.uint8)
+    mask = np.zeros((240, 320), dtype=np.uint8)
+    mask[20:100, 30:120] = 255
+    sil = np.random.rand(64, 44).astype(np.float32)
+    seg_input = np.random.rand(4, 64, 44).astype(np.float32)
+
+    with (
+        mock.patch("cv2.namedWindow") as named_window,
+        mock.patch("cv2.imshow"),
+        mock.patch("cv2.destroyWindow") as destroy_window,
+        mock.patch("cv2.waitKey", side_effect=[ord("r"), ord("r"), ord("q")]),
+    ):
+        assert viz.update(
+            frame,
+            (10, 10, 120, 150),
+            (10, 10, 120, 150),
+            1,
+            cast(ImageArray, mask),
+            sil,
+            seg_input,
+            None,
+            None,
+            15.0,
+        )
+        assert viz.show_raw_window is True
+        assert viz._raw_window_created is True
+
+        assert viz.update(
+            frame,
+            (10, 10, 120, 150),
+            (10, 10, 120, 150),
+            1,
+            cast(ImageArray, mask),
+            sil,
+            seg_input,
+            None,
+            None,
+            15.0,
+        )
+        assert viz.show_raw_window is False
+        assert viz._raw_window_created is False
+        assert destroy_window.called
+
+        assert (
+            viz.update(
+                frame,
+                (10, 10, 120, 150),
+                (10, 10, 120, 150),
+                1,
+                cast(ImageArray, mask),
+                sil,
+                seg_input,
+                None,
+                None,
+                15.0,
+            )
+            is False
+        )
+        assert named_window.called
+
+
+def test_sample_video_raw_mask_shape_range_and_render_signal() -> None:
+    if not SAMPLE_VIDEO_PATH.is_file():
+        pytest.skip(f"Missing sample video: {SAMPLE_VIDEO_PATH}")
+    if not YOLO_MODEL_PATH.is_file():
+        pytest.skip(f"Missing YOLO model file: {YOLO_MODEL_PATH}")
+
+    ultralytics = pytest.importorskip("ultralytics")
+    yolo_cls = getattr(ultralytics, "YOLO")
+
+    viz = OpenCVVisualizer()
+    detector = yolo_cls(str(YOLO_MODEL_PATH))
+
+    masks_seen = 0
+    rendered_nonzero: list[int] = []
+
+    for frame, _meta in create_source(str(SAMPLE_VIDEO_PATH), max_frames=30):
+        detections = detector.track(
+            frame,
+            persist=True,
+            verbose=False,
+            classes=[0],
+            device="cpu",
+        )
+        if not isinstance(detections, list) or not detections:
+            continue
+
+        selected = select_person(detections[0])
+        if selected is None:
+            continue
+
+        mask_raw, _, _, _ = selected
+        masks_seen += 1
+
+        arr = np.asarray(mask_raw)
+        assert arr.ndim == 2
+        assert arr.shape[0] > 0 and arr.shape[1] > 0
+        assert np.issubdtype(arr.dtype, np.number)
+        assert float(arr.min()) >= 0.0
+        assert float(arr.max()) <= 255.0
+        assert int(np.count_nonzero(arr)) > 0
+
+        rendered = viz._prepare_raw_view(arr)
+        roi = rendered[: DISPLAY_HEIGHT - 40, :, 0]
+        rendered_nonzero.append(int(np.count_nonzero(roi)))
+
+    assert masks_seen > 0
+    assert min(rendered_nonzero) > 0