feat!: reorganize detection and tracking pipeline
Refactor the package into common, schema, detection, and tracking namespaces and move dataset-specific ActualTest utilities into tests/support. Add a pluggable detection stack with typed protocols, pydantic-settings config, loguru-based runner logging, cvmmap and headless video sources, NATS and parquet sinks, and a structured coco-wholebody133 payload path. Teach tracking replay loading to consume parquet detection directories directly, preserve empty frames, and keep the video-to-parquet-to-tracking workflow usable for offline E2E runs. Vendor the local mmcv and xtcocotools wheels under Git LFS, update uv sources/lock state, and refresh the mmcv build so mmcv.ops loads successfully with the current torch+cu130 environment.
This commit is contained in:
@@ -0,0 +1 @@
|
||||
"""Test package for support helpers and test-local utilities."""
|
||||
@@ -0,0 +1 @@
|
||||
"""Test-only support helpers."""
|
||||
@@ -0,0 +1,186 @@
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pyarrow.parquet as pq
|
||||
from beartype import beartype
|
||||
from loguru import logger
|
||||
|
||||
from pose_tracking_exp.common.normalization import infer_bbox_from_keypoints, normalize_rtmpose_body20
|
||||
from pose_tracking_exp.schema import CameraCalibration, CameraFrame, FrameBundle, PoseDetection, SceneConfig, TrackerConfig
|
||||
from pose_tracking_exp.tracking import PoseTracker
|
||||
|
||||
_NOMINAL_FRAME_PERIOD_NS = 33_333_333
|
||||
|
||||
|
||||
@beartype
|
||||
def load_actual_test_scene(root: Path) -> SceneConfig:
|
||||
# ActualTest parquet comes from the ChArUco/OpenCV side, so `rvec` / `tvec`
|
||||
# are world->camera extrinsics. The RPT-facing camera pose is derived later
|
||||
# from this canonical OpenCV form.
|
||||
camera_rows = pq.read_table(root / "camera_params" / "camera_params.parquet").to_pylist()
|
||||
cameras: list[CameraCalibration] = []
|
||||
for item in camera_rows:
|
||||
rotation, _ = cv2.Rodrigues(np.asarray(item["extrinsic"]["rvec"], dtype=np.float64).reshape(3, 1))
|
||||
cameras.append(
|
||||
CameraCalibration.from_opencv_extrinsics(
|
||||
name=str(item["port"]),
|
||||
width=int(item["resolution"]["width"]),
|
||||
height=int(item["resolution"]["height"]),
|
||||
K=np.asarray(item["intrinsic"]["camera_matrix"], dtype=np.float64),
|
||||
DC=np.asarray(item["intrinsic"]["distortion_coefficients"], dtype=np.float64).reshape(-1),
|
||||
R=np.asarray(rotation, dtype=np.float64),
|
||||
T=np.asarray(item["extrinsic"]["tvec"], dtype=np.float64).reshape(3),
|
||||
rvec=np.asarray(item["extrinsic"]["rvec"], dtype=np.float64).reshape(3),
|
||||
)
|
||||
)
|
||||
return SceneConfig(
|
||||
room_size=np.asarray([20.0, 20.0, 8.0], dtype=np.float64),
|
||||
room_center=np.asarray([0.0, 0.0, 2.0], dtype=np.float64),
|
||||
cameras=tuple(sorted(cameras, key=lambda camera: camera.name)),
|
||||
)
|
||||
|
||||
|
||||
@beartype
|
||||
def load_actual_test_segment_bundles(
|
||||
root: Path,
|
||||
segment_name: str,
|
||||
*,
|
||||
frame_start: int = 690,
|
||||
frame_stop: int | None = None,
|
||||
max_frames: int | None = None,
|
||||
min_cameras_with_rows: int = 1,
|
||||
min_visible_joints: int = 6,
|
||||
) -> list[FrameBundle]:
|
||||
segment_root = root / segment_name
|
||||
by_camera: dict[str, dict[int, tuple[PoseDetection, ...]]] = {}
|
||||
|
||||
for parquet_path in sorted(segment_root.glob("*_detected.parquet")):
|
||||
camera_name = parquet_path.name.removesuffix("_detected.parquet")
|
||||
rows = pq.read_table(parquet_path).to_pylist()
|
||||
frames: dict[int, tuple[PoseDetection, ...]] = {}
|
||||
for row in rows:
|
||||
frame_index = int(row["frame_index"])
|
||||
if frame_index < frame_start:
|
||||
continue
|
||||
if frame_stop is not None and frame_index >= frame_stop:
|
||||
continue
|
||||
|
||||
detections: list[PoseDetection] = []
|
||||
boxes = row["boxes"]
|
||||
keypoints_batch = row["kps"]
|
||||
confidence_batch = row["kps_scores"]
|
||||
if not (len(boxes) == len(keypoints_batch) == len(confidence_batch)):
|
||||
raise ValueError(
|
||||
f"Mismatched detection arrays for camera {camera_name} frame {frame_index}: "
|
||||
f"{len(boxes)=}, {len(keypoints_batch)=}, {len(confidence_batch)=}."
|
||||
)
|
||||
|
||||
for box, keypoints_xy, confidences in zip(boxes, keypoints_batch, confidence_batch, strict=True):
|
||||
keypoints_xy_array = np.asarray(keypoints_xy, dtype=np.float64)
|
||||
confidences_array = np.asarray(confidences, dtype=np.float64)
|
||||
pose = normalize_rtmpose_body20(keypoints_xy_array, confidences_array)
|
||||
if np.count_nonzero(pose[:, 2] > 0.15) < min_visible_joints:
|
||||
continue
|
||||
bbox = (
|
||||
np.asarray(box, dtype=np.float64)
|
||||
if len(box) == 4
|
||||
else infer_bbox_from_keypoints(pose)
|
||||
)
|
||||
visible_confidences = pose[pose[:, 2] > 0.0, 2]
|
||||
detections.append(
|
||||
PoseDetection(
|
||||
bbox=bbox,
|
||||
bbox_confidence=float(np.mean(visible_confidences)) if visible_confidences.size else 0.0,
|
||||
keypoints=pose,
|
||||
)
|
||||
)
|
||||
frames[frame_index] = tuple(detections)
|
||||
by_camera[camera_name] = frames
|
||||
|
||||
if not by_camera:
|
||||
return []
|
||||
|
||||
candidate_frames = sorted(set().union(*(set(frames) for frames in by_camera.values())))
|
||||
if min_cameras_with_rows > 1:
|
||||
candidate_frames = [
|
||||
frame_index
|
||||
for frame_index in candidate_frames
|
||||
if sum(frame_index in frames for frames in by_camera.values()) >= min_cameras_with_rows
|
||||
]
|
||||
if max_frames is not None:
|
||||
candidate_frames = candidate_frames[:max_frames]
|
||||
|
||||
scene = load_actual_test_scene(root)
|
||||
camera_by_name = {camera.name: camera for camera in scene.cameras}
|
||||
bundles: list[FrameBundle] = []
|
||||
ordered_camera_names = [camera.name for camera in scene.cameras]
|
||||
for bundle_index, frame_index in enumerate(candidate_frames):
|
||||
timestamp_unix_ns = bundle_index * _NOMINAL_FRAME_PERIOD_NS
|
||||
views: list[CameraFrame] = []
|
||||
for camera_name in ordered_camera_names:
|
||||
camera = camera_by_name[camera_name]
|
||||
views.append(
|
||||
CameraFrame(
|
||||
camera_name=camera_name,
|
||||
frame_index=frame_index,
|
||||
timestamp_unix_ns=timestamp_unix_ns,
|
||||
detections=by_camera.get(camera_name, {}).get(frame_index, ()),
|
||||
source_size=(camera.width, camera.height),
|
||||
)
|
||||
)
|
||||
bundles.append(
|
||||
FrameBundle(
|
||||
bundle_index=bundle_index,
|
||||
timestamp_unix_ns=timestamp_unix_ns,
|
||||
views=tuple(views),
|
||||
)
|
||||
)
|
||||
return bundles
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("root_path", type=click.Path(path_type=Path, exists=True, file_okay=False))
|
||||
@click.option("--segment", "segment_name", default="Segment_1", show_default=True)
|
||||
@click.option("--frame-start", default=690, type=int, show_default=True)
|
||||
@click.option("--frame-stop", type=int)
|
||||
@click.option("--max-frames", type=click.IntRange(min=1))
|
||||
@click.option("--min-camera-rows", default=1, type=click.IntRange(min=1), show_default=True)
|
||||
@click.option("--max-active-tracks", default=1, type=click.IntRange(min=1), show_default=True)
|
||||
def main(
|
||||
root_path: Path,
|
||||
segment_name: str,
|
||||
frame_start: int,
|
||||
frame_stop: int | None,
|
||||
max_frames: int | None,
|
||||
min_camera_rows: int,
|
||||
max_active_tracks: int,
|
||||
) -> None:
|
||||
logger.remove()
|
||||
logger.add(
|
||||
click.get_text_stream("stderr"),
|
||||
level="INFO",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
||||
)
|
||||
scene = load_actual_test_scene(root_path)
|
||||
bundles = load_actual_test_segment_bundles(
|
||||
root_path,
|
||||
segment_name,
|
||||
frame_start=frame_start,
|
||||
frame_stop=frame_stop,
|
||||
max_frames=max_frames,
|
||||
min_cameras_with_rows=min_camera_rows,
|
||||
)
|
||||
tracker = PoseTracker(scene, TrackerConfig(max_active_tracks=max_active_tracks))
|
||||
results = tracker.run(bundles)
|
||||
logger.info(
|
||||
"actual_test bundles={} active_frames={} proposal_frames={}",
|
||||
len(results),
|
||||
sum(1 for result in results if result.active_tracks),
|
||||
sum(1 for result in results if result.proposals),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,8 +4,8 @@ import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from pose_tracking_exp.actualtest import load_actualtest_scene, load_actualtest_segment_bundles
|
||||
from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
|
||||
from tests.support.actual_test import load_actual_test_scene, load_actual_test_segment_bundles
|
||||
|
||||
|
||||
def _write_parquet(path: Path, rows: list[dict[str, object]]) -> None:
|
||||
@@ -25,7 +25,7 @@ def _sample_rtmpose_detection() -> tuple[list[float], list[list[float]], list[fl
|
||||
return [8.0, 4.0, 32.0, 64.0], keypoints_xy.tolist(), scores.tolist()
|
||||
|
||||
|
||||
def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
|
||||
def test_load_actual_test_parquet_scene_and_segment(tmp_path: Path) -> None:
|
||||
root = tmp_path / "ActualTest_WeiHua"
|
||||
_write_parquet(
|
||||
root / "camera_params" / "camera_params.parquet",
|
||||
@@ -62,8 +62,8 @@ def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
|
||||
],
|
||||
)
|
||||
|
||||
scene = load_actualtest_scene(root)
|
||||
bundles = load_actualtest_segment_bundles(root, "Segment_1", frame_start=690, max_frames=1)
|
||||
scene = load_actual_test_scene(root)
|
||||
bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690, max_frames=1)
|
||||
|
||||
assert [camera.name for camera in scene.cameras] == ["5602", "5603"]
|
||||
np.testing.assert_allclose(scene.cameras[0].pose_T, [0.0, 0.0, 0.0])
|
||||
@@ -75,3 +75,53 @@ def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
|
||||
bundles[0].views[0].detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
|
||||
[20.0, 60.0],
|
||||
)
|
||||
|
||||
|
||||
def test_load_actual_test_keeps_partial_camera_frames(tmp_path: Path) -> None:
|
||||
root = tmp_path / "ActualTest_WeiHua"
|
||||
_write_parquet(
|
||||
root / "camera_params" / "camera_params.parquet",
|
||||
[
|
||||
{
|
||||
"name": "AF_02",
|
||||
"port": 5602,
|
||||
"intrinsic": {
|
||||
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
||||
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
},
|
||||
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [0.0, 0.0, 0.0]},
|
||||
"resolution": {"width": 640, "height": 480},
|
||||
},
|
||||
{
|
||||
"name": "AF_03",
|
||||
"port": 5603,
|
||||
"intrinsic": {
|
||||
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
||||
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
},
|
||||
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [1.0, 0.0, 0.0]},
|
||||
"resolution": {"width": 640, "height": 480},
|
||||
},
|
||||
],
|
||||
)
|
||||
box, keypoints_xy, scores = _sample_rtmpose_detection()
|
||||
_write_parquet(
|
||||
root / "Segment_1" / "5602_detected.parquet",
|
||||
[
|
||||
{"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
|
||||
{"frame_index": 691, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
|
||||
],
|
||||
)
|
||||
_write_parquet(
|
||||
root / "Segment_1" / "5603_detected.parquet",
|
||||
[
|
||||
{"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
|
||||
],
|
||||
)
|
||||
|
||||
bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690)
|
||||
|
||||
assert [bundle.views[0].frame_index for bundle in bundles] == [690, 691]
|
||||
assert [view.camera_name for view in bundles[1].views] == ["5602", "5603"]
|
||||
assert len(bundles[1].views[0].detections) == 1
|
||||
assert bundles[1].views[1].detections == ()
|
||||
@@ -8,9 +8,9 @@ import pytest
|
||||
|
||||
pytest.importorskip("rpt")
|
||||
|
||||
from pose_tracking_exp.models import CameraCalibration, SceneConfig
|
||||
from pose_tracking_exp.replay import load_scene_file
|
||||
from pose_tracking_exp.rpt_adapter import build_rpt_config
|
||||
from pose_tracking_exp.schema import CameraCalibration, CameraModel, SceneConfig, parse_camera_model
|
||||
from pose_tracking_exp.tracking.replay_io import load_scene_file
|
||||
from pose_tracking_exp.tracking.rpt_adapter import build_rpt_config
|
||||
|
||||
|
||||
class _CameraArgs(NamedTuple):
|
||||
@@ -19,7 +19,7 @@ class _CameraArgs(NamedTuple):
|
||||
height: int
|
||||
K: np.ndarray
|
||||
DC: np.ndarray
|
||||
model: str
|
||||
model: CameraModel
|
||||
|
||||
|
||||
def _camera_args() -> _CameraArgs:
|
||||
@@ -29,7 +29,7 @@ def _camera_args() -> _CameraArgs:
|
||||
height=480,
|
||||
K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
|
||||
DC=np.zeros(5, dtype=np.float64),
|
||||
model="pinhole",
|
||||
model=parse_camera_model("pinhole"),
|
||||
)
|
||||
|
||||
|
||||
@@ -139,7 +139,7 @@ def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch)
|
||||
captured["min_group_size"] = min_group_size
|
||||
return captured
|
||||
|
||||
monkeypatch.setattr("pose_tracking_exp.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
|
||||
monkeypatch.setattr("pose_tracking_exp.tracking.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
|
||||
|
||||
build_rpt_config(scene, min_match_score=0.5, min_group_size=2)
|
||||
|
||||
|
||||
@@ -0,0 +1,223 @@
|
||||
from collections.abc import AsyncIterator, Sequence
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pose_tracking_exp.detection.config import (
|
||||
DetectionRunnerConfig,
|
||||
load_detection_runner_config,
|
||||
resolve_instances,
|
||||
)
|
||||
from pose_tracking_exp.detection.runner import (
|
||||
PendingFrame,
|
||||
SourceSlot,
|
||||
run_detection_runner,
|
||||
store_latest_frame,
|
||||
take_pending_batch,
|
||||
)
|
||||
from pose_tracking_exp.schema.detection import PoseDetections, SourceFrame
|
||||
|
||||
|
||||
def test_load_detection_runner_config_from_toml_and_env(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
config_path = tmp_path / "runner.toml"
|
||||
config_path.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
'instances = ["front_left", "front_right"]',
|
||||
'device = "cuda:1"',
|
||||
'nats_host = "nats://localhost:4222"',
|
||||
'yolo_checkpoint = "checkpoint/yolo/yolo11_mix_epoch10.pt"',
|
||||
'pose_checkpoint = "checkpoint/dwpose/best_coco-wholebody_AP_epoch_50.pth"',
|
||||
"bbox_area_threshold = 2500",
|
||||
"max_batch_frames = 6",
|
||||
"max_batch_wait_ms = 3",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("POSE_TRACKING_EXP_DETECTION_DEVICE", "cpu")
|
||||
config = load_detection_runner_config(config_path)
|
||||
|
||||
assert config.instances == ("front_left", "front_right")
|
||||
assert config.device == "cpu"
|
||||
assert config.nats_host == "nats://localhost:4222"
|
||||
assert config.bbox_area_threshold == 2500
|
||||
assert config.max_batch_frames == 6
|
||||
assert config.max_batch_wait_ms == 3
|
||||
|
||||
|
||||
def test_resolve_instances_prefers_cli_values() -> None:
|
||||
assert resolve_instances(("cli_a", "cli_b"), ("cfg_a",)) == ("cli_a", "cli_b")
|
||||
|
||||
|
||||
def test_resolve_instances_falls_back_to_config_values() -> None:
|
||||
assert resolve_instances((), ("cfg_a", "cfg_b")) == ("cfg_a", "cfg_b")
|
||||
|
||||
|
||||
def test_store_latest_frame_overwrites_pending_frame() -> None:
|
||||
slot = SourceSlot(source_name="front_left")
|
||||
first = SourceFrame(
|
||||
source_name="front_left",
|
||||
image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
|
||||
frame_index=1,
|
||||
timestamp_unix_ns=100,
|
||||
)
|
||||
second = SourceFrame(
|
||||
source_name="front_left",
|
||||
image_bgr=np.ones((1, 1, 3), dtype=np.uint8),
|
||||
frame_index=2,
|
||||
timestamp_unix_ns=200,
|
||||
)
|
||||
|
||||
store_latest_frame(slot, first)
|
||||
store_latest_frame(slot, second)
|
||||
|
||||
assert slot.received_frames == 2
|
||||
assert slot.dropped_frames == 1
|
||||
assert slot.pending_frame is not None
|
||||
assert slot.pending_frame.frame is second
|
||||
|
||||
|
||||
def test_take_pending_batch_collects_at_most_one_frame_per_source() -> None:
|
||||
slots = {
|
||||
"front_left": SourceSlot(
|
||||
source_name="front_left",
|
||||
pending_frame=PendingFrame(
|
||||
source_name="front_left",
|
||||
frame=SourceFrame(
|
||||
source_name="front_left",
|
||||
image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
|
||||
frame_index=11,
|
||||
timestamp_unix_ns=110,
|
||||
),
|
||||
),
|
||||
),
|
||||
"front_right": SourceSlot(
|
||||
source_name="front_right",
|
||||
pending_frame=PendingFrame(
|
||||
source_name="front_right",
|
||||
frame=SourceFrame(
|
||||
source_name="front_right",
|
||||
image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
|
||||
frame_index=22,
|
||||
timestamp_unix_ns=220,
|
||||
),
|
||||
),
|
||||
),
|
||||
"rear": SourceSlot(
|
||||
source_name="rear",
|
||||
pending_frame=PendingFrame(
|
||||
source_name="rear",
|
||||
frame=SourceFrame(
|
||||
source_name="rear",
|
||||
image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
|
||||
frame_index=33,
|
||||
timestamp_unix_ns=330,
|
||||
),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
batch = take_pending_batch(slots, max_batch_frames=2)
|
||||
|
||||
assert [frame.source_name for frame in batch] == ["front_left", "front_right"]
|
||||
assert slots["front_left"].pending_frame is None
|
||||
assert slots["front_right"].pending_frame is None
|
||||
assert slots["rear"].pending_frame is not None
|
||||
|
||||
|
||||
class StubSource:
|
||||
def __init__(self, source_name: str, frames: tuple[SourceFrame, ...]) -> None:
|
||||
self.source_name = source_name
|
||||
self._frames = frames
|
||||
|
||||
async def frames(self) -> AsyncIterator[SourceFrame]:
|
||||
for frame in self._frames:
|
||||
yield frame
|
||||
|
||||
|
||||
class StubPoseShim:
|
||||
def process_many(self, frames: Sequence[SourceFrame]) -> list[PoseDetections]:
|
||||
detections: list[PoseDetections] = []
|
||||
for frame in frames:
|
||||
detections.append(
|
||||
PoseDetections(
|
||||
source_name=frame.source_name,
|
||||
frame_index=frame.frame_index,
|
||||
source_size=(frame.image_bgr.shape[1], frame.image_bgr.shape[0]),
|
||||
boxes_xyxy=np.asarray([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
|
||||
box_scores=np.asarray([1.0], dtype=np.float32),
|
||||
keypoints_xy=np.zeros((1, 133, 2), dtype=np.float32),
|
||||
keypoint_scores=np.ones((1, 133), dtype=np.float32),
|
||||
timestamp_unix_ns=frame.timestamp_unix_ns,
|
||||
keypoint_schema="coco_wholebody133",
|
||||
)
|
||||
)
|
||||
return detections
|
||||
|
||||
|
||||
class StubSink:
|
||||
def __init__(self) -> None:
|
||||
self.messages: list[PoseDetections] = []
|
||||
self.closed = False
|
||||
|
||||
async def publish_pose(self, detections: PoseDetections) -> None:
|
||||
self.messages.append(detections)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
self.closed = True
|
||||
|
||||
|
||||
def test_run_detection_runner_publishes_payloads() -> None:
|
||||
sink = StubSink()
|
||||
sources = (
|
||||
StubSource(
|
||||
"cam0",
|
||||
(
|
||||
SourceFrame(
|
||||
source_name="cam0",
|
||||
image_bgr=np.zeros((2, 3, 3), dtype=np.uint8),
|
||||
frame_index=1,
|
||||
timestamp_unix_ns=100,
|
||||
),
|
||||
),
|
||||
),
|
||||
StubSource(
|
||||
"cam1",
|
||||
(
|
||||
SourceFrame(
|
||||
source_name="cam1",
|
||||
image_bgr=np.zeros((2, 3, 3), dtype=np.uint8),
|
||||
frame_index=2,
|
||||
timestamp_unix_ns=200,
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
config = DetectionRunnerConfig(
|
||||
instances=("cam0", "cam1"),
|
||||
pose_config_path=Path(__file__),
|
||||
yolo_checkpoint=Path(__file__),
|
||||
pose_checkpoint=Path(__file__),
|
||||
max_batch_frames=2,
|
||||
)
|
||||
|
||||
anyio.run(
|
||||
run_detection_runner,
|
||||
sources,
|
||||
StubPoseShim(),
|
||||
sink,
|
||||
config,
|
||||
)
|
||||
|
||||
assert sink.closed is True
|
||||
assert [(item.source_name, item.frame_index, item.timestamp_unix_ns) for item in sink.messages] == [
|
||||
("cam0", 1, 100),
|
||||
("cam1", 2, 200),
|
||||
]
|
||||
@@ -0,0 +1,137 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.detection.sinks import ParquetPoseSink
|
||||
from pose_tracking_exp.detection.sources import VideoFrameSource
|
||||
from pose_tracking_exp.schema.detection import PoseDetections
|
||||
from pose_tracking_exp.tracking import load_replay_file
|
||||
|
||||
|
||||
def _write_synthetic_video(path: Path) -> None:
|
||||
writer = cv2.VideoWriter(
|
||||
str(path),
|
||||
cv2.VideoWriter.fourcc(*"MJPG"),
|
||||
10.0,
|
||||
(8, 6),
|
||||
)
|
||||
if not writer.isOpened():
|
||||
raise RuntimeError("Could not open synthetic video writer.")
|
||||
try:
|
||||
for frame_index in range(3):
|
||||
frame = np.full((6, 8, 3), frame_index * 32, dtype=np.uint8)
|
||||
writer.write(frame)
|
||||
finally:
|
||||
writer.release()
|
||||
|
||||
|
||||
def _sample_wholebody_detection(*, source_name: str, frame_index: int) -> PoseDetections:
|
||||
keypoints_xy = np.zeros((1, 133, 2), dtype=np.float32)
|
||||
keypoint_scores = np.zeros((1, 133), dtype=np.float32)
|
||||
keypoints_xy[0, 5] = [10.0, 20.0]
|
||||
keypoints_xy[0, 6] = [30.0, 20.0]
|
||||
keypoints_xy[0, 11] = [12.0, 60.0]
|
||||
keypoints_xy[0, 12] = [28.0, 60.0]
|
||||
keypoints_xy[0, 0] = [20.0, 8.0]
|
||||
keypoint_scores[0, [0, 5, 6, 11, 12]] = 1.0
|
||||
return PoseDetections(
|
||||
source_name=source_name,
|
||||
frame_index=frame_index,
|
||||
source_size=(640, 480),
|
||||
boxes_xyxy=np.asarray([[8.0, 4.0, 32.0, 64.0]], dtype=np.float32),
|
||||
box_scores=np.asarray([0.9], dtype=np.float32),
|
||||
keypoints_xy=keypoints_xy,
|
||||
keypoint_scores=keypoint_scores,
|
||||
timestamp_unix_ns=frame_index * 100_000_000,
|
||||
keypoint_schema="coco_wholebody133",
|
||||
)
|
||||
|
||||
|
||||
def test_video_frame_source_reads_frames(tmp_path: Path) -> None:
|
||||
video_path = tmp_path / "cam0.avi"
|
||||
_write_synthetic_video(video_path)
|
||||
source = VideoFrameSource(video_path, source_name="cam0")
|
||||
|
||||
async def collect() -> list[tuple[str, int, int, tuple[int, int, int]]]:
|
||||
frames: list[tuple[str, int, int, tuple[int, int, int]]] = []
|
||||
async for frame in source.frames():
|
||||
frames.append(
|
||||
(
|
||||
frame.source_name,
|
||||
frame.frame_index,
|
||||
frame.timestamp_unix_ns,
|
||||
frame.image_bgr.shape,
|
||||
)
|
||||
)
|
||||
return frames
|
||||
|
||||
frames = anyio.run(collect)
|
||||
|
||||
assert [item[0] for item in frames] == ["cam0", "cam0", "cam0"]
|
||||
assert [item[1] for item in frames] == [0, 1, 2]
|
||||
assert [item[3] for item in frames] == [(6, 8, 3), (6, 8, 3), (6, 8, 3)]
|
||||
assert frames[0][2] <= frames[1][2] <= frames[2][2]
|
||||
|
||||
|
||||
def test_parquet_sink_round_trips_into_tracking_replay(tmp_path: Path) -> None:
|
||||
output_dir = tmp_path / "detections"
|
||||
sink = ParquetPoseSink(output_dir, flush_rows=1)
|
||||
|
||||
async def write_rows() -> None:
|
||||
await sink.publish_pose(_sample_wholebody_detection(source_name="cam0", frame_index=0))
|
||||
await sink.publish_pose(
|
||||
PoseDetections(
|
||||
source_name="cam0",
|
||||
frame_index=1,
|
||||
source_size=(640, 480),
|
||||
boxes_xyxy=np.empty((0, 4), dtype=np.float32),
|
||||
box_scores=np.empty((0,), dtype=np.float32),
|
||||
keypoints_xy=np.empty((0, 133, 2), dtype=np.float32),
|
||||
keypoint_scores=np.empty((0, 133), dtype=np.float32),
|
||||
timestamp_unix_ns=100_000_000,
|
||||
keypoint_schema="coco_wholebody133",
|
||||
)
|
||||
)
|
||||
await sink.aclose()
|
||||
|
||||
anyio.run(write_rows)
|
||||
|
||||
parquet_path = output_dir / "cam0_detected.parquet"
|
||||
assert parquet_path.exists()
|
||||
assert pq.read_table(parquet_path).num_rows == 2
|
||||
|
||||
scene_path = tmp_path / "scene.json"
|
||||
scene_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"room_size": [6.0, 4.0, 3.0],
|
||||
"room_center": [0.0, 0.0, 1.0],
|
||||
"cameras": [
|
||||
{
|
||||
"name": "cam0",
|
||||
"width": 640,
|
||||
"height": 480,
|
||||
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
||||
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
||||
"T": [[0.0], [0.0], [0.0]],
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
replay = load_replay_file(scene_path, output_dir)
|
||||
frames = replay.frames_by_camera["cam0"]
|
||||
assert [frame.frame_index for frame in frames] == [0, 1]
|
||||
assert frames[1].detections == ()
|
||||
np.testing.assert_allclose(
|
||||
frames[0].detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
|
||||
[20.0, 60.0],
|
||||
)
|
||||
@@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.kinematics import seed_state_from_pose3d
|
||||
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.tracking.kinematics import seed_state_from_pose3d
|
||||
|
||||
|
||||
def _sample_pose3d() -> np.ndarray:
|
||||
@@ -38,7 +38,7 @@ def test_seed_state_from_pose3d_does_not_call_least_squares(monkeypatch) -> None
|
||||
def fail_least_squares(*args: object, **kwargs: object) -> object:
|
||||
raise AssertionError("seed_state_from_pose3d should not call scipy.optimize.least_squares")
|
||||
|
||||
monkeypatch.setattr("pose_tracking_exp.kinematics.least_squares", fail_least_squares)
|
||||
monkeypatch.setattr("pose_tracking_exp.tracking.kinematics.least_squares", fail_least_squares)
|
||||
state = seed_state_from_pose3d(_sample_pose3d())
|
||||
|
||||
assert state.parameters.shape == (31,)
|
||||
|
||||
@@ -4,11 +4,17 @@ from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.normalization import normalize_rtmpose_body20
|
||||
from pose_tracking_exp.parajumping import PROTOCOL_HEADER, convert_payload_record, decode_pose_payload
|
||||
from pose_tracking_exp.replay import load_replay_file, load_scene_file
|
||||
from pose_tracking_exp.sync import synchronize_frames
|
||||
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.common.normalization import normalize_coco_body20, normalize_rtmpose_body20
|
||||
from pose_tracking_exp.detection.cvmmap_payload import (
|
||||
COCO_WHOLEBODY_KEYPOINT_COUNT,
|
||||
PROTOCOL_HEADER,
|
||||
CvmmapPosePayloadCodec,
|
||||
convert_payload_record,
|
||||
decode_pose_payload,
|
||||
)
|
||||
from pose_tracking_exp.schema.detection import PoseDetections
|
||||
from pose_tracking_exp.tracking import load_replay_file, load_scene_file, synchronize_frames
|
||||
|
||||
|
||||
def _encode_payload(
|
||||
@@ -31,7 +37,7 @@ def _encode_payload(
|
||||
+ np.asarray(box_scores, dtype=np.uint8).tobytes()
|
||||
+ int(keypoints_xy.shape[0]).to_bytes(1, "little")
|
||||
+ np.asarray(keypoints_xy, dtype="<u2").tobytes()
|
||||
+ int(keypoint_scores.size).to_bytes(1, "little")
|
||||
+ int(keypoint_scores.shape[0]).to_bytes(1, "little")
|
||||
+ np.asarray(keypoint_scores, dtype=np.uint8).reshape(-1).tobytes()
|
||||
+ int(timestamp_unix_ns).to_bytes(8, "little")
|
||||
)
|
||||
@@ -54,6 +60,23 @@ def test_normalize_rtmpose_body20_derives_midpoints_and_head():
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["head"], :2], [20.0, 8.0])
|
||||
|
||||
|
||||
def test_normalize_coco17_body20_derives_midpoints_and_head():
|
||||
keypoints = np.zeros((17, 2), dtype=np.float64)
|
||||
scores = np.zeros((17,), dtype=np.float64)
|
||||
keypoints[5] = [10.0, 20.0]
|
||||
keypoints[6] = [30.0, 20.0]
|
||||
keypoints[11] = [12.0, 60.0]
|
||||
keypoints[12] = [28.0, 60.0]
|
||||
keypoints[0] = [20.0, 8.0]
|
||||
scores[[0, 5, 6, 11, 12]] = 1.0
|
||||
|
||||
normalized = normalize_coco_body20(keypoints, scores, keypoint_schema="coco17")
|
||||
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["hip_middle"], :2], [20.0, 60.0])
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["shoulder_middle"], :2], [20.0, 20.0])
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["head"], :2], [20.0, 8.0])
|
||||
|
||||
|
||||
def test_decode_payload_and_convert_record():
|
||||
keypoints_xy = np.zeros((1, 133, 2), dtype=np.uint16)
|
||||
keypoint_scores = np.zeros((1, 133), dtype=np.uint8)
|
||||
@@ -87,6 +110,26 @@ def test_decode_payload_and_convert_record():
|
||||
assert converted["frame_index"] == 7
|
||||
|
||||
|
||||
def test_encode_pose_payload_requires_coco_wholebody133():
|
||||
codec = CvmmapPosePayloadCodec()
|
||||
detections = PoseDetections(
|
||||
source_name="cam0",
|
||||
frame_index=1,
|
||||
source_size=(640, 480),
|
||||
boxes_xyxy=np.zeros((1, 4), dtype=np.float32),
|
||||
box_scores=np.ones((1,), dtype=np.float32),
|
||||
keypoints_xy=np.zeros((1, COCO_WHOLEBODY_KEYPOINT_COUNT, 2), dtype=np.float32),
|
||||
keypoint_scores=np.ones((1, COCO_WHOLEBODY_KEYPOINT_COUNT), dtype=np.float32),
|
||||
timestamp_unix_ns=123,
|
||||
keypoint_schema="coco_wholebody133",
|
||||
)
|
||||
|
||||
payload = codec.encode(detections)
|
||||
decoded = decode_pose_payload(payload)
|
||||
assert decoded.frame_index == 1
|
||||
assert decoded.reference_size == (640, 480)
|
||||
|
||||
|
||||
def test_load_replay_and_synchronize(tmp_path: Path):
|
||||
scene_path = tmp_path / "scene.json"
|
||||
replay_path = tmp_path / "replay.jsonl"
|
||||
@@ -153,4 +196,3 @@ def test_load_replay_and_synchronize(tmp_path: Path):
|
||||
bundles = synchronize_frames(replay, max_skew_ns=20, min_views=2)
|
||||
assert len(bundles) == 1
|
||||
assert {frame.camera_name for frame in bundles[0].views} == {"cam0", "cam1"}
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import pytest
|
||||
|
||||
pytest.importorskip("rpt")
|
||||
|
||||
from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.models import CameraCalibration, CameraFrame, FrameBundle, ProposalCluster, SceneConfig, TrackerConfig
|
||||
from pose_tracking_exp.tracker import PoseTracker
|
||||
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.schema import CameraCalibration, CameraFrame, FrameBundle, ProposalCluster, SceneConfig, TrackerConfig
|
||||
from pose_tracking_exp.tracking import PoseTracker
|
||||
|
||||
|
||||
def _make_scene() -> SceneConfig:
|
||||
@@ -96,7 +96,7 @@ def test_single_person_mode_caps_active_tracks(monkeypatch) -> None:
|
||||
tracker = PoseTracker(
|
||||
_make_scene(),
|
||||
TrackerConfig(
|
||||
mode="single_person",
|
||||
max_active_tracks=1,
|
||||
tentative_min_age=1,
|
||||
tentative_hits_required=1,
|
||||
tentative_promote_score=0.0,
|
||||
@@ -127,7 +127,7 @@ def test_single_person_mode_reuses_lost_track_id(monkeypatch) -> None:
|
||||
tracker = PoseTracker(
|
||||
_make_scene(),
|
||||
TrackerConfig(
|
||||
mode="single_person",
|
||||
max_active_tracks=1,
|
||||
tentative_min_age=1,
|
||||
tentative_hits_required=1,
|
||||
tentative_promote_score=0.0,
|
||||
|
||||
@@ -6,9 +6,9 @@ import pytest
|
||||
|
||||
pytest.importorskip("rpt")
|
||||
|
||||
from pose_tracking_exp.models import CameraFrame, FrameBundle, PoseDetection, TrackerConfig
|
||||
from pose_tracking_exp.replay import load_scene_file
|
||||
from pose_tracking_exp.tracker import PoseTracker
|
||||
from pose_tracking_exp.schema import CameraFrame, FrameBundle, PoseDetection, TrackerConfig
|
||||
from pose_tracking_exp.tracking import PoseTracker
|
||||
from pose_tracking_exp.tracking.replay_io import load_scene_file
|
||||
|
||||
RPT_ROOT = Path("/home/crosstyan/Code/RapidPoseTriangulation")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user