feat!: reorganize detection and tracking pipeline

Refactor the package into common, schema, detection, and tracking namespaces and move dataset-specific ActualTest utilities into tests/support.

Add a pluggable detection stack with typed protocols, pydantic-settings config, loguru-based runner logging, cvmmap and headless video sources, NATS and parquet sinks, and a structured coco-wholebody133 payload path.

Teach tracking replay loading to consume parquet detection directories directly, preserve empty frames, and keep the video-to-parquet-to-tracking workflow usable for offline E2E runs.

Vendor the local mmcv and xtcocotools wheels under Git LFS, update uv sources/lock state, and refresh the mmcv build so mmcv.ops loads successfully with the current torch+cu130 environment.
This commit is contained in:
2026-03-26 16:24:27 +08:00
parent f1a2372b3c
commit 2c0d51ab31
56 changed files with 5179 additions and 889 deletions
+127
View File
@@ -0,0 +1,127 @@
from pathlib import Path
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
from tests.support.actual_test import load_actual_test_scene, load_actual_test_segment_bundles
def _write_parquet(path: Path, rows: list[dict[str, object]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
pq.write_table(pa.Table.from_pylist(rows), path)
def _sample_rtmpose_detection() -> tuple[list[float], list[list[float]], list[float]]:
keypoints_xy = np.zeros((133, 2), dtype=np.float64)
scores = np.zeros((133,), dtype=np.float64)
keypoints_xy[5] = [10.0, 20.0]
keypoints_xy[6] = [30.0, 20.0]
keypoints_xy[11] = [12.0, 60.0]
keypoints_xy[12] = [28.0, 60.0]
keypoints_xy[0] = [20.0, 8.0]
scores[[0, 5, 6, 11, 12]] = 1.0
return [8.0, 4.0, 32.0, 64.0], keypoints_xy.tolist(), scores.tolist()
def test_load_actual_test_parquet_scene_and_segment(tmp_path: Path) -> None:
root = tmp_path / "ActualTest_WeiHua"
_write_parquet(
root / "camera_params" / "camera_params.parquet",
[
{
"name": "AF_02",
"port": 5602,
"intrinsic": {
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
},
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [0.0, 0.0, 0.0]},
"resolution": {"width": 640, "height": 480},
},
{
"name": "AF_03",
"port": 5603,
"intrinsic": {
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
},
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [1.0, 0.0, 0.0]},
"resolution": {"width": 640, "height": 480},
},
],
)
box, keypoints_xy, scores = _sample_rtmpose_detection()
for camera_name in ("5602", "5603"):
_write_parquet(
root / "Segment_1" / f"{camera_name}_detected.parquet",
[
{"frame_index": 689, "boxes": [], "kps": [], "kps_scores": []},
{"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
],
)
scene = load_actual_test_scene(root)
bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690, max_frames=1)
assert [camera.name for camera in scene.cameras] == ["5602", "5603"]
np.testing.assert_allclose(scene.cameras[0].pose_T, [0.0, 0.0, 0.0])
np.testing.assert_allclose(scene.cameras[1].pose_T, [-1.0, 0.0, 0.0])
assert len(bundles) == 1
assert [view.camera_name for view in bundles[0].views] == ["5602", "5603"]
assert bundles[0].views[0].frame_index == 690
np.testing.assert_allclose(
bundles[0].views[0].detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
[20.0, 60.0],
)
def test_load_actual_test_keeps_partial_camera_frames(tmp_path: Path) -> None:
root = tmp_path / "ActualTest_WeiHua"
_write_parquet(
root / "camera_params" / "camera_params.parquet",
[
{
"name": "AF_02",
"port": 5602,
"intrinsic": {
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
},
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [0.0, 0.0, 0.0]},
"resolution": {"width": 640, "height": 480},
},
{
"name": "AF_03",
"port": 5603,
"intrinsic": {
"camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
"distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
},
"extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [1.0, 0.0, 0.0]},
"resolution": {"width": 640, "height": 480},
},
],
)
box, keypoints_xy, scores = _sample_rtmpose_detection()
_write_parquet(
root / "Segment_1" / "5602_detected.parquet",
[
{"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
{"frame_index": 691, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
],
)
_write_parquet(
root / "Segment_1" / "5603_detected.parquet",
[
{"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
],
)
bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690)
assert [bundle.views[0].frame_index for bundle in bundles] == [690, 691]
assert [view.camera_name for view in bundles[1].views] == ["5602", "5603"]
assert len(bundles[1].views[0].detections) == 1
assert bundles[1].views[1].detections == ()