feat!: reorganize detection and tracking pipeline

Refactor the package into common, schema, detection, and tracking namespaces and move dataset-specific ActualTest utilities into tests/support. Add a pluggable detection stack with typed protocols, pydantic-settings config, loguru-based runner logging, cvmmap and headless video sources, NATS and parquet sinks, and a structured coco-wholebody133 payload path. Teach tracking replay loading to consume parquet detection directories directly, preserve empty frames, and keep the video-to-parquet-to-tracking workflow usable for offline E2E runs. Vendor the local mmcv and xtcocotools wheels under Git LFS, update uv sources/lock state, and refresh the mmcv build so mmcv.ops loads successfully with the current torch+cu130 environment.
2026-03-26 16:24:27 +08:00
parent f1a2372b3c
commit 2c0d51ab31
56 changed files with 5179 additions and 889 deletions
@@ -0,0 +1 @@
 vendor/wheels/*.whl filter=lfs diff=lfs merge=lfs -text
@@ -9,13 +9,13 @@ Offline multiview body tracking experiments built around:
 ## Install
 ```bash
-uv sync --extra dev
+uv sync --group dev
 ```
 ## Run
 ```bash
-uv run pose-tracking-exp run data/scene.json data/replay.jsonl
+uv run pose-tracking-exp run_tracking data/scene.json data/replay.jsonl
 ```
 `scene.json` may declare camera extrinsics in either format:
@@ -26,13 +26,58 @@ uv run pose-tracking-exp run data/scene.json data/replay.jsonl
 The loader normalizes both to OpenCV extrinsics for reprojection and converts to RPT pose only when building the triangulation config.
 If you already have an older hand-authored scene file that stored RPT camera pose directly, set `extrinsic_format` explicitly to `rpt_camera_pose`.
-## Convert ParaJumping Payload Records
+## Convert cvmmap Pose Payload Records
 ```bash
-uv run pose-tracking-exp convert-parajumping input.jsonl output.jsonl
+uv run pose-tracking-exp convert-cvmmap-pose input.jsonl output.jsonl
 ```
-## ActualTest Calibration Caveat
+The current cvmmap `.pose` wire format is fixed to `COCO-WholeBody-133` keypoints.
 That is a transport compatibility constraint, not a tracker limitation: the tracker-side normalizer accepts both `coco17` and `coco_wholebody133`, because the first 17 body joints share the standard COCO ordering.
 References:
 - https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html
 - https://github.com/jin-s13/COCO-WholeBody
 ## Run Detection
 ```bash
 uv sync --group dev --group detection
 uv run pose-tracking-exp run_detection --config detection.toml camera0 camera1
 uv run pose-tracking-exp run_detection --source video --output-dir data/detections --config detection.toml cam0=/data/cam0.mp4 cam1=/data/cam1.mp4
 ```
 The embedded 2D detection module is organized as a swapable shim:
 - `FrameSource`: where images come from
 - `PoseShim`: object detection + pose estimation backend
 - `PoseSink`: where structured detections are published or stored
 The default backend is `yolo_rtmpose`, and the heavy runtime dependencies live in the optional `detection` dependency group.
 Checkpoint paths are explicit config fields; the code does not hardcode local checkpoint locations.
 The only inferred path is the MMPose config path, which is resolved relative to the installed `mmpose` package when `pose_config_path` is omitted.
 For offline video runs, the default sink is parquet and writes one `*_detected.parquet` file per source. `run_tracking` can consume that directory directly as replay input.
 Example `detection.toml`:
 ```bash
 instances = ["camera0", "camera1"]
 device = "cuda"
 yolo_checkpoint = "/path/to/yolo_checkpoint.pt"
 pose_checkpoint = "/path/to/coco_wholebody_pose_checkpoint.pth"
 ```
 ## Actual Test Helper
 ```bash
 uv run --group dev --group detection python -m tests.support.actual_test /mnt/hddl/data/ActualTest_WeiHua --segment Segment_2 --frame-start 1100 --max-frames 120
 ```
 `actual_test` is a test/support helper, not part of the public installed CLI surface.
 It keeps the union of per-camera frame indices and fills missing camera rows with empty detections, so later 2-camera stretches are still usable instead of being dropped by a 4-camera intersection.
 ## Actual Test Calibration Caveat
 `ActualTest_WeiHua/camera_params.parquet` appears to store raw OpenCV extrinsics from the ChArUco pipeline, not camera poses. The tracker now converts those values before calling `RapidPoseTriangulation`, because RPT expects camera centers and camera-to-world rotation.
@@ -7,13 +7,14 @@ name = "pose-tracking-exp"
 version = "0.1.0"
 description = "Offline multiview pose tracking experiment with RPT-backed proposal births"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.12,<3.13"
 dependencies = [
  "anyio>=4.11.0",
  "beartype>=0.19.0",
  "click>=8.2.1",
  "jaxtyping>=0.3.2",
  "numpy>=2.1.0",
-  "opencv-python>=4.12.0.88",
+  "opencv-python-headless>=4.12.0.88",
  "pyarrow>=21.0.0",
  "rapid-pose-triangulation",
  "scipy>=1.15.0",
@@ -22,8 +23,24 @@ dependencies = [
 [dependency-groups]
 dev = [
  "basedpyright>=1.31.0",
  "jupyterlab>=4.5.6",
  "pytest>=8.4.0",
 ]
 detection = [
  "cvmmap-client",
  "loguru>=0.7.3",
  "mmcv",
  "mmdet>=3.3.0",
  "mmengine>=0.10.7",
  "mmpose>=1.3.2",
  "nats-py>=2.11.0",
  "pydantic>=2.11.7",
  "pydantic-settings>=2.0.0",
  "torch>=2.7.0",
  "torchvision>=0.22.0",
  "ultralytics>=8.3.166",
  "xtcocotools",
 ]
 [project.scripts]
 pose-tracking-exp = "pose_tracking_exp.cli:main"
@@ -33,6 +50,9 @@ packages = ["src/pose_tracking_exp"]
 [tool.uv.sources]
 rapid-pose-triangulation = { path = "../RapidPoseTriangulation", editable = true }
 cvmmap-client = { path = "../cvmmap-python-client", editable = true }
 mmcv = { path = "vendor/wheels/mmcv-2.2.0-cp312-cp312-linux_x86_64.whl" }
 xtcocotools = { path = "vendor/wheels/xtcocotools-1.14.3-cp312-cp312-linux_x86_64.whl" }
 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -1,40 +1,37 @@
-from pose_tracking_exp.joints import BODY20_JOINT_NAMES, BODY20_OBSERVATION_COUNT
+from pose_tracking_exp.common.joints import BODY20_JOINT_NAMES, BODY20_OBSERVATION_COUNT
-from pose_tracking_exp.models import (
+from pose_tracking_exp.detection.cvmmap_payload import CvmmapPosePayloadCodec, decode_pose_payload
-    ActiveTrackState,
+from pose_tracking_exp.schema import (
    CameraCalibration,
    CameraFrame,
    FrameBundle,
    PoseDetection,
    ProposalCluster,
    ReplaySequence,
    SceneConfig,
    TentativeTrackState,
    TrackerConfig,
    TrackedFrameResult,
 )
-from pose_tracking_exp.parajumping import decode_pose_payload
+from pose_tracking_exp.tracking import (
-from pose_tracking_exp.replay import load_replay_file, load_scene_file
+    PoseTracker,
-from pose_tracking_exp.sync import synchronize_frames
+    load_parquet_replay_dir,
-from pose_tracking_exp.tracker import PoseTracker
+    load_replay_file,
    load_scene_file,
    synchronize_frames,
 )
 __all__ = [
    "BODY20_JOINT_NAMES",
    "BODY20_OBSERVATION_COUNT",
    "ActiveTrackState",
    "CameraCalibration",
    "CameraFrame",
    "CvmmapPosePayloadCodec",
    "FrameBundle",
    "PoseDetection",
    "PoseTracker",
-    "ProposalCluster",
+    "load_parquet_replay_dir",
    "ReplaySequence",
    "SceneConfig",
    "TentativeTrackState",
    "TrackedFrameResult",
    "TrackerConfig",
    "decode_pose_payload",
    "load_replay_file",
    "load_scene_file",
    "synchronize_frames",
 ]
@@ -1,15 +1,12 @@
 import json
 import sys
 from pathlib import Path
 from typing import Literal, cast
 import click
-from pose_tracking_exp.actualtest import load_actualtest_scene, load_actualtest_segment_bundles
+from pose_tracking_exp.detection.cvmmap_payload import convert_payload_jsonl_lines
-from pose_tracking_exp.models import TrackerConfig
+from pose_tracking_exp.schema import TrackerConfig
-from pose_tracking_exp.parajumping import convert_payload_jsonl_lines
+from pose_tracking_exp.tracking import PoseTracker, load_replay_file, load_scene_file, synchronize_frames
 from pose_tracking_exp.replay import load_replay_file, load_scene_file
 from pose_tracking_exp.sync import synchronize_frames
 from pose_tracking_exp.tracker import PoseTracker
@click.group()
@@ -17,19 +14,120 @@ def main() -> None:
    """Offline multiview pose tracking experiment CLI."""
-@main.command("convert-parajumping")
+@main.command("convert-cvmmap-pose")
@click.argument("input_path", type=click.Path(path_type=Path, exists=True, dir_okay=False))
@click.argument("output_path", type=click.Path(path_type=Path, dir_okay=False))
-def convert_parajumping(input_path: Path, output_path: Path) -> None:
+def convert_cvmmap_pose(input_path: Path, output_path: Path) -> None:
    lines = input_path.read_text(encoding="utf-8").splitlines()
    converted = convert_payload_jsonl_lines(lines)
    output_path.write_text("\n".join(converted) + ("\n" if converted else ""), encoding="utf-8")
-@main.command("run")
+@main.command("run_detection")
@click.argument("inputs", nargs=-1, type=str, required=False)
@click.option(
    "--config",
    "config_path",
    type=click.Path(dir_okay=False, path_type=Path),
    default=None,
    help="Optional TOML detection runner config file.",
 )
@click.option(
    "--source",
    "source_kind",
    type=click.Choice(("cvmmap", "video")),
    default="cvmmap",
    show_default=True,
    help="Frame source implementation to use.",
 )
@click.option(
    "--sink",
    "sink_kind",
    type=click.Choice(("auto", "nats", "parquet")),
    default="auto",
    show_default=True,
    help="Output sink. `auto` picks nats for cvmmap and parquet for video.",
 )
@click.option(
    "--output-dir",
    type=click.Path(file_okay=False, path_type=Path),
    default=None,
    help="Required for parquet sink output.",
 )
@click.option(
    "--log-level",
    default="INFO",
    show_default=True,
    type=click.Choice(("DEBUG", "INFO", "WARNING", "ERROR")),
 )
 def run_detection(
    inputs: tuple[str, ...],
    config_path: Path | None,
    source_kind: str,
    sink_kind: str,
    output_dir: Path | None,
    log_level: str,
 ) -> None:
    import anyio
    from loguru import logger
    from pose_tracking_exp.detection import (
        CvmmapFrameSource,
        NatsPoseSink,
        ParquetPoseSink,
        VideoFrameSource,
        build_pose_shim,
        load_detection_runner_config,
        parse_video_input_specs,
        resolve_instances,
        run_detection_runner,
    )
    logger.remove()
    logger.add(
        sys.stderr,
        level=log_level,
        format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}",
    )
    config = load_detection_runner_config(config_path)
    config.validate_runtime_paths()
    if source_kind == "cvmmap":
        resolved_instances = resolve_instances(inputs, config.instances)
        config = config.model_copy(update={"instances": resolved_instances})
        sources = tuple(CvmmapFrameSource(instance) for instance in resolved_instances)
    else:
        video_inputs = parse_video_input_specs(inputs)
        sources = tuple(
            VideoFrameSource(video_path, source_name=source_name)
            for source_name, video_path in video_inputs
        )
    pose_shim = build_pose_shim(config)
    resolved_sink_kind = sink_kind
    if resolved_sink_kind == "auto":
        resolved_sink_kind = "nats" if source_kind == "cvmmap" else "parquet"
    if resolved_sink_kind == "nats":
        pose_sink = NatsPoseSink(config.nats_host)
    else:
        if output_dir is None:
            raise click.ClickException("--output-dir is required for parquet sink output.")
        pose_sink = ParquetPoseSink(output_dir)
    anyio.run(
        run_detection_runner,
        sources,
        pose_shim,
        pose_sink,
        config,
    )
@main.command("run_tracking")
@click.argument("scene_path", type=click.Path(path_type=Path, exists=True, dir_okay=False))
-@click.argument("replay_path", type=click.Path(path_type=Path, exists=True, dir_okay=False))
+@click.argument("replay_path", type=click.Path(path_type=Path, exists=True))
-def run(scene_path: Path, replay_path: Path) -> None:
+def run_tracking(scene_path: Path, replay_path: Path) -> None:
    scene = load_scene_file(scene_path)
    replay = load_replay_file(scene_path, replay_path)
    config = TrackerConfig()
@@ -52,79 +150,3 @@ def run(scene_path: Path, replay_path: Path) -> None:
        for result in results
    ]
    click.echo(json.dumps(payload, indent=2))
@main.command("run-actualtest")
@click.argument("root_path", type=click.Path(path_type=Path, exists=True, file_okay=False))
@click.option("--segment", "segment_name", default="Segment_1", show_default=True)
@click.option("--frame-start", default=690, type=int, show_default=True)
@click.option("--frame-stop", type=int)
@click.option("--max-frames", type=int)
@click.option("--mode", type=click.Choice(["single_person", "general"]), default="single_person", show_default=True)
@click.option("--proposal-min-score", default=0.5, type=float, show_default=True)
@click.option("--tentative-min-age", default=2, type=int, show_default=True)
@click.option("--tentative-hits-required", default=2, type=int, show_default=True)
@click.option("--tentative-promote-score", default=1.2, type=float, show_default=True)
 def run_actualtest(
    root_path: Path,
    segment_name: str,
    frame_start: int,
    frame_stop: int | None,
    max_frames: int | None,
    mode: str,
    proposal_min_score: float,
    tentative_min_age: int,
    tentative_hits_required: int,
    tentative_promote_score: float,
 ) -> None:
    tracker_mode = cast(Literal["general", "single_person"], mode)
    scene = load_actualtest_scene(root_path)
    bundles = load_actualtest_segment_bundles(
        root_path,
        segment_name,
        frame_start=frame_start,
        frame_stop=frame_stop,
        max_frames=max_frames,
    )
    config = TrackerConfig(
        mode=tracker_mode,
        proposal_min_score=proposal_min_score,
        tentative_min_age=tentative_min_age,
        tentative_hits_required=tentative_hits_required,
        tentative_promote_score=tentative_promote_score,
    )
    tracker = PoseTracker(scene, config)
    results = tracker.run(bundles)
    diagnostics = tracker.diagnostics_snapshot()
    payload = {
        "segment": segment_name,
        "mode": tracker_mode,
        "bundle_count": len(results),
        "active_track_frames": sum(1 for result in results if result.active_tracks),
        "proposal_frames": sum(1 for result in results if result.proposals),
        "max_active_tracks": max((len(result.active_tracks) for result in results), default=0),
        "diagnostics": {
            "match_existing_calls": diagnostics.match_existing_calls,
            "match_existing_seconds": diagnostics.match_existing_seconds,
            "proposal_build_calls": diagnostics.proposal_build_calls,
            "proposal_build_seconds": diagnostics.proposal_build_seconds,
            "promotions": diagnostics.promotions,
            "reacquisitions": diagnostics.reacquisitions,
            "active_updates": diagnostics.active_updates,
            "seed_initializations": diagnostics.seed_initializations,
            "nonlinear_refinements": diagnostics.nonlinear_refinements,
        },
        "results": [
            {
                "bundle_index": result.bundle_index,
                "source_frame_index": bundle.views[0].frame_index if bundle.views else -1,
                "timestamp_unix_ns": result.timestamp_unix_ns,
                "tentative_track_ids": [track.track_id for track in result.tentative_tracks],
                "active_track_ids": [track.track_id for track in result.active_tracks],
                "lost_track_ids": [track.track_id for track in result.lost_tracks],
                "proposal_count": len(result.proposals),
            }
            for bundle, result in zip(bundles, results, strict=True)
        ],
    }
    click.echo(json.dumps(payload, indent=2))
@@ -0,0 +1,33 @@
 from pose_tracking_exp.common.camera_math import project_pose
 from pose_tracking_exp.common.joints import (
    BODY20_INDEX_BY_NAME,
    BODY20_JOINT_NAMES,
    BODY20_OBSERVATION_COUNT,
    COCO_BODY17_INDEX_BY_NAME,
    COCO_BODY17_NAMES,
    CORE_JOINT_INDICES,
    CORE_JOINT_NAMES,
 )
 from pose_tracking_exp.common.normalization import (
    core_reprojection_distance,
    infer_bbox_from_keypoints,
    normalize_coco_body20,
    normalize_openpose25_body20,
    normalize_rtmpose_body20,
 )
 __all__ = [
    "BODY20_INDEX_BY_NAME",
    "BODY20_JOINT_NAMES",
    "BODY20_OBSERVATION_COUNT",
    "COCO_BODY17_INDEX_BY_NAME",
    "COCO_BODY17_NAMES",
    "CORE_JOINT_INDICES",
    "CORE_JOINT_NAMES",
    "core_reprojection_distance",
    "infer_bbox_from_keypoints",
    "normalize_coco_body20",
    "normalize_openpose25_body20",
    "normalize_rtmpose_body20",
    "project_pose",
 ]
@@ -1,8 +1,8 @@
 import cv2
 import numpy as np
-from pose_tracking_exp.models import CameraCalibration
+from pose_tracking_exp.common.tensor_types import Pose3D
-from pose_tracking_exp.tensor_types import Pose3D
+from pose_tracking_exp.schema.camera import CameraCalibration
 def project_pose(camera: CameraCalibration, pose3d: Pose3D) -> np.ndarray:
@@ -0,0 +1,43 @@
 from pathlib import Path
 import pyarrow as pa
 from pose_tracking_exp.schema.detection import PoseDetections
 DETECTED_PARQUET_SUFFIX = "_detected.parquet"
 DETECTION_PARQUET_SCHEMA = pa.schema(
    [
        pa.field("frame_index", pa.int64()),
        pa.field("timestamp_unix_ns", pa.int64()),
        pa.field("source_width", pa.int32()),
        pa.field("source_height", pa.int32()),
        pa.field("boxes", pa.list_(pa.list_(pa.float32()))),
        pa.field("box_scores", pa.list_(pa.float32())),
        pa.field("kps", pa.list_(pa.list_(pa.list_(pa.float32())))),
        pa.field("kps_scores", pa.list_(pa.list_(pa.float32()))),
        pa.field("keypoint_schema", pa.string()),
    ]
 )
 def detection_parquet_path(output_dir: Path, source_name: str) -> Path:
    return output_dir / f"{source_name}{DETECTED_PARQUET_SUFFIX}"
 def pose_detections_to_row(detections: PoseDetections) -> dict[str, object]:
    if detections.box_scores is None:
        raise ValueError("Parquet sink requires box_scores to be present.")
    if detections.keypoint_scores is None:
        raise ValueError("Parquet sink requires keypoint_scores to be present.")
    return {
        "frame_index": int(detections.frame_index),
        "timestamp_unix_ns": int(detections.timestamp_unix_ns),
        "source_width": int(detections.source_size[0]),
        "source_height": int(detections.source_size[1]),
        "boxes": detections.boxes_xyxy.astype("float32", copy=False).tolist(),
        "box_scores": detections.box_scores.astype("float32", copy=False).tolist(),
        "kps": detections.keypoints_xy.astype("float32", copy=False).tolist(),
        "kps_scores": detections.keypoint_scores.astype("float32", copy=False).tolist(),
        "keypoint_schema": detections.keypoint_schema,
    }
@@ -39,7 +39,7 @@ CORE_JOINT_NAMES: tuple[str, ...] = (
 CORE_JOINT_INDICES: tuple[int, ...] = tuple(BODY20_INDEX_BY_NAME[name] for name in CORE_JOINT_NAMES)
-RTMPOSE_BODY17_INDEX_BY_NAME = {
+COCO_BODY17_INDEX_BY_NAME = {
    "nose": 0,
    "eye_left": 1,
    "eye_right": 2,
@@ -59,5 +59,8 @@ RTMPOSE_BODY17_INDEX_BY_NAME = {
    "ankle_right": 16,
 }
-RTMPOSE_BODY17_NAMES = tuple(RTMPOSE_BODY17_INDEX_BY_NAME.keys())
+COCO_BODY17_NAMES = tuple(COCO_BODY17_INDEX_BY_NAME.keys())
 # RTMPose whole-body uses the standard COCO body-17 ordering for the first 17 joints.
 RTMPOSE_BODY17_INDEX_BY_NAME = COCO_BODY17_INDEX_BY_NAME
 RTMPOSE_BODY17_NAMES = COCO_BODY17_NAMES
@@ -1,12 +1,51 @@
 import math
 from collections.abc import Mapping
 from typing import Literal
 import numpy as np
 from beartype import beartype
 from jaxtyping import jaxtyped
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME, BODY20_OBSERVATION_COUNT, RTMPOSE_BODY17_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import (
-from pose_tracking_exp.tensor_types import FloatArray, JointXY, Pose2D
+    BODY20_INDEX_BY_NAME,
    BODY20_OBSERVATION_COUNT,
    COCO_BODY17_INDEX_BY_NAME,
 )
 from pose_tracking_exp.common.tensor_types import FloatArray, JointXY, Pose2D
 def _validate_coco_shape(
    keypoints_xy: FloatArray,
    confidences: FloatArray,
    *,
    keypoint_schema: Literal["coco17", "coco_wholebody133"] | None,
 ) -> Literal["coco17", "coco_wholebody133"]:
    if keypoints_xy.ndim != 2 or keypoints_xy.shape[1] != 2:
        raise ValueError(
            f"Expected keypoints with shape (N, 2), got {keypoints_xy.shape}."
        )
    if confidences.ndim != 1 or confidences.shape[0] != keypoints_xy.shape[0]:
        raise ValueError(
            "Expected confidences with shape matching keypoint count. "
            f"Got {confidences.shape} for {keypoints_xy.shape}."
        )
    detected_schema: Literal["coco17", "coco_wholebody133"]
    if keypoints_xy.shape[0] == 17:
        detected_schema = "coco17"
    elif keypoints_xy.shape[0] == 133:
        detected_schema = "coco_wholebody133"
    else:
        raise ValueError(
            "Expected COCO-compatible keypoints with 17 or 133 joints, "
            f"got {keypoints_xy.shape[0]}."
        )
    if keypoint_schema is not None and keypoint_schema != detected_schema:
        raise ValueError(
            f"Expected {keypoint_schema} keypoints, got shape {keypoints_xy.shape}."
        )
    return detected_schema
 def _visible_mean(points: list[tuple[np.ndarray, float]], fallback_xy: np.ndarray) -> tuple[np.ndarray, float]:
@@ -68,18 +107,20 @@ def _normalize_named_keypoints(
@jaxtyped(typechecker=beartype)
-def normalize_rtmpose_body20(
+def normalize_coco_body20(
    keypoints_xy: FloatArray,
    confidences: FloatArray,
    *,
    keypoint_schema: Literal["coco17", "coco_wholebody133"] | None = None,
 ) -> Pose2D:
-    if keypoints_xy.shape != (133, 2):
+    _validate_coco_shape(
-        raise ValueError(f"Expected RTMPose keypoints with shape (133, 2), got {keypoints_xy.shape}.")
+        keypoints_xy,
-    if confidences.shape != (133,):
+        confidences,
-        raise ValueError(f"Expected RTMPose confidences with shape (133,), got {confidences.shape}.")
+        keypoint_schema=keypoint_schema,
-
+    )
    keypoint_map = {
        name: (keypoints_xy[source_index], float(confidences[source_index]))
-        for name, source_index in RTMPOSE_BODY17_INDEX_BY_NAME.items()
+        for name, source_index in COCO_BODY17_INDEX_BY_NAME.items()
    }
    return _normalize_named_keypoints(
        keypoint_map,
@@ -89,6 +130,18 @@ def normalize_rtmpose_body20(
    )
@jaxtyped(typechecker=beartype)
 def normalize_rtmpose_body20(
    keypoints_xy: FloatArray,
    confidences: FloatArray,
 ) -> Pose2D:
    return normalize_coco_body20(
        keypoints_xy,
        confidences,
        keypoint_schema="coco_wholebody133",
    )
@jaxtyped(typechecker=beartype)
 def normalize_openpose25_body20(keypoints: FloatArray) -> Pose2D:
    if keypoints.shape != (25, 3):
@@ -0,0 +1,58 @@
 from pose_tracking_exp.detection.config import (
    DEFAULT_BACKEND,
    DetectionRunnerConfig,
    load_detection_runner_config,
    resolve_default_pose_config,
    resolve_instances,
 )
 from pose_tracking_exp.detection.factory import build_pose_shim
 from pose_tracking_exp.detection.runner import (
    SimpleMovingAverage,
    SourceSlot,
    run_detection_runner,
    store_latest_frame,
    take_pending_batch,
 )
 from pose_tracking_exp.detection.sinks import NatsPoseSink, ParquetPoseSink
 from pose_tracking_exp.detection.sources import (
    CvmmapFrameSource,
    IteratorFrameSource,
    VideoFrameSource,
    parse_video_input_specs,
 )
 from pose_tracking_exp.schema.detection import BoxDetections, CocoKeypointSchema, PoseBatchRequest, PoseDetections, SourceFrame
 from pose_tracking_exp.detection.yolo_rtmpose import (
    WholeBodyPoseEstimator,
    YoloRtmposeShim,
    build_yolo_rtmpose_shim,
    legacy_torch_checkpoint_loading,
 )
 __all__ = [
    "BoxDetections",
    "CocoKeypointSchema",
    "CvmmapFrameSource",
    "DEFAULT_BACKEND",
    "DetectionRunnerConfig",
    "IteratorFrameSource",
    "NatsPoseSink",
    "ParquetPoseSink",
    "PoseBatchRequest",
    "PoseDetections",
    "SimpleMovingAverage",
    "SourceFrame",
    "SourceSlot",
    "VideoFrameSource",
    "WholeBodyPoseEstimator",
    "YoloRtmposeShim",
    "build_pose_shim",
    "build_yolo_rtmpose_shim",
    "legacy_torch_checkpoint_loading",
    "load_detection_runner_config",
    "parse_video_input_specs",
    "resolve_default_pose_config",
    "resolve_instances",
    "run_detection_runner",
    "store_latest_frame",
    "take_pending_batch",
 ]
@@ -0,0 +1,147 @@
 import tomllib
 from pathlib import Path
 from typing import Any, Literal, cast
 import click
 from pydantic import (
    PositiveFloat,
    PositiveInt,
    ValidationError,
    field_validator,
    model_validator,
 )
 from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
 )
 DEFAULT_BACKEND = "yolo_rtmpose"
 ENV_PREFIX = "POSE_TRACKING_EXP_DETECTION_"
 POSE_CONFIG_RELATIVE_PATH = Path(
    "wholebody_2d_keypoint/rtmpose/ubody/rtmpose-l_8xb64-270e_coco-ubody-wholebody-256x192.py"
 )
 def resolve_default_pose_config() -> Path:
    import mmpose
    module_file = getattr(mmpose, "__file__", None)
    if module_file is None:
        raise FileNotFoundError("Could not locate the installed mmpose package.")
    config_path = (
        Path(module_file).resolve().parent
        / ".mim"
        / "configs"
        / POSE_CONFIG_RELATIVE_PATH
    )
    if not config_path.exists():
        raise FileNotFoundError(f"Default pose config is missing: {config_path}")
    return config_path
 class DetectionRunnerConfig(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix=ENV_PREFIX,
        extra="forbid",
    )
    instances: tuple[str, ...] = ()
    backend: Literal["yolo_rtmpose"] = DEFAULT_BACKEND
    device: str = "cuda"
    nats_host: str = "nats://localhost:4222"
    yolo_checkpoint: Path
    yolo_conf_threshold: float = 0.6
    pose_checkpoint: Path
    pose_config_path: Path | None = None
    bbox_area_threshold: PositiveInt = 50 * 50
    max_batch_frames: PositiveInt = 8
    max_batch_wait_ms: int = 4
    slow_frame_budget_seconds: PositiveFloat = 1 / 22
    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,
        file_secret_settings: PydanticBaseSettingsSource,
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        return (
            env_settings,
            init_settings,
            dotenv_settings,
            file_secret_settings,
        )
    @field_validator("instances", mode="before")
    @classmethod
    def _parse_instances(cls, value: object) -> object:
        if isinstance(value, str):
            return tuple(item.strip() for item in value.split(",") if item.strip())
        return value
    @field_validator("max_batch_wait_ms")
    @classmethod
    def _validate_wait_ms(cls, value: int) -> int:
        if value < 0:
            raise ValueError("max_batch_wait_ms must be non-negative.")
        return value
    @model_validator(mode="after")
    def _resolve_pose_config(self) -> "DetectionRunnerConfig":
        if self.pose_config_path is None:
            self.pose_config_path = resolve_default_pose_config()
        return self
    def validate_runtime_paths(self) -> None:
        missing: list[Path] = []
        for candidate in (
            self.yolo_checkpoint,
            self.pose_checkpoint,
            self.pose_config_path,
        ):
            if candidate is None:
                raise FileNotFoundError
            if not candidate.exists():
                missing.append(candidate)
        if missing:
            formatted = ", ".join(str(path) for path in missing)
            raise click.ClickException(f"Missing runtime assets: {formatted}")
 def load_detection_runner_config(config_path: Path | None) -> DetectionRunnerConfig:
    config_data: dict[str, object] = {}
    if config_path is not None:
        with config_path.open("rb") as handle:
            parsed = tomllib.load(handle)
        if not isinstance(parsed, dict):
            raise click.ClickException("Detection runner config must be a TOML table.")
        config_data = parsed
    try:
        # TOML/env values are validated by Pydantic at construction.
        return DetectionRunnerConfig(**cast(dict[str, Any], config_data))
    except (ValidationError, ValueError, FileNotFoundError) as exc:
        raise click.ClickException(str(exc)) from exc
 def resolve_instances(
    cli_instances: tuple[str, ...],
    configured_instances: tuple[str, ...],
 ) -> tuple[str, ...]:
    selected = cli_instances or configured_instances
    if not selected:
        raise click.ClickException(
            "Provide at least one instance on the command line or via config `instances = [...]`."
        )
    unique_instances: list[str] = []
    seen: set[str] = set()
    for instance in selected:
        if instance in seen:
            raise click.ClickException(f"Duplicate instance requested: {instance}")
        unique_instances.append(instance)
        seen.add(instance)
    return tuple(unique_instances)
@@ -0,0 +1,237 @@
 """cvmmap pose payload helpers.
 The current `.pose` wire format is fixed-width for COCO-WholeBody-133 keypoints.
 That is a protocol compatibility choice, not a tracker limitation: the tracker
 normalizer accepts either `coco17` or `coco_wholebody133` because the first
 17 body joints share the standard COCO ordering.
 References:
 - https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html
 - https://github.com/jin-s13/COCO-WholeBody
 """
 import base64
 import json
 from dataclasses import dataclass
 import numpy as np
 from beartype import beartype
 from pose_tracking_exp.common.normalization import normalize_coco_body20
 from pose_tracking_exp.schema import CameraFrame, PoseDetection
 from pose_tracking_exp.schema.detection import PoseDetections
 PROTOCOL_HEADER = bytes([0x80]) + b"POSE"
 COCO_WHOLEBODY_KEYPOINT_COUNT = 133
@dataclass(slots=True)
 class DecodedPosePayload:
    frame_index: int
    reference_size: tuple[int, int]
    timestamp_unix_ns: int
    detections: tuple[PoseDetection, ...]
 class CvmmapPosePayloadCodec:
    def encode(self, detections: PoseDetections) -> bytes:
        return encode_pose_payload(detections)
 def _read_u8(payload: memoryview, offset: int) -> tuple[int, int]:
    return int(payload[offset]), offset + 1
 def _read_u16_array(payload: memoryview, offset: int, count: int) -> tuple[np.ndarray, int]:
    size = count * 2
    array = np.frombuffer(payload[offset : offset + size], dtype="<u2", count=count).astype(np.float64)
    return array, offset + size
@beartype
 def decode_pose_payload(payload: bytes) -> DecodedPosePayload:
    if not payload.startswith(PROTOCOL_HEADER):
        raise ValueError("Invalid cvmmap pose payload header.")
    view = memoryview(payload)
    offset = len(PROTOCOL_HEADER)
    frame_index = int.from_bytes(view[offset : offset + 4], "little")
    offset += 4
    reference_size = tuple(int(x) for x in np.frombuffer(view[offset : offset + 4], dtype="<u2", count=2))
    offset += 4
    num_bbox = int(view[offset])
    offset += 1
    bbox_raw, offset = _read_u16_array(view, offset, num_bbox * 4)
    bboxes = bbox_raw.reshape(num_bbox, 4) if num_bbox > 0 else np.zeros((0, 4), dtype=np.float64)
    num_bbox_conf = int(view[offset])
    offset += 1
    bbox_confidence = np.frombuffer(view[offset : offset + num_bbox_conf], dtype=np.uint8, count=num_bbox_conf)
    offset += num_bbox_conf
    num_keypoints = int(view[offset])
    offset += 1
    keypoints_raw, offset = _read_u16_array(
        view,
        offset,
        num_keypoints * COCO_WHOLEBODY_KEYPOINT_COUNT * 2,
    )
    keypoints_xy = (
        keypoints_raw.reshape(num_keypoints, COCO_WHOLEBODY_KEYPOINT_COUNT, 2)
        if num_keypoints > 0
        else np.zeros((0, COCO_WHOLEBODY_KEYPOINT_COUNT, 2), dtype=np.float64)
    )
    num_keypoint_conf = int(view[offset])
    offset += 1
    keypoint_confidence_count = num_keypoint_conf * COCO_WHOLEBODY_KEYPOINT_COUNT
    keypoint_confidence = (
        np.frombuffer(
            view[offset : offset + keypoint_confidence_count],
            dtype=np.uint8,
            count=keypoint_confidence_count,
        ).astype(np.float64)
        / 255.0
    )
    offset += keypoint_confidence_count
    timestamp_unix_ns = int.from_bytes(view[offset : offset + 8], "little")
    if num_keypoint_conf > 0 and num_keypoint_conf != num_keypoints:
        raise ValueError("Unexpected keypoint confidence set count.")
    detection_items: list[PoseDetection] = []
    confidences = (
        keypoint_confidence.reshape(num_keypoints, COCO_WHOLEBODY_KEYPOINT_COUNT)
        if num_keypoints > 0
        else np.zeros((0, COCO_WHOLEBODY_KEYPOINT_COUNT), dtype=np.float64)
    )
    for index in range(num_keypoints):
        normalized = normalize_coco_body20(
            keypoints_xy[index],
            confidences[index],
            keypoint_schema="coco_wholebody133",
        )
        bbox_score = float(bbox_confidence[index] / 255.0) if index < bbox_confidence.shape[0] else 0.0
        bbox = bboxes[index] if index < bboxes.shape[0] else np.zeros(4, dtype=np.float64)
        detection_items.append(
            PoseDetection(
                bbox=np.asarray(bbox, dtype=np.float64),
                bbox_confidence=bbox_score,
                keypoints=np.asarray(normalized, dtype=np.float64),
            )
        )
    return DecodedPosePayload(
        frame_index=frame_index,
        reference_size=(reference_size[0], reference_size[1]),
        timestamp_unix_ns=timestamp_unix_ns,
        detections=tuple(detection_items),
    )
@beartype
 def encode_pose_payload(detections: PoseDetections) -> bytes:
    detections.validate()
    if detections.keypoint_schema != "coco_wholebody133":
        raise ValueError(
            "The cvmmap `.pose` payload currently requires `coco_wholebody133` keypoints."
        )
    frame_index_bytes = int(detections.frame_index).to_bytes(4, "little")
    reference_size_bytes = np.asarray(detections.source_size, dtype=np.dtype("<u2")).tobytes()
    num_bbox = int(detections.boxes_xyxy.shape[0])
    num_bbox_bytes = num_bbox.to_bytes(1, "little")
    bbox_bytes = np.ascontiguousarray(
        detections.boxes_xyxy.astype(np.uint16),
        dtype=np.dtype("<u2"),
    ).tobytes()
    num_bbox_confidence_bytes = bytes([0])
    bbox_confidence_bytes = bytes()
    if detections.box_scores is not None:
        num_bbox_confidence_bytes = int(detections.box_scores.shape[0]).to_bytes(1, "little")
        bbox_confidence_bytes = np.ascontiguousarray(
            np.clip(detections.box_scores * np.iinfo(np.uint8).max, 0, 255).astype(np.uint8),
            dtype=np.dtype("<u1"),
        ).tobytes()
    num_keypoints = int(detections.keypoints_xy.shape[0])
    num_keypoints_bytes = num_keypoints.to_bytes(1, "little")
    keypoints_bytes = np.ascontiguousarray(
        detections.keypoints_xy.astype(np.uint16),
        dtype=np.dtype("<u2"),
    ).tobytes()
    num_keypoint_confidence_bytes = bytes([0])
    keypoint_confidence_bytes = bytes()
    if detections.keypoint_scores is not None:
        num_keypoint_confidence_bytes = int(detections.keypoint_scores.shape[0]).to_bytes(1, "little")
        keypoint_confidence_bytes = np.ascontiguousarray(
            np.clip(detections.keypoint_scores * np.iinfo(np.uint8).max, 0, 255).astype(np.uint8),
            dtype=np.dtype("<u1"),
        ).tobytes()
    timestamp_unix_ns_bytes = int(detections.timestamp_unix_ns).to_bytes(8, "little")
    return (
        PROTOCOL_HEADER
        + frame_index_bytes
        + reference_size_bytes
        + num_bbox_bytes
        + bbox_bytes
        + num_bbox_confidence_bytes
        + bbox_confidence_bytes
        + num_keypoints_bytes
        + keypoints_bytes
        + num_keypoint_confidence_bytes
        + keypoint_confidence_bytes
        + timestamp_unix_ns_bytes
    )
@beartype
 def frame_from_payload(camera_name: str, payload: bytes) -> CameraFrame:
    decoded = decode_pose_payload(payload)
    return CameraFrame(
        camera_name=camera_name,
        frame_index=decoded.frame_index,
        timestamp_unix_ns=decoded.timestamp_unix_ns,
        detections=decoded.detections,
        source_size=decoded.reference_size,
    )
@beartype
 def convert_payload_record(record: dict[str, object]) -> dict[str, object]:
    camera_name = str(record["camera"])
    payload_b64 = str(record["payload_b64"])
    frame = frame_from_payload(camera_name, base64.b64decode(payload_b64))
    return {
        "camera": frame.camera_name,
        "frame_index": frame.frame_index,
        "timestamp_unix_ns": frame.timestamp_unix_ns,
        "source_size": list(frame.source_size),
        "detections": [
            {
                "bbox": detection.bbox.tolist(),
                "bbox_confidence": detection.bbox_confidence,
                "keypoints": detection.keypoints.tolist(),
            }
            for detection in frame.detections
        ],
    }
@beartype
 def convert_payload_jsonl_lines(lines: list[str]) -> list[str]:
    output_lines: list[str] = []
    for line in lines:
        if not line.strip():
            continue
        record = json.loads(line)
        converted = convert_payload_record(record)
        output_lines.append(json.dumps(converted))
    return output_lines
@@ -0,0 +1,3 @@
 from pose_tracking_exp.detection.sources.cvmmap import CvmmapFrameSource
 __all__ = ["CvmmapFrameSource"]
@@ -0,0 +1,21 @@
 import click
 from pose_tracking_exp.detection.config import DEFAULT_BACKEND, DetectionRunnerConfig
 from pose_tracking_exp.detection.protocols import PoseShim
 from pose_tracking_exp.detection.yolo_rtmpose import build_yolo_rtmpose_shim
 def build_pose_shim(config: DetectionRunnerConfig) -> PoseShim:
    if config.backend == DEFAULT_BACKEND:
        if config.pose_config_path is None:
            raise click.ClickException("pose_config_path must be resolved before building the backend.")
        return build_yolo_rtmpose_shim(
            yolo_checkpoint=config.yolo_checkpoint,
            yolo_conf_threshold=config.yolo_conf_threshold,
            pose_checkpoint=config.pose_checkpoint,
            pose_config_path=config.pose_config_path,
            device=config.device,
            max_batch_frames=config.max_batch_frames,
            bbox_area_threshold=config.bbox_area_threshold,
        )
    raise click.ClickException(f"Unsupported detection backend: {config.backend}")
@@ -0,0 +1,3 @@
 from pose_tracking_exp.detection.sinks.nats import NatsPoseSink
 __all__ = ["NatsPoseSink"]
@@ -0,0 +1,49 @@
 from collections.abc import AsyncIterator, Sequence
 from typing import Protocol
 import numpy as np
 from pose_tracking_exp.schema.detection import BoxDetections, PoseBatchRequest, PoseDetections, SourceFrame
 class FrameSource(Protocol):
    source_name: str
    def frames(self) -> AsyncIterator[SourceFrame]:
        ...
 class ObjectDetector(Protocol):
    def detect_many(
        self,
        frames_rgb: Sequence[np.ndarray],
        *,
        classes: Sequence[int] | None = None,
    ) -> list[BoxDetections]:
        ...
 class PoseEstimator(Protocol):
    def estimate_batch(
        self,
        requests: Sequence[PoseBatchRequest],
    ) -> list[tuple[np.ndarray, np.ndarray]]:
        ...
 class PoseShim(Protocol):
    def process_many(self, frames: Sequence[SourceFrame]) -> list[PoseDetections]:
        ...
 class PosePayloadCodec(Protocol):
    def encode(self, detections: PoseDetections) -> bytes:
        ...
 class PoseSink(Protocol):
    async def publish_pose(self, detections: PoseDetections) -> None:
        ...
    async def aclose(self) -> None:
        ...
@@ -0,0 +1,238 @@
 from dataclasses import dataclass
 from time import perf_counter
 import anyio
 from anyio.to_thread import run_sync as to_thread_run_sync
 from loguru import logger
 from pose_tracking_exp.detection.config import DetectionRunnerConfig
 from pose_tracking_exp.detection.protocols import FrameSource, PoseShim, PoseSink
 from pose_tracking_exp.schema.detection import SourceFrame
 PERFORMANCE_WINDOW = 60
@dataclass(slots=True)
 class PendingFrame:
    source_name: str
    frame: SourceFrame
@dataclass(slots=True)
 class SourceSlot:
    source_name: str
    pending_frame: PendingFrame | None = None
    last_seen_frame_index: int | None = None
    received_frames: int = 0
    dropped_frames: int = 0
    processed_frames: int = 0
    published_frames: int = 0
    closed: bool = False
 def store_latest_frame(slot: SourceSlot, frame: SourceFrame) -> None:
    slot.received_frames += 1
    if slot.pending_frame is not None:
        slot.dropped_frames += 1
    slot.pending_frame = PendingFrame(source_name=slot.source_name, frame=frame)
 def pending_source_count(slots: dict[str, SourceSlot]) -> int:
    return sum(slot.pending_frame is not None for slot in slots.values())
 def take_pending_batch(
    slots: dict[str, SourceSlot],
    max_batch_frames: int,
 ) -> list[PendingFrame]:
    batch: list[PendingFrame] = []
    for slot in slots.values():
        if slot.pending_frame is None:
            continue
        batch.append(slot.pending_frame)
        slot.pending_frame = None
        if len(batch) >= max_batch_frames:
            break
    return batch
 def all_sources_closed_and_idle(slots: dict[str, SourceSlot]) -> bool:
    return all(slot.closed and slot.pending_frame is None for slot in slots.values())
 class SimpleMovingAverage:
    def __init__(self, window: int) -> None:
        self._window = window
        self._sum = 0.0
        self._size = 0
        self._value: float | None = None
    def next(self, value: float) -> float:
        if self._size < self._window:
            self._sum += value
            self._size += 1
            self._value = self._sum / self._size
        else:
            self._sum -= self._sum / self._window
            self._sum += value
            self._value = self._sum / self._window
        return float(self._value)
    def get(self) -> float | None:
        return self._value
 async def run_detection_runner(
    sources: tuple[FrameSource, ...],
    pose_shim: PoseShim,
    pose_sink: PoseSink,
    config: DetectionRunnerConfig,
 ) -> None:
    performance_sma = SimpleMovingAverage(PERFORMANCE_WINDOW)
    batch_size_sma = SimpleMovingAverage(PERFORMANCE_WINDOW)
    scheduler_condition = anyio.Condition()
    slots = {
        source.source_name: SourceSlot(source_name=source.source_name) for source in sources
    }
    inference_limiter = anyio.CapacityLimiter(1)
    async def ingest_loop(source: FrameSource) -> None:
        logger.info(
            "[{}] source initialized; waiting for first frame metadata",
            source.source_name,
        )
        try:
            async for frame in source.frames():
                should_log_init = False
                previous_frame_index: int | None = None
                async with scheduler_condition:
                    slot = slots[source.source_name]
                    previous_frame_index = slot.last_seen_frame_index
                    should_log_init = previous_frame_index is None
                    slot.last_seen_frame_index = frame.frame_index
                    store_latest_frame(slot, frame)
                    scheduler_condition.notify_all()
                if should_log_init:
                    logger.info(
                        "[{}] initialized with frame shape={}x{} frame_index={}",
                        source.source_name,
                        frame.image_bgr.shape[1],
                        frame.image_bgr.shape[0],
                        frame.frame_index,
                    )
                elif previous_frame_index is not None and frame.frame_index != previous_frame_index + 1:
                    logger.warning(
                        "[{}] skip frame detected: {} -> {}",
                        source.source_name,
                        previous_frame_index,
                        frame.frame_index,
                    )
        finally:
            async with scheduler_condition:
                slots[source.source_name].closed = True
                scheduler_condition.notify_all()
            logger.info("[{}] source closed", source.source_name)
    async def scheduler_loop() -> None:
        while True:
            async with scheduler_condition:
                while pending_source_count(slots) == 0:
                    if all_sources_closed_and_idle(slots):
                        return
                    await scheduler_condition.wait()
                if (
                    pending_source_count(slots) < config.max_batch_frames
                    and config.max_batch_wait_ms > 0
                    and not all_sources_closed_and_idle(slots)
                ):
                    with anyio.move_on_after(config.max_batch_wait_ms / 1000):
                        while (
                            pending_source_count(slots) < config.max_batch_frames
                            and not all_sources_closed_and_idle(slots)
                        ):
                            await scheduler_condition.wait()
                batch = take_pending_batch(slots, config.max_batch_frames)
            start = perf_counter()
            pose_infos = await to_thread_run_sync(
                pose_shim.process_many,
                [item.frame for item in batch],
                limiter=inference_limiter,
            )
            elapsed = perf_counter() - start
            average_elapsed = elapsed / len(batch)
            performance_sma.next(average_elapsed)
            batch_size_sma.next(float(len(batch)))
            if average_elapsed > config.slow_frame_budget_seconds:
                logger.warning(
                    "slow batch: size={} total={:.2f}ms avg={:.2f}ms",
                    len(batch),
                    elapsed * 1000,
                    average_elapsed * 1000,
                )
            for pending_frame, pose_info in zip(batch, pose_infos, strict=True):
                slot = slots[pending_frame.source_name]
                slot.processed_frames += 1
                await pose_sink.publish_pose(pose_info)
                slot.published_frames += 1
                if pose_info.boxes_xyxy.shape[0] == 0:
                    logger.debug(
                        "[{}:{}] no detections",
                        pending_frame.source_name,
                        pending_frame.frame.frame_index,
                    )
    async def log_performance() -> None:
        while True:
            await anyio.sleep(5)
            async with scheduler_condition:
                if all_sources_closed_and_idle(slots):
                    return
                slot_snapshot = {
                    source_name: (
                        slot.received_frames,
                        slot.dropped_frames,
                        slot.processed_frames,
                        slot.published_frames,
                    )
                    for source_name, slot in slots.items()
                }
            per_source = " ".join(
                (
                    f"[{source_name}]"
                    f" recv={received}"
                    f" drop={dropped}"
                    f" proc={processed}"
                    f" pub={published}"
                )
                for source_name, (received, dropped, processed, published) in slot_snapshot.items()
            )
            if value := performance_sma.get():
                batch_size = batch_size_sma.get() or 1.0
                logger.info(
                    "{:.2f}it/s ({:.2f}ms/frame) batch={:.2f} {}",
                    1 / value,
                    value * 1000,
                    batch_size,
                    per_source,
                )
            else:
                logger.info("warming up {}", per_source)
    try:
        async with anyio.create_task_group() as task_group:
            for source in sources:
                task_group.start_soon(ingest_loop, source)
            task_group.start_soon(log_performance)
            await scheduler_loop()
            task_group.cancel_scope.cancel()
    finally:
        await pose_sink.aclose()
@@ -0,0 +1,7 @@
 from pose_tracking_exp.detection.sinks.nats import NatsPoseSink
 from pose_tracking_exp.detection.sinks.parquet import ParquetPoseSink
 __all__ = [
    "NatsPoseSink",
    "ParquetPoseSink",
 ]
@@ -0,0 +1,29 @@
 from pose_tracking_exp.detection.cvmmap_payload import CvmmapPosePayloadCodec
 from pose_tracking_exp.schema.detection import PoseDetections
 class NatsPoseSink:
    def __init__(self, nats_host: str) -> None:
        self._nats_host = nats_host
        self._client = None
        self._codec = CvmmapPosePayloadCodec()
    async def _client_or_connect(self):
        if self._client is None:
            from nats.aio.client import Client as NatsClient
            client = NatsClient()
            await client.connect(servers=[self._nats_host])
            self._client = client
        return self._client
    async def publish_pose(self, detections: PoseDetections) -> None:
        client = await self._client_or_connect()
        payload = self._codec.encode(detections)
        await client.publish(f"{detections.source_name}.pose", payload)
    async def aclose(self) -> None:
        if self._client is None:
            return
        await self._client.drain()
        self._client = None
@@ -0,0 +1,51 @@
 from pathlib import Path
 import pyarrow as pa
 import pyarrow.parquet as pq
 from pose_tracking_exp.common.detection_parquet import (
    DETECTION_PARQUET_SCHEMA,
    detection_parquet_path,
    pose_detections_to_row,
 )
 from pose_tracking_exp.schema.detection import PoseDetections
 class ParquetPoseSink:
    def __init__(self, output_dir: Path, *, flush_rows: int = 64) -> None:
        self._output_dir = output_dir
        self._flush_rows = flush_rows
        self._buffers: dict[str, list[dict[str, object]]] = {}
        self._writers: dict[str, pq.ParquetWriter] = {}
        self._output_dir.mkdir(parents=True, exist_ok=True)
    def _writer_for(self, source_name: str) -> pq.ParquetWriter:
        writer = self._writers.get(source_name)
        if writer is not None:
            return writer
        path = detection_parquet_path(self._output_dir, source_name)
        writer = pq.ParquetWriter(path, DETECTION_PARQUET_SCHEMA)
        self._writers[source_name] = writer
        return writer
    def _flush_source(self, source_name: str) -> None:
        rows = self._buffers.get(source_name)
        if not rows:
            return
        table = pa.Table.from_pylist(rows, schema=DETECTION_PARQUET_SCHEMA)
        self._writer_for(source_name).write_table(table)
        rows.clear()
    async def publish_pose(self, detections: PoseDetections) -> None:
        rows = self._buffers.setdefault(detections.source_name, [])
        rows.append(pose_detections_to_row(detections))
        if len(rows) >= self._flush_rows:
            self._flush_source(detections.source_name)
    async def aclose(self) -> None:
        for source_name in tuple(self._buffers):
            self._flush_source(source_name)
        for writer in self._writers.values():
            writer.close()
        self._writers.clear()
@@ -0,0 +1,10 @@
 from pose_tracking_exp.detection.sources.adapters import IteratorFrameSource
 from pose_tracking_exp.detection.sources.cvmmap import CvmmapFrameSource
 from pose_tracking_exp.detection.sources.video import VideoFrameSource, parse_video_input_specs
 __all__ = [
    "CvmmapFrameSource",
    "IteratorFrameSource",
    "VideoFrameSource",
    "parse_video_input_specs",
 ]
@@ -0,0 +1,47 @@
 from collections.abc import AsyncIterator, Callable, Iterator
 from typing import Protocol
 from anyio.to_thread import run_sync as to_thread_run_sync
 from pose_tracking_exp.schema.detection import SourceFrame
 class BlockingFrameProducer(Protocol):
    source_name: str
    def iter_frames(self) -> Iterator[SourceFrame]:
        ...
 def _next_or_none(iterator: Iterator[SourceFrame]) -> SourceFrame | None:
    return next(iterator, None)
 class IteratorFrameSource:
    def __init__(
        self,
        source_name: str,
        iterator_factory: Callable[[], Iterator[SourceFrame]],
    ) -> None:
        self.source_name = source_name
        self._iterator_factory = iterator_factory
    async def frames(self) -> AsyncIterator[SourceFrame]:
        iterator = self._iterator_factory()
        try:
            while True:
                frame = await to_thread_run_sync(_next_or_none, iterator)
                if frame is None:
                    return
                yield frame
        finally:
            close = getattr(iterator, "close", None)
            if callable(close):
                await to_thread_run_sync(close)
 def wrap_blocking_source(producer: BlockingFrameProducer) -> IteratorFrameSource:
    return IteratorFrameSource(
        source_name=producer.source_name,
        iterator_factory=producer.iter_frames,
    )
@@ -0,0 +1,22 @@
 from collections.abc import AsyncIterator
 import numpy as np
 from pose_tracking_exp.schema.detection import SourceFrame
 class CvmmapFrameSource:
    def __init__(self, source_name: str) -> None:
        self.source_name = source_name
    async def frames(self) -> AsyncIterator[SourceFrame]:
        from cvmmap import CvMmapClient
        client = CvMmapClient(self.source_name)
        async for frame, meta in client:
            yield SourceFrame(
                source_name=self.source_name,
                image_bgr=np.array(frame, copy=True),
                frame_index=meta.frame_count,
                timestamp_unix_ns=meta.timestamp_ns,
            )
@@ -0,0 +1,83 @@
 from collections.abc import AsyncIterator, Iterator, Sequence
 from pathlib import Path
 import click
 import cv2
 import numpy as np
 from pose_tracking_exp.detection.sources.adapters import wrap_blocking_source
 from pose_tracking_exp.schema.detection import SourceFrame
 _DEFAULT_VIDEO_FPS = 30.0
 def parse_video_input_specs(specs: Sequence[str]) -> tuple[tuple[str, Path], ...]:
    inputs: list[tuple[str, Path]] = []
    seen: set[str] = set()
    for spec in specs:
        source_name, separator, raw_path = spec.partition("=")
        if separator == "" or not source_name or not raw_path:
            raise click.ClickException(
                f"Video input must be in source=path form, got: {spec!r}"
            )
        if source_name in seen:
            raise click.ClickException(f"Duplicate video source requested: {source_name}")
        path = Path(raw_path).expanduser().resolve()
        if not path.exists():
            raise click.ClickException(f"Missing video input: {path}")
        inputs.append((source_name, path))
        seen.add(source_name)
    if not inputs:
        raise click.ClickException("Provide at least one --input source=path entry.")
    return tuple(inputs)
 class VideoFrameSource:
    def __init__(
        self,
        video_path: Path,
        *,
        source_name: str | None = None,
        default_fps: float = _DEFAULT_VIDEO_FPS,
    ) -> None:
        self.video_path = video_path
        self.source_name = source_name or video_path.stem
        self._default_fps = default_fps
        self._adapter = wrap_blocking_source(self)
    async def frames(self) -> AsyncIterator[SourceFrame]:
        async for frame in self._adapter.frames():
            yield frame
    def iter_frames(self) -> Iterator[SourceFrame]:
        capture = cv2.VideoCapture(str(self.video_path))
        if not capture.isOpened():
            capture.release()
            raise click.ClickException(f"Could not open video input: {self.video_path}")
        fps = float(capture.get(cv2.CAP_PROP_FPS))
        if not np.isfinite(fps) or fps <= 0:
            fps = self._default_fps
        frame_index = 0
        try:
            while True:
                success, frame = capture.read()
                if not success or frame is None:
                    return
                pos_msec = float(capture.get(cv2.CAP_PROP_POS_MSEC))
                if np.isfinite(pos_msec) and (pos_msec > 0.0 or frame_index == 0):
                    timestamp_unix_ns = int(round(pos_msec * 1_000_000.0))
                else:
                    timestamp_unix_ns = int(round((frame_index / fps) * 1_000_000_000.0))
                yield SourceFrame(
                    source_name=self.source_name,
                    image_bgr=np.ascontiguousarray(frame),
                    frame_index=frame_index,
                    timestamp_unix_ns=timestamp_unix_ns,
                )
                frame_index += 1
        finally:
            capture.release()
@@ -0,0 +1,263 @@
 from contextlib import contextmanager
 from collections.abc import Sequence
 from pathlib import Path
 from typing import Any, cast
 import cv2
 import numpy as np
 from pose_tracking_exp.detection.protocols import ObjectDetector, PoseEstimator
 from pose_tracking_exp.schema.detection import BoxDetections, PoseBatchRequest, PoseDetections, SourceFrame
 COCO_PERSON_CLASS_ID = 0
 class YoloObjectDetector:
    def __init__(
        self,
        checkpoint: Path,
        *,
        device: str,
        conf_threshold: float,
        max_batch_frames: int,
    ) -> None:
        import ultralytics
        yolo_ctor = getattr(ultralytics, "YOLO")
        self._model: Any = yolo_ctor(str(checkpoint))
        self._device = device
        self._conf_threshold = conf_threshold
        self._max_batch_frames = max_batch_frames
    def detect_many(
        self,
        frames_rgb: Sequence[np.ndarray],
        *,
        classes: Sequence[int] | None = None,
    ) -> list[BoxDetections]:
        if not frames_rgb:
            return []
        frames_list = list(frames_rgb)
        results = self._model(
            frames_list,
            conf=self._conf_threshold,
            device=self._device,
            classes=classes,
            batch=min(self._max_batch_frames, len(frames_list)),
            verbose=False,
        )
        detections: list[BoxDetections] = []
        for frame_rgb, result in zip(frames_list, results, strict=True):
            boxes = result.boxes
            if boxes is None:
                detections.append(
                    BoxDetections(
                        boxes_xyxy=np.empty((0, 4), dtype=np.float32),
                        scores=np.empty((0,), dtype=np.float32),
                        reference_frame_shape=(frame_rgb.shape[0], frame_rgb.shape[1]),
                    )
                )
                continue
            detections.append(
                BoxDetections(
                    boxes_xyxy=boxes.xyxy.cpu().numpy(),
                    scores=boxes.conf.cpu().numpy(),
                    reference_frame_shape=(frame_rgb.shape[0], frame_rgb.shape[1]),
                )
            )
        return detections
@contextmanager
 def legacy_torch_checkpoint_loading():
    import torch
    original_torch_load = torch.load
    def patched_torch_load(*args, **kwargs):
        kwargs.setdefault("weights_only", False)
        return original_torch_load(*args, **kwargs)
    torch.load = patched_torch_load
    try:
        yield
    finally:
        torch.load = original_torch_load
 class WholeBodyPoseEstimator:
    def __init__(self, config_path: Path, checkpoint_path: Path, *, device: str) -> None:
        from mmengine.dataset import Compose, pseudo_collate
        from mmengine.registry import init_default_scope
        from mmpose.apis import init_model
        self._compose = Compose
        self._pseudo_collate = pseudo_collate
        self._init_default_scope = init_default_scope
        with legacy_torch_checkpoint_loading():
            self._model: Any = init_model(str(config_path), str(checkpoint_path), device=device)
        model_cfg = cast(Any, self._model.cfg)
        self._scope = cast(str | None, model_cfg.get("default_scope", "mmpose"))
        self._pipeline = self._compose(cast(Any, model_cfg.test_dataloader.dataset.pipeline))
    def estimate_batch(
        self,
        requests: Sequence[PoseBatchRequest],
    ) -> list[tuple[np.ndarray, np.ndarray]]:
        import torch
        if not requests:
            return []
        if self._scope is not None:
            self._init_default_scope(self._scope)
        torch_module = cast(Any, torch)
        data_list = []
        detection_counts: list[int] = []
        for request in requests:
            boxes = np.asarray(request.boxes_xyxy, dtype=np.float32)
            detections = int(boxes.shape[0])
            detection_counts.append(detections)
            for bbox in boxes:
                data_info = {
                    "img": request.image_rgb,
                    "bbox": bbox[None],
                    "bbox_score": np.ones(1, dtype=np.float32),
                }
                data_info.update(cast(Any, self._model.dataset_meta))
                data_list.append(self._pipeline(data_info))
        samples = []
        if data_list:
            batch = self._pseudo_collate(data_list)
            with torch_module.no_grad():
                samples = self._model.test_step(batch)
        outputs: list[tuple[np.ndarray, np.ndarray]] = []
        offset = 0
        for detections in detection_counts:
            keypoints = np.zeros((detections, 133, 2), dtype=np.float32)
            scores = np.zeros((detections, 133), dtype=np.float32)
            for index in range(detections):
                pred_instances = samples[offset + index].pred_instances
                try:
                    keypoints[index] = np.asarray(pred_instances.keypoints[0], dtype=np.float32)
                    scores[index] = np.asarray(
                        pred_instances.keypoint_scores[0],
                        dtype=np.float32,
                    )
                except IndexError:
                    continue
            outputs.append((keypoints, scores))
            offset += detections
        return outputs
 class YoloRtmposeShim:
    def __init__(
        self,
        object_detector: ObjectDetector,
        pose_estimator: PoseEstimator,
        *,
        bbox_area_threshold: int,
    ) -> None:
        self._object_detector = object_detector
        self._pose_estimator = pose_estimator
        self._bbox_area_threshold = bbox_area_threshold
    def process_many(self, frames: Sequence[SourceFrame]) -> list[PoseDetections]:
        if not frames:
            return []
        frames_rgb = [
            cv2.cvtColor(frame.image_bgr, cv2.COLOR_BGR2RGB)
            for frame in frames
        ]
        detections = self._object_detector.detect_many(
            frames_rgb,
            classes=[COCO_PERSON_CLASS_ID],
        )
        results = [
            PoseDetections(
                source_name=frame.source_name,
                frame_index=frame.frame_index,
                source_size=(frame.image_bgr.shape[1], frame.image_bgr.shape[0]),
                boxes_xyxy=np.empty((0, 4), dtype=np.float32),
                box_scores=np.empty((0,), dtype=np.float32),
                keypoints_xy=np.empty((0, 133, 2), dtype=np.float32),
                keypoint_scores=np.empty((0, 133), dtype=np.float32),
                timestamp_unix_ns=frame.timestamp_unix_ns,
                keypoint_schema="coco_wholebody133",
            )
            for frame in frames
        ]
        pose_requests: list[PoseBatchRequest] = []
        detection_mapping: list[tuple[int, BoxDetections]] = []
        for index, (frame, frame_rgb, detection_result) in enumerate(
            zip(frames, frames_rgb, detections, strict=True)
        ):
            filtered_result = detection_result.filter_by_area(self._bbox_area_threshold)
            if filtered_result.boxes_num == 0:
                continue
            pose_requests.append(
                PoseBatchRequest(
                    image_rgb=frame_rgb,
                    boxes_xyxy=filtered_result.boxes_xyxy,
                )
            )
            detection_mapping.append((index, filtered_result))
        pose_outputs = self._pose_estimator.estimate_batch(pose_requests)
        for (frame_index, detection_result), (keypoints, keypoint_scores) in zip(
            detection_mapping,
            pose_outputs,
            strict=True,
        ):
            source_frame = frames[frame_index]
            results[frame_index] = PoseDetections(
                source_name=source_frame.source_name,
                frame_index=source_frame.frame_index,
                source_size=detection_result.reference_size,
                boxes_xyxy=detection_result.boxes_xyxy,
                box_scores=detection_result.scores,
                keypoints_xy=keypoints,
                keypoint_scores=keypoint_scores,
                timestamp_unix_ns=source_frame.timestamp_unix_ns,
                keypoint_schema="coco_wholebody133",
            )
        return results
 def build_yolo_rtmpose_shim(
    *,
    yolo_checkpoint: Path,
    yolo_conf_threshold: float,
    pose_checkpoint: Path,
    pose_config_path: Path,
    device: str,
    max_batch_frames: int,
    bbox_area_threshold: int,
 ) -> YoloRtmposeShim:
    object_detector = YoloObjectDetector(
        yolo_checkpoint,
        device=device,
        conf_threshold=yolo_conf_threshold,
        max_batch_frames=max_batch_frames,
    )
    pose_estimator = WholeBodyPoseEstimator(
        pose_config_path,
        pose_checkpoint,
        device=device,
    )
    return YoloRtmposeShim(
        object_detector,
        pose_estimator,
        bbox_area_threshold=bbox_area_threshold,
    )
@@ -1,224 +0,0 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Literal
 import cv2
 import numpy as np
 from pose_tracking_exp.tensor_types import Matrix3, Pose2D, Pose3D, Vector3
@dataclass(slots=True)
 class CameraCalibration:
    name: str
    width: int
    height: int
    K: Matrix3
    DC: np.ndarray
    # Canonical in-repo convention: OpenCV world->camera extrinsics.
    R: Matrix3
    T: Vector3
    model: str = "pinhole"
    rvec: np.ndarray | None = None
    pose_R: Matrix3 = field(init=False)
    pose_T: Vector3 = field(init=False)
    def __post_init__(self) -> None:
        self.K = np.asarray(self.K, dtype=np.float64).reshape(3, 3)
        self.DC = np.asarray(self.DC, dtype=np.float64).reshape(-1)
        self.R = np.asarray(self.R, dtype=np.float64).reshape(3, 3)
        self.T = np.asarray(self.T, dtype=np.float64).reshape(3)
        if self.rvec is None:
            rvec, _ = cv2.Rodrigues(self.R)
            self.rvec = np.asarray(rvec, dtype=np.float64).reshape(3)
        else:
            self.rvec = np.asarray(self.rvec, dtype=np.float64).reshape(3)
        self.pose_R = self.R.T
        self.pose_T = -(self.pose_R @ self.T)
    @classmethod
    def from_opencv_extrinsics(
        cls,
        *,
        name: str,
        width: int,
        height: int,
        K: Matrix3,
        DC: np.ndarray,
        R: Matrix3,
        T: Vector3,
        model: str = "pinhole",
        rvec: np.ndarray | None = None,
    ) -> "CameraCalibration":
        return cls(
            name=name,
            width=width,
            height=height,
            K=K,
            DC=DC,
            R=R,
            T=T,
            model=model,
            rvec=rvec,
        )
    @classmethod
    def from_rpt_pose(
        cls,
        *,
        name: str,
        width: int,
        height: int,
        K: Matrix3,
        DC: np.ndarray,
        R: Matrix3,
        T: Vector3,
        model: str = "pinhole",
    ) -> "CameraCalibration":
        pose_R = np.asarray(R, dtype=np.float64).reshape(3, 3)
        pose_T = np.asarray(T, dtype=np.float64).reshape(3)
        rotation = pose_R.T
        translation = -(rotation @ pose_T)
        rvec, _ = cv2.Rodrigues(rotation)
        return cls(
            name=name,
            width=width,
            height=height,
            K=K,
            DC=DC,
            R=rotation,
            T=translation,
            model=model,
            rvec=np.asarray(rvec, dtype=np.float64).reshape(3),
        )
@dataclass(slots=True)
 class SceneConfig:
    room_size: Vector3
    room_center: Vector3
    cameras: tuple[CameraCalibration, ...]
@dataclass(slots=True)
 class PoseDetection:
    bbox: np.ndarray
    bbox_confidence: float
    keypoints: Pose2D
@dataclass(slots=True)
 class CameraFrame:
    camera_name: str
    frame_index: int
    timestamp_unix_ns: int
    detections: tuple[PoseDetection, ...]
    source_size: tuple[int, int]
@dataclass(slots=True)
 class FrameBundle:
    bundle_index: int
    timestamp_unix_ns: int
    views: tuple[CameraFrame, ...]
@dataclass(slots=True)
 class ReplaySequence:
    scene_path: Path
    replay_path: Path
    frames_by_camera: dict[str, list[CameraFrame]]
@dataclass(slots=True)
 class ProposalCluster:
    pose3d: Pose3D
    root: Vector3
    source_views: frozenset[str]
    support_size: int
    mean_score: float
@dataclass(slots=True)
 class SkeletonState:
    parameters: np.ndarray
    beta: np.ndarray
    pose3d: Pose3D
@dataclass(slots=True)
 class TentativeTrackState:
    track_id: int
    state: Literal["tentative"] = "tentative"
    age: int = 0
    misses: int = 0
    score: float = 0.0
    last_bundle_index: int = -1
    root: Vector3 = field(default_factory=lambda: np.zeros(3, dtype=np.float64))
    pose3d: Pose3D = field(default_factory=lambda: np.zeros((20, 4), dtype=np.float64))
    evidence_buffer: list[Pose3D] = field(default_factory=list)
@dataclass(slots=True)
 class ActiveTrackState:
    track_id: int
    status: Literal["active", "lost"] = "active"
    misses: int = 0
    lost_age: int = 0
    score: float = 0.0
    last_bundle_index: int = -1
    skeleton: SkeletonState = field(
        default_factory=lambda: SkeletonState(
            parameters=np.zeros(31, dtype=np.float64),
            beta=np.ones(8, dtype=np.float64),
            pose3d=np.zeros((20, 4), dtype=np.float64),
        )
    )
    noise_scale: np.ndarray = field(default_factory=lambda: np.full((20,), 9.0, dtype=np.float64))
 TrackState = TentativeTrackState | ActiveTrackState
@dataclass(slots=True)
 class TrackedFrameResult:
    bundle_index: int
    timestamp_unix_ns: int
    tentative_tracks: tuple[TentativeTrackState, ...]
    active_tracks: tuple[ActiveTrackState, ...]
    lost_tracks: tuple[ActiveTrackState, ...]
    proposals: tuple[ProposalCluster, ...]
@dataclass(slots=True)
 class TrackerDiagnostics:
    match_existing_calls: int = 0
    match_existing_seconds: float = 0.0
    proposal_build_calls: int = 0
    proposal_build_seconds: float = 0.0
    promotions: int = 0
    reacquisitions: int = 0
    active_updates: int = 0
    seed_initializations: int = 0
    nonlinear_refinements: int = 0
@dataclass(slots=True)
 class TrackerConfig:
    mode: Literal["general", "single_person"] = "general"
    min_bundle_views: int = 2
    max_sync_skew_ns: int = 12_000_000
    tentative_buffer_size: int = 5
    tentative_min_age: int = 3
    tentative_hits_required: int = 3
    tentative_promote_score: float = 3.0
    tentative_max_misses: int = 2
    active_min_views: int = 2
    active_core_gate_px: float = 80.0
    active_joint_gate_px: float = 120.0
    active_miss_to_lost: int = 3
    lost_delete_age: int = 15
    proposal_match_distance_m: float = 0.45
    noise_ema: float = 0.85
    proposal_min_score: float = 0.9
    proposal_min_group_size: int = 1
@@ -1,147 +0,0 @@
 import base64
 import json
 from dataclasses import dataclass
 import numpy as np
 from beartype import beartype
 from pose_tracking_exp.models import CameraFrame, PoseDetection
 from pose_tracking_exp.normalization import normalize_rtmpose_body20
 PROTOCOL_HEADER = bytes([0x80]) + b"POSE"
 POSE_JOINT_COUNT = 133
@dataclass(slots=True)
 class DecodedPosePayload:
    frame_index: int
    reference_size: tuple[int, int]
    timestamp_unix_ns: int
    detections: tuple[PoseDetection, ...]
 def _read_u8(payload: memoryview, offset: int) -> tuple[int, int]:
    return int(payload[offset]), offset + 1
 def _read_u16_array(payload: memoryview, offset: int, count: int) -> tuple[np.ndarray, int]:
    size = count * 2
    array = np.frombuffer(payload[offset : offset + size], dtype="<u2", count=count).astype(np.float64)
    return array, offset + size
@beartype
 def decode_pose_payload(payload: bytes) -> DecodedPosePayload:
    if not payload.startswith(PROTOCOL_HEADER):
        raise ValueError("Invalid ParaJumping pose payload header.")
    view = memoryview(payload)
    offset = len(PROTOCOL_HEADER)
    frame_index = int.from_bytes(view[offset : offset + 4], "little")
    offset += 4
    reference_size = tuple(int(x) for x in np.frombuffer(view[offset : offset + 4], dtype="<u2", count=2))
    offset += 4
    num_bbox = int(view[offset])
    offset += 1
    bbox_raw, offset = _read_u16_array(view, offset, num_bbox * 4)
    bboxes = bbox_raw.reshape(num_bbox, 4) if num_bbox > 0 else np.zeros((0, 4), dtype=np.float64)
    num_bbox_conf = int(view[offset])
    offset += 1
    bbox_confidence = np.frombuffer(view[offset : offset + num_bbox_conf], dtype=np.uint8, count=num_bbox_conf)
    offset += num_bbox_conf
    num_keypoints = int(view[offset])
    offset += 1
    keypoints_raw, offset = _read_u16_array(view, offset, num_keypoints * POSE_JOINT_COUNT * 2)
    keypoints_xy = (
        keypoints_raw.reshape(num_keypoints, POSE_JOINT_COUNT, 2)
        if num_keypoints > 0
        else np.zeros((0, POSE_JOINT_COUNT, 2), dtype=np.float64)
    )
    num_keypoint_conf = int(view[offset])
    offset += 1
    keypoint_confidence = (
        np.frombuffer(view[offset : offset + num_keypoint_conf], dtype=np.uint8, count=num_keypoint_conf).astype(np.float64)
        / 255.0
    )
    offset += num_keypoint_conf
    timestamp_unix_ns = int.from_bytes(view[offset : offset + 8], "little")
    if num_keypoint_conf > 0 and num_keypoint_conf != num_keypoints * POSE_JOINT_COUNT:
        raise ValueError("Unexpected keypoint confidence payload length.")
    detection_items: list[PoseDetection] = []
    confidences = (
        keypoint_confidence.reshape(num_keypoints, POSE_JOINT_COUNT)
        if num_keypoints > 0
        else np.zeros((0, POSE_JOINT_COUNT), dtype=np.float64)
    )
    for index in range(num_keypoints):
        normalized = normalize_rtmpose_body20(keypoints_xy[index], confidences[index])
        bbox_score = float(bbox_confidence[index] / 255.0) if index < bbox_confidence.shape[0] else 0.0
        bbox = bboxes[index] if index < bboxes.shape[0] else np.zeros(4, dtype=np.float64)
        detection_items.append(
            PoseDetection(
                bbox=np.asarray(bbox, dtype=np.float64),
                bbox_confidence=bbox_score,
                keypoints=np.asarray(normalized, dtype=np.float64),
            )
        )
    return DecodedPosePayload(
        frame_index=frame_index,
        reference_size=(reference_size[0], reference_size[1]),
        timestamp_unix_ns=timestamp_unix_ns,
        detections=tuple(detection_items),
    )
@beartype
 def frame_from_payload(camera_name: str, payload: bytes) -> CameraFrame:
    decoded = decode_pose_payload(payload)
    return CameraFrame(
        camera_name=camera_name,
        frame_index=decoded.frame_index,
        timestamp_unix_ns=decoded.timestamp_unix_ns,
        detections=decoded.detections,
        source_size=decoded.reference_size,
    )
@beartype
 def convert_payload_record(record: dict[str, object]) -> dict[str, object]:
    camera_name = str(record["camera"])
    payload_b64 = str(record["payload_b64"])
    frame = frame_from_payload(camera_name, base64.b64decode(payload_b64))
    return {
        "camera": frame.camera_name,
        "frame_index": frame.frame_index,
        "timestamp_unix_ns": frame.timestamp_unix_ns,
        "source_size": list(frame.source_size),
        "detections": [
            {
                "bbox": detection.bbox.tolist(),
                "bbox_confidence": detection.bbox_confidence,
                "keypoints": detection.keypoints.tolist(),
            }
            for detection in frame.detections
        ],
    }
@beartype
 def convert_payload_jsonl_lines(lines: list[str]) -> list[str]:
    output_lines: list[str] = []
    for line in lines:
        if not line.strip():
            continue
        record = json.loads(line)
        converted = convert_payload_record(record)
        output_lines.append(json.dumps(converted))
    return output_lines
@@ -1,108 +0,0 @@
 import json
 from pathlib import Path
 import numpy as np
 from beartype import beartype
 from pose_tracking_exp.models import CameraCalibration, CameraFrame, PoseDetection, ReplaySequence, SceneConfig
 _OPENCV_EXTRINSICS = "opencv_world_to_camera"
 _RPT_POSE = "rpt_camera_pose"
 def _as_float_array(values: object, shape: tuple[int, ...]) -> np.ndarray:
    array = np.asarray(values, dtype=np.float64)
    if array.shape != shape:
        raise ValueError(f"Expected shape {shape}, got {array.shape}.")
    return array
@beartype
 def load_scene_file(path: Path) -> SceneConfig:
    payload = json.loads(path.read_text(encoding="utf-8"))
    default_extrinsic_format = str(payload.get("extrinsic_format", _OPENCV_EXTRINSICS))
    cameras: list[CameraCalibration] = []
    for camera_payload in payload["cameras"]:
        extrinsic_format = str(camera_payload.get("extrinsic_format", default_extrinsic_format))
        name = str(camera_payload["name"])
        width = int(camera_payload["width"])
        height = int(camera_payload["height"])
        K = _as_float_array(camera_payload["K"], (3, 3))
        DC = np.asarray(camera_payload.get("DC", [0.0, 0.0, 0.0, 0.0, 0.0]), dtype=np.float64)
        R = _as_float_array(camera_payload["R"], (3, 3))
        T = _as_float_array(camera_payload["T"], (3, 1)).reshape(3)
        model = str(camera_payload.get("model", "pinhole"))
        if extrinsic_format == _OPENCV_EXTRINSICS:
            cameras.append(
                CameraCalibration.from_opencv_extrinsics(
                    name=name,
                    width=width,
                    height=height,
                    K=K,
                    DC=DC,
                    R=R,
                    T=T,
                    model=model,
                    rvec=np.asarray(camera_payload["rvec"], dtype=np.float64).reshape(3)
                    if "rvec" in camera_payload
                    else None,
                )
            )
        elif extrinsic_format == _RPT_POSE:
            cameras.append(
                CameraCalibration.from_rpt_pose(
                    name=name,
                    width=width,
                    height=height,
                    K=K,
                    DC=DC,
                    R=R,
                    T=T,
                    model=model,
                )
            )
        else:
            raise ValueError(
                f"Unsupported extrinsic format {extrinsic_format!r}. "
                f"Expected {_OPENCV_EXTRINSICS!r} or {_RPT_POSE!r}."
            )
    return SceneConfig(
        room_size=_as_float_array(payload["room_size"], (3,)),
        room_center=_as_float_array(payload["room_center"], (3,)),
        cameras=tuple(cameras),
    )
@beartype
 def load_replay_file(scene_path: Path, replay_path: Path) -> ReplaySequence:
    frames_by_camera: dict[str, list[CameraFrame]] = {}
    for raw_line in replay_path.read_text(encoding="utf-8").splitlines():
        if not raw_line.strip():
            continue
        payload = json.loads(raw_line)
        camera_name = str(payload["camera"])
        detections: list[PoseDetection] = []
        for detection_payload in payload["detections"]:
            detections.append(
                PoseDetection(
                    bbox=np.asarray(detection_payload["bbox"], dtype=np.float64),
                    bbox_confidence=float(detection_payload["bbox_confidence"]),
                    keypoints=np.asarray(detection_payload["keypoints"], dtype=np.float64),
                )
            )
        frames_by_camera.setdefault(camera_name, []).append(
            CameraFrame(
                camera_name=camera_name,
                frame_index=int(payload["frame_index"]),
                timestamp_unix_ns=int(payload["timestamp_unix_ns"]),
                detections=tuple(detections),
                source_size=(
                    int(payload["source_size"][0]),
                    int(payload["source_size"][1]),
                ),
            )
        )
    for frames in frames_by_camera.values():
        frames.sort(key=lambda item: (item.timestamp_unix_ns, item.frame_index))
    return ReplaySequence(scene_path=scene_path, replay_path=replay_path, frames_by_camera=frames_by_camera)
@@ -0,0 +1,50 @@
 from pose_tracking_exp.schema.camera import (
    CameraCalibration,
    CameraModel,
    PINHOLE_CAMERA_MODEL,
    SceneConfig,
    parse_camera_model,
 )
 from pose_tracking_exp.schema.detection import (
    BoxDetections,
    CocoKeypointSchema,
    PoseBatchRequest,
    PoseDetections,
    SourceFrame,
 )
 from pose_tracking_exp.schema.observation import CameraFrame, FrameBundle, PoseDetection, ReplaySequence
 from pose_tracking_exp.schema.tracking import (
    ActiveTrackState,
    ProposalCluster,
    SkeletonState,
    TentativeTrackState,
    TrackState,
    TrackerConfig,
    TrackerDiagnostics,
    TrackedFrameResult,
 )
 __all__ = [
    "ActiveTrackState",
    "BoxDetections",
    "CameraCalibration",
    "CameraFrame",
    "CameraModel",
    "CocoKeypointSchema",
    "FrameBundle",
    "PINHOLE_CAMERA_MODEL",
    "PoseBatchRequest",
    "PoseDetection",
    "PoseDetections",
    "ProposalCluster",
    "ReplaySequence",
    "SceneConfig",
    "SkeletonState",
    "TentativeTrackState",
    "TrackState",
    "TrackerConfig",
    "TrackerDiagnostics",
    "TrackedFrameResult",
    "SourceFrame",
    "parse_camera_model",
 ]
@@ -0,0 +1,106 @@
 from dataclasses import dataclass, field
 from typing import Literal
 import cv2
 import numpy as np
 from pose_tracking_exp.common.tensor_types import Matrix3, Vector3
 CameraModel = Literal["pinhole"]
 PINHOLE_CAMERA_MODEL: CameraModel = "pinhole"
 def parse_camera_model(model: str) -> CameraModel:
    if model != PINHOLE_CAMERA_MODEL:
        raise ValueError(
            f"Unsupported camera model {model!r}. Expected {PINHOLE_CAMERA_MODEL!r}."
        )
    return PINHOLE_CAMERA_MODEL
@dataclass(slots=True)
 class CameraCalibration:
    name: str
    width: int
    height: int
    K: Matrix3
    DC: np.ndarray
    R: Matrix3
    T: Vector3
    model: CameraModel = PINHOLE_CAMERA_MODEL
    rvec: np.ndarray | None = None
    pose_R: Matrix3 = field(init=False)
    pose_T: Vector3 = field(init=False)
    def __post_init__(self) -> None:
        self.K = np.asarray(self.K, dtype=np.float64).reshape(3, 3)
        self.DC = np.asarray(self.DC, dtype=np.float64).reshape(-1)
        self.R = np.asarray(self.R, dtype=np.float64).reshape(3, 3)
        self.T = np.asarray(self.T, dtype=np.float64).reshape(3)
        self.model = parse_camera_model(self.model)
        if self.rvec is None:
            rvec, _ = cv2.Rodrigues(self.R)
            self.rvec = np.asarray(rvec, dtype=np.float64).reshape(3)
        else:
            self.rvec = np.asarray(self.rvec, dtype=np.float64).reshape(3)
        self.pose_R = self.R.T
        self.pose_T = -(self.pose_R @ self.T)
    @staticmethod
    def from_opencv_extrinsics(
        name: str,
        width: int,
        height: int,
        K: Matrix3,
        DC: np.ndarray,
        R: Matrix3,
        T: Vector3,
        model: CameraModel = PINHOLE_CAMERA_MODEL,
        rvec: np.ndarray | None = None,
    ) -> "CameraCalibration":
        return CameraCalibration(
            name=name,
            width=width,
            height=height,
            K=K,
            DC=DC,
            R=R,
            T=T,
            model=model,
            rvec=rvec,
        )
    @staticmethod
    def from_rpt_pose(
        name: str,
        width: int,
        height: int,
        K: Matrix3,
        DC: np.ndarray,
        R: Matrix3,
        T: Vector3,
        model: CameraModel = PINHOLE_CAMERA_MODEL,
    ) -> "CameraCalibration":
        pose_R = np.asarray(R, dtype=np.float64).reshape(3, 3)
        pose_T = np.asarray(T, dtype=np.float64).reshape(3)
        rotation = pose_R.T
        translation = -(rotation @ pose_T)
        rvec, _ = cv2.Rodrigues(rotation)
        return CameraCalibration(
            name=name,
            width=width,
            height=height,
            K=K,
            DC=DC,
            R=rotation,
            T=translation,
            model=model,
            rvec=np.asarray(rvec, dtype=np.float64).reshape(3),
        )
@dataclass(slots=True)
 class SceneConfig:
    room_size: Vector3
    room_center: Vector3
    cameras: tuple[CameraCalibration, ...]
@@ -0,0 +1,116 @@
 """Shared 2D detection schema.
 `coco_wholebody133` matches the COCO-WholeBody dataset terminology used by
 MMPose and the official dataset repo. The first 17 joints follow the standard
 COCO body ordering, so it is body-compatible with `coco17`.
 References:
 - https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html
 - https://github.com/jin-s13/COCO-WholeBody
 """
 from dataclasses import dataclass
 from typing import Literal
 import numpy as np
 CocoKeypointSchema = Literal["coco17", "coco_wholebody133"]
 def expected_keypoint_count(schema: CocoKeypointSchema) -> int:
    if schema == "coco17":
        return 17
    return 133
@dataclass(slots=True)
 class SourceFrame:
    source_name: str
    image_bgr: np.ndarray
    frame_index: int
    timestamp_unix_ns: int
@dataclass(slots=True)
 class BoxDetections:
    boxes_xyxy: np.ndarray
    scores: np.ndarray
    reference_frame_shape: tuple[int, int]
    @property
    def reference_size(self) -> tuple[int, int]:
        return (self.reference_frame_shape[1], self.reference_frame_shape[0])
    @property
    def boxes_num(self) -> int:
        return int(self.boxes_xyxy.shape[0])
    def filter_by_area(self, area_threshold: int) -> "BoxDetections":
        if area_threshold <= 0:
            raise ValueError("Area threshold must be positive.")
        areas = np.abs(
            (self.boxes_xyxy[:, 2] - self.boxes_xyxy[:, 0])
            * (self.boxes_xyxy[:, 3] - self.boxes_xyxy[:, 1])
        )
        mask = areas >= area_threshold
        return BoxDetections(
            boxes_xyxy=self.boxes_xyxy[mask],
            scores=self.scores[mask],
            reference_frame_shape=self.reference_frame_shape,
        )
@dataclass(slots=True)
 class PoseBatchRequest:
    image_rgb: np.ndarray
    boxes_xyxy: np.ndarray
@dataclass(slots=True)
 class PoseDetections:
    source_name: str
    frame_index: int
    source_size: tuple[int, int]
    boxes_xyxy: np.ndarray
    box_scores: np.ndarray | None
    keypoints_xy: np.ndarray
    keypoint_scores: np.ndarray | None
    timestamp_unix_ns: int
    keypoint_schema: CocoKeypointSchema = "coco_wholebody133"
    def validate(self) -> None:
        if self.boxes_xyxy.ndim != 2 or self.boxes_xyxy.shape[1] != 4:
            raise ValueError(
                f"Expected boxes with shape (N, 4), got {self.boxes_xyxy.shape}."
            )
        if self.keypoints_xy.ndim != 3 or self.keypoints_xy.shape[2] != 2:
            raise ValueError(
                "Expected keypoints with shape (N, K, 2), "
                f"got {self.keypoints_xy.shape}."
            )
        expected_count = expected_keypoint_count(self.keypoint_schema)
        if self.keypoints_xy.shape[1] != expected_count:
            raise ValueError(
                f"Expected {self.keypoint_schema} keypoints with {expected_count} joints, "
                f"got {self.keypoints_xy.shape[1]}."
            )
        detection_count = int(self.keypoints_xy.shape[0])
        if self.boxes_xyxy.shape[0] != detection_count:
            raise ValueError(
                "Expected box and keypoint detection counts to match, "
                f"got {self.boxes_xyxy.shape[0]} and {detection_count}."
            )
        if self.box_scores is not None and self.box_scores.shape != (detection_count,):
            raise ValueError(
                f"Expected box scores with shape ({detection_count},), got {self.box_scores.shape}."
            )
        if self.keypoint_scores is not None and self.keypoint_scores.shape != (
            detection_count,
            expected_count,
        ):
            raise ValueError(
                "Expected keypoint scores with shape "
                f"({detection_count}, {expected_count}), got {self.keypoint_scores.shape}."
            )
@@ -0,0 +1,36 @@
 from dataclasses import dataclass
 from pathlib import Path
 import numpy as np
 from pose_tracking_exp.common.tensor_types import Pose2D
@dataclass(slots=True)
 class PoseDetection:
    bbox: np.ndarray
    bbox_confidence: float
    keypoints: Pose2D
@dataclass(slots=True)
 class CameraFrame:
    camera_name: str
    frame_index: int
    timestamp_unix_ns: int
    detections: tuple[PoseDetection, ...]
    source_size: tuple[int, int]
@dataclass(slots=True)
 class FrameBundle:
    bundle_index: int
    timestamp_unix_ns: int
    views: tuple[CameraFrame, ...]
@dataclass(slots=True)
 class ReplaySequence:
    scene_path: Path
    replay_path: Path
    frames_by_camera: dict[str, list[CameraFrame]]
@@ -0,0 +1,102 @@
 from dataclasses import dataclass, field
 from typing import Literal
 import numpy as np
 from pose_tracking_exp.common.tensor_types import Pose3D, Vector3
@dataclass(slots=True)
 class ProposalCluster:
    pose3d: Pose3D
    root: Vector3
    source_views: frozenset[str]
    support_size: int
    mean_score: float
@dataclass(slots=True)
 class SkeletonState:
    parameters: np.ndarray
    beta: np.ndarray
    pose3d: Pose3D
@dataclass(slots=True)
 class TentativeTrackState:
    track_id: int
    state: Literal["tentative"] = "tentative"
    age: int = 0
    misses: int = 0
    score: float = 0.0
    last_bundle_index: int = -1
    root: Vector3 = field(default_factory=lambda: np.zeros(3, dtype=np.float64))
    pose3d: Pose3D = field(default_factory=lambda: np.zeros((20, 4), dtype=np.float64))
    evidence_buffer: list[Pose3D] = field(default_factory=list)
@dataclass(slots=True)
 class ActiveTrackState:
    track_id: int
    status: Literal["active", "lost"] = "active"
    misses: int = 0
    lost_age: int = 0
    score: float = 0.0
    last_bundle_index: int = -1
    skeleton: SkeletonState = field(
        default_factory=lambda: SkeletonState(
            parameters=np.zeros(31, dtype=np.float64),
            beta=np.ones(8, dtype=np.float64),
            pose3d=np.zeros((20, 4), dtype=np.float64),
        )
    )
    noise_scale: np.ndarray = field(
        default_factory=lambda: np.full((20,), 9.0, dtype=np.float64)
    )
 TrackState = TentativeTrackState | ActiveTrackState
@dataclass(slots=True)
 class TrackedFrameResult:
    bundle_index: int
    timestamp_unix_ns: int
    tentative_tracks: tuple[TentativeTrackState, ...]
    active_tracks: tuple[ActiveTrackState, ...]
    lost_tracks: tuple[ActiveTrackState, ...]
    proposals: tuple[ProposalCluster, ...]
@dataclass(slots=True)
 class TrackerDiagnostics:
    match_existing_calls: int = 0
    match_existing_seconds: float = 0.0
    proposal_build_calls: int = 0
    proposal_build_seconds: float = 0.0
    promotions: int = 0
    reacquisitions: int = 0
    active_updates: int = 0
    seed_initializations: int = 0
    nonlinear_refinements: int = 0
@dataclass(slots=True)
 class TrackerConfig:
    max_active_tracks: int | None = None
    min_bundle_views: int = 2
    max_sync_skew_ns: int = 12_000_000
    tentative_buffer_size: int = 5
    tentative_min_age: int = 3
    tentative_hits_required: int = 3
    tentative_promote_score: float = 3.0
    tentative_max_misses: int = 2
    active_min_views: int = 2
    active_core_gate_px: float = 80.0
    active_joint_gate_px: float = 120.0
    active_miss_to_lost: int = 3
    lost_delete_age: int = 15
    proposal_match_distance_m: float = 0.45
    noise_ema: float = 0.85
    proposal_min_score: float = 0.9
    proposal_min_group_size: int = 1
@@ -0,0 +1,15 @@
 from pose_tracking_exp.tracking.kinematics import seed_state_from_pose3d, update_noise_scale, update_state_from_multiview
 from pose_tracking_exp.tracking.replay_io import load_parquet_replay_dir, load_replay_file, load_scene_file
 from pose_tracking_exp.tracking.sync import synchronize_frames
 from pose_tracking_exp.tracking.tracker import PoseTracker
 __all__ = [
    "PoseTracker",
    "load_parquet_replay_dir",
    "load_replay_file",
    "load_scene_file",
    "seed_state_from_pose3d",
    "synchronize_frames",
    "update_noise_scale",
    "update_state_from_multiview",
 ]
@@ -4,10 +4,10 @@ import numpy as np
 from beartype import beartype
 from scipy.optimize import least_squares
-from pose_tracking_exp.camera_math import project_pose
+from pose_tracking_exp.common.camera_math import project_pose
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
-from pose_tracking_exp.models import CameraCalibration, PoseDetection, SkeletonState
+from pose_tracking_exp.common.tensor_types import Pose3D
-from pose_tracking_exp.tensor_types import Pose3D
+from pose_tracking_exp.schema import CameraCalibration, PoseDetection, SkeletonState
 PARAMETER_DIMENSION = 31
 SHAPE_DIMENSION = 8
@@ -0,0 +1,221 @@
 import json
 from pathlib import Path
 from typing import cast
 import numpy as np
 import pyarrow.parquet as pq
 from beartype import beartype
 from pose_tracking_exp.common.detection_parquet import DETECTED_PARQUET_SUFFIX
 from pose_tracking_exp.common.normalization import infer_bbox_from_keypoints, normalize_coco_body20
 from pose_tracking_exp.schema import (
    CameraCalibration,
    CameraFrame,
    CocoKeypointSchema,
    PoseDetection,
    ReplaySequence,
    SceneConfig,
    parse_camera_model,
 )
 _OPENCV_EXTRINSICS = "opencv_world_to_camera"
 _RPT_POSE = "rpt_camera_pose"
 def _as_float_array(values: object, shape: tuple[int, ...]) -> np.ndarray:
    array = np.asarray(values, dtype=np.float64)
    if array.shape != shape:
        raise ValueError(f"Expected shape {shape}, got {array.shape}.")
    return array
@beartype
 def load_scene_file(path: Path) -> SceneConfig:
    payload = json.loads(path.read_text(encoding="utf-8"))
    default_extrinsic_format = str(payload.get("extrinsic_format", _OPENCV_EXTRINSICS))
    cameras: list[CameraCalibration] = []
    for camera_payload in payload["cameras"]:
        extrinsic_format = str(
            camera_payload.get("extrinsic_format", default_extrinsic_format)
        )
        name = str(camera_payload["name"])
        width = int(camera_payload["width"])
        height = int(camera_payload["height"])
        K = _as_float_array(camera_payload["K"], (3, 3))
        DC = np.asarray(
            camera_payload.get("DC", [0.0, 0.0, 0.0, 0.0, 0.0]), dtype=np.float64
        )
        R = _as_float_array(camera_payload["R"], (3, 3))
        T = _as_float_array(camera_payload["T"], (3, 1)).reshape(3)
        model = parse_camera_model(camera_payload.get("model", "pinhole"))
        if extrinsic_format == _OPENCV_EXTRINSICS:
            cameras.append(
                CameraCalibration.from_opencv_extrinsics(
                    name=name,
                    width=width,
                    height=height,
                    K=K,
                    DC=DC,
                    R=R,
                    T=T,
                    model=model,
                    rvec=np.asarray(camera_payload["rvec"], dtype=np.float64).reshape(3)
                    if "rvec" in camera_payload
                    else None,
                )
            )
        elif extrinsic_format == _RPT_POSE:
            cameras.append(
                CameraCalibration.from_rpt_pose(
                    name=name,
                    width=width,
                    height=height,
                    K=K,
                    DC=DC,
                    R=R,
                    T=T,
                    model=model,
                )
            )
        else:
            raise ValueError(
                f"Unsupported extrinsic format {extrinsic_format!r}. "
                f"Expected {_OPENCV_EXTRINSICS!r} or {_RPT_POSE!r}."
            )
    return SceneConfig(
        room_size=_as_float_array(payload["room_size"], (3,)),
        room_center=_as_float_array(payload["room_center"], (3,)),
        cameras=tuple(cameras),
    )
@beartype
 def load_replay_file(scene_path: Path, replay_path: Path) -> ReplaySequence:
    if replay_path.is_dir():
        return load_parquet_replay_dir(scene_path, replay_path)
    frames_by_camera: dict[str, list[CameraFrame]] = {}
    for raw_line in replay_path.read_text(encoding="utf-8").splitlines():
        if not raw_line.strip():
            continue
        payload = json.loads(raw_line)
        camera_name = str(payload["camera"])
        detections: list[PoseDetection] = []
        for detection_payload in payload["detections"]:
            detections.append(
                PoseDetection(
                    bbox=np.asarray(detection_payload["bbox"], dtype=np.float64),
                    bbox_confidence=float(detection_payload["bbox_confidence"]),
                    keypoints=np.asarray(
                        detection_payload["keypoints"], dtype=np.float64
                    ),
                )
            )
        frames_by_camera.setdefault(camera_name, []).append(
            CameraFrame(
                camera_name=camera_name,
                frame_index=int(payload["frame_index"]),
                timestamp_unix_ns=int(payload["timestamp_unix_ns"]),
                detections=tuple(detections),
                source_size=(
                    int(payload["source_size"][0]),
                    int(payload["source_size"][1]),
                ),
            )
        )
    for frames in frames_by_camera.values():
        frames.sort(key=lambda item: (item.timestamp_unix_ns, item.frame_index))
    return ReplaySequence(
        scene_path=scene_path,
        replay_path=replay_path,
        frames_by_camera=frames_by_camera,
    )
 def _pose_detections_from_parquet_row(row: dict[str, object]) -> tuple[PoseDetection, ...]:
    boxes = np.asarray(row.get("boxes", []), dtype=np.float64)
    if boxes.size == 0:
        boxes = np.empty((0, 4), dtype=np.float64)
    box_scores = np.asarray(row.get("box_scores", []), dtype=np.float64)
    keypoints_xy = np.asarray(row.get("kps", []), dtype=np.float64)
    if keypoints_xy.size == 0:
        keypoints_xy = np.empty((0, 133, 2), dtype=np.float64)
    keypoint_scores = np.asarray(row.get("kps_scores", []), dtype=np.float64)
    if keypoint_scores.size == 0:
        keypoint_scores = np.empty((0, 133), dtype=np.float64)
    raw_keypoint_schema = row.get("keypoint_schema", "coco_wholebody133")
    if raw_keypoint_schema not in {"coco17", "coco_wholebody133"}:
        raise ValueError(f"Unsupported keypoint schema in parquet replay: {raw_keypoint_schema!r}")
    keypoint_schema = cast(CocoKeypointSchema, raw_keypoint_schema)
    if keypoints_xy.shape[0] != keypoint_scores.shape[0]:
        raise ValueError(
            "Expected matching keypoint coordinate and score counts in parquet replay row."
        )
    detections: list[PoseDetection] = []
    for detection_index in range(int(keypoints_xy.shape[0])):
        normalized = normalize_coco_body20(
            keypoints_xy[detection_index],
            keypoint_scores[detection_index],
            keypoint_schema=keypoint_schema,
        )
        bbox = (
            boxes[detection_index]
            if detection_index < boxes.shape[0]
            else infer_bbox_from_keypoints(normalized)
        )
        visible = normalized[:, 2] > 0.0
        bbox_confidence = (
            float(box_scores[detection_index])
            if detection_index < box_scores.shape[0]
            else float(np.mean(normalized[visible, 2]))
            if np.any(visible)
            else 0.0
        )
        detections.append(
            PoseDetection(
                bbox=np.asarray(bbox, dtype=np.float64),
                bbox_confidence=bbox_confidence,
                keypoints=np.asarray(normalized, dtype=np.float64),
            )
        )
    return tuple(detections)
@beartype
 def load_parquet_replay_dir(scene_path: Path, replay_root: Path) -> ReplaySequence:
    parquet_paths = sorted(replay_root.glob(f"*{DETECTED_PARQUET_SUFFIX}"))
    if not parquet_paths:
        raise FileNotFoundError(
            f"No detection parquet files matching *{DETECTED_PARQUET_SUFFIX} under {replay_root}."
        )
    frames_by_camera: dict[str, list[CameraFrame]] = {}
    for parquet_path in parquet_paths:
        camera_name = parquet_path.name.removesuffix(DETECTED_PARQUET_SUFFIX)
        frames: list[CameraFrame] = []
        for row in pq.read_table(parquet_path).to_pylist():
            frames.append(
                CameraFrame(
                    camera_name=camera_name,
                    frame_index=int(row["frame_index"]),
                    timestamp_unix_ns=int(row["timestamp_unix_ns"]),
                    detections=_pose_detections_from_parquet_row(row),
                    source_size=(
                        int(row.get("source_width", 0)),
                        int(row.get("source_height", 0)),
                    ),
                )
            )
        frames.sort(key=lambda item: (item.timestamp_unix_ns, item.frame_index))
        frames_by_camera[camera_name] = frames
    return ReplaySequence(
        scene_path=scene_path,
        replay_path=replay_root,
        frames_by_camera=frames_by_camera,
    )
@@ -1,16 +1,19 @@
 from typing import Any
 import numpy as np
 import rpt
 from beartype import beartype
 from rpt._core import TriangulationConfig, TriangulationTrace  # type: ignore[reportMissingModuleSource]
-from pose_tracking_exp.joints import BODY20_JOINT_NAMES, BODY20_OBSERVATION_COUNT, BODY20_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME, BODY20_JOINT_NAMES, BODY20_OBSERVATION_COUNT
-from pose_tracking_exp.models import CameraFrame, ProposalCluster, SceneConfig
+from pose_tracking_exp.common.tensor_types import Pose2D
-from pose_tracking_exp.tensor_types import Pose2D
+from pose_tracking_exp.schema import CameraFrame, ProposalCluster, SceneConfig
-@beartype
+def build_rpt_config(
-def build_rpt_config(scene: SceneConfig, *, min_match_score: float, min_group_size: int) -> Any:
+    scene: SceneConfig,
    *,
    min_match_score: float,
    min_group_size: int,
 ) -> TriangulationConfig:
    cameras = [
        {
            "name": camera.name,
@@ -50,7 +53,7 @@ def pack_view_detections(frames: tuple[CameraFrame, ...], unmatched_indices: dic
@beartype
 def extract_clusters(
-    trace: Any,
+    trace: TriangulationTrace,
    camera_names: tuple[str, ...],
 ) -> tuple[ProposalCluster, ...]:
    clusters: list[ProposalCluster] = []
@@ -2,7 +2,7 @@ from collections.abc import Iterable
 from beartype import beartype
-from pose_tracking_exp.models import CameraFrame, FrameBundle, ReplaySequence
+from pose_tracking_exp.schema import CameraFrame, FrameBundle, ReplaySequence
@beartype
@@ -50,4 +50,3 @@ def synchronize_frames(
                )
            )
    return bundles
@@ -5,10 +5,10 @@ import numpy as np
 from beartype import beartype
 from scipy.optimize import linear_sum_assignment
-from pose_tracking_exp.camera_math import project_pose
+from pose_tracking_exp.common.camera_math import project_pose
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME, CORE_JOINT_INDICES
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME, CORE_JOINT_INDICES
-from pose_tracking_exp.kinematics import seed_state_from_pose3d, update_noise_scale, update_state_from_multiview
+from pose_tracking_exp.common.normalization import core_reprojection_distance
-from pose_tracking_exp.models import (
+from pose_tracking_exp.schema import (
    ActiveTrackState,
    FrameBundle,
    PoseDetection,
@@ -20,8 +20,8 @@ from pose_tracking_exp.models import (
    TrackerConfig,
    TrackerDiagnostics,
 )
-from pose_tracking_exp.normalization import core_reprojection_distance
+from pose_tracking_exp.tracking.kinematics import seed_state_from_pose3d, update_noise_scale, update_state_from_multiview
-from pose_tracking_exp.rpt_adapter import build_rpt_config, extract_clusters, pack_view_detections
+from pose_tracking_exp.tracking.rpt_adapter import build_rpt_config, extract_clusters, pack_view_detections
 CORE_JOINT_MASK = np.zeros((20,), dtype=bool)
 CORE_JOINT_MASK[list(CORE_JOINT_INDICES)] = True
@@ -78,20 +78,24 @@ class PoseTracker:
        return replace(self._diagnostics)
    def run(self, bundles: list[FrameBundle]) -> list[TrackedFrameResult]:
        self._tentative.clear()
        self._active.clear()
        self._lost.clear()
        self._next_track_id = 1
        self._diagnostics = TrackerDiagnostics()
        return [self.step(bundle) for bundle in bundles]
    def step(self, bundle: FrameBundle) -> TrackedFrameResult:
-        self._enforce_single_person_constraints()
+        self._enforce_track_limits()
        matches, unmatched = self._match_existing_tracks(bundle)
        self._update_active_tracks(bundle, matches)
        self._update_lost_tracks(bundle, matches)
-        proposals = self._refresh_single_person_track_from_proposals(bundle, self._build_proposals(bundle, unmatched))
+        proposals = self._refresh_capped_single_track_from_proposals(bundle, self._build_proposals(bundle, unmatched))
        self._update_tentative_tracks(bundle, self._birth_candidate_proposals(proposals))
        self._promote_tentative_tracks(bundle)
        self._reacquire_lost_tracks(bundle, proposals)
        self._delete_expired_tracks()
-        self._enforce_single_person_constraints()
+        self._enforce_track_limits()
        return TrackedFrameResult(
            bundle_index=bundle.bundle_index,
            timestamp_unix_ns=bundle.timestamp_unix_ns,
@@ -101,46 +105,58 @@ class PoseTracker:
            proposals=proposals,
        )
-    def _single_person_mode(self) -> bool:
+    def _track_limit(self) -> int | None:
-        return self._config.mode == "single_person"
+        return self._config.max_active_tracks
-    def _keep_best_active_track(self) -> None:
+    def _single_track_cap_enabled(self) -> bool:
-        if len(self._active) <= 1:
+        return self._config.max_active_tracks == 1
    def _keep_best_active_tracks(self, limit: int) -> None:
        if len(self._active) <= limit:
            return
-        best_id = max(self._active, key=lambda track_id: _active_track_rank(self._active[track_id]))
+        ranked_ids = sorted(self._active, key=lambda track_id: _active_track_rank(self._active[track_id]), reverse=True)
        keep_ids = set(ranked_ids[:limit])
        for track_id in list(self._active):
-            if track_id != best_id:
+            if track_id not in keep_ids:
                self._active.pop(track_id, None)
-    def _keep_best_lost_track(self) -> None:
+    def _keep_best_lost_tracks(self, limit: int) -> None:
-        if len(self._lost) <= 1:
+        if len(self._lost) <= limit:
            return
-        best_id = max(self._lost, key=lambda track_id: _lost_track_rank(self._lost[track_id]))
+        ranked_ids = sorted(self._lost, key=lambda track_id: _lost_track_rank(self._lost[track_id]), reverse=True)
        keep_ids = set(ranked_ids[:limit])
        for track_id in list(self._lost):
-            if track_id != best_id:
+            if track_id not in keep_ids:
                self._lost.pop(track_id, None)
-    def _keep_best_tentative_track(self) -> None:
+    def _keep_best_tentative_tracks(self, limit: int) -> None:
-        if len(self._tentative) <= 1:
+        if len(self._tentative) <= limit:
            return
-        best_id = max(self._tentative, key=lambda track_id: _tentative_track_rank(self._tentative[track_id]))
+        ranked_ids = sorted(
            self._tentative,
            key=lambda track_id: _tentative_track_rank(self._tentative[track_id]),
            reverse=True,
        )
        keep_ids = set(ranked_ids[:limit])
        for track_id in list(self._tentative):
-            if track_id != best_id:
+            if track_id not in keep_ids:
                self._tentative.pop(track_id, None)
-    def _enforce_single_person_constraints(self) -> None:
+    def _enforce_track_limits(self) -> None:
-        if not self._single_person_mode():
+        limit = self._track_limit()
        if limit is None:
            return
        self._keep_best_active_tracks(limit)
        self._keep_best_lost_tracks(limit)
        self._keep_best_tentative_tracks(limit)
        if not self._single_track_cap_enabled():
            return
        self._keep_best_active_track()
        if self._active:
            self._lost.clear()
            self._tentative.clear()
            return
        self._keep_best_lost_track()
        if self._lost:
            self._tentative.clear()
            return
        self._keep_best_tentative_track()
    def _predicted_pose_by_track(self) -> dict[int, np.ndarray]:
        result: dict[int, np.ndarray] = {}
@@ -278,7 +294,7 @@ class PoseTracker:
            self._diagnostics.proposal_build_seconds += perf_counter() - started_at
    def _birth_candidate_proposals(self, proposals: tuple[ProposalCluster, ...]) -> tuple[ProposalCluster, ...]:
-        if not self._single_person_mode():
+        if not self._single_track_cap_enabled():
            return proposals
        if self._active or self._lost:
            return ()
@@ -286,12 +302,12 @@ class PoseTracker:
            return ()
        return (max(proposals, key=_proposal_rank),)
-    def _refresh_single_person_track_from_proposals(
+    def _refresh_capped_single_track_from_proposals(
        self,
        bundle: FrameBundle,
        proposals: tuple[ProposalCluster, ...],
    ) -> tuple[ProposalCluster, ...]:
-        if not self._single_person_mode() or not proposals:
+        if not self._single_track_cap_enabled() or not proposals:
            return proposals
        remaining = list(proposals)
@@ -0,0 +1 @@
 """Test package for support helpers and test-local utilities."""
@@ -0,0 +1 @@
 """Test-only support helpers."""
@@ -1,18 +1,21 @@
 from pathlib import Path
 import click
 import cv2
 import numpy as np
 import pyarrow.parquet as pq
 from beartype import beartype
 from loguru import logger
-from pose_tracking_exp.models import CameraCalibration, CameraFrame, FrameBundle, PoseDetection, SceneConfig
+from pose_tracking_exp.common.normalization import infer_bbox_from_keypoints, normalize_rtmpose_body20
-from pose_tracking_exp.normalization import infer_bbox_from_keypoints, normalize_rtmpose_body20
+from pose_tracking_exp.schema import CameraCalibration, CameraFrame, FrameBundle, PoseDetection, SceneConfig, TrackerConfig
 from pose_tracking_exp.tracking import PoseTracker
 _NOMINAL_FRAME_PERIOD_NS = 33_333_333
@beartype
-def load_actualtest_scene(root: Path) -> SceneConfig:
+def load_actual_test_scene(root: Path) -> SceneConfig:
    # ActualTest parquet comes from the ChArUco/OpenCV side, so `rvec` / `tvec`
    # are world->camera extrinsics. The RPT-facing camera pose is derived later
    # from this canonical OpenCV form.
@@ -40,13 +43,14 @@ def load_actualtest_scene(root: Path) -> SceneConfig:
@beartype
-def load_actualtest_segment_bundles(
+def load_actual_test_segment_bundles(
    root: Path,
    segment_name: str,
    *,
    frame_start: int = 690,
    frame_stop: int | None = None,
    max_frames: int | None = None,
    min_cameras_with_rows: int = 1,
    min_visible_joints: int = 6,
 ) -> list[FrameBundle]:
    segment_root = root / segment_name
@@ -98,24 +102,31 @@ def load_actualtest_segment_bundles(
    if not by_camera:
        return []
-    common_frames = sorted(set.intersection(*(set(frames) for frames in by_camera.values())))
+    candidate_frames = sorted(set().union(*(set(frames) for frames in by_camera.values())))
    if min_cameras_with_rows > 1:
        candidate_frames = [
            frame_index
            for frame_index in candidate_frames
            if sum(frame_index in frames for frames in by_camera.values()) >= min_cameras_with_rows
        ]
    if max_frames is not None:
-        common_frames = common_frames[:max_frames]
+        candidate_frames = candidate_frames[:max_frames]
-    scene = load_actualtest_scene(root)
+    scene = load_actual_test_scene(root)
    camera_by_name = {camera.name: camera for camera in scene.cameras}
    bundles: list[FrameBundle] = []
-    for bundle_index, frame_index in enumerate(common_frames):
+    ordered_camera_names = [camera.name for camera in scene.cameras]
    for bundle_index, frame_index in enumerate(candidate_frames):
        timestamp_unix_ns = bundle_index * _NOMINAL_FRAME_PERIOD_NS
        views: list[CameraFrame] = []
-        for camera_name in sorted(by_camera):
+        for camera_name in ordered_camera_names:
            camera = camera_by_name[camera_name]
            views.append(
                CameraFrame(
                    camera_name=camera_name,
                    frame_index=frame_index,
                    timestamp_unix_ns=timestamp_unix_ns,
-                    detections=by_camera[camera_name][frame_index],
+                    detections=by_camera.get(camera_name, {}).get(frame_index, ()),
                    source_size=(camera.width, camera.height),
                )
            )
@@ -127,3 +138,49 @@ def load_actualtest_segment_bundles(
            )
        )
    return bundles
@click.command()
@click.argument("root_path", type=click.Path(path_type=Path, exists=True, file_okay=False))
@click.option("--segment", "segment_name", default="Segment_1", show_default=True)
@click.option("--frame-start", default=690, type=int, show_default=True)
@click.option("--frame-stop", type=int)
@click.option("--max-frames", type=click.IntRange(min=1))
@click.option("--min-camera-rows", default=1, type=click.IntRange(min=1), show_default=True)
@click.option("--max-active-tracks", default=1, type=click.IntRange(min=1), show_default=True)
 def main(
    root_path: Path,
    segment_name: str,
    frame_start: int,
    frame_stop: int | None,
    max_frames: int | None,
    min_camera_rows: int,
    max_active_tracks: int,
 ) -> None:
    logger.remove()
    logger.add(
        click.get_text_stream("stderr"),
        level="INFO",
        format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
    )
    scene = load_actual_test_scene(root_path)
    bundles = load_actual_test_segment_bundles(
        root_path,
        segment_name,
        frame_start=frame_start,
        frame_stop=frame_stop,
        max_frames=max_frames,
        min_cameras_with_rows=min_camera_rows,
    )
    tracker = PoseTracker(scene, TrackerConfig(max_active_tracks=max_active_tracks))
    results = tracker.run(bundles)
    logger.info(
        "actual_test bundles={} active_frames={} proposal_frames={}",
        len(results),
        sum(1 for result in results if result.active_tracks),
        sum(1 for result in results if result.proposals),
    )
 if __name__ == "__main__":
    main()
@@ -4,8 +4,8 @@ import numpy as np
 import pyarrow as pa
 import pyarrow.parquet as pq
-from pose_tracking_exp.actualtest import load_actualtest_scene, load_actualtest_segment_bundles
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
+from tests.support.actual_test import load_actual_test_scene, load_actual_test_segment_bundles
 def _write_parquet(path: Path, rows: list[dict[str, object]]) -> None:
@@ -25,7 +25,7 @@ def _sample_rtmpose_detection() -> tuple[list[float], list[list[float]], list[fl
    return [8.0, 4.0, 32.0, 64.0], keypoints_xy.tolist(), scores.tolist()
-def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
+def test_load_actual_test_parquet_scene_and_segment(tmp_path: Path) -> None:
    root = tmp_path / "ActualTest_WeiHua"
    _write_parquet(
        root / "camera_params" / "camera_params.parquet",
@@ -62,8 +62,8 @@ def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
            ],
        )
-    scene = load_actualtest_scene(root)
+    scene = load_actual_test_scene(root)
-    bundles = load_actualtest_segment_bundles(root, "Segment_1", frame_start=690, max_frames=1)
+    bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690, max_frames=1)
    assert [camera.name for camera in scene.cameras] == ["5602", "5603"]
    np.testing.assert_allclose(scene.cameras[0].pose_T, [0.0, 0.0, 0.0])
@@ -75,3 +75,53 @@ def test_load_actualtest_parquet_scene_and_segment(tmp_path: Path) -> None:
        bundles[0].views[0].detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
        [20.0, 60.0],
    )
 def test_load_actual_test_keeps_partial_camera_frames(tmp_path: Path) -> None:
    root = tmp_path / "ActualTest_WeiHua"
    _write_parquet(
        root / "camera_params" / "camera_params.parquet",
        [
            {
                "name": "AF_02",
                "port": 5602,
                "intrinsic": {
                    "camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
                    "distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
                },
                "extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [0.0, 0.0, 0.0]},
                "resolution": {"width": 640, "height": 480},
            },
            {
                "name": "AF_03",
                "port": 5603,
                "intrinsic": {
                    "camera_matrix": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
                    "distortion_coefficients": [0.0, 0.0, 0.0, 0.0, 0.0],
                },
                "extrinsic": {"rvec": [0.0, 0.0, 0.0], "tvec": [1.0, 0.0, 0.0]},
                "resolution": {"width": 640, "height": 480},
            },
        ],
    )
    box, keypoints_xy, scores = _sample_rtmpose_detection()
    _write_parquet(
        root / "Segment_1" / "5602_detected.parquet",
        [
            {"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
            {"frame_index": 691, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
        ],
    )
    _write_parquet(
        root / "Segment_1" / "5603_detected.parquet",
        [
            {"frame_index": 690, "boxes": [box], "kps": [keypoints_xy], "kps_scores": [scores]},
        ],
    )
    bundles = load_actual_test_segment_bundles(root, "Segment_1", frame_start=690)
    assert [bundle.views[0].frame_index for bundle in bundles] == [690, 691]
    assert [view.camera_name for view in bundles[1].views] == ["5602", "5603"]
    assert len(bundles[1].views[0].detections) == 1
    assert bundles[1].views[1].detections == ()
@@ -8,9 +8,9 @@ import pytest
 pytest.importorskip("rpt")
-from pose_tracking_exp.models import CameraCalibration, SceneConfig
+from pose_tracking_exp.schema import CameraCalibration, CameraModel, SceneConfig, parse_camera_model
-from pose_tracking_exp.replay import load_scene_file
+from pose_tracking_exp.tracking.replay_io import load_scene_file
-from pose_tracking_exp.rpt_adapter import build_rpt_config
+from pose_tracking_exp.tracking.rpt_adapter import build_rpt_config
 class _CameraArgs(NamedTuple):
@@ -19,7 +19,7 @@ class _CameraArgs(NamedTuple):
    height: int
    K: np.ndarray
    DC: np.ndarray
-    model: str
+    model: CameraModel
 def _camera_args() -> _CameraArgs:
@@ -29,7 +29,7 @@ def _camera_args() -> _CameraArgs:
        height=480,
        K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
        DC=np.zeros(5, dtype=np.float64),
-        model="pinhole",
+        model=parse_camera_model("pinhole"),
    )
@@ -139,7 +139,7 @@ def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch)
        captured["min_group_size"] = min_group_size
        return captured
-    monkeypatch.setattr("pose_tracking_exp.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
+    monkeypatch.setattr("pose_tracking_exp.tracking.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
    build_rpt_config(scene, min_match_score=0.5, min_group_size=2)
@@ -0,0 +1,223 @@
 from collections.abc import AsyncIterator, Sequence
 from pathlib import Path
 import anyio
 import numpy as np
 import pytest
 from pose_tracking_exp.detection.config import (
    DetectionRunnerConfig,
    load_detection_runner_config,
    resolve_instances,
 )
 from pose_tracking_exp.detection.runner import (
    PendingFrame,
    SourceSlot,
    run_detection_runner,
    store_latest_frame,
    take_pending_batch,
 )
 from pose_tracking_exp.schema.detection import PoseDetections, SourceFrame
 def test_load_detection_runner_config_from_toml_and_env(
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
 ) -> None:
    config_path = tmp_path / "runner.toml"
    config_path.write_text(
        "\n".join(
            [
                'instances = ["front_left", "front_right"]',
                'device = "cuda:1"',
                'nats_host = "nats://localhost:4222"',
                'yolo_checkpoint = "checkpoint/yolo/yolo11_mix_epoch10.pt"',
                'pose_checkpoint = "checkpoint/dwpose/best_coco-wholebody_AP_epoch_50.pth"',
                "bbox_area_threshold = 2500",
                "max_batch_frames = 6",
                "max_batch_wait_ms = 3",
            ]
        ),
        encoding="utf-8",
    )
    monkeypatch.setenv("POSE_TRACKING_EXP_DETECTION_DEVICE", "cpu")
    config = load_detection_runner_config(config_path)
    assert config.instances == ("front_left", "front_right")
    assert config.device == "cpu"
    assert config.nats_host == "nats://localhost:4222"
    assert config.bbox_area_threshold == 2500
    assert config.max_batch_frames == 6
    assert config.max_batch_wait_ms == 3
 def test_resolve_instances_prefers_cli_values() -> None:
    assert resolve_instances(("cli_a", "cli_b"), ("cfg_a",)) == ("cli_a", "cli_b")
 def test_resolve_instances_falls_back_to_config_values() -> None:
    assert resolve_instances((), ("cfg_a", "cfg_b")) == ("cfg_a", "cfg_b")
 def test_store_latest_frame_overwrites_pending_frame() -> None:
    slot = SourceSlot(source_name="front_left")
    first = SourceFrame(
        source_name="front_left",
        image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
        frame_index=1,
        timestamp_unix_ns=100,
    )
    second = SourceFrame(
        source_name="front_left",
        image_bgr=np.ones((1, 1, 3), dtype=np.uint8),
        frame_index=2,
        timestamp_unix_ns=200,
    )
    store_latest_frame(slot, first)
    store_latest_frame(slot, second)
    assert slot.received_frames == 2
    assert slot.dropped_frames == 1
    assert slot.pending_frame is not None
    assert slot.pending_frame.frame is second
 def test_take_pending_batch_collects_at_most_one_frame_per_source() -> None:
    slots = {
        "front_left": SourceSlot(
            source_name="front_left",
            pending_frame=PendingFrame(
                source_name="front_left",
                frame=SourceFrame(
                    source_name="front_left",
                    image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
                    frame_index=11,
                    timestamp_unix_ns=110,
                ),
            ),
        ),
        "front_right": SourceSlot(
            source_name="front_right",
            pending_frame=PendingFrame(
                source_name="front_right",
                frame=SourceFrame(
                    source_name="front_right",
                    image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
                    frame_index=22,
                    timestamp_unix_ns=220,
                ),
            ),
        ),
        "rear": SourceSlot(
            source_name="rear",
            pending_frame=PendingFrame(
                source_name="rear",
                frame=SourceFrame(
                    source_name="rear",
                    image_bgr=np.zeros((1, 1, 3), dtype=np.uint8),
                    frame_index=33,
                    timestamp_unix_ns=330,
                ),
            ),
        ),
    }
    batch = take_pending_batch(slots, max_batch_frames=2)
    assert [frame.source_name for frame in batch] == ["front_left", "front_right"]
    assert slots["front_left"].pending_frame is None
    assert slots["front_right"].pending_frame is None
    assert slots["rear"].pending_frame is not None
 class StubSource:
    def __init__(self, source_name: str, frames: tuple[SourceFrame, ...]) -> None:
        self.source_name = source_name
        self._frames = frames
    async def frames(self) -> AsyncIterator[SourceFrame]:
        for frame in self._frames:
            yield frame
 class StubPoseShim:
    def process_many(self, frames: Sequence[SourceFrame]) -> list[PoseDetections]:
        detections: list[PoseDetections] = []
        for frame in frames:
            detections.append(
                PoseDetections(
                    source_name=frame.source_name,
                    frame_index=frame.frame_index,
                    source_size=(frame.image_bgr.shape[1], frame.image_bgr.shape[0]),
                    boxes_xyxy=np.asarray([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
                    box_scores=np.asarray([1.0], dtype=np.float32),
                    keypoints_xy=np.zeros((1, 133, 2), dtype=np.float32),
                    keypoint_scores=np.ones((1, 133), dtype=np.float32),
                    timestamp_unix_ns=frame.timestamp_unix_ns,
                    keypoint_schema="coco_wholebody133",
                )
            )
        return detections
 class StubSink:
    def __init__(self) -> None:
        self.messages: list[PoseDetections] = []
        self.closed = False
    async def publish_pose(self, detections: PoseDetections) -> None:
        self.messages.append(detections)
    async def aclose(self) -> None:
        self.closed = True
 def test_run_detection_runner_publishes_payloads() -> None:
    sink = StubSink()
    sources = (
        StubSource(
            "cam0",
            (
                SourceFrame(
                    source_name="cam0",
                    image_bgr=np.zeros((2, 3, 3), dtype=np.uint8),
                    frame_index=1,
                    timestamp_unix_ns=100,
                ),
            ),
        ),
        StubSource(
            "cam1",
            (
                SourceFrame(
                    source_name="cam1",
                    image_bgr=np.zeros((2, 3, 3), dtype=np.uint8),
                    frame_index=2,
                    timestamp_unix_ns=200,
                ),
            ),
        ),
    )
    config = DetectionRunnerConfig(
        instances=("cam0", "cam1"),
        pose_config_path=Path(__file__),
        yolo_checkpoint=Path(__file__),
        pose_checkpoint=Path(__file__),
        max_batch_frames=2,
    )
    anyio.run(
        run_detection_runner,
        sources,
        StubPoseShim(),
        sink,
        config,
    )
    assert sink.closed is True
    assert [(item.source_name, item.frame_index, item.timestamp_unix_ns) for item in sink.messages] == [
        ("cam0", 1, 100),
        ("cam1", 2, 200),
    ]
@@ -0,0 +1,137 @@
 import json
 from pathlib import Path
 import anyio
 import cv2
 import numpy as np
 import pyarrow.parquet as pq
 from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
 from pose_tracking_exp.detection.sinks import ParquetPoseSink
 from pose_tracking_exp.detection.sources import VideoFrameSource
 from pose_tracking_exp.schema.detection import PoseDetections
 from pose_tracking_exp.tracking import load_replay_file
 def _write_synthetic_video(path: Path) -> None:
    writer = cv2.VideoWriter(
        str(path),
        cv2.VideoWriter.fourcc(*"MJPG"),
        10.0,
        (8, 6),
    )
    if not writer.isOpened():
        raise RuntimeError("Could not open synthetic video writer.")
    try:
        for frame_index in range(3):
            frame = np.full((6, 8, 3), frame_index * 32, dtype=np.uint8)
            writer.write(frame)
    finally:
        writer.release()
 def _sample_wholebody_detection(*, source_name: str, frame_index: int) -> PoseDetections:
    keypoints_xy = np.zeros((1, 133, 2), dtype=np.float32)
    keypoint_scores = np.zeros((1, 133), dtype=np.float32)
    keypoints_xy[0, 5] = [10.0, 20.0]
    keypoints_xy[0, 6] = [30.0, 20.0]
    keypoints_xy[0, 11] = [12.0, 60.0]
    keypoints_xy[0, 12] = [28.0, 60.0]
    keypoints_xy[0, 0] = [20.0, 8.0]
    keypoint_scores[0, [0, 5, 6, 11, 12]] = 1.0
    return PoseDetections(
        source_name=source_name,
        frame_index=frame_index,
        source_size=(640, 480),
        boxes_xyxy=np.asarray([[8.0, 4.0, 32.0, 64.0]], dtype=np.float32),
        box_scores=np.asarray([0.9], dtype=np.float32),
        keypoints_xy=keypoints_xy,
        keypoint_scores=keypoint_scores,
        timestamp_unix_ns=frame_index * 100_000_000,
        keypoint_schema="coco_wholebody133",
    )
 def test_video_frame_source_reads_frames(tmp_path: Path) -> None:
    video_path = tmp_path / "cam0.avi"
    _write_synthetic_video(video_path)
    source = VideoFrameSource(video_path, source_name="cam0")
    async def collect() -> list[tuple[str, int, int, tuple[int, int, int]]]:
        frames: list[tuple[str, int, int, tuple[int, int, int]]] = []
        async for frame in source.frames():
            frames.append(
                (
                    frame.source_name,
                    frame.frame_index,
                    frame.timestamp_unix_ns,
                    frame.image_bgr.shape,
                )
            )
        return frames
    frames = anyio.run(collect)
    assert [item[0] for item in frames] == ["cam0", "cam0", "cam0"]
    assert [item[1] for item in frames] == [0, 1, 2]
    assert [item[3] for item in frames] == [(6, 8, 3), (6, 8, 3), (6, 8, 3)]
    assert frames[0][2] <= frames[1][2] <= frames[2][2]
 def test_parquet_sink_round_trips_into_tracking_replay(tmp_path: Path) -> None:
    output_dir = tmp_path / "detections"
    sink = ParquetPoseSink(output_dir, flush_rows=1)
    async def write_rows() -> None:
        await sink.publish_pose(_sample_wholebody_detection(source_name="cam0", frame_index=0))
        await sink.publish_pose(
            PoseDetections(
                source_name="cam0",
                frame_index=1,
                source_size=(640, 480),
                boxes_xyxy=np.empty((0, 4), dtype=np.float32),
                box_scores=np.empty((0,), dtype=np.float32),
                keypoints_xy=np.empty((0, 133, 2), dtype=np.float32),
                keypoint_scores=np.empty((0, 133), dtype=np.float32),
                timestamp_unix_ns=100_000_000,
                keypoint_schema="coco_wholebody133",
            )
        )
        await sink.aclose()
    anyio.run(write_rows)
    parquet_path = output_dir / "cam0_detected.parquet"
    assert parquet_path.exists()
    assert pq.read_table(parquet_path).num_rows == 2
    scene_path = tmp_path / "scene.json"
    scene_path.write_text(
        json.dumps(
            {
                "room_size": [6.0, 4.0, 3.0],
                "room_center": [0.0, 0.0, 1.0],
                "cameras": [
                    {
                        "name": "cam0",
                        "width": 640,
                        "height": 480,
                        "K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
                        "DC": [0.0, 0.0, 0.0, 0.0, 0.0],
                        "R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
                        "T": [[0.0], [0.0], [0.0]],
                    }
                ],
            }
        ),
        encoding="utf-8",
    )
    replay = load_replay_file(scene_path, output_dir)
    frames = replay.frames_by_camera["cam0"]
    assert [frame.frame_index for frame in frames] == [0, 1]
    assert frames[1].detections == ()
    np.testing.assert_allclose(
        frames[0].detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
        [20.0, 60.0],
    )
@@ -1,7 +1,7 @@
 import numpy as np
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
-from pose_tracking_exp.kinematics import seed_state_from_pose3d
+from pose_tracking_exp.tracking.kinematics import seed_state_from_pose3d
 def _sample_pose3d() -> np.ndarray:
@@ -38,7 +38,7 @@ def test_seed_state_from_pose3d_does_not_call_least_squares(monkeypatch) -> None
    def fail_least_squares(*args: object, **kwargs: object) -> object:
        raise AssertionError("seed_state_from_pose3d should not call scipy.optimize.least_squares")
-    monkeypatch.setattr("pose_tracking_exp.kinematics.least_squares", fail_least_squares)
+    monkeypatch.setattr("pose_tracking_exp.tracking.kinematics.least_squares", fail_least_squares)
    state = seed_state_from_pose3d(_sample_pose3d())
    assert state.parameters.shape == (31,)
@@ -4,11 +4,17 @@ from pathlib import Path
 import numpy as np
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
-from pose_tracking_exp.normalization import normalize_rtmpose_body20
+from pose_tracking_exp.common.normalization import normalize_coco_body20, normalize_rtmpose_body20
-from pose_tracking_exp.parajumping import PROTOCOL_HEADER, convert_payload_record, decode_pose_payload
+from pose_tracking_exp.detection.cvmmap_payload import (
-from pose_tracking_exp.replay import load_replay_file, load_scene_file
+    COCO_WHOLEBODY_KEYPOINT_COUNT,
-from pose_tracking_exp.sync import synchronize_frames
+    PROTOCOL_HEADER,
    CvmmapPosePayloadCodec,
    convert_payload_record,
    decode_pose_payload,
 )
 from pose_tracking_exp.schema.detection import PoseDetections
 from pose_tracking_exp.tracking import load_replay_file, load_scene_file, synchronize_frames
 def _encode_payload(
@@ -31,7 +37,7 @@ def _encode_payload(
        + np.asarray(box_scores, dtype=np.uint8).tobytes()
        + int(keypoints_xy.shape[0]).to_bytes(1, "little")
        + np.asarray(keypoints_xy, dtype="<u2").tobytes()
-        + int(keypoint_scores.size).to_bytes(1, "little")
+        + int(keypoint_scores.shape[0]).to_bytes(1, "little")
        + np.asarray(keypoint_scores, dtype=np.uint8).reshape(-1).tobytes()
        + int(timestamp_unix_ns).to_bytes(8, "little")
    )
@@ -54,6 +60,23 @@ def test_normalize_rtmpose_body20_derives_midpoints_and_head():
    np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["head"], :2], [20.0, 8.0])
 def test_normalize_coco17_body20_derives_midpoints_and_head():
    keypoints = np.zeros((17, 2), dtype=np.float64)
    scores = np.zeros((17,), dtype=np.float64)
    keypoints[5] = [10.0, 20.0]
    keypoints[6] = [30.0, 20.0]
    keypoints[11] = [12.0, 60.0]
    keypoints[12] = [28.0, 60.0]
    keypoints[0] = [20.0, 8.0]
    scores[[0, 5, 6, 11, 12]] = 1.0
    normalized = normalize_coco_body20(keypoints, scores, keypoint_schema="coco17")
    np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["hip_middle"], :2], [20.0, 60.0])
    np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["shoulder_middle"], :2], [20.0, 20.0])
    np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["head"], :2], [20.0, 8.0])
 def test_decode_payload_and_convert_record():
    keypoints_xy = np.zeros((1, 133, 2), dtype=np.uint16)
    keypoint_scores = np.zeros((1, 133), dtype=np.uint8)
@@ -87,6 +110,26 @@ def test_decode_payload_and_convert_record():
    assert converted["frame_index"] == 7
 def test_encode_pose_payload_requires_coco_wholebody133():
    codec = CvmmapPosePayloadCodec()
    detections = PoseDetections(
        source_name="cam0",
        frame_index=1,
        source_size=(640, 480),
        boxes_xyxy=np.zeros((1, 4), dtype=np.float32),
        box_scores=np.ones((1,), dtype=np.float32),
        keypoints_xy=np.zeros((1, COCO_WHOLEBODY_KEYPOINT_COUNT, 2), dtype=np.float32),
        keypoint_scores=np.ones((1, COCO_WHOLEBODY_KEYPOINT_COUNT), dtype=np.float32),
        timestamp_unix_ns=123,
        keypoint_schema="coco_wholebody133",
    )
    payload = codec.encode(detections)
    decoded = decode_pose_payload(payload)
    assert decoded.frame_index == 1
    assert decoded.reference_size == (640, 480)
 def test_load_replay_and_synchronize(tmp_path: Path):
    scene_path = tmp_path / "scene.json"
    replay_path = tmp_path / "replay.jsonl"
@@ -153,4 +196,3 @@ def test_load_replay_and_synchronize(tmp_path: Path):
    bundles = synchronize_frames(replay, max_skew_ns=20, min_views=2)
    assert len(bundles) == 1
    assert {frame.camera_name for frame in bundles[0].views} == {"cam0", "cam1"}
@@ -5,9 +5,9 @@ import pytest
 pytest.importorskip("rpt")
-from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
+from pose_tracking_exp.common.joints import BODY20_INDEX_BY_NAME
-from pose_tracking_exp.models import CameraCalibration, CameraFrame, FrameBundle, ProposalCluster, SceneConfig, TrackerConfig
+from pose_tracking_exp.schema import CameraCalibration, CameraFrame, FrameBundle, ProposalCluster, SceneConfig, TrackerConfig
-from pose_tracking_exp.tracker import PoseTracker
+from pose_tracking_exp.tracking import PoseTracker
 def _make_scene() -> SceneConfig:
@@ -96,7 +96,7 @@ def test_single_person_mode_caps_active_tracks(monkeypatch) -> None:
    tracker = PoseTracker(
        _make_scene(),
        TrackerConfig(
-            mode="single_person",
+            max_active_tracks=1,
            tentative_min_age=1,
            tentative_hits_required=1,
            tentative_promote_score=0.0,
@@ -127,7 +127,7 @@ def test_single_person_mode_reuses_lost_track_id(monkeypatch) -> None:
    tracker = PoseTracker(
        _make_scene(),
        TrackerConfig(
-            mode="single_person",
+            max_active_tracks=1,
            tentative_min_age=1,
            tentative_hits_required=1,
            tentative_promote_score=0.0,
@@ -6,9 +6,9 @@ import pytest
 pytest.importorskip("rpt")
-from pose_tracking_exp.models import CameraFrame, FrameBundle, PoseDetection, TrackerConfig
+from pose_tracking_exp.schema import CameraFrame, FrameBundle, PoseDetection, TrackerConfig
-from pose_tracking_exp.replay import load_scene_file
+from pose_tracking_exp.tracking import PoseTracker
-from pose_tracking_exp.tracker import PoseTracker
+from pose_tracking_exp.tracking.replay_io import load_scene_file
 RPT_ROOT = Path("/home/crosstyan/Code/RapidPoseTriangulation")
@@ -1,3 +1 @@
-[[index]]
+no-build-isolation-package = ["chumpy", "xtcocotools"]
 url = "https://pypi.org/simple"
 default = true
		`@@ -0,0 +1 @@`
							`vendor/wheels/*.whl filter=lfs diff=lfs merge=lfs -text`
		`@@ -0,0 +1,3 @@`
							`from pose_tracking_exp.detection.sources.cvmmap import CvmmapFrameSource`

							`__all__ = ["CvmmapFrameSource"]`
		`@@ -0,0 +1,3 @@`
							`from pose_tracking_exp.detection.sinks.nats import NatsPoseSink`

							`__all__ = ["NatsPoseSink"]`
		`@@ -0,0 +1 @@`
							`"""Test package for support helpers and test-local utilities."""`