f1a2372b3c
Set up pose_tracking_exp as a uv-managed Python package for offline multiview body tracking experiments. This initial commit includes: - the typed package scaffold, CLI entrypoints, and repo-local uv configuration - scene and replay loaders for generic JSON replays and ActualTest parquet inputs - ParaJumping payload conversion and RTMPose-to-body20 normalization - a custom articulated tracker with tentative, active, and lost lifecycle handling - RPT-backed proposal generation, camera convention handling, and multiview reprojection updates - regression tests for normalization, camera conventions, ActualTest ingestion, seeding, and tracker smoke flows - project documentation covering extrinsic formats and the ActualTest calibration caveat
149 lines
4.7 KiB
Python
149 lines
4.7 KiB
Python
import json
|
|
from pathlib import Path
|
|
from typing import NamedTuple, cast
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import pytest
|
|
|
|
pytest.importorskip("rpt")
|
|
|
|
from pose_tracking_exp.models import CameraCalibration, SceneConfig
|
|
from pose_tracking_exp.replay import load_scene_file
|
|
from pose_tracking_exp.rpt_adapter import build_rpt_config
|
|
|
|
|
|
class _CameraArgs(NamedTuple):
|
|
name: str
|
|
width: int
|
|
height: int
|
|
K: np.ndarray
|
|
DC: np.ndarray
|
|
model: str
|
|
|
|
|
|
def _camera_args() -> _CameraArgs:
|
|
return _CameraArgs(
|
|
name="cam0",
|
|
width=640,
|
|
height=480,
|
|
K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
|
|
DC=np.zeros(5, dtype=np.float64),
|
|
model="pinhole",
|
|
)
|
|
|
|
|
|
def test_from_opencv_extrinsics_derives_rpt_pose() -> None:
|
|
args = _camera_args()
|
|
rotation_vec = np.asarray([0.0, 0.2, 0.0], dtype=np.float64).reshape(3, 1)
|
|
rotation, _ = cv2.Rodrigues(rotation_vec)
|
|
translation = np.asarray([0.5, -0.1, 2.0], dtype=np.float64)
|
|
|
|
camera = CameraCalibration.from_opencv_extrinsics(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=rotation,
|
|
T=translation,
|
|
rvec=rotation_vec.reshape(3),
|
|
)
|
|
|
|
np.testing.assert_allclose(camera.pose_R, rotation.T)
|
|
np.testing.assert_allclose(camera.pose_T, -(rotation.T @ translation))
|
|
|
|
|
|
def test_from_rpt_pose_derives_opencv_extrinsics() -> None:
|
|
args = _camera_args()
|
|
pose_rotation_vec = np.asarray([0.0, -0.3, 0.0], dtype=np.float64).reshape(3, 1)
|
|
pose_rotation, _ = cv2.Rodrigues(pose_rotation_vec)
|
|
pose_translation = np.asarray([1.5, 0.2, -0.4], dtype=np.float64)
|
|
|
|
camera = CameraCalibration.from_rpt_pose(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=pose_rotation,
|
|
T=pose_translation,
|
|
)
|
|
|
|
np.testing.assert_allclose(camera.pose_R, pose_rotation)
|
|
np.testing.assert_allclose(camera.pose_T, pose_translation)
|
|
np.testing.assert_allclose(camera.R, pose_rotation.T)
|
|
np.testing.assert_allclose(camera.T, -(pose_rotation.T @ pose_translation))
|
|
|
|
|
|
def test_load_scene_file_supports_explicit_rpt_pose(tmp_path: Path) -> None:
|
|
scene_path = tmp_path / "scene.json"
|
|
payload = {
|
|
"extrinsic_format": "rpt_camera_pose",
|
|
"room_size": [6.0, 4.0, 3.0],
|
|
"room_center": [0.0, 0.0, 1.0],
|
|
"cameras": [
|
|
{
|
|
"name": "cam0",
|
|
"width": 640,
|
|
"height": 480,
|
|
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
|
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
|
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
|
"T": [[1.0], [2.0], [3.0]],
|
|
}
|
|
],
|
|
}
|
|
scene_path.write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
scene = load_scene_file(scene_path)
|
|
|
|
np.testing.assert_allclose(scene.cameras[0].pose_T, [1.0, 2.0, 3.0])
|
|
np.testing.assert_allclose(scene.cameras[0].T, [-1.0, -2.0, -3.0])
|
|
|
|
|
|
def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
args = _camera_args()
|
|
camera = CameraCalibration.from_opencv_extrinsics(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=np.eye(3, dtype=np.float64),
|
|
T=np.asarray([1.0, 2.0, 3.0], dtype=np.float64),
|
|
rvec=np.zeros(3, dtype=np.float64),
|
|
)
|
|
scene = SceneConfig(
|
|
room_size=np.asarray([6.0, 4.0, 3.0], dtype=np.float64),
|
|
room_center=np.asarray([0.0, 0.0, 1.0], dtype=np.float64),
|
|
cameras=(camera,),
|
|
)
|
|
captured: dict[str, object] = {}
|
|
|
|
def fake_make_triangulation_config(
|
|
cameras: list[dict[str, object]],
|
|
roomparams: np.ndarray,
|
|
joint_names: list[str],
|
|
*,
|
|
min_match_score: float,
|
|
min_group_size: int,
|
|
) -> dict[str, object]:
|
|
captured["cameras"] = cameras
|
|
captured["roomparams"] = roomparams
|
|
captured["joint_names"] = joint_names
|
|
captured["min_match_score"] = min_match_score
|
|
captured["min_group_size"] = min_group_size
|
|
return captured
|
|
|
|
monkeypatch.setattr("pose_tracking_exp.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
|
|
|
|
build_rpt_config(scene, min_match_score=0.5, min_group_size=2)
|
|
|
|
camera_payload = cast(list[dict[str, object]], captured["cameras"])[0]
|
|
assert camera_payload["R"] == camera.pose_R.tolist()
|
|
assert camera_payload["T"] == camera.pose_T.reshape(3, 1).tolist()
|