Files
pose_tracking_exp/tests/test_camera_conventions.py
T
crosstyan f1a2372b3c feat: initialize offline multiview pose tracking experiment
Set up pose_tracking_exp as a uv-managed Python package for offline multiview body tracking experiments.

This initial commit includes:

- the typed package scaffold, CLI entrypoints, and repo-local uv configuration

- scene and replay loaders for generic JSON replays and ActualTest parquet inputs

- ParaJumping payload conversion and RTMPose-to-body20 normalization

- a custom articulated tracker with tentative, active, and lost lifecycle handling

- RPT-backed proposal generation, camera convention handling, and multiview reprojection updates

- regression tests for normalization, camera conventions, ActualTest ingestion, seeding, and tracker smoke flows

- project documentation covering extrinsic formats and the ActualTest calibration caveat
2026-03-26 13:03:56 +08:00

149 lines
4.7 KiB
Python

import json
from pathlib import Path
from typing import NamedTuple, cast
import cv2
import numpy as np
import pytest
pytest.importorskip("rpt")
from pose_tracking_exp.models import CameraCalibration, SceneConfig
from pose_tracking_exp.replay import load_scene_file
from pose_tracking_exp.rpt_adapter import build_rpt_config
class _CameraArgs(NamedTuple):
name: str
width: int
height: int
K: np.ndarray
DC: np.ndarray
model: str
def _camera_args() -> _CameraArgs:
return _CameraArgs(
name="cam0",
width=640,
height=480,
K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
DC=np.zeros(5, dtype=np.float64),
model="pinhole",
)
def test_from_opencv_extrinsics_derives_rpt_pose() -> None:
args = _camera_args()
rotation_vec = np.asarray([0.0, 0.2, 0.0], dtype=np.float64).reshape(3, 1)
rotation, _ = cv2.Rodrigues(rotation_vec)
translation = np.asarray([0.5, -0.1, 2.0], dtype=np.float64)
camera = CameraCalibration.from_opencv_extrinsics(
name=args.name,
width=args.width,
height=args.height,
K=args.K,
DC=args.DC,
model=args.model,
R=rotation,
T=translation,
rvec=rotation_vec.reshape(3),
)
np.testing.assert_allclose(camera.pose_R, rotation.T)
np.testing.assert_allclose(camera.pose_T, -(rotation.T @ translation))
def test_from_rpt_pose_derives_opencv_extrinsics() -> None:
args = _camera_args()
pose_rotation_vec = np.asarray([0.0, -0.3, 0.0], dtype=np.float64).reshape(3, 1)
pose_rotation, _ = cv2.Rodrigues(pose_rotation_vec)
pose_translation = np.asarray([1.5, 0.2, -0.4], dtype=np.float64)
camera = CameraCalibration.from_rpt_pose(
name=args.name,
width=args.width,
height=args.height,
K=args.K,
DC=args.DC,
model=args.model,
R=pose_rotation,
T=pose_translation,
)
np.testing.assert_allclose(camera.pose_R, pose_rotation)
np.testing.assert_allclose(camera.pose_T, pose_translation)
np.testing.assert_allclose(camera.R, pose_rotation.T)
np.testing.assert_allclose(camera.T, -(pose_rotation.T @ pose_translation))
def test_load_scene_file_supports_explicit_rpt_pose(tmp_path: Path) -> None:
scene_path = tmp_path / "scene.json"
payload = {
"extrinsic_format": "rpt_camera_pose",
"room_size": [6.0, 4.0, 3.0],
"room_center": [0.0, 0.0, 1.0],
"cameras": [
{
"name": "cam0",
"width": 640,
"height": 480,
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
"T": [[1.0], [2.0], [3.0]],
}
],
}
scene_path.write_text(json.dumps(payload), encoding="utf-8")
scene = load_scene_file(scene_path)
np.testing.assert_allclose(scene.cameras[0].pose_T, [1.0, 2.0, 3.0])
np.testing.assert_allclose(scene.cameras[0].T, [-1.0, -2.0, -3.0])
def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch) -> None:
args = _camera_args()
camera = CameraCalibration.from_opencv_extrinsics(
name=args.name,
width=args.width,
height=args.height,
K=args.K,
DC=args.DC,
model=args.model,
R=np.eye(3, dtype=np.float64),
T=np.asarray([1.0, 2.0, 3.0], dtype=np.float64),
rvec=np.zeros(3, dtype=np.float64),
)
scene = SceneConfig(
room_size=np.asarray([6.0, 4.0, 3.0], dtype=np.float64),
room_center=np.asarray([0.0, 0.0, 1.0], dtype=np.float64),
cameras=(camera,),
)
captured: dict[str, object] = {}
def fake_make_triangulation_config(
cameras: list[dict[str, object]],
roomparams: np.ndarray,
joint_names: list[str],
*,
min_match_score: float,
min_group_size: int,
) -> dict[str, object]:
captured["cameras"] = cameras
captured["roomparams"] = roomparams
captured["joint_names"] = joint_names
captured["min_match_score"] = min_match_score
captured["min_group_size"] = min_group_size
return captured
monkeypatch.setattr("pose_tracking_exp.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
build_rpt_config(scene, min_match_score=0.5, min_group_size=2)
camera_payload = cast(list[dict[str, object]], captured["cameras"])[0]
assert camera_payload["R"] == camera.pose_R.tolist()
assert camera_payload["T"] == camera.pose_T.reshape(3, 1).tolist()