pose_tracking_exp/tests/test_camera_conventions.py

import json
from pathlib import Path
from typing import NamedTuple, cast

import cv2
import numpy as np
import pytest

pytest.importorskip("rpt")

from pose_tracking_exp.models import CameraCalibration, SceneConfig
from pose_tracking_exp.replay import load_scene_file
from pose_tracking_exp.rpt_adapter import build_rpt_config


class _CameraArgs(NamedTuple):
    name: str
    width: int
    height: int
    K: np.ndarray
    DC: np.ndarray
    model: str


def _camera_args() -> _CameraArgs:
    return _CameraArgs(
        name="cam0",
        width=640,
        height=480,
        K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
        DC=np.zeros(5, dtype=np.float64),
        model="pinhole",
    )


def test_from_opencv_extrinsics_derives_rpt_pose() -> None:
    args = _camera_args()
    rotation_vec = np.asarray([0.0, 0.2, 0.0], dtype=np.float64).reshape(3, 1)
    rotation, _ = cv2.Rodrigues(rotation_vec)
    translation = np.asarray([0.5, -0.1, 2.0], dtype=np.float64)

    camera = CameraCalibration.from_opencv_extrinsics(
        name=args.name,
        width=args.width,
        height=args.height,
        K=args.K,
        DC=args.DC,
        model=args.model,
        R=rotation,
        T=translation,
        rvec=rotation_vec.reshape(3),
    )

    np.testing.assert_allclose(camera.pose_R, rotation.T)
    np.testing.assert_allclose(camera.pose_T, -(rotation.T @ translation))


def test_from_rpt_pose_derives_opencv_extrinsics() -> None:
    args = _camera_args()
    pose_rotation_vec = np.asarray([0.0, -0.3, 0.0], dtype=np.float64).reshape(3, 1)
    pose_rotation, _ = cv2.Rodrigues(pose_rotation_vec)
    pose_translation = np.asarray([1.5, 0.2, -0.4], dtype=np.float64)

    camera = CameraCalibration.from_rpt_pose(
        name=args.name,
        width=args.width,
        height=args.height,
        K=args.K,
        DC=args.DC,
        model=args.model,
        R=pose_rotation,
        T=pose_translation,
    )

    np.testing.assert_allclose(camera.pose_R, pose_rotation)
    np.testing.assert_allclose(camera.pose_T, pose_translation)
    np.testing.assert_allclose(camera.R, pose_rotation.T)
    np.testing.assert_allclose(camera.T, -(pose_rotation.T @ pose_translation))


def test_load_scene_file_supports_explicit_rpt_pose(tmp_path: Path) -> None:
    scene_path = tmp_path / "scene.json"
    payload = {
        "extrinsic_format": "rpt_camera_pose",
        "room_size": [6.0, 4.0, 3.0],
        "room_center": [0.0, 0.0, 1.0],
        "cameras": [
            {
                "name": "cam0",
                "width": 640,
                "height": 480,
                "K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
                "DC": [0.0, 0.0, 0.0, 0.0, 0.0],
                "R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
                "T": [[1.0], [2.0], [3.0]],
            }
        ],
    }
    scene_path.write_text(json.dumps(payload), encoding="utf-8")

    scene = load_scene_file(scene_path)

    np.testing.assert_allclose(scene.cameras[0].pose_T, [1.0, 2.0, 3.0])
    np.testing.assert_allclose(scene.cameras[0].T, [-1.0, -2.0, -3.0])


def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch) -> None:
    args = _camera_args()
    camera = CameraCalibration.from_opencv_extrinsics(
        name=args.name,
        width=args.width,
        height=args.height,
        K=args.K,
        DC=args.DC,
        model=args.model,
        R=np.eye(3, dtype=np.float64),
        T=np.asarray([1.0, 2.0, 3.0], dtype=np.float64),
        rvec=np.zeros(3, dtype=np.float64),
    )
    scene = SceneConfig(
        room_size=np.asarray([6.0, 4.0, 3.0], dtype=np.float64),
        room_center=np.asarray([0.0, 0.0, 1.0], dtype=np.float64),
        cameras=(camera,),
    )
    captured: dict[str, object] = {}

    def fake_make_triangulation_config(
        cameras: list[dict[str, object]],
        roomparams: np.ndarray,
        joint_names: list[str],
        *,
        min_match_score: float,
        min_group_size: int,
    ) -> dict[str, object]:
        captured["cameras"] = cameras
        captured["roomparams"] = roomparams
        captured["joint_names"] = joint_names
        captured["min_match_score"] = min_match_score
        captured["min_group_size"] = min_group_size
        return captured

    monkeypatch.setattr("pose_tracking_exp.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)

    build_rpt_config(scene, min_match_score=0.5, min_group_size=2)

    camera_payload = cast(list[dict[str, object]], captured["cameras"])[0]
    assert camera_payload["R"] == camera.pose_R.tolist()
    assert camera_payload["T"] == camera.pose_T.reshape(3, 1).tolist()