0bfeec77e4
Implement the next tracker tranche around a recursive articulated state rather than per-frame ad hoc updates. Track state now propagates full pose/velocity/shape covariance, uses process noise during prediction, and drives active-to-lost transitions from both miss counts and recursive score thresholds. The multiview update path replaces the generic SciPy least_squares call with a bounded LM/GN loop that returns parameter and beta covariance blocks, accepted-joint counts, mean reprojection error, and iteration diagnostics. Lost-track handling is stricter and safer: proposal-based reacquisition now requires same-frame 2D support and articulated refinement before a track can return to active. Proposal clusters retain contributing detection indices, the tracker searches broadly within contributing views, and proposal-compatible lost frames are surfaced explicitly instead of silently reviving a track. Old scene JSONs with imgpaths now default to the RPT camera-pose convention so proposal reprojection gating works on the sample scenes. Add ActualTest support diagnostics that summarize event counts, accepted support, reprojection quality, and tracker diagnostics, plus focused regressions for camera conventions, score-driven demotion, covariance behavior, proposal-compatible lost handling, and broader proposal-backed matching.
176 lines
5.7 KiB
Python
176 lines
5.7 KiB
Python
import json
|
|
from pathlib import Path
|
|
from typing import NamedTuple, cast
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import pytest
|
|
|
|
pytest.importorskip("rpt")
|
|
import rpt
|
|
|
|
from pose_tracking_exp.schema import CameraCalibration, CameraModel, SceneConfig, parse_camera_model
|
|
from pose_tracking_exp.tracking.replay_io import load_scene_file
|
|
from pose_tracking_exp.tracking.rpt_adapter import build_rpt_config
|
|
|
|
|
|
class _CameraArgs(NamedTuple):
|
|
name: str
|
|
width: int
|
|
height: int
|
|
K: np.ndarray
|
|
DC: np.ndarray
|
|
model: CameraModel
|
|
|
|
|
|
def _camera_args() -> _CameraArgs:
|
|
return _CameraArgs(
|
|
name="cam0",
|
|
width=640,
|
|
height=480,
|
|
K=np.asarray([[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]], dtype=np.float64),
|
|
DC=np.zeros(5, dtype=np.float64),
|
|
model=parse_camera_model("pinhole"),
|
|
)
|
|
|
|
|
|
def test_from_opencv_extrinsics_derives_rpt_pose() -> None:
|
|
args = _camera_args()
|
|
rotation_vec = np.asarray([0.0, 0.2, 0.0], dtype=np.float64).reshape(3, 1)
|
|
rotation, _ = cv2.Rodrigues(rotation_vec)
|
|
translation = np.asarray([0.5, -0.1, 2.0], dtype=np.float64)
|
|
|
|
camera = CameraCalibration.from_opencv_extrinsics(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=rotation,
|
|
T=translation,
|
|
rvec=rotation_vec.reshape(3),
|
|
)
|
|
|
|
np.testing.assert_allclose(camera.pose_R, rotation.T)
|
|
np.testing.assert_allclose(camera.pose_T, -(rotation.T @ translation))
|
|
|
|
|
|
def test_from_rpt_pose_derives_opencv_extrinsics() -> None:
|
|
args = _camera_args()
|
|
pose_rotation_vec = np.asarray([0.0, -0.3, 0.0], dtype=np.float64).reshape(3, 1)
|
|
pose_rotation, _ = cv2.Rodrigues(pose_rotation_vec)
|
|
pose_translation = np.asarray([1.5, 0.2, -0.4], dtype=np.float64)
|
|
|
|
camera = CameraCalibration.from_rpt_pose(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=pose_rotation,
|
|
T=pose_translation,
|
|
)
|
|
|
|
np.testing.assert_allclose(camera.pose_R, pose_rotation)
|
|
np.testing.assert_allclose(camera.pose_T, pose_translation)
|
|
np.testing.assert_allclose(camera.R, pose_rotation.T)
|
|
np.testing.assert_allclose(camera.T, -(pose_rotation.T @ pose_translation))
|
|
|
|
|
|
def test_load_scene_file_supports_explicit_rpt_pose(tmp_path: Path) -> None:
|
|
scene_path = tmp_path / "scene.json"
|
|
payload = {
|
|
"extrinsic_format": "rpt_camera_pose",
|
|
"room_size": [6.0, 4.0, 3.0],
|
|
"room_center": [0.0, 0.0, 1.0],
|
|
"cameras": [
|
|
{
|
|
"name": "cam0",
|
|
"width": 640,
|
|
"height": 480,
|
|
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
|
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
|
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
|
"T": [[1.0], [2.0], [3.0]],
|
|
}
|
|
],
|
|
}
|
|
scene_path.write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
scene = load_scene_file(scene_path)
|
|
|
|
np.testing.assert_allclose(scene.cameras[0].pose_T, [1.0, 2.0, 3.0])
|
|
np.testing.assert_allclose(scene.cameras[0].T, [-1.0, -2.0, -3.0])
|
|
|
|
|
|
def test_load_scene_file_defaults_imgpaths_payloads_to_rpt_pose(tmp_path: Path) -> None:
|
|
scene_path = tmp_path / "scene.json"
|
|
payload = {
|
|
"imgpaths": ["/tmp/cam0.jpg"],
|
|
"room_size": [6.0, 4.0, 3.0],
|
|
"room_center": [0.0, 0.0, 1.0],
|
|
"cameras": [
|
|
{
|
|
"name": "cam0",
|
|
"width": 640,
|
|
"height": 480,
|
|
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
|
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
|
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
|
"T": [[1.0], [2.0], [3.0]],
|
|
}
|
|
],
|
|
}
|
|
scene_path.write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
scene = load_scene_file(scene_path)
|
|
|
|
np.testing.assert_allclose(scene.cameras[0].pose_T, [1.0, 2.0, 3.0])
|
|
np.testing.assert_allclose(scene.cameras[0].T, [-1.0, -2.0, -3.0])
|
|
|
|
|
|
def test_build_rpt_config_uses_pose_convention(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
args = _camera_args()
|
|
camera = CameraCalibration.from_opencv_extrinsics(
|
|
name=args.name,
|
|
width=args.width,
|
|
height=args.height,
|
|
K=args.K,
|
|
DC=args.DC,
|
|
model=args.model,
|
|
R=np.eye(3, dtype=np.float64),
|
|
T=np.asarray([1.0, 2.0, 3.0], dtype=np.float64),
|
|
rvec=np.zeros(3, dtype=np.float64),
|
|
)
|
|
scene = SceneConfig(
|
|
room_size=np.asarray([6.0, 4.0, 3.0], dtype=np.float64),
|
|
room_center=np.asarray([0.0, 0.0, 1.0], dtype=np.float64),
|
|
cameras=(camera,),
|
|
)
|
|
captured: dict[str, object] = {}
|
|
|
|
def fake_make_triangulation_config(
|
|
cameras: list[rpt.Camera],
|
|
roomparams: np.ndarray,
|
|
joint_names: list[str],
|
|
*,
|
|
min_match_score: float,
|
|
min_group_size: int,
|
|
) -> dict[str, object]:
|
|
captured["cameras"] = cameras
|
|
captured["roomparams"] = roomparams
|
|
captured["joint_names"] = joint_names
|
|
captured["min_match_score"] = min_match_score
|
|
captured["min_group_size"] = min_group_size
|
|
return captured
|
|
|
|
monkeypatch.setattr("pose_tracking_exp.tracking.rpt_adapter.rpt.make_triangulation_config", fake_make_triangulation_config)
|
|
|
|
build_rpt_config(scene, min_match_score=0.5, min_group_size=2)
|
|
|
|
camera_payload = cast(list[rpt.Camera], captured["cameras"])[0]
|
|
np.testing.assert_allclose(camera_payload.R, camera.pose_R.tolist())
|
|
np.testing.assert_allclose(camera_payload.T, camera.pose_T.reshape(3, 1).tolist())
|