feat: initialize offline multiview pose tracking experiment
Set up pose_tracking_exp as a uv-managed Python package for offline multiview body tracking experiments. This initial commit includes: - the typed package scaffold, CLI entrypoints, and repo-local uv configuration - scene and replay loaders for generic JSON replays and ActualTest parquet inputs - ParaJumping payload conversion and RTMPose-to-body20 normalization - a custom articulated tracker with tentative, active, and lost lifecycle handling - RPT-backed proposal generation, camera convention handling, and multiview reprojection updates - regression tests for normalization, camera conventions, ActualTest ingestion, seeding, and tracker smoke flows - project documentation covering extrinsic formats and the ActualTest calibration caveat
This commit is contained in:
@@ -0,0 +1,156 @@
|
||||
import base64
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pose_tracking_exp.joints import BODY20_INDEX_BY_NAME
|
||||
from pose_tracking_exp.normalization import normalize_rtmpose_body20
|
||||
from pose_tracking_exp.parajumping import PROTOCOL_HEADER, convert_payload_record, decode_pose_payload
|
||||
from pose_tracking_exp.replay import load_replay_file, load_scene_file
|
||||
from pose_tracking_exp.sync import synchronize_frames
|
||||
|
||||
|
||||
def _encode_payload(
|
||||
*,
|
||||
frame_index: int,
|
||||
reference_size: tuple[int, int],
|
||||
boxes: np.ndarray,
|
||||
box_scores: np.ndarray,
|
||||
keypoints_xy: np.ndarray,
|
||||
keypoint_scores: np.ndarray,
|
||||
timestamp_unix_ns: int,
|
||||
) -> bytes:
|
||||
return (
|
||||
PROTOCOL_HEADER
|
||||
+ int(frame_index).to_bytes(4, "little")
|
||||
+ np.asarray(reference_size, dtype="<u2").tobytes()
|
||||
+ int(boxes.shape[0]).to_bytes(1, "little")
|
||||
+ np.asarray(boxes, dtype="<u2").tobytes()
|
||||
+ int(box_scores.shape[0]).to_bytes(1, "little")
|
||||
+ np.asarray(box_scores, dtype=np.uint8).tobytes()
|
||||
+ int(keypoints_xy.shape[0]).to_bytes(1, "little")
|
||||
+ np.asarray(keypoints_xy, dtype="<u2").tobytes()
|
||||
+ int(keypoint_scores.size).to_bytes(1, "little")
|
||||
+ np.asarray(keypoint_scores, dtype=np.uint8).reshape(-1).tobytes()
|
||||
+ int(timestamp_unix_ns).to_bytes(8, "little")
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_rtmpose_body20_derives_midpoints_and_head():
|
||||
keypoints = np.zeros((133, 2), dtype=np.float64)
|
||||
scores = np.zeros((133,), dtype=np.float64)
|
||||
keypoints[5] = [10.0, 20.0]
|
||||
keypoints[6] = [30.0, 20.0]
|
||||
keypoints[11] = [12.0, 60.0]
|
||||
keypoints[12] = [28.0, 60.0]
|
||||
keypoints[0] = [20.0, 8.0]
|
||||
scores[[0, 5, 6, 11, 12]] = 1.0
|
||||
|
||||
normalized = normalize_rtmpose_body20(keypoints, scores)
|
||||
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["hip_middle"], :2], [20.0, 60.0])
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["shoulder_middle"], :2], [20.0, 20.0])
|
||||
np.testing.assert_allclose(normalized[BODY20_INDEX_BY_NAME["head"], :2], [20.0, 8.0])
|
||||
|
||||
|
||||
def test_decode_payload_and_convert_record():
|
||||
keypoints_xy = np.zeros((1, 133, 2), dtype=np.uint16)
|
||||
keypoint_scores = np.zeros((1, 133), dtype=np.uint8)
|
||||
keypoints_xy[0, 5] = [100, 200]
|
||||
keypoints_xy[0, 6] = [200, 200]
|
||||
keypoints_xy[0, 11] = [110, 400]
|
||||
keypoints_xy[0, 12] = [190, 400]
|
||||
keypoints_xy[0, 0] = [150, 120]
|
||||
keypoint_scores[0, [0, 5, 6, 11, 12]] = 255
|
||||
|
||||
payload = _encode_payload(
|
||||
frame_index=7,
|
||||
reference_size=(640, 480),
|
||||
boxes=np.asarray([[90, 100, 210, 420]], dtype=np.uint16),
|
||||
box_scores=np.asarray([200], dtype=np.uint8),
|
||||
keypoints_xy=keypoints_xy,
|
||||
keypoint_scores=keypoint_scores,
|
||||
timestamp_unix_ns=1234,
|
||||
)
|
||||
decoded = decode_pose_payload(payload)
|
||||
assert decoded.frame_index == 7
|
||||
assert decoded.reference_size == (640, 480)
|
||||
assert len(decoded.detections) == 1
|
||||
np.testing.assert_allclose(
|
||||
decoded.detections[0].keypoints[BODY20_INDEX_BY_NAME["hip_middle"], :2],
|
||||
[150.0, 400.0],
|
||||
)
|
||||
|
||||
converted = convert_payload_record({"camera": "cam0", "payload_b64": base64.b64encode(payload).decode("ascii")})
|
||||
assert converted["camera"] == "cam0"
|
||||
assert converted["frame_index"] == 7
|
||||
|
||||
|
||||
def test_load_replay_and_synchronize(tmp_path: Path):
|
||||
scene_path = tmp_path / "scene.json"
|
||||
replay_path = tmp_path / "replay.jsonl"
|
||||
scene_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"room_size": [6.0, 4.0, 3.0],
|
||||
"room_center": [0.0, 0.0, 1.0],
|
||||
"cameras": [
|
||||
{
|
||||
"name": "cam0",
|
||||
"width": 640,
|
||||
"height": 480,
|
||||
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
||||
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
||||
"T": [[0.0], [0.0], [0.0]],
|
||||
},
|
||||
{
|
||||
"name": "cam1",
|
||||
"width": 640,
|
||||
"height": 480,
|
||||
"K": [[500.0, 0.0, 320.0], [0.0, 500.0, 240.0], [0.0, 0.0, 1.0]],
|
||||
"DC": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
"R": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
|
||||
"T": [[1.0], [0.0], [0.0]],
|
||||
},
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
pose = np.zeros((20, 3), dtype=np.float64)
|
||||
replay_path.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
json.dumps(
|
||||
{
|
||||
"camera": "cam0",
|
||||
"frame_index": 0,
|
||||
"timestamp_unix_ns": 1000,
|
||||
"source_size": [640, 480],
|
||||
"detections": [{"bbox": [0, 0, 1, 1], "bbox_confidence": 1.0, "keypoints": pose.tolist()}],
|
||||
}
|
||||
),
|
||||
json.dumps(
|
||||
{
|
||||
"camera": "cam1",
|
||||
"frame_index": 0,
|
||||
"timestamp_unix_ns": 1006,
|
||||
"source_size": [640, 480],
|
||||
"detections": [{"bbox": [0, 0, 1, 1], "bbox_confidence": 1.0, "keypoints": pose.tolist()}],
|
||||
}
|
||||
),
|
||||
]
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
scene = load_scene_file(scene_path)
|
||||
assert len(scene.cameras) == 2
|
||||
replay = load_replay_file(scene_path, replay_path)
|
||||
bundles = synchronize_frames(replay, max_skew_ns=20, min_views=2)
|
||||
assert len(bundles) == 1
|
||||
assert {frame.camera_name for frame in bundles[0].views} == {"cam0", "cam1"}
|
||||
|
||||
Reference in New Issue
Block a user