93 lines
3.1 KiB
Python
93 lines
3.1 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Mapping, Sequence
|
|
from typing import Any, TypeAlias
|
|
|
|
import numpy as np
|
|
import numpy.typing as npt
|
|
|
|
from ._core import Camera
|
|
|
|
CameraLike = Camera | Mapping[str, Any]
|
|
PoseViewLike: TypeAlias = (
|
|
npt.NDArray[np.generic]
|
|
| Sequence[Sequence[Sequence[float]]]
|
|
| Sequence[Sequence[float]]
|
|
)
|
|
|
|
|
|
def convert_cameras(cameras: Sequence[CameraLike]) -> list[Camera]:
|
|
"""Normalize mappings or existing Camera objects into bound Camera instances."""
|
|
|
|
converted: list[Camera] = []
|
|
for cam in cameras:
|
|
if isinstance(cam, Camera):
|
|
converted.append(cam)
|
|
continue
|
|
|
|
camera = Camera()
|
|
camera.name = str(cam["name"])
|
|
camera.K = cam["K"]
|
|
camera.DC = cam["DC"]
|
|
camera.R = cam["R"]
|
|
camera.T = cam["T"]
|
|
camera.width = int(cam["width"])
|
|
camera.height = int(cam["height"])
|
|
camera.type = str(cam.get("type", "pinhole"))
|
|
converted.append(camera)
|
|
return converted
|
|
|
|
|
|
def pack_poses_2d(
|
|
views: Sequence[PoseViewLike], *, joint_count: int | None = None
|
|
) -> tuple[npt.NDArray[np.float32], npt.NDArray[np.uint32]]:
|
|
"""Pack ragged per-view pose detections into the padded tensor expected by the core API."""
|
|
|
|
normalized: list[npt.NDArray[np.float32]] = []
|
|
inferred_joint_count = joint_count
|
|
|
|
for view in views:
|
|
array = np.asarray(view, dtype=np.float32)
|
|
|
|
if array.size == 0:
|
|
normalized.append(np.zeros((0, 0, 3), dtype=np.float32))
|
|
continue
|
|
|
|
if array.ndim == 2:
|
|
if array.shape[-1] != 3:
|
|
raise ValueError("Single-person pose inputs must have shape [joints, 3].")
|
|
array = array[np.newaxis, :, :]
|
|
elif array.ndim != 3 or array.shape[-1] != 3:
|
|
raise ValueError("Each view must have shape [persons, joints, 3] or [joints, 3].")
|
|
|
|
if inferred_joint_count is None:
|
|
inferred_joint_count = int(array.shape[1])
|
|
elif array.shape[1] != inferred_joint_count:
|
|
raise ValueError("All views must use the same joint count.")
|
|
|
|
normalized.append(np.ascontiguousarray(array, dtype=np.float32))
|
|
|
|
if inferred_joint_count is None:
|
|
raise ValueError("joint_count is required when all views are empty.")
|
|
|
|
fixed_views: list[npt.NDArray[np.float32]] = []
|
|
max_persons = 0
|
|
for array in normalized:
|
|
if array.size == 0:
|
|
array = np.zeros((0, inferred_joint_count, 3), dtype=np.float32)
|
|
elif array.shape[1] != inferred_joint_count:
|
|
raise ValueError("All views must use the same joint count.")
|
|
max_persons = max(max_persons, int(array.shape[0]))
|
|
fixed_views.append(array)
|
|
|
|
packed = np.zeros((len(fixed_views), max_persons, inferred_joint_count, 3), dtype=np.float32)
|
|
counts = np.zeros((len(fixed_views),), dtype=np.uint32)
|
|
|
|
for view_idx, array in enumerate(fixed_views):
|
|
person_count = int(array.shape[0])
|
|
counts[view_idx] = person_count
|
|
if person_count:
|
|
packed[view_idx, :person_count, :, :] = array
|
|
|
|
return packed, counts
|