forked from HQU-gxy/CVTH3PE
feat: Migrate play notebook to Python script and update dependencies
- Removed the `play.ipynb` notebook and created a new `playground.py` script to enhance code organization and maintainability. - Updated `pyproject.toml` to include `jupytext` for Jupyter notebook conversion support. - Added instructions in `README.md` for converting notebooks using Jupytext. - Enhanced the `uv.lock` file to reflect the new dependency on Jupytext.
This commit is contained in:
655
play.ipynb
655
play.ipynb
@ -1,655 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from copy import deepcopy\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from pathlib import Path\n",
|
||||
"from typing import (Any, Generator, Optional, Sequence, TypeAlias, TypedDict,\n",
|
||||
" cast, overload)\n",
|
||||
"\n",
|
||||
"import awkward as ak\n",
|
||||
"import jax\n",
|
||||
"import jax.numpy as jnp\n",
|
||||
"import numpy as np\n",
|
||||
"import orjson\n",
|
||||
"from beartype import beartype\n",
|
||||
"from cv2 import undistortPoints\n",
|
||||
"from jaxtyping import Array, Float, Num, jaxtyped\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from numpy.typing import ArrayLike\n",
|
||||
"from scipy.spatial.transform import Rotation as R\n",
|
||||
"\n",
|
||||
"from app.camera import Camera, CameraParams, Detection\n",
|
||||
"from app.visualize.whole_body import visualize_whole_body\n",
|
||||
"\n",
|
||||
"NDArray: TypeAlias = np.ndarray"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DATASET_PATH = Path(\"samples\") / \"04_02\" \n",
|
||||
"AK_CAMERA_DATASET: ak.Array = ak.from_parquet(DATASET_PATH / \"camera_params.parquet\")\n",
|
||||
"display(AK_CAMERA_DATASET)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Resolution(TypedDict):\n",
|
||||
" width: int\n",
|
||||
" height: int\n",
|
||||
"\n",
|
||||
"class Intrinsic(TypedDict):\n",
|
||||
" camera_matrix: Num[Array, \"3 3\"]\n",
|
||||
" \"\"\"\n",
|
||||
" K\n",
|
||||
" \"\"\"\n",
|
||||
" distortion_coefficients: Num[Array, \"N\"]\n",
|
||||
" \"\"\"\n",
|
||||
" distortion coefficients; usually 5\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
"class Extrinsic(TypedDict):\n",
|
||||
" rvec: Num[NDArray, \"3\"]\n",
|
||||
" tvec: Num[NDArray, \"3\"]\n",
|
||||
"\n",
|
||||
"class ExternalCameraParams(TypedDict):\n",
|
||||
" name: str\n",
|
||||
" port: int\n",
|
||||
" intrinsic: Intrinsic\n",
|
||||
" extrinsic: Extrinsic\n",
|
||||
" resolution: Resolution\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def read_dataset_by_port(port: int) -> ak.Array:\n",
|
||||
" P = DATASET_PATH / f\"{port}.parquet\"\n",
|
||||
" return ak.from_parquet(P)\n",
|
||||
"\n",
|
||||
"KEYPOINT_DATASET = {int(p): read_dataset_by_port(p) for p in ak.to_numpy(AK_CAMERA_DATASET[\"port\"])}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"KEYPOINT_DATASET[5601]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class KeypointDataset(TypedDict):\n",
|
||||
" frame_index: int\n",
|
||||
" boxes: Num[NDArray, \"N 4\"]\n",
|
||||
" kps: Num[NDArray, \"N J 2\"]\n",
|
||||
" kps_scores: Num[NDArray, \"N J\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def to_transformation_matrix(\n",
|
||||
" rvec: Num[NDArray, \"3\"], tvec: Num[NDArray, \"3\"]\n",
|
||||
") -> Num[NDArray, \"4 4\"]:\n",
|
||||
" res = np.eye(4)\n",
|
||||
" res[:3, :3] = R.from_rotvec(rvec).as_matrix()\n",
|
||||
" res[:3, 3] = tvec\n",
|
||||
" return res\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def undistort_points(\n",
|
||||
" points: Num[NDArray, \"M 2\"],\n",
|
||||
" camera_matrix: Num[NDArray, \"3 3\"],\n",
|
||||
" dist_coeffs: Num[NDArray, \"N\"],\n",
|
||||
") -> Num[NDArray, \"M 2\"]:\n",
|
||||
" K = camera_matrix\n",
|
||||
" dist = dist_coeffs\n",
|
||||
" res = undistortPoints(points, K, dist, P=K) # type: ignore\n",
|
||||
" return res.reshape(-1, 2)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def from_camera_params(camera: ExternalCameraParams) -> Camera:\n",
|
||||
" rt = jnp.array(\n",
|
||||
" to_transformation_matrix(\n",
|
||||
" ak.to_numpy(camera[\"extrinsic\"][\"rvec\"]),\n",
|
||||
" ak.to_numpy(camera[\"extrinsic\"][\"tvec\"]),\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" K = jnp.array(camera[\"intrinsic\"][\"camera_matrix\"]).reshape(3, 3)\n",
|
||||
" dist_coeffs = jnp.array(camera[\"intrinsic\"][\"distortion_coefficients\"])\n",
|
||||
" image_size = jnp.array(\n",
|
||||
" (camera[\"resolution\"][\"width\"], camera[\"resolution\"][\"height\"])\n",
|
||||
" )\n",
|
||||
" return Camera(\n",
|
||||
" id=camera[\"name\"],\n",
|
||||
" params=CameraParams(\n",
|
||||
" K=K,\n",
|
||||
" Rt=rt,\n",
|
||||
" dist_coeffs=dist_coeffs,\n",
|
||||
" image_size=image_size,\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def preprocess_keypoint_dataset(\n",
|
||||
" dataset: Sequence[KeypointDataset],\n",
|
||||
" camera: Camera,\n",
|
||||
" fps: float,\n",
|
||||
" start_timestamp: datetime,\n",
|
||||
") -> Generator[Detection, None, None]:\n",
|
||||
" frame_interval_s = 1 / fps\n",
|
||||
" for el in dataset:\n",
|
||||
" frame_index = el[\"frame_index\"]\n",
|
||||
" timestamp = start_timestamp + timedelta(seconds=frame_index * frame_interval_s)\n",
|
||||
" for kp, kp_score in zip(el[\"kps\"], el[\"kps_scores\"]):\n",
|
||||
" yield Detection(\n",
|
||||
" keypoints=jnp.array(kp),\n",
|
||||
" confidences=jnp.array(kp_score),\n",
|
||||
" camera=camera,\n",
|
||||
" timestamp=timestamp,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DetectionGenerator: TypeAlias = Generator[Detection, None, None]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def sync_batch_gen(gens: list[DetectionGenerator], diff: timedelta):\n",
|
||||
" \"\"\"\n",
|
||||
" given a list of detection generators, return a generator that yields a batch of detections\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" gens: list of detection generators\n",
|
||||
" diff: maximum timestamp difference between detections to consider them part of the same batch\n",
|
||||
" \"\"\"\n",
|
||||
" N = len(gens)\n",
|
||||
" last_batch_timestamp: Optional[datetime] = None\n",
|
||||
" next_batch_timestamp: Optional[datetime] = None\n",
|
||||
" current_batch: list[Detection] = []\n",
|
||||
" next_batch: list[Detection] = []\n",
|
||||
" paused: list[bool] = [False] * N\n",
|
||||
" finished: list[bool] = [False] * N\n",
|
||||
"\n",
|
||||
" def reset_paused():\n",
|
||||
" \"\"\"\n",
|
||||
" reset paused list based on finished list\n",
|
||||
" \"\"\"\n",
|
||||
" for i in range(N):\n",
|
||||
" if not finished[i]:\n",
|
||||
" paused[i] = False\n",
|
||||
" else:\n",
|
||||
" paused[i] = True\n",
|
||||
"\n",
|
||||
" EPS = 1e-6\n",
|
||||
" # a small epsilon to avoid floating point precision issues\n",
|
||||
" diff_esp = diff - timedelta(seconds=EPS)\n",
|
||||
" while True:\n",
|
||||
" for i, gen in enumerate(gens):\n",
|
||||
" try:\n",
|
||||
" if finished[i] or paused[i]:\n",
|
||||
" continue\n",
|
||||
" val = next(gen)\n",
|
||||
" if last_batch_timestamp is None:\n",
|
||||
" last_batch_timestamp = val.timestamp\n",
|
||||
" current_batch.append(val)\n",
|
||||
" else:\n",
|
||||
" if abs(val.timestamp - last_batch_timestamp) >= diff_esp:\n",
|
||||
" next_batch.append(val)\n",
|
||||
" if next_batch_timestamp is None:\n",
|
||||
" next_batch_timestamp = val.timestamp\n",
|
||||
" paused[i] = True\n",
|
||||
" if all(paused):\n",
|
||||
" yield current_batch\n",
|
||||
" current_batch = next_batch\n",
|
||||
" next_batch = []\n",
|
||||
" last_batch_timestamp = next_batch_timestamp\n",
|
||||
" next_batch_timestamp = None\n",
|
||||
" reset_paused()\n",
|
||||
" else:\n",
|
||||
" current_batch.append(val)\n",
|
||||
" except StopIteration:\n",
|
||||
" finished[i] = True\n",
|
||||
" paused[i] = True\n",
|
||||
" if all(finished):\n",
|
||||
" if len(current_batch) > 0:\n",
|
||||
" # All generators exhausted, flush remaining batch and exit\n",
|
||||
" yield current_batch\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@overload\n",
|
||||
"def to_projection_matrix(\n",
|
||||
" transformation_matrix: Num[NDArray, \"4 4\"], camera_matrix: Num[NDArray, \"3 3\"]\n",
|
||||
") -> Num[NDArray, \"3 4\"]: ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@overload\n",
|
||||
"def to_projection_matrix(\n",
|
||||
" transformation_matrix: Num[Array, \"4 4\"], camera_matrix: Num[Array, \"3 3\"]\n",
|
||||
") -> Num[Array, \"3 4\"]: ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def to_projection_matrix(\n",
|
||||
" transformation_matrix: Num[Any, \"4 4\"],\n",
|
||||
" camera_matrix: Num[Any, \"3 3\"],\n",
|
||||
") -> Num[Any, \"3 4\"]:\n",
|
||||
" return camera_matrix @ transformation_matrix[:3, :]\n",
|
||||
"\n",
|
||||
"to_projection_matrix_jit = jax.jit(to_projection_matrix)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def dlt(\n",
|
||||
" H1: Num[NDArray, \"3 4\"],\n",
|
||||
" H2: Num[NDArray, \"3 4\"],\n",
|
||||
" p1: Num[NDArray, \"2\"],\n",
|
||||
" p2: Num[NDArray, \"2\"],\n",
|
||||
") -> Num[NDArray, \"3\"]:\n",
|
||||
" \"\"\"\n",
|
||||
" Direct Linear Transformation\n",
|
||||
" \"\"\"\n",
|
||||
" A = [\n",
|
||||
" p1[1] * H1[2, :] - H1[1, :],\n",
|
||||
" H1[0, :] - p1[0] * H1[2, :],\n",
|
||||
" p2[1] * H2[2, :] - H2[1, :],\n",
|
||||
" H2[0, :] - p2[0] * H2[2, :],\n",
|
||||
" ]\n",
|
||||
" A = np.array(A).reshape((4, 4))\n",
|
||||
"\n",
|
||||
" B = A.transpose() @ A\n",
|
||||
" from scipy import linalg\n",
|
||||
"\n",
|
||||
" U, s, Vh = linalg.svd(B, full_matrices=False)\n",
|
||||
" return Vh[3, 0:3] / Vh[3, 3]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@overload\n",
|
||||
"def homogeneous_to_euclidean(points: Num[NDArray, \"N 4\"]) -> Num[NDArray, \"N 3\"]: ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@overload\n",
|
||||
"def homogeneous_to_euclidean(points: Num[Array, \"N 4\"]) -> Num[Array, \"N 3\"]: ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def homogeneous_to_euclidean(\n",
|
||||
" points: Num[Any, \"N 4\"],\n",
|
||||
") -> Num[Any, \"N 3\"]:\n",
|
||||
" \"\"\"\n",
|
||||
" 将齐次坐标转换为欧几里得坐标\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" points: homogeneous coordinates (x, y, z, w) in numpy array or jax array\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" euclidean coordinates (x, y, z) in numpy array or jax array\n",
|
||||
" \"\"\"\n",
|
||||
" return points[..., :-1] / points[..., -1:]\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FPS = 24\n",
|
||||
"image_gen_5600 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5600], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5600][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n",
|
||||
"image_gen_5601 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5601], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5601][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n",
|
||||
"image_gen_5602 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5602], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5602][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n",
|
||||
"\n",
|
||||
"display(1/FPS)\n",
|
||||
"sync_gen = sync_batch_gen([image_gen_5600, image_gen_5601, image_gen_5602], timedelta(seconds=1/FPS))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"detections = next(sync_gen)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from app.camera import calculate_affinity_matrix_by_epipolar_constraint\n",
|
||||
"\n",
|
||||
"sorted_detections, affinity_matrix = calculate_affinity_matrix_by_epipolar_constraint(detections, \n",
|
||||
" alpha_2d=2000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display(list(map(lambda x: {\"timestamp\": str(x.timestamp), \"camera\": x.camera.id}, sorted_detections)))\n",
|
||||
"with jnp.printoptions(precision=3, suppress=True):\n",
|
||||
" display(affinity_matrix)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from app.solver._old import GLPKSolver\n",
|
||||
"\n",
|
||||
"def clusters_to_detections(clusters: list[list[int]], sorted_detections: list[Detection]) -> list[list[Detection]]:\n",
|
||||
" \"\"\"\n",
|
||||
" given a list of clusters (which is the indices of the detections in the sorted_detections list),\n",
|
||||
" extract the detections from the sorted_detections list\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" clusters: list of clusters, each cluster is a list of indices of the detections in the `sorted_detections` list\n",
|
||||
" sorted_detections: list of SORTED detections\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" list of clusters, each cluster is a list of detections\n",
|
||||
" \"\"\"\n",
|
||||
" return [[sorted_detections[i] for i in cluster] for cluster in clusters]\n",
|
||||
"\n",
|
||||
"solver = GLPKSolver()\n",
|
||||
"aff_np = np.asarray(affinity_matrix).astype(np.float64)\n",
|
||||
"clusters, sol_matrix = solver.solve(aff_np)\n",
|
||||
"display(clusters)\n",
|
||||
"display(sol_matrix)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"WIDTH = 2560\n",
|
||||
"HEIGHT = 1440\n",
|
||||
"\n",
|
||||
"clusters_detections = clusters_to_detections(clusters, sorted_detections)\n",
|
||||
"im = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)\n",
|
||||
"for el in clusters_detections[0]:\n",
|
||||
" im = visualize_whole_body(np.asarray(el.keypoints), im)\n",
|
||||
"\n",
|
||||
"p = plt.imshow(im)\n",
|
||||
"display(p)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"im_prime = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)\n",
|
||||
"for el in clusters_detections[1]:\n",
|
||||
" im_prime = visualize_whole_body(np.asarray(el.keypoints), im_prime)\n",
|
||||
"\n",
|
||||
"p_prime = plt.imshow(im_prime)\n",
|
||||
"display(p_prime)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def triangulate_one_point_from_multiple_views_linear(\n",
|
||||
" proj_matrices: Float[Array, \"N 3 4\"],\n",
|
||||
" points: Num[Array, \"N 2\"],\n",
|
||||
" confidences: Optional[Float[Array, \"N\"]] = None,\n",
|
||||
") -> Float[Array, \"3\"]:\n",
|
||||
" \"\"\"\n",
|
||||
" Args:\n",
|
||||
" proj_matrices: 形状为(N, 3, 4)的投影矩阵序列\n",
|
||||
" points: 形状为(N, 2)的点坐标序列\n",
|
||||
" confidences: 形状为(N,)的置信度序列,范围[0.0, 1.0]\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" point_3d: 形状为(3,)的三角测量得到的3D点\n",
|
||||
" \"\"\"\n",
|
||||
" assert len(proj_matrices) == len(points)\n",
|
||||
"\n",
|
||||
" N = len(proj_matrices)\n",
|
||||
" confi: Float[Array, \"N\"]\n",
|
||||
" if confidences is None:\n",
|
||||
" confi = jnp.ones(N, dtype=np.float32)\n",
|
||||
" else:\n",
|
||||
" # Use square root of confidences for weighting - more balanced approach\n",
|
||||
" confi = jnp.sqrt(jnp.clip(confidences, 0, 1))\n",
|
||||
"\n",
|
||||
" A = jnp.zeros((N * 2, 4), dtype=np.float32)\n",
|
||||
" for i in range(N):\n",
|
||||
" x, y = points[i]\n",
|
||||
" A = A.at[2 * i].set(proj_matrices[i, 2] * x - proj_matrices[i, 0])\n",
|
||||
" A = A.at[2 * i + 1].set(proj_matrices[i, 2] * y - proj_matrices[i, 1])\n",
|
||||
" A = A.at[2 * i].mul(confi[i])\n",
|
||||
" A = A.at[2 * i + 1].mul(confi[i])\n",
|
||||
"\n",
|
||||
" # https://docs.jax.dev/en/latest/_autosummary/jax.numpy.linalg.svd.html\n",
|
||||
" _, _, vh = jnp.linalg.svd(A, full_matrices=False)\n",
|
||||
" point_3d_homo = vh[-1] # shape (4,)\n",
|
||||
"\n",
|
||||
" # replace the Python `if` with a jnp.where\n",
|
||||
" point_3d_homo = jnp.where(\n",
|
||||
" point_3d_homo[3] < 0, # predicate (scalar bool tracer)\n",
|
||||
" -point_3d_homo, # if True\n",
|
||||
" point_3d_homo, # if False\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" point_3d = point_3d_homo[:3] / point_3d_homo[3]\n",
|
||||
" return point_3d\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def triangulate_points_from_multiple_views_linear(\n",
|
||||
" proj_matrices: Float[Array, \"N 3 4\"],\n",
|
||||
" points: Num[Array, \"N P 2\"],\n",
|
||||
" confidences: Optional[Float[Array, \"N P\"]] = None,\n",
|
||||
") -> Float[Array, \"P 3\"]:\n",
|
||||
" \"\"\"\n",
|
||||
" Batch-triangulate P points observed by N cameras, linearly via SVD.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" proj_matrices: (N, 3, 4) projection matrices\n",
|
||||
" points: (N, P, 2) image-coordinates per view\n",
|
||||
" confidences: (N, P, 1) optional per-view confidences in [0,1]\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" (P, 3) 3D point for each of the P tracks\n",
|
||||
" \"\"\"\n",
|
||||
" N, P, _ = points.shape\n",
|
||||
" assert proj_matrices.shape[0] == N\n",
|
||||
" if confidences is None:\n",
|
||||
" conf = jnp.ones((N, P), dtype=jnp.float32)\n",
|
||||
" else:\n",
|
||||
" conf = jnp.sqrt(jnp.clip(confidences, 0.0, 1.0))\n",
|
||||
"\n",
|
||||
" # vectorize your one‐point routine over P\n",
|
||||
" vmap_triangulate = jax.vmap(\n",
|
||||
" triangulate_one_point_from_multiple_views_linear,\n",
|
||||
" in_axes=(None, 1, 1), # proj_matrices static, map over points[:,p,:], conf[:,p]\n",
|
||||
" out_axes=0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # returns (P, 3)\n",
|
||||
" return vmap_triangulate(proj_matrices, points, conf)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dataclasses import dataclass\n",
|
||||
"from copy import copy as shallow_copy, deepcopy as deep_copy\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"@dataclass(frozen=True)\n",
|
||||
"class Tracking:\n",
|
||||
" id: int\n",
|
||||
" keypoints: Float[Array, \"J 3\"]\n",
|
||||
" last_active_timestamp: datetime\n",
|
||||
"\n",
|
||||
" def __repr__(self) -> str:\n",
|
||||
" return f\"Tracking({self.id}, {self.last_active_timestamp})\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@jaxtyped(typechecker=beartype)\n",
|
||||
"def triangle_from_cluster(\n",
|
||||
" cluster: list[Detection],\n",
|
||||
") -> tuple[Float[Array, \"N 3\"], datetime]:\n",
|
||||
" proj_matrices = jnp.array([el.camera.params.projection_matrix for el in cluster])\n",
|
||||
" points = jnp.array([el.keypoints_undistorted for el in cluster])\n",
|
||||
" confidences = jnp.array([el.confidences for el in cluster])\n",
|
||||
" latest_timestamp = max(el.timestamp for el in cluster)\n",
|
||||
" return (\n",
|
||||
" triangulate_points_from_multiple_views_linear(\n",
|
||||
" proj_matrices, points, confidences=confidences\n",
|
||||
" ),\n",
|
||||
" latest_timestamp,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# res = {\n",
|
||||
"# \"a\": triangle_from_cluster(clusters_detections[0]).tolist(),\n",
|
||||
"# \"b\": triangle_from_cluster(clusters_detections[1]).tolist(),\n",
|
||||
"# }\n",
|
||||
"# with open(\"samples/res.json\", \"wb\") as f:\n",
|
||||
"# f.write(orjson.dumps(res))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GlobalTrackingState:\n",
|
||||
" _last_id: int\n",
|
||||
" _trackings: dict[int, Tracking]\n",
|
||||
"\n",
|
||||
" def __init__(self):\n",
|
||||
" self._last_id = 0\n",
|
||||
" self._trackings = {}\n",
|
||||
"\n",
|
||||
" def __repr__(self) -> str:\n",
|
||||
" return (\n",
|
||||
" f\"GlobalTrackingState(last_id={self._last_id}, trackings={self._trackings})\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def trackings(self) -> dict[int, Tracking]:\n",
|
||||
" return shallow_copy(self._trackings)\n",
|
||||
"\n",
|
||||
" def add_tracking(self, cluster: list[Detection]) -> Tracking:\n",
|
||||
" kps_3d, latest_timestamp = triangle_from_cluster(cluster)\n",
|
||||
" next_id = self._last_id + 1\n",
|
||||
" tracking = Tracking(\n",
|
||||
" id=next_id, keypoints=kps_3d, last_active_timestamp=latest_timestamp\n",
|
||||
" )\n",
|
||||
" self._trackings[next_id] = tracking\n",
|
||||
" self._last_id = next_id\n",
|
||||
" return tracking\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"global_tracking_state = GlobalTrackingState()\n",
|
||||
"for cluster in clusters_detections:\n",
|
||||
" global_tracking_state.add_tracking(cluster)\n",
|
||||
"display(global_tracking_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"next_group = next(sync_gen)\n",
|
||||
"display(next_group)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from app.camera import classify_by_camera\n",
|
||||
"\n",
|
||||
"# let's do cross-view association\n",
|
||||
"trackings = sorted(global_tracking_state.trackings.values(), key=lambda x: x.id)\n",
|
||||
"detections = shallow_copy(next_group)\n",
|
||||
"# cross-view association matrix with shape (T, D), where T is the number of trackings, D is the number of detections\n",
|
||||
"affinity = np.zeros((len(trackings), len(detections)))\n",
|
||||
"detection_by_camera = classify_by_camera(detections)\n",
|
||||
"for i, tracking in enumerate(trackings):\n",
|
||||
" for c, detections in detection_by_camera.items():\n",
|
||||
" camera = next(iter(detections)).camera\n",
|
||||
" # pixel space, unnormalized\n",
|
||||
" tracking_2d_projection = camera.project(tracking.keypoints)\n",
|
||||
" \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
562
playground.py
Normal file
562
playground.py
Normal file
@ -0,0 +1,562 @@
|
||||
# ---
|
||||
# jupyter:
|
||||
# jupytext:
|
||||
# text_representation:
|
||||
# extension: .py
|
||||
# format_name: percent
|
||||
# format_version: '1.3'
|
||||
# jupytext_version: 1.17.0
|
||||
# kernelspec:
|
||||
# display_name: .venv
|
||||
# language: python
|
||||
# name: python3
|
||||
# ---
|
||||
|
||||
# %%
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Generator,
|
||||
Optional,
|
||||
Sequence,
|
||||
TypeAlias,
|
||||
TypedDict,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
|
||||
import awkward as ak
|
||||
import jax
|
||||
import jax.numpy as jnp
|
||||
import numpy as np
|
||||
import orjson
|
||||
from beartype import beartype
|
||||
from cv2 import undistortPoints
|
||||
from jaxtyping import Array, Float, Num, jaxtyped
|
||||
from matplotlib import pyplot as plt
|
||||
from numpy.typing import ArrayLike
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from app.camera import Camera, CameraParams, Detection
|
||||
from app.visualize.whole_body import visualize_whole_body
|
||||
from IPython.display import display
|
||||
|
||||
NDArray: TypeAlias = np.ndarray
|
||||
|
||||
# %%
|
||||
DATASET_PATH = Path("samples") / "04_02"
|
||||
AK_CAMERA_DATASET: ak.Array = ak.from_parquet(DATASET_PATH / "camera_params.parquet")
|
||||
display(AK_CAMERA_DATASET)
|
||||
|
||||
|
||||
# %%
|
||||
class Resolution(TypedDict):
|
||||
width: int
|
||||
height: int
|
||||
|
||||
|
||||
class Intrinsic(TypedDict):
|
||||
camera_matrix: Num[Array, "3 3"]
|
||||
"""
|
||||
K
|
||||
"""
|
||||
distortion_coefficients: Num[Array, "N"]
|
||||
"""
|
||||
distortion coefficients; usually 5
|
||||
"""
|
||||
|
||||
|
||||
class Extrinsic(TypedDict):
|
||||
rvec: Num[NDArray, "3"]
|
||||
tvec: Num[NDArray, "3"]
|
||||
|
||||
|
||||
class ExternalCameraParams(TypedDict):
|
||||
name: str
|
||||
port: int
|
||||
intrinsic: Intrinsic
|
||||
extrinsic: Extrinsic
|
||||
resolution: Resolution
|
||||
|
||||
|
||||
# %%
|
||||
def read_dataset_by_port(port: int) -> ak.Array:
|
||||
P = DATASET_PATH / f"{port}.parquet"
|
||||
return ak.from_parquet(P)
|
||||
|
||||
|
||||
KEYPOINT_DATASET = {
|
||||
int(p): read_dataset_by_port(p) for p in ak.to_numpy(AK_CAMERA_DATASET["port"])
|
||||
}
|
||||
|
||||
|
||||
# %%
|
||||
class KeypointDataset(TypedDict):
|
||||
frame_index: int
|
||||
boxes: Num[NDArray, "N 4"]
|
||||
kps: Num[NDArray, "N J 2"]
|
||||
kps_scores: Num[NDArray, "N J"]
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def to_transformation_matrix(
|
||||
rvec: Num[NDArray, "3"], tvec: Num[NDArray, "3"]
|
||||
) -> Num[NDArray, "4 4"]:
|
||||
res = np.eye(4)
|
||||
res[:3, :3] = R.from_rotvec(rvec).as_matrix()
|
||||
res[:3, 3] = tvec
|
||||
return res
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def undistort_points(
|
||||
points: Num[NDArray, "M 2"],
|
||||
camera_matrix: Num[NDArray, "3 3"],
|
||||
dist_coeffs: Num[NDArray, "N"],
|
||||
) -> Num[NDArray, "M 2"]:
|
||||
K = camera_matrix
|
||||
dist = dist_coeffs
|
||||
res = undistortPoints(points, K, dist, P=K) # type: ignore
|
||||
return res.reshape(-1, 2)
|
||||
|
||||
|
||||
def from_camera_params(camera: ExternalCameraParams) -> Camera:
|
||||
rt = jnp.array(
|
||||
to_transformation_matrix(
|
||||
ak.to_numpy(camera["extrinsic"]["rvec"]),
|
||||
ak.to_numpy(camera["extrinsic"]["tvec"]),
|
||||
)
|
||||
)
|
||||
K = jnp.array(camera["intrinsic"]["camera_matrix"]).reshape(3, 3)
|
||||
dist_coeffs = jnp.array(camera["intrinsic"]["distortion_coefficients"])
|
||||
image_size = jnp.array(
|
||||
(camera["resolution"]["width"], camera["resolution"]["height"])
|
||||
)
|
||||
return Camera(
|
||||
id=camera["name"],
|
||||
params=CameraParams(
|
||||
K=K,
|
||||
Rt=rt,
|
||||
dist_coeffs=dist_coeffs,
|
||||
image_size=image_size,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def preprocess_keypoint_dataset(
|
||||
dataset: Sequence[KeypointDataset],
|
||||
camera: Camera,
|
||||
fps: float,
|
||||
start_timestamp: datetime,
|
||||
) -> Generator[Detection, None, None]:
|
||||
frame_interval_s = 1 / fps
|
||||
for el in dataset:
|
||||
frame_index = el["frame_index"]
|
||||
timestamp = start_timestamp + timedelta(seconds=frame_index * frame_interval_s)
|
||||
for kp, kp_score in zip(el["kps"], el["kps_scores"]):
|
||||
yield Detection(
|
||||
keypoints=jnp.array(kp),
|
||||
confidences=jnp.array(kp_score),
|
||||
camera=camera,
|
||||
timestamp=timestamp,
|
||||
)
|
||||
|
||||
|
||||
# %%
|
||||
DetectionGenerator: TypeAlias = Generator[Detection, None, None]
|
||||
|
||||
|
||||
def sync_batch_gen(gens: list[DetectionGenerator], diff: timedelta):
|
||||
"""
|
||||
given a list of detection generators, return a generator that yields a batch of detections
|
||||
|
||||
Args:
|
||||
gens: list of detection generators
|
||||
diff: maximum timestamp difference between detections to consider them part of the same batch
|
||||
"""
|
||||
N = len(gens)
|
||||
last_batch_timestamp: Optional[datetime] = None
|
||||
next_batch_timestamp: Optional[datetime] = None
|
||||
current_batch: list[Detection] = []
|
||||
next_batch: list[Detection] = []
|
||||
paused: list[bool] = [False] * N
|
||||
finished: list[bool] = [False] * N
|
||||
|
||||
def reset_paused():
|
||||
"""
|
||||
reset paused list based on finished list
|
||||
"""
|
||||
for i in range(N):
|
||||
if not finished[i]:
|
||||
paused[i] = False
|
||||
else:
|
||||
paused[i] = True
|
||||
|
||||
EPS = 1e-6
|
||||
# a small epsilon to avoid floating point precision issues
|
||||
diff_esp = diff - timedelta(seconds=EPS)
|
||||
while True:
|
||||
for i, gen in enumerate(gens):
|
||||
try:
|
||||
if finished[i] or paused[i]:
|
||||
continue
|
||||
val = next(gen)
|
||||
if last_batch_timestamp is None:
|
||||
last_batch_timestamp = val.timestamp
|
||||
current_batch.append(val)
|
||||
else:
|
||||
if abs(val.timestamp - last_batch_timestamp) >= diff_esp:
|
||||
next_batch.append(val)
|
||||
if next_batch_timestamp is None:
|
||||
next_batch_timestamp = val.timestamp
|
||||
paused[i] = True
|
||||
if all(paused):
|
||||
yield current_batch
|
||||
current_batch = next_batch
|
||||
next_batch = []
|
||||
last_batch_timestamp = next_batch_timestamp
|
||||
next_batch_timestamp = None
|
||||
reset_paused()
|
||||
else:
|
||||
current_batch.append(val)
|
||||
except StopIteration:
|
||||
finished[i] = True
|
||||
paused[i] = True
|
||||
if all(finished):
|
||||
if len(current_batch) > 0:
|
||||
# All generators exhausted, flush remaining batch and exit
|
||||
yield current_batch
|
||||
break
|
||||
|
||||
|
||||
# %%
|
||||
@overload
|
||||
def to_projection_matrix(
|
||||
transformation_matrix: Num[NDArray, "4 4"], camera_matrix: Num[NDArray, "3 3"]
|
||||
) -> Num[NDArray, "3 4"]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def to_projection_matrix(
|
||||
transformation_matrix: Num[Array, "4 4"], camera_matrix: Num[Array, "3 3"]
|
||||
) -> Num[Array, "3 4"]: ...
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def to_projection_matrix(
|
||||
transformation_matrix: Num[Any, "4 4"],
|
||||
camera_matrix: Num[Any, "3 3"],
|
||||
) -> Num[Any, "3 4"]:
|
||||
return camera_matrix @ transformation_matrix[:3, :]
|
||||
|
||||
|
||||
to_projection_matrix_jit = jax.jit(to_projection_matrix)
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def dlt(
|
||||
H1: Num[NDArray, "3 4"],
|
||||
H2: Num[NDArray, "3 4"],
|
||||
p1: Num[NDArray, "2"],
|
||||
p2: Num[NDArray, "2"],
|
||||
) -> Num[NDArray, "3"]:
|
||||
"""
|
||||
Direct Linear Transformation
|
||||
"""
|
||||
A = [
|
||||
p1[1] * H1[2, :] - H1[1, :],
|
||||
H1[0, :] - p1[0] * H1[2, :],
|
||||
p2[1] * H2[2, :] - H2[1, :],
|
||||
H2[0, :] - p2[0] * H2[2, :],
|
||||
]
|
||||
A = np.array(A).reshape((4, 4))
|
||||
|
||||
B = A.transpose() @ A
|
||||
from scipy import linalg
|
||||
|
||||
U, s, Vh = linalg.svd(B, full_matrices=False)
|
||||
return Vh[3, 0:3] / Vh[3, 3]
|
||||
|
||||
|
||||
@overload
|
||||
def homogeneous_to_euclidean(points: Num[NDArray, "N 4"]) -> Num[NDArray, "N 3"]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def homogeneous_to_euclidean(points: Num[Array, "N 4"]) -> Num[Array, "N 3"]: ...
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def homogeneous_to_euclidean(
|
||||
points: Num[Any, "N 4"],
|
||||
) -> Num[Any, "N 3"]:
|
||||
"""
|
||||
将齐次坐标转换为欧几里得坐标
|
||||
|
||||
Args:
|
||||
points: homogeneous coordinates (x, y, z, w) in numpy array or jax array
|
||||
|
||||
Returns:
|
||||
euclidean coordinates (x, y, z) in numpy array or jax array
|
||||
"""
|
||||
return points[..., :-1] / points[..., -1:]
|
||||
|
||||
|
||||
# %%
|
||||
FPS = 24
|
||||
image_gen_5600 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5600], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5600][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
|
||||
image_gen_5601 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5601], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5601][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
|
||||
image_gen_5602 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5602], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5602][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
|
||||
|
||||
display(1 / FPS)
|
||||
sync_gen = sync_batch_gen(
|
||||
[image_gen_5600, image_gen_5601, image_gen_5602], timedelta(seconds=1 / FPS)
|
||||
)
|
||||
|
||||
# %%
|
||||
detections = next(sync_gen)
|
||||
|
||||
# %%
|
||||
from app.camera import calculate_affinity_matrix_by_epipolar_constraint
|
||||
|
||||
sorted_detections, affinity_matrix = calculate_affinity_matrix_by_epipolar_constraint(
|
||||
detections, alpha_2d=2000
|
||||
)
|
||||
|
||||
# %%
|
||||
display(
|
||||
list(
|
||||
map(
|
||||
lambda x: {"timestamp": str(x.timestamp), "camera": x.camera.id},
|
||||
sorted_detections,
|
||||
)
|
||||
)
|
||||
)
|
||||
with jnp.printoptions(precision=3, suppress=True):
|
||||
display(affinity_matrix)
|
||||
|
||||
# %%
|
||||
from app.solver._old import GLPKSolver
|
||||
|
||||
|
||||
def clusters_to_detections(
|
||||
clusters: list[list[int]], sorted_detections: list[Detection]
|
||||
) -> list[list[Detection]]:
|
||||
"""
|
||||
given a list of clusters (which is the indices of the detections in the sorted_detections list),
|
||||
extract the detections from the sorted_detections list
|
||||
|
||||
Args:
|
||||
clusters: list of clusters, each cluster is a list of indices of the detections in the `sorted_detections` list
|
||||
sorted_detections: list of SORTED detections
|
||||
|
||||
Returns:
|
||||
list of clusters, each cluster is a list of detections
|
||||
"""
|
||||
return [[sorted_detections[i] for i in cluster] for cluster in clusters]
|
||||
|
||||
|
||||
solver = GLPKSolver()
|
||||
aff_np = np.asarray(affinity_matrix).astype(np.float64)
|
||||
clusters, sol_matrix = solver.solve(aff_np)
|
||||
display(clusters)
|
||||
display(sol_matrix)
|
||||
|
||||
# %%
|
||||
WIDTH = 2560
|
||||
HEIGHT = 1440
|
||||
|
||||
clusters_detections = clusters_to_detections(clusters, sorted_detections)
|
||||
im = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
|
||||
for el in clusters_detections[0]:
|
||||
im = visualize_whole_body(np.asarray(el.keypoints), im)
|
||||
|
||||
p = plt.imshow(im)
|
||||
display(p)
|
||||
|
||||
# %%
|
||||
im_prime = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
|
||||
for el in clusters_detections[1]:
|
||||
im_prime = visualize_whole_body(np.asarray(el.keypoints), im_prime)
|
||||
|
||||
p_prime = plt.imshow(im_prime)
|
||||
display(p_prime)
|
||||
|
||||
|
||||
# %%
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def triangulate_one_point_from_multiple_views_linear(
|
||||
proj_matrices: Float[Array, "N 3 4"],
|
||||
points: Num[Array, "N 2"],
|
||||
confidences: Optional[Float[Array, "N"]] = None,
|
||||
) -> Float[Array, "3"]:
|
||||
"""
|
||||
Args:
|
||||
proj_matrices: 形状为(N, 3, 4)的投影矩阵序列
|
||||
points: 形状为(N, 2)的点坐标序列
|
||||
confidences: 形状为(N,)的置信度序列,范围[0.0, 1.0]
|
||||
|
||||
Returns:
|
||||
point_3d: 形状为(3,)的三角测量得到的3D点
|
||||
"""
|
||||
assert len(proj_matrices) == len(points)
|
||||
|
||||
N = len(proj_matrices)
|
||||
confi: Float[Array, "N"]
|
||||
if confidences is None:
|
||||
confi = jnp.ones(N, dtype=np.float32)
|
||||
else:
|
||||
# Use square root of confidences for weighting - more balanced approach
|
||||
confi = jnp.sqrt(jnp.clip(confidences, 0, 1))
|
||||
|
||||
A = jnp.zeros((N * 2, 4), dtype=np.float32)
|
||||
for i in range(N):
|
||||
x, y = points[i]
|
||||
A = A.at[2 * i].set(proj_matrices[i, 2] * x - proj_matrices[i, 0])
|
||||
A = A.at[2 * i + 1].set(proj_matrices[i, 2] * y - proj_matrices[i, 1])
|
||||
A = A.at[2 * i].mul(confi[i])
|
||||
A = A.at[2 * i + 1].mul(confi[i])
|
||||
|
||||
# https://docs.jax.dev/en/latest/_autosummary/jax.numpy.linalg.svd.html
|
||||
_, _, vh = jnp.linalg.svd(A, full_matrices=False)
|
||||
point_3d_homo = vh[-1] # shape (4,)
|
||||
|
||||
# replace the Python `if` with a jnp.where
|
||||
point_3d_homo = jnp.where(
|
||||
point_3d_homo[3] < 0, # predicate (scalar bool tracer)
|
||||
-point_3d_homo, # if True
|
||||
point_3d_homo, # if False
|
||||
)
|
||||
|
||||
point_3d = point_3d_homo[:3] / point_3d_homo[3]
|
||||
return point_3d
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def triangulate_points_from_multiple_views_linear(
|
||||
proj_matrices: Float[Array, "N 3 4"],
|
||||
points: Num[Array, "N P 2"],
|
||||
confidences: Optional[Float[Array, "N P"]] = None,
|
||||
) -> Float[Array, "P 3"]:
|
||||
"""
|
||||
Batch-triangulate P points observed by N cameras, linearly via SVD.
|
||||
|
||||
Args:
|
||||
proj_matrices: (N, 3, 4) projection matrices
|
||||
points: (N, P, 2) image-coordinates per view
|
||||
confidences: (N, P, 1) optional per-view confidences in [0,1]
|
||||
|
||||
Returns:
|
||||
(P, 3) 3D point for each of the P tracks
|
||||
"""
|
||||
N, P, _ = points.shape
|
||||
assert proj_matrices.shape[0] == N
|
||||
if confidences is None:
|
||||
conf = jnp.ones((N, P), dtype=jnp.float32)
|
||||
else:
|
||||
conf = jnp.sqrt(jnp.clip(confidences, 0.0, 1.0))
|
||||
|
||||
# vectorize your one‐point routine over P
|
||||
vmap_triangulate = jax.vmap(
|
||||
triangulate_one_point_from_multiple_views_linear,
|
||||
in_axes=(None, 1, 1), # proj_matrices static, map over points[:,p,:], conf[:,p]
|
||||
out_axes=0,
|
||||
)
|
||||
|
||||
# returns (P, 3)
|
||||
return vmap_triangulate(proj_matrices, points, conf)
|
||||
|
||||
|
||||
# %%
|
||||
from dataclasses import dataclass
|
||||
from copy import copy as shallow_copy, deepcopy as deep_copy
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
@dataclass(frozen=True)
|
||||
class Tracking:
|
||||
id: int
|
||||
keypoints: Float[Array, "J 3"]
|
||||
last_active_timestamp: datetime
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Tracking({self.id}, {self.last_active_timestamp})"
|
||||
|
||||
|
||||
@jaxtyped(typechecker=beartype)
|
||||
def triangle_from_cluster(
|
||||
cluster: list[Detection],
|
||||
) -> tuple[Float[Array, "N 3"], datetime]:
|
||||
proj_matrices = jnp.array([el.camera.params.projection_matrix for el in cluster])
|
||||
points = jnp.array([el.keypoints_undistorted for el in cluster])
|
||||
confidences = jnp.array([el.confidences for el in cluster])
|
||||
latest_timestamp = max(el.timestamp for el in cluster)
|
||||
return (
|
||||
triangulate_points_from_multiple_views_linear(
|
||||
proj_matrices, points, confidences=confidences
|
||||
),
|
||||
latest_timestamp,
|
||||
)
|
||||
|
||||
|
||||
# res = {
|
||||
# "a": triangle_from_cluster(clusters_detections[0]).tolist(),
|
||||
# "b": triangle_from_cluster(clusters_detections[1]).tolist(),
|
||||
# }
|
||||
# with open("samples/res.json", "wb") as f:
|
||||
# f.write(orjson.dumps(res))
|
||||
|
||||
|
||||
class GlobalTrackingState:
|
||||
_last_id: int
|
||||
_trackings: dict[int, Tracking]
|
||||
|
||||
def __init__(self):
|
||||
self._last_id = 0
|
||||
self._trackings = {}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"GlobalTrackingState(last_id={self._last_id}, trackings={self._trackings})"
|
||||
)
|
||||
|
||||
@property
|
||||
def trackings(self) -> dict[int, Tracking]:
|
||||
return shallow_copy(self._trackings)
|
||||
|
||||
def add_tracking(self, cluster: list[Detection]) -> Tracking:
|
||||
kps_3d, latest_timestamp = triangle_from_cluster(cluster)
|
||||
next_id = self._last_id + 1
|
||||
tracking = Tracking(
|
||||
id=next_id, keypoints=kps_3d, last_active_timestamp=latest_timestamp
|
||||
)
|
||||
self._trackings[next_id] = tracking
|
||||
self._last_id = next_id
|
||||
return tracking
|
||||
|
||||
|
||||
global_tracking_state = GlobalTrackingState()
|
||||
for cluster in clusters_detections:
|
||||
global_tracking_state.add_tracking(cluster)
|
||||
display(global_tracking_state)
|
||||
|
||||
# %%
|
||||
next_group = next(sync_gen)
|
||||
display(next_group)
|
||||
|
||||
# %%
|
||||
from app.camera import classify_by_camera
|
||||
|
||||
# let's do cross-view association
|
||||
trackings = sorted(global_tracking_state.trackings.values(), key=lambda x: x.id)
|
||||
detections = shallow_copy(next_group)
|
||||
# cross-view association matrix with shape (T, D), where T is the number of trackings, D is the number of detections
|
||||
affinity = np.zeros((len(trackings), len(detections)))
|
||||
detection_by_camera = classify_by_camera(detections)
|
||||
for i, tracking in enumerate(trackings):
|
||||
for c, detections in detection_by_camera.items():
|
||||
camera = next(iter(detections)).camera
|
||||
# pixel space, unnormalized
|
||||
tracking_2d_projection = camera.project(tracking.keypoints)
|
||||
@ -11,6 +11,7 @@ dependencies = [
|
||||
"cvxopt>=1.3.2",
|
||||
"jax[cuda12]>=0.5.1",
|
||||
"jaxtyping>=0.2.38",
|
||||
"jupytext>=1.17.0",
|
||||
"matplotlib>=3.10.1",
|
||||
"opencv-python-headless>=4.11.0.86",
|
||||
"orjson>=3.10.15",
|
||||
|
||||
52
uv.lock
generated
52
uv.lock
generated
@ -450,6 +450,7 @@ dependencies = [
|
||||
{ name = "cvxopt" },
|
||||
{ name = "jax", extra = ["cuda12"] },
|
||||
{ name = "jaxtyping" },
|
||||
{ name = "jupytext" },
|
||||
{ name = "matplotlib" },
|
||||
{ name = "opencv-python-headless" },
|
||||
{ name = "orjson" },
|
||||
@ -475,6 +476,7 @@ requires-dist = [
|
||||
{ name = "cvxopt", specifier = ">=1.3.2" },
|
||||
{ name = "jax", extras = ["cuda12"], specifier = ">=0.5.1" },
|
||||
{ name = "jaxtyping", specifier = ">=0.2.38" },
|
||||
{ name = "jupytext", specifier = ">=1.17.0" },
|
||||
{ name = "matplotlib", specifier = ">=3.10.1" },
|
||||
{ name = "opencv-python-headless", specifier = ">=4.11.0.86" },
|
||||
{ name = "orjson", specifier = ">=3.10.15" },
|
||||
@ -1245,6 +1247,23 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/93/858e87edc634d628e5d752ba944c2833133a28fa87bb093e6832ced36a3e/jupyterlab_widgets-3.0.13-py3-none-any.whl", hash = "sha256:e3cda2c233ce144192f1e29914ad522b2f4c40e77214b0cc97377ca3d323db54", size = 214392 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jupytext"
|
||||
version = "1.17.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markdown-it-py" },
|
||||
{ name = "mdit-py-plugins" },
|
||||
{ name = "nbformat" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pyyaml" },
|
||||
{ name = "tomli", marker = "python_full_version < '3.11'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/17/b5/b04008ae2f3b37f0a955d9232c729589bcf40819732cd08a54bce08feb83/jupytext-1.17.0.tar.gz", hash = "sha256:c74adf6d205a778f481189e986b6066af79bdeb69099f4138d933cc15c398fb6", size = 3746613 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/46/c2fb92e01eb0423bae7fe91c3bf2ca994069f299a6455919f4a9a12960ed/jupytext-1.17.0-py3-none-any.whl", hash = "sha256:d75b7cd198b3640a12f9cdf4d610bb80c9f27a8c3318b00372f90d21466d40e1", size = 164106 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kiwisolver"
|
||||
version = "1.4.8"
|
||||
@ -1332,6 +1351,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markdown-it-py"
|
||||
version = "3.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "mdurl" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "3.0.2"
|
||||
@ -1454,6 +1485,27 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mdit-py-plugins"
|
||||
version = "0.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markdown-it-py" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mdurl"
|
||||
version = "0.1.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mistune"
|
||||
version = "3.1.2"
|
||||
|
||||
Reference in New Issue
Block a user