{ "cells": [ { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "from pathlib import Path\n", "from typing import Generator, Sequence, TypeAlias, TypedDict\n", "\n", "import awkward as ak\n", "import jax\n", "import jax.numpy as jnp\n", "import numpy as np\n", "from jaxtyping import Array, Num\n", "from matplotlib import pyplot as plt\n", "\n", "from app.camera import Detection\n", "from app.camera import Camera, CameraParams\n", "\n", "NDArray: TypeAlias = np.ndarray" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[{name: 'AE_01', port: 5602, intrinsic: {...}, extrinsic: {...}, ...},\n",
       " {name: 'AE_1A', port: 5601, intrinsic: {...}, extrinsic: {...}, ...},\n",
       " {name: 'AE_08', port: 5600, intrinsic: {...}, extrinsic: {...}, ...}]\n",
       "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
       "backend: cpu\n",
       "nbytes: 823 B\n",
       "type: 3 * {\n",
       "    name: string,\n",
       "    port: int64,\n",
       "    intrinsic: {\n",
       "        camera_matrix: var * var * var * float64,\n",
       "        distortion_coefficients: var * float64\n",
       "    },\n",
       "    extrinsic: {\n",
       "        rvec: var * float64,\n",
       "        tvec: var * float64\n",
       "    },\n",
       "    resolution: {\n",
       "        width: int64,\n",
       "        height: int64\n",
       "    }\n",
       "}
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "DATASET_PATH = Path(\"samples\") / \"04_02\" \n", "AK_CAMERA_DATASET: ak.Array = ak.from_parquet(DATASET_PATH / \"camera_params.parquet\")\n", "display(AK_CAMERA_DATASET)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "class Resolution(TypedDict):\n", " width: int\n", " height: int\n", "\n", "class Intrinsic(TypedDict):\n", " camera_matrix: Num[Array, \"3 3\"]\n", " \"\"\"\n", " K\n", " \"\"\"\n", " distortion_coefficients: Num[Array, \"N\"]\n", " \"\"\"\n", " distortion coefficients; usually 5\n", " \"\"\"\n", "\n", "class Extrinsic(TypedDict):\n", " rvec: Num[NDArray, \"3\"]\n", " tvec: Num[NDArray, \"3\"]\n", "\n", "class ExternalCameraParams(TypedDict):\n", " name: str\n", " port: int\n", " intrinsic: Intrinsic\n", " extrinsic: Extrinsic\n", " resolution: Resolution\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "def read_dataset_by_port(port: int) -> ak.Array:\n", " P = DATASET_PATH / f\"{port}.parquet\"\n", " return ak.from_parquet(P)\n", "\n", "KEYPOINT_DATASET = {int(p): read_dataset_by_port(p) for p in ak.to_numpy(AK_CAMERA_DATASET[\"port\"])}" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[{frame_index: 0, boxes: [[599, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 1, boxes: [[599, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 2, boxes: [[599, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 3, boxes: [[599, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 4, boxes: [[598, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 5, boxes: [[596, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 6, boxes: [[594, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 7, boxes: [[595, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 8, boxes: [[595, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " {frame_index: 9, boxes: [[595, ...], [...]], kps: [...], kps_scores: ..., ...},\n",
       " ...,\n",
       " {frame_index: 520, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 521, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 522, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 523, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 524, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 525, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 526, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 527, boxes: [[1.09e+03, ...], ...], kps: [...], ...},\n",
       " {frame_index: 528, boxes: [[1.09e+03, ...], ...], kps: [...], ...}]\n",
       "-----------------------------------------------------------------------------------------------------------------------------------------------\n",
       "backend: cpu\n",
       "nbytes: 4.6 MB\n",
       "type: 529 * {\n",
       "    frame_index: int64,\n",
       "    boxes: var * var * float64,\n",
       "    kps: var * var * var * float64,\n",
       "    kps_scores: var * var * float64\n",
       "}
" ], "text/plain": [ "" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "KEYPOINT_DATASET[5601]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "from scipy.spatial.transform import Rotation as R" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "class KeypointDataset(TypedDict):\n", " frame_index: int\n", " boxes: Num[NDArray, \"N 4\"]\n", " kps: Num[NDArray, \"N J 2\"]\n", " kps_scores: Num[NDArray, \"N J\"]\n", "\n", "def to_transformation_matrix(rvec: Num[NDArray, \"3\"], tvec: Num[NDArray, \"3\"]) -> Num[NDArray, \"4 4\"]:\n", " r = R.from_rotvec(rvec) # type: ignore\n", " t = tvec.reshape(3, 1)\n", " return np.concatenate([np.concatenate([r.as_matrix(), t], axis=1), np.array([[0, 0, 0, 1]])], axis=0)\n", "\n", "def from_camera_params(camera: ExternalCameraParams) -> Camera:\n", " rt = jnp.array(to_transformation_matrix(ak.to_numpy(camera[\"extrinsic\"][\"rvec\"]), ak.to_numpy(camera[\"extrinsic\"][\"tvec\"])))\n", " K = jnp.array(camera[\"intrinsic\"][\"camera_matrix\"]).reshape(3, 3)\n", " dist_coeffs = jnp.array(camera[\"intrinsic\"][\"distortion_coefficients\"])\n", " image_size = jnp.array((camera[\"resolution\"][\"width\"], camera[\"resolution\"][\"height\"]))\n", " return Camera(\n", " id=camera[\"name\"],\n", " params=CameraParams(\n", " K=K,\n", " Rt=rt,\n", " dist_coeffs=dist_coeffs,\n", " image_size=image_size,\n", " )\n", " )\n", "\n", "def preprocess_keypoint_dataset(dataset: Sequence[KeypointDataset], camera: Camera,fps: float, start_timestamp: datetime) -> Generator[Detection, None, None]:\n", " frame_interval_s = 1 / fps\n", " for el in dataset:\n", " frame_index = el[\"frame_index\"]\n", " timestamp = start_timestamp + timedelta(seconds=frame_index * frame_interval_s)\n", " for kp, kp_score in zip(el[\"kps\"], el[\"kps_scores\"]):\n", " yield Detection(\n", " keypoints=jnp.array(kp),\n", " confidences=jnp.array(kp_score),\n", " camera=camera,\n", " timestamp=timestamp,\n", " )\n" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "from typing import Optional\n", "from copy import deepcopy\n", "\n", "DetectionGenerator: TypeAlias = Generator[Detection, None, None]\n", "\n", "\n", "def sync_batch_gen(gens: list[DetectionGenerator], diff: timedelta):\n", " \"\"\"\n", " given a list of detection generators, return a generator that yields a batch of detections\n", "\n", " Args:\n", " gens: list of detection generators\n", " diff: maximum timestamp difference between detections to consider them part of the same batch\n", " \"\"\"\n", " N = len(gens)\n", " last_batch_timestamp: Optional[datetime] = None\n", " next_batch_timestamp: Optional[datetime] = None\n", " current_batch: list[Detection] = []\n", " next_batch: list[Detection] = []\n", " paused: list[bool] = [False] * N\n", " finished: list[bool] = [False] * N\n", "\n", " def reset_paused():\n", " \"\"\"\n", " reset paused list based on finished list\n", " \"\"\"\n", " for i in range(N):\n", " if not finished[i]:\n", " paused[i] = False\n", " else:\n", " paused[i] = True\n", "\n", " EPS = 1e-6\n", " # a small epsilon to avoid floating point precision issues\n", " diff_esp = diff - timedelta(seconds=EPS)\n", " while True:\n", " for i, gen in enumerate(gens):\n", " try:\n", " if finished[i] or paused[i]:\n", " continue\n", " val = next(gen)\n", " if last_batch_timestamp is None:\n", " last_batch_timestamp = val.timestamp\n", " current_batch.append(val)\n", " else:\n", " if abs(val.timestamp - last_batch_timestamp) >= diff_esp:\n", " next_batch.append(val)\n", " if next_batch_timestamp is None:\n", " next_batch_timestamp = val.timestamp\n", " paused[i] = True\n", " if all(paused):\n", " yield current_batch\n", " current_batch = next_batch\n", " next_batch = []\n", " last_batch_timestamp = next_batch_timestamp\n", " next_batch_timestamp = None\n", " reset_paused()\n", " else:\n", " current_batch.append(val)\n", " except StopIteration:\n", " finished[i] = True\n", " paused[i] = True\n", " if all(finished):\n", " if len(current_batch) > 0:\n", " # All generators exhausted, flush remaining batch and exit\n", " yield current_batch\n", " break" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.041666666666666664" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "FPS = 24\n", "image_gen_5600 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5600], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5600][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n", "image_gen_5601 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5601], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5601][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n", "image_gen_5602 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5602], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET[\"port\"] == 5602][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore\n", "\n", "display(1/FPS)\n", "sync_gen = sync_batch_gen([image_gen_5600, image_gen_5601, image_gen_5602], timedelta(seconds=1/FPS))" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "detections = next(sync_gen)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from app.camera import calculate_affinity_matrix_by_epipolar_constraint\n", "\n", "sorted_detections, affinity_matrix = calculate_affinity_matrix_by_epipolar_constraint(detections, \n", " alpha_2d=1800)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_08'},\n", " {'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_08'},\n", " {'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_1A'},\n", " {'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_1A'},\n", " {'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_01'},\n", " {'timestamp': '2024-04-02 12:00:00', 'camera': 'AE_01'}]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Array([[ -inf, -inf, 0.625, 0.321, -0.243, 0.018],\n", " [ -inf, -inf, 0.9 , 0.795, 0.293, 0.568],\n", " [ 0.625, 0.9 , -inf, -inf, 0.211, 0.371],\n", " [ 0.321, 0.795, -inf, -inf, 0.684, 0.793],\n", " [-0.243, 0.293, 0.211, 0.684, -inf, -inf],\n", " [ 0.018, 0.568, 0.371, 0.793, -inf, -inf]], dtype=float32)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(list(map(lambda x: {\"timestamp\": str(x.timestamp), \"camera\": x.camera.id}, sorted_detections)))\n", "with jnp.printoptions(precision=3, suppress=True):\n", " display(affinity_matrix)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[0, 2, 5], [1, 3, 4]]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "array([[0, 0, 1, 0, 0, 1],\n", " [0, 0, 0, 1, 1, 0],\n", " [1, 0, 0, 0, 0, 1],\n", " [0, 1, 0, 0, 1, 0],\n", " [0, 1, 0, 1, 0, 0],\n", " [1, 0, 1, 0, 0, 0]])" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from app.solver._old import GLPKSolver\n", "\n", "solver = GLPKSolver()\n", "aff_np = np.asarray(affinity_matrix).astype(np.float64)\n", "clusters, sol_matrix = solver.solve(aff_np)\n", "display(clusters)\n", "display(sol_matrix)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from app.visualize.whole_body import visualize_whole_body\n", "from matplotlib import pyplot as plt\n", "\n", "WIDTH = 2560\n", "HEIGHT = 1440\n", "\n", "im = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)\n", "for i in clusters[0]:\n", " el = sorted_detections[i]\n", " im = visualize_whole_body(np.asarray(el.keypoints), im)\n", "\n", "p = plt.imshow(im)\n", "display(p)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "im_prime = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)\n", "for i in clusters[1]:\n", " el = sorted_detections[i]\n", " im_prime = visualize_whole_body(np.asarray(el.keypoints), im_prime)\n", "\n", "p_prime= plt.imshow(im_prime)\n", "display(p_prime)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }