Refactor find_cute_box_with_image.ipynb to reset execution counts, enhance type annotations, and add new functions for marker processing and UV-to-3D interpolation. Introduced TypedDict for marker representation and improved code organization.
This commit is contained in:
@ -2,26 +2,30 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 50,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import cv2\n",
|
|
||||||
"from cv2 import aruco\n",
|
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
"from loguru import logger\n",
|
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"from typing import cast, Final, TypeAlias\n",
|
"from typing import Any, Final, TypeAlias, cast, TypedDict\n",
|
||||||
"from cv2.typing import MatLike\n",
|
"\n",
|
||||||
"from matplotlib import pyplot as plt\n",
|
"import cv2\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
|
"from cv2 import aruco\n",
|
||||||
|
"from cv2.typing import MatLike\n",
|
||||||
|
"from loguru import logger\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"from numpy.typing import ArrayLike\n",
|
||||||
|
"from numpy.typing import NDArray as NDArrayT\n",
|
||||||
|
"import orjson\n",
|
||||||
"\n",
|
"\n",
|
||||||
"NDArray: TypeAlias = np.ndarray"
|
"NDArray: TypeAlias = np.ndarray"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -34,7 +38,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 52,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -46,7 +50,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 53,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -58,7 +62,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -80,7 +84,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 55,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -111,6 +115,57 @@
|
|||||||
" )"
|
" )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class Marker(TypedDict):\n",
|
||||||
|
" id: int\n",
|
||||||
|
" center: NDArray\n",
|
||||||
|
" corners: NDArray\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"output_markers: list[Marker] = []\n",
|
||||||
|
"if ids is not None:\n",
|
||||||
|
" IMAGE_WIDTH = frame.shape[1]\n",
|
||||||
|
" IMAGE_HEIGHT = frame.shape[0]\n",
|
||||||
|
"\n",
|
||||||
|
" def normalize_point(point: NDArrayT[Any]) -> NDArrayT[np.float64]:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" input could be: [N, 2] or [2]\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" if point.ndim == 1:\n",
|
||||||
|
" return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n",
|
||||||
|
" elif point.ndim == 2:\n",
|
||||||
|
" return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n",
|
||||||
|
" else:\n",
|
||||||
|
" raise ValueError(f\"Invalid shape: {point.shape}\")\n",
|
||||||
|
"\n",
|
||||||
|
" def flip_y(point: NDArrayT[Any], y_max: int) -> NDArrayT[Any]:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" flip y axis;\n",
|
||||||
|
"\n",
|
||||||
|
" Usually OpenCV image y-axis is inverted. (origin at top-left)\n",
|
||||||
|
" In UV layout, the origin is at bottom-left.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" return np.array([point[0], y_max - point[1]])\n",
|
||||||
|
"\n",
|
||||||
|
" for m, i in zip(markers, ids):\n",
|
||||||
|
" center = np.mean(m, axis=0).astype(int) # type: ignore\n",
|
||||||
|
" output_markers.append(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"id\": i[0],\n",
|
||||||
|
" \"center\": flip_y(normalize_point(center), 1),\n",
|
||||||
|
" \"corners\": np.array([flip_y(normalize_point(c), 1) for c in m]),\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"output/aruco_3d_coords.json\", \"wb\") as f:\n",
|
||||||
|
" f.write(orjson.dumps(output_markers, option=orjson.OPT_SERIALIZE_NUMPY))"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -128,6 +183,107 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"cv2.imwrite(\"merged_uv_layout_with_markers.png\", out)"
|
"cv2.imwrite(\"merged_uv_layout_with_markers.png\", out)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from typing import Optional, Union\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import trimesh\n",
|
||||||
|
"from jaxtyping import Float, Int, Num, jaxtyped\n",
|
||||||
|
"from beartype import beartype\n",
|
||||||
|
"\n",
|
||||||
|
"@jaxtyped(typechecker=beartype)\n",
|
||||||
|
"def interpolate_uvs_to_3d(\n",
|
||||||
|
" uv_points: Num[NDArray, \"N 2\"],\n",
|
||||||
|
" vertices: Num[NDArray, \"V 3\"],\n",
|
||||||
|
" uvs: Num[NDArray, \"V 2\"],\n",
|
||||||
|
" faces: Num[NDArray, \"F 3\"],\n",
|
||||||
|
" epsilon: float = 1e-6,\n",
|
||||||
|
") -> Num[NDArray, \"N 3\"]:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Map multiple UV points to 3D coordinates using barycentric interpolation.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" uv_points: (N, 2) array of UV coordinates in [0,1]\n",
|
||||||
|
" vertices: (V, 3) array of mesh vertex positions\n",
|
||||||
|
" uvs: (V, 2) array of per-vertex UV coordinates\n",
|
||||||
|
" faces: (F, 3) array of triangle vertex indices\n",
|
||||||
|
" epsilon: barycentric inside-triangle tolerance\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" (N, 3) array of interpolated 3D coordinates (NaNs if no triangle found)\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" results = np.full((uv_points.shape[0], 3), np.nan, dtype=np.float64)\n",
|
||||||
|
"\n",
|
||||||
|
" for pi, uv_point in enumerate(uv_points):\n",
|
||||||
|
" for face in faces:\n",
|
||||||
|
" uv_tri = uvs[face] # (3,2)\n",
|
||||||
|
" v_tri = vertices[face] # (3,3)\n",
|
||||||
|
"\n",
|
||||||
|
" A = np.array(\n",
|
||||||
|
" [\n",
|
||||||
|
" [uv_tri[0, 0] - uv_tri[2, 0], uv_tri[1, 0] - uv_tri[2, 0]],\n",
|
||||||
|
" [uv_tri[0, 1] - uv_tri[2, 1], uv_tri[1, 1] - uv_tri[2, 1]],\n",
|
||||||
|
" ]\n",
|
||||||
|
" )\n",
|
||||||
|
" b = uv_point - uv_tri[2]\n",
|
||||||
|
"\n",
|
||||||
|
" try:\n",
|
||||||
|
" w0, w1 = np.linalg.solve(A, b)\n",
|
||||||
|
" w2 = 1.0 - w0 - w1\n",
|
||||||
|
" if min(w0, w1, w2) >= -epsilon:\n",
|
||||||
|
" results[pi] = w0 * v_tri[0] + w1 * v_tri[1] + w2 * v_tri[2]\n",
|
||||||
|
" break # Stop after first matching triangle\n",
|
||||||
|
" except np.linalg.LinAlgError:\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
" return results\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"@jaxtyped(typechecker=beartype)\n",
|
||||||
|
"def interpolate_uvs_to_3d_trimesh(\n",
|
||||||
|
" uv_points: Num[NDArray, \"N 2\"],\n",
|
||||||
|
" mesh: Union[trimesh.Trimesh, trimesh.Scene],\n",
|
||||||
|
" epsilon: float = 1e-6,\n",
|
||||||
|
") -> Num[NDArray, \"N 3\"]:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Wrapper for batched UV-to-3D interpolation using a trimesh mesh or scene.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" uv_points: (N, 2) UV coordinates to convert\n",
|
||||||
|
" mesh: a Trimesh or Scene object\n",
|
||||||
|
" epsilon: barycentric epsilon tolerance\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" (N, 3) array of 3D positions (NaN if outside mesh)\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" if isinstance(mesh, trimesh.Scene):\n",
|
||||||
|
" if len(mesh.geometry) == 0:\n",
|
||||||
|
" raise ValueError(\"Scene has no geometry.\")\n",
|
||||||
|
" mesh = list(mesh.geometry.values())[0]\n",
|
||||||
|
"\n",
|
||||||
|
" if not isinstance(mesh, trimesh.Trimesh):\n",
|
||||||
|
" raise TypeError(\"Expected a Trimesh or Scene with geometry.\")\n",
|
||||||
|
"\n",
|
||||||
|
" if mesh.visual is None:\n",
|
||||||
|
" raise ValueError(\"Mesh does not have visual.\")\n",
|
||||||
|
"\n",
|
||||||
|
" if mesh.visual.uv is None:\n",
|
||||||
|
" raise ValueError(\"Mesh does not have UVs.\")\n",
|
||||||
|
"\n",
|
||||||
|
" return interpolate_uvs_to_3d(\n",
|
||||||
|
" uv_points=uv_points,\n",
|
||||||
|
" vertices=mesh.vertices,\n",
|
||||||
|
" uvs=mesh.visual.uv,\n",
|
||||||
|
" faces=mesh.faces,\n",
|
||||||
|
" epsilon=epsilon,\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user