diff --git a/find_cute_box_with_image.ipynb b/find_cute_box_with_image.ipynb index 16a14df..0c31d3d 100644 --- a/find_cute_box_with_image.ipynb +++ b/find_cute_box_with_image.ipynb @@ -2,26 +2,30 @@ "cells": [ { "cell_type": "code", - "execution_count": 50, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import cv2\n", - "from cv2 import aruco\n", "from datetime import datetime\n", - "from loguru import logger\n", "from pathlib import Path\n", - "from typing import cast, Final, TypeAlias\n", - "from cv2.typing import MatLike\n", - "from matplotlib import pyplot as plt\n", + "from typing import Any, Final, TypeAlias, cast, TypedDict\n", + "\n", + "import cv2\n", "import numpy as np\n", + "from cv2 import aruco\n", + "from cv2.typing import MatLike\n", + "from loguru import logger\n", + "from matplotlib import pyplot as plt\n", + "from numpy.typing import ArrayLike\n", + "from numpy.typing import NDArray as NDArrayT\n", + "import orjson\n", "\n", "NDArray: TypeAlias = np.ndarray" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -34,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -111,6 +115,57 @@ " )" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class Marker(TypedDict):\n", + " id: int\n", + " center: NDArray\n", + " corners: NDArray\n", + "\n", + "\n", + "output_markers: list[Marker] = []\n", + "if ids is not None:\n", + " IMAGE_WIDTH = frame.shape[1]\n", + " IMAGE_HEIGHT = frame.shape[0]\n", + "\n", + " def normalize_point(point: NDArrayT[Any]) -> NDArrayT[np.float64]:\n", + " \"\"\"\n", + " input could be: [N, 2] or [2]\n", + " \"\"\"\n", + " if point.ndim == 1:\n", + " return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n", + " elif point.ndim == 2:\n", + " return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n", + " else:\n", + " raise ValueError(f\"Invalid shape: {point.shape}\")\n", + "\n", + " def flip_y(point: NDArrayT[Any], y_max: int) -> NDArrayT[Any]:\n", + " \"\"\"\n", + " flip y axis;\n", + "\n", + " Usually OpenCV image y-axis is inverted. (origin at top-left)\n", + " In UV layout, the origin is at bottom-left.\n", + " \"\"\"\n", + " return np.array([point[0], y_max - point[1]])\n", + "\n", + " for m, i in zip(markers, ids):\n", + " center = np.mean(m, axis=0).astype(int) # type: ignore\n", + " output_markers.append(\n", + " {\n", + " \"id\": i[0],\n", + " \"center\": flip_y(normalize_point(center), 1),\n", + " \"corners\": np.array([flip_y(normalize_point(c), 1) for c in m]),\n", + " }\n", + " )\n", + "\n", + "with open(\"output/aruco_3d_coords.json\", \"wb\") as f:\n", + " f.write(orjson.dumps(output_markers, option=orjson.OPT_SERIALIZE_NUMPY))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -128,6 +183,107 @@ "source": [ "cv2.imwrite(\"merged_uv_layout_with_markers.png\", out)" ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Optional, Union\n", + "\n", + "import numpy as np\n", + "import trimesh\n", + "from jaxtyping import Float, Int, Num, jaxtyped\n", + "from beartype import beartype\n", + "\n", + "@jaxtyped(typechecker=beartype)\n", + "def interpolate_uvs_to_3d(\n", + " uv_points: Num[NDArray, \"N 2\"],\n", + " vertices: Num[NDArray, \"V 3\"],\n", + " uvs: Num[NDArray, \"V 2\"],\n", + " faces: Num[NDArray, \"F 3\"],\n", + " epsilon: float = 1e-6,\n", + ") -> Num[NDArray, \"N 3\"]:\n", + " \"\"\"\n", + " Map multiple UV points to 3D coordinates using barycentric interpolation.\n", + "\n", + " Args:\n", + " uv_points: (N, 2) array of UV coordinates in [0,1]\n", + " vertices: (V, 3) array of mesh vertex positions\n", + " uvs: (V, 2) array of per-vertex UV coordinates\n", + " faces: (F, 3) array of triangle vertex indices\n", + " epsilon: barycentric inside-triangle tolerance\n", + "\n", + " Returns:\n", + " (N, 3) array of interpolated 3D coordinates (NaNs if no triangle found)\n", + " \"\"\"\n", + " results = np.full((uv_points.shape[0], 3), np.nan, dtype=np.float64)\n", + "\n", + " for pi, uv_point in enumerate(uv_points):\n", + " for face in faces:\n", + " uv_tri = uvs[face] # (3,2)\n", + " v_tri = vertices[face] # (3,3)\n", + "\n", + " A = np.array(\n", + " [\n", + " [uv_tri[0, 0] - uv_tri[2, 0], uv_tri[1, 0] - uv_tri[2, 0]],\n", + " [uv_tri[0, 1] - uv_tri[2, 1], uv_tri[1, 1] - uv_tri[2, 1]],\n", + " ]\n", + " )\n", + " b = uv_point - uv_tri[2]\n", + "\n", + " try:\n", + " w0, w1 = np.linalg.solve(A, b)\n", + " w2 = 1.0 - w0 - w1\n", + " if min(w0, w1, w2) >= -epsilon:\n", + " results[pi] = w0 * v_tri[0] + w1 * v_tri[1] + w2 * v_tri[2]\n", + " break # Stop after first matching triangle\n", + " except np.linalg.LinAlgError:\n", + " continue\n", + "\n", + " return results\n", + "\n", + "\n", + "@jaxtyped(typechecker=beartype)\n", + "def interpolate_uvs_to_3d_trimesh(\n", + " uv_points: Num[NDArray, \"N 2\"],\n", + " mesh: Union[trimesh.Trimesh, trimesh.Scene],\n", + " epsilon: float = 1e-6,\n", + ") -> Num[NDArray, \"N 3\"]:\n", + " \"\"\"\n", + " Wrapper for batched UV-to-3D interpolation using a trimesh mesh or scene.\n", + "\n", + " Args:\n", + " uv_points: (N, 2) UV coordinates to convert\n", + " mesh: a Trimesh or Scene object\n", + " epsilon: barycentric epsilon tolerance\n", + "\n", + " Returns:\n", + " (N, 3) array of 3D positions (NaN if outside mesh)\n", + " \"\"\"\n", + " if isinstance(mesh, trimesh.Scene):\n", + " if len(mesh.geometry) == 0:\n", + " raise ValueError(\"Scene has no geometry.\")\n", + " mesh = list(mesh.geometry.values())[0]\n", + "\n", + " if not isinstance(mesh, trimesh.Trimesh):\n", + " raise TypeError(\"Expected a Trimesh or Scene with geometry.\")\n", + "\n", + " if mesh.visual is None:\n", + " raise ValueError(\"Mesh does not have visual.\")\n", + "\n", + " if mesh.visual.uv is None:\n", + " raise ValueError(\"Mesh does not have UVs.\")\n", + "\n", + " return interpolate_uvs_to_3d(\n", + " uv_points=uv_points,\n", + " vertices=mesh.vertices,\n", + " uvs=mesh.visual.uv,\n", + " faces=mesh.faces,\n", + " epsilon=epsilon,\n", + " )" + ] } ], "metadata": {