{ "cells": [ { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "from pathlib import Path\n", "from typing import Any, Final, Optional, TypeAlias, TypedDict, Union, cast\n", "from dataclasses import dataclass\n", "\n", "import cv2\n", "import numpy as np\n", "import orjson\n", "import trimesh\n", "from beartype import beartype\n", "from cv2 import aruco\n", "from cv2.typing import MatLike\n", "from jaxtyping import Float, Int, Num, jaxtyped\n", "from loguru import logger\n", "from matplotlib import pyplot as plt\n", "from numpy.typing import ArrayLike\n", "from numpy.typing import NDArray as NDArrayT\n", "\n", "NDArray: TypeAlias = np.ndarray" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [], "source": [ "INPUT_IMAGE = Path(\"merged_uv_layout.png\")\n", "# 7x7\n", "DICTIONARY: Final[int] = aruco.DICT_7X7_1000\n", "# 400mm\n", "MARKER_LENGTH: Final[float] = 0.4" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "aruco_dict = aruco.getPredefinedDictionary(DICTIONARY)\n", "detector = aruco.ArucoDetector(\n", " dictionary=aruco_dict, detectorParams=aruco.DetectorParameters()\n", ")" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [], "source": [ "frame = cv2.imread(str(INPUT_IMAGE))\n", "grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", "# pylint: disable-next=unpacking-non-sequence\n", "markers, ids, rejected = detector.detectMarkers(grey)" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "# Note: BGR\n", "RED = (0, 0, 255)\n", "GREEN = (0, 255, 0)\n", "BLUE = (255, 0, 0)\n", "YELLOW = (0, 255, 255)\n", "GREY = (128, 128, 128)\n", "CYAN = (255, 255, 0)\n", "MAGENTA = (255, 0, 255)\n", "ORANGE = (0, 165, 255)\n", "PINK = (147, 20, 255)\n", "\n", "UI_SCALE = 10\n", "UI_SCALE_FONT = 8\n", "UI_SCALE_FONT_WEIGHT = 20" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "out = frame.copy()\n", "# `markers` is [N, 1, 4, 2]\n", "# `ids` is [N, 1]\n", "if ids is not None:\n", " markers = np.reshape(markers, (-1, 4, 2))\n", " ids = np.reshape(ids, (-1, 1))\n", " # logger.info(\"markers={}, ids={}\", np.array(markers).shape, np.array(ids).shape)\n", " for m, i in zip(markers, ids):\n", " # logger.info(\"id={}, center={}\", i, center)\n", " center = np.mean(m, axis=0).astype(int) # type: ignore\n", " # BGR\n", " color_map = [RED, GREEN, BLUE, YELLOW]\n", " for color, corners in zip(color_map, m):\n", " corners = corners.astype(int)\n", " out = cv2.circle(out, corners, 5*UI_SCALE, color, -1)\n", " cv2.circle(out, tuple(center), 5*UI_SCALE, CYAN, -1)\n", " cv2.putText(\n", " out,\n", " str(i),\n", " tuple(center),\n", " cv2.FONT_HERSHEY_SIMPLEX,\n", " 1*UI_SCALE_FONT,\n", " MAGENTA,\n", " UI_SCALE_FONT_WEIGHT,\n", " )" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "@jaxtyped(typechecker=beartype)\n", "@dataclass\n", "class Marker:\n", " id: int\n", " center: Num[NDArray, \"2\"]\n", " corners: Num[NDArray, \"4 2\"]\n", "\n", "\n", "output_markers: list[Marker] = []\n", "if ids is not None:\n", " IMAGE_WIDTH = frame.shape[1]\n", " IMAGE_HEIGHT = frame.shape[0]\n", "\n", " def normalize_point(point: NDArrayT[Any]) -> NDArrayT[np.float64]:\n", " \"\"\"\n", " input could be: [N, 2] or [2]\n", " \"\"\"\n", " if point.ndim == 1:\n", " return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n", " elif point.ndim == 2:\n", " return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n", " else:\n", " raise ValueError(f\"Invalid shape: {point.shape}\")\n", "\n", " def flip_y(point: NDArrayT[Any], y_max: int) -> NDArrayT[Any]:\n", " \"\"\"\n", " flip y axis;\n", "\n", " Usually OpenCV image y-axis is inverted. (origin at top-left)\n", " In UV layout, the origin is at bottom-left.\n", " \"\"\"\n", " return np.array([point[0], y_max - point[1]])\n", "\n", " for m, i in zip(markers, ids):\n", " center = np.mean(m, axis=0).astype(int) # type: ignore\n", " output_markers.append(\n", " Marker(\n", " id=int(i[0]),\n", " center=flip_y(normalize_point(center), 1),\n", " corners=np.array([flip_y(normalize_point(c), 1) for c in m]),\n", " )\n", " )\n", "\n", "with open(\"output/aruco_2d_uv_coords_normalized.json\", \"wb\") as f:\n", " f.write(orjson.dumps(output_markers, option=orjson.OPT_SERIALIZE_NUMPY))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.imshow(cv2.cvtColor(out, cv2.COLOR_BGR2RGB))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cv2.imwrite(\"merged_uv_layout_with_markers.png\", out)" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "@jaxtyped(typechecker=beartype)\n", "def interpolate_uvs_to_3d(\n", " uv_points: Num[NDArray, \"N 2\"],\n", " vertices: Num[NDArray, \"V 3\"],\n", " uvs: Num[NDArray, \"V 2\"],\n", " faces: Num[NDArray, \"F 3\"],\n", " epsilon: float = 1e-6,\n", ") -> Num[NDArray, \"N 3\"]:\n", " \"\"\"\n", " Map multiple UV points to 3D coordinates using barycentric interpolation.\n", "\n", " Args:\n", " uv_points: (N, 2) array of UV coordinates in [0,1]\n", " vertices: (V, 3) array of mesh vertex positions\n", " uvs: (V, 2) array of per-vertex UV coordinates\n", " faces: (F, 3) array of triangle vertex indices\n", " epsilon: barycentric inside-triangle tolerance\n", "\n", " Returns:\n", " (N, 3) array of interpolated 3D coordinates (NaNs if no triangle found)\n", " \"\"\"\n", " results = np.full((uv_points.shape[0], 3), np.nan, dtype=np.float64)\n", "\n", " for pi, uv_point in enumerate(uv_points):\n", " for face in faces:\n", " uv_tri = uvs[face] # (3,2)\n", " v_tri = vertices[face] # (3,3)\n", "\n", " A = np.array(\n", " [\n", " [uv_tri[0, 0] - uv_tri[2, 0], uv_tri[1, 0] - uv_tri[2, 0]],\n", " [uv_tri[0, 1] - uv_tri[2, 1], uv_tri[1, 1] - uv_tri[2, 1]],\n", " ]\n", " )\n", " b = uv_point - uv_tri[2]\n", "\n", " try:\n", " w0, w1 = np.linalg.solve(A, b)\n", " w2 = 1.0 - w0 - w1\n", " if min(w0, w1, w2) >= -epsilon:\n", " results[pi] = w0 * v_tri[0] + w1 * v_tri[1] + w2 * v_tri[2]\n", " break # Stop after first matching triangle\n", " except np.linalg.LinAlgError:\n", " continue\n", "\n", " return results\n", "\n", "\n", "@jaxtyped(typechecker=beartype)\n", "def interpolate_uvs_to_3d_trimesh(\n", " uv_points: Num[NDArray, \"N 2\"],\n", " mesh: Union[trimesh.Trimesh, trimesh.Scene],\n", " epsilon: float = 1e-6,\n", ") -> Num[NDArray, \"N 3\"]:\n", " \"\"\"\n", " Wrapper for batched UV-to-3D interpolation using a trimesh mesh or scene.\n", "\n", " Args:\n", " uv_points: (N, 2) UV coordinates to convert\n", " mesh: a Trimesh or Scene object\n", " epsilon: barycentric epsilon tolerance\n", "\n", " Returns:\n", " (N, 3) array of 3D positions (NaN if outside mesh)\n", " \"\"\"\n", " if isinstance(mesh, trimesh.Scene):\n", " if len(mesh.geometry) == 0:\n", " raise ValueError(\"Scene has no geometry.\")\n", " mesh = list(mesh.geometry.values())[0]\n", "\n", " if not isinstance(mesh, trimesh.Trimesh):\n", " raise TypeError(\"Expected a Trimesh or Scene with geometry.\")\n", "\n", " if mesh.visual is None:\n", " raise ValueError(\"Mesh does not have visual.\")\n", "\n", " if mesh.visual.uv is None:\n", " raise ValueError(\"Mesh does not have UVs.\")\n", "\n", " return interpolate_uvs_to_3d(\n", " uv_points=uv_points,\n", " vertices=mesh.vertices,\n", " uvs=mesh.visual.uv,\n", " faces=mesh.faces,\n", " epsilon=epsilon,\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m = trimesh.load_mesh(\"sample/standard_box.glb\")\n", "def marker_to_3d_coords(marker: Marker, mesh: trimesh.Trimesh):\n", " uv_points = marker.corners\n", " return interpolate_uvs_to_3d_trimesh(uv_points, mesh)\n", "\n", "id_to_3d_coords = {marker.id: marker_to_3d_coords(marker, m) for marker in output_markers}\n", "# note that the glb is Y up\n", "# when visualizing with matplotlib, it's Z up\n", "OPEN_GL_TO_BLENDER = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n", "display(np.linalg.inv(OPEN_GL_TO_BLENDER)) # should be the same" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "# matplotlib default colors scheme\n", "colors: list[str] = plt.rcParams[\"axes.prop_cycle\"].by_key()[\"color\"]\n", "\n", "def hex_to_rgb(hex_color: str) -> tuple[float, float, float]:\n", " assert hex_color.startswith(\"#\")\n", " assert len(hex_color) == 7\n", " return tuple(int(hex_color[i:i+2], 16) / 255.0 for i in (1, 3, 5)) # type: ignore" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from functools import lru_cache\n", "\n", "@dataclass\n", "class Face:\n", " color: tuple[float, float, float]\n", " marker_ids: list[int]\n", "\n", "\n", "# fmt: off\n", "layout:list[list[Optional[int]]] = [\n", " [None, None, 0, None, None],\n", " [None, None, 1, None, None],\n", " [None, 5, 2, 4, None],\n", " [None, None, 3, None, None],\n", "]\n", "# fmt: on\n", "\n", "faces = {\n", " \"bottom\": Face(color=hex_to_rgb(colors[0]), marker_ids=[0, 1, 2, 3]),\n", " \"back\": Face(color=hex_to_rgb(colors[1]), marker_ids=[4, 5, 6, 7]),\n", " \"top\": Face(color=hex_to_rgb(colors[2]), marker_ids=[8, 9, 10, 11]),\n", " \"front\": Face(color=hex_to_rgb(colors[3]), marker_ids=[12, 13, 14, 15]),\n", " \"right\": Face(color=hex_to_rgb(colors[4]), marker_ids=[16, 17, 18, 19]),\n", " \"left\": Face(color=hex_to_rgb(colors[5]), marker_ids=[20, 21, 22, 23]),\n", "}\n", "\n", "@lru_cache\n", "def get_face_by_marker_id(marker_id: int) -> Optional[Face]:\n", " for face in faces.values():\n", " if marker_id in face.marker_ids:\n", " return face\n", " return None\n", "\n", "\n", "# 3D Visualization (with flipped and fully valid data)\n", "fig = plt.figure(figsize=(8, 8))\n", "ax = fig.add_subplot(111, projection=\"3d\")\n", "\n", "for tag_id, corners in id_to_3d_coords.items():\n", " corners = np.array(corners)\n", " face = get_face_by_marker_id(tag_id)\n", " assert face is not None\n", " color = face.color\n", " for i in range(4):\n", " p1 = OPEN_GL_TO_BLENDER @ corners[i]\n", " p2 = OPEN_GL_TO_BLENDER @ corners[(i + 1) % 4]\n", " ax.plot(*zip(p1, p2), color=color)\n", " center = OPEN_GL_TO_BLENDER @ corners.mean(axis=0)\n", " ax.scatter(*center, color=color)\n", " ax.text(*center, str(tag_id), fontsize=9, color=\"black\") # type: ignore\n", "\n", "ax.set_box_aspect([1, 1, 1]) # type: ignore\n", "ax.set_title(\"ArUco Corners in 3D\")\n", "ax.set_xlabel(\"X\")\n", "ax.set_ylabel(\"Y\")\n", "ax.set_zlabel(\"Z\") # type: ignore\n", "\n", "# Set the viewing angle\n", "# ax.view_init(elev=60, azim=35) # type: ignore\n", "\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 2 }