ChArUcoBoardExp/find_cute_box_with_image.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "from pathlib import Path\n",
    "from typing import Any, Final, TypeAlias, cast, TypedDict\n",
    "\n",
    "import cv2\n",
    "import numpy as np\n",
    "from cv2 import aruco\n",
    "from cv2.typing import MatLike\n",
    "from loguru import logger\n",
    "from matplotlib import pyplot as plt\n",
    "from numpy.typing import ArrayLike\n",
    "from numpy.typing import NDArray as NDArrayT\n",
    "import orjson\n",
    "\n",
    "NDArray: TypeAlias = np.ndarray"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "INPUT_IMAGE = Path(\"merged_uv_layout.png\")\n",
    "# 7x7\n",
    "DICTIONARY: Final[int] = aruco.DICT_7X7_1000\n",
    "# 400mm\n",
    "MARKER_LENGTH: Final[float] = 0.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "aruco_dict = aruco.getPredefinedDictionary(DICTIONARY)\n",
    "detector = aruco.ArucoDetector(\n",
    "    dictionary=aruco_dict, detectorParams=aruco.DetectorParameters()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = cv2.imread(str(INPUT_IMAGE))\n",
    "grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
    "# pylint: disable-next=unpacking-non-sequence\n",
    "markers, ids, rejected = detector.detectMarkers(grey)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note: BGR\n",
    "RED = (0, 0, 255)\n",
    "GREEN = (0, 255, 0)\n",
    "BLUE = (255, 0, 0)\n",
    "YELLOW = (0, 255, 255)\n",
    "GREY = (128, 128, 128)\n",
    "CYAN = (255, 255, 0)\n",
    "MAGENTA = (255, 0, 255)\n",
    "ORANGE = (0, 165, 255)\n",
    "PINK = (147, 20, 255)\n",
    "\n",
    "UI_SCALE = 10\n",
    "UI_SCALE_FONT = 8\n",
    "UI_SCALE_FONT_WEIGHT = 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "out = frame.copy()\n",
    "# `markers` is [N, 1, 4, 2]\n",
    "# `ids` is [N, 1]\n",
    "if ids is not None:\n",
    "    markers = np.reshape(markers, (-1, 4, 2))\n",
    "    ids = np.reshape(ids, (-1, 1))\n",
    "    # logger.info(\"markers={}, ids={}\", np.array(markers).shape, np.array(ids).shape)\n",
    "    for m, i in zip(markers, ids):\n",
    "        # logger.info(\"id={}, center={}\", i, center)\n",
    "        center = np.mean(m, axis=0).astype(int) # type: ignore\n",
    "        # BGR\n",
    "        color_map = [RED, GREEN, BLUE, YELLOW]\n",
    "        for color, corners in zip(color_map, m):\n",
    "            corners = corners.astype(int)\n",
    "            out = cv2.circle(out, corners, 5*UI_SCALE, color, -1)\n",
    "        cv2.circle(out, tuple(center), 5*UI_SCALE, CYAN, -1)\n",
    "        cv2.putText(\n",
    "            out,\n",
    "            str(i),\n",
    "            tuple(center),\n",
    "            cv2.FONT_HERSHEY_SIMPLEX,\n",
    "            1*UI_SCALE_FONT,\n",
    "            MAGENTA,\n",
    "            UI_SCALE_FONT_WEIGHT,\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Marker(TypedDict):\n",
    "    id: int\n",
    "    center: NDArray\n",
    "    corners: NDArray\n",
    "\n",
    "\n",
    "output_markers: list[Marker] = []\n",
    "if ids is not None:\n",
    "    IMAGE_WIDTH = frame.shape[1]\n",
    "    IMAGE_HEIGHT = frame.shape[0]\n",
    "\n",
    "    def normalize_point(point: NDArrayT[Any]) -> NDArrayT[np.float64]:\n",
    "        \"\"\"\n",
    "        input could be: [N, 2] or [2]\n",
    "        \"\"\"\n",
    "        if point.ndim == 1:\n",
    "            return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n",
    "        elif point.ndim == 2:\n",
    "            return point / np.array([IMAGE_WIDTH, IMAGE_HEIGHT])\n",
    "        else:\n",
    "            raise ValueError(f\"Invalid shape: {point.shape}\")\n",
    "\n",
    "    def flip_y(point: NDArrayT[Any], y_max: int) -> NDArrayT[Any]:\n",
    "        \"\"\"\n",
    "        flip y axis;\n",
    "\n",
    "        Usually OpenCV image y-axis is inverted. (origin at top-left)\n",
    "        In UV layout, the origin is at bottom-left.\n",
    "        \"\"\"\n",
    "        return np.array([point[0], y_max - point[1]])\n",
    "\n",
    "    for m, i in zip(markers, ids):\n",
    "        center = np.mean(m, axis=0).astype(int)  # type: ignore\n",
    "        output_markers.append(\n",
    "            {\n",
    "                \"id\": i[0],\n",
    "                \"center\": flip_y(normalize_point(center), 1),\n",
    "                \"corners\": np.array([flip_y(normalize_point(c), 1) for c in m]),\n",
    "            }\n",
    "        )\n",
    "\n",
    "with open(\"output/aruco_3d_coords.json\", \"wb\") as f:\n",
    "    f.write(orjson.dumps(output_markers, option=orjson.OPT_SERIALIZE_NUMPY))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.imshow(cv2.cvtColor(out, cv2.COLOR_BGR2RGB))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cv2.imwrite(\"merged_uv_layout_with_markers.png\", out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Optional, Union\n",
    "\n",
    "import numpy as np\n",
    "import trimesh\n",
    "from jaxtyping import Float, Int, Num, jaxtyped\n",
    "from beartype import beartype\n",
    "\n",
    "@jaxtyped(typechecker=beartype)\n",
    "def interpolate_uvs_to_3d(\n",
    "    uv_points: Num[NDArray, \"N 2\"],\n",
    "    vertices: Num[NDArray, \"V 3\"],\n",
    "    uvs: Num[NDArray, \"V 2\"],\n",
    "    faces: Num[NDArray, \"F 3\"],\n",
    "    epsilon: float = 1e-6,\n",
    ") -> Num[NDArray, \"N 3\"]:\n",
    "    \"\"\"\n",
    "    Map multiple UV points to 3D coordinates using barycentric interpolation.\n",
    "\n",
    "    Args:\n",
    "        uv_points: (N, 2) array of UV coordinates in [0,1]\n",
    "        vertices: (V, 3) array of mesh vertex positions\n",
    "        uvs:      (V, 2) array of per-vertex UV coordinates\n",
    "        faces:    (F, 3) array of triangle vertex indices\n",
    "        epsilon:  barycentric inside-triangle tolerance\n",
    "\n",
    "    Returns:\n",
    "        (N, 3) array of interpolated 3D coordinates (NaNs if no triangle found)\n",
    "    \"\"\"\n",
    "    results = np.full((uv_points.shape[0], 3), np.nan, dtype=np.float64)\n",
    "\n",
    "    for pi, uv_point in enumerate(uv_points):\n",
    "        for face in faces:\n",
    "            uv_tri = uvs[face]  # (3,2)\n",
    "            v_tri = vertices[face]  # (3,3)\n",
    "\n",
    "            A = np.array(\n",
    "                [\n",
    "                    [uv_tri[0, 0] - uv_tri[2, 0], uv_tri[1, 0] - uv_tri[2, 0]],\n",
    "                    [uv_tri[0, 1] - uv_tri[2, 1], uv_tri[1, 1] - uv_tri[2, 1]],\n",
    "                ]\n",
    "            )\n",
    "            b = uv_point - uv_tri[2]\n",
    "\n",
    "            try:\n",
    "                w0, w1 = np.linalg.solve(A, b)\n",
    "                w2 = 1.0 - w0 - w1\n",
    "                if min(w0, w1, w2) >= -epsilon:\n",
    "                    results[pi] = w0 * v_tri[0] + w1 * v_tri[1] + w2 * v_tri[2]\n",
    "                    break  # Stop after first matching triangle\n",
    "            except np.linalg.LinAlgError:\n",
    "                continue\n",
    "\n",
    "    return results\n",
    "\n",
    "\n",
    "@jaxtyped(typechecker=beartype)\n",
    "def interpolate_uvs_to_3d_trimesh(\n",
    "    uv_points: Num[NDArray, \"N 2\"],\n",
    "    mesh: Union[trimesh.Trimesh, trimesh.Scene],\n",
    "    epsilon: float = 1e-6,\n",
    ") -> Num[NDArray, \"N 3\"]:\n",
    "    \"\"\"\n",
    "    Wrapper for batched UV-to-3D interpolation using a trimesh mesh or scene.\n",
    "\n",
    "    Args:\n",
    "        uv_points: (N, 2) UV coordinates to convert\n",
    "        mesh: a Trimesh or Scene object\n",
    "        epsilon: barycentric epsilon tolerance\n",
    "\n",
    "    Returns:\n",
    "        (N, 3) array of 3D positions (NaN if outside mesh)\n",
    "    \"\"\"\n",
    "    if isinstance(mesh, trimesh.Scene):\n",
    "        if len(mesh.geometry) == 0:\n",
    "            raise ValueError(\"Scene has no geometry.\")\n",
    "        mesh = list(mesh.geometry.values())[0]\n",
    "\n",
    "    if not isinstance(mesh, trimesh.Trimesh):\n",
    "        raise TypeError(\"Expected a Trimesh or Scene with geometry.\")\n",
    "\n",
    "    if mesh.visual is None:\n",
    "        raise ValueError(\"Mesh does not have visual.\")\n",
    "\n",
    "    if mesh.visual.uv is None:\n",
    "        raise ValueError(\"Mesh does not have UVs.\")\n",
    "\n",
    "    return interpolate_uvs_to_3d(\n",
    "        uv_points=uv_points,\n",
    "        vertices=mesh.vertices,\n",
    "        uvs=mesh.visual.uv,\n",
    "        faces=mesh.faces,\n",
    "        epsilon=epsilon,\n",
    "    )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}