Refactor Jupyter notebooks for marker processing: deleted boom.ipynb and compute_3d_maybe.ipynb, added calculate_box_coord_naive.ipynb, calculate_box_face_coord_naive.ipynb, estimate_extrinstic.ipynb, find_aruco_points_with_image.ipynb, find_aruco_points.py, and find_extrinsic_object.py. Updated .gitignore to include new output files.

2025-04-30 11:43:59 +08:00
parent 733c6f8670
commit c8f4a7ab26
15 changed files with 845 additions and 2586 deletions
--- a/preprocess_camera_parameters.ipynb
+++ b/preprocess_camera_parameters.ipynb
@ -0,0 +1,288 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "from cv2 import aruco\n",
+    "from datetime import datetime\n",
+    "from loguru import logger\n",
+    "from pathlib import Path\n",
+    "from typing import Optional, cast, Final\n",
+    "import awkward as ak\n",
+    "from cv2.typing import MatLike\n",
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "import awkward as ak\n",
+    "from awkward import Record as AwkwardRecord, Array as AwkwardArray"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NDArray = np.ndarray\n",
+    "OBJECT_POINTS_PARQUET = Path(\"output\") / \"object_points.parquet\"\n",
+    "DICTIONARY: Final[int] = aruco.DICT_4X4_50\n",
+    "# 400mm\n",
+    "MARKER_LENGTH: Final[float] = 0.4\n",
+    "\n",
+    "A_CALIBRATION_PARQUET = Path(\"output\") / \"a-ae_08.parquet\"\n",
+    "B_CALIBRATION_PARQUET = Path(\"output\") / \"b-ae_09.parquet\"\n",
+    "C_CALIBRATION_PARQUET = Path(\"output\") / \"c-af_03.parquet\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aruco_dict = aruco.getPredefinedDictionary(DICTIONARY)\n",
+    "def read_camera_calibration(path: Path) -> tuple[MatLike, MatLike]:\n",
+    "    cal = ak.from_parquet(path)[0]\n",
+    "    camera_matrix = cast(MatLike, ak.to_numpy(cal[\"camera_matrix\"]))\n",
+    "    distortion_coefficients = cast(MatLike, ak.to_numpy(cal[\"distortion_coefficients\"]))\n",
+    "    return camera_matrix, distortion_coefficients\n",
+    "\n",
+    "ops = ak.from_parquet(OBJECT_POINTS_PARQUET)\n",
+    "detector = aruco.ArucoDetector(\n",
+    "    dictionary=aruco_dict, detectorParams=aruco.DetectorParameters()\n",
+    ")\n",
+    "\n",
+    "total_ids = cast(NDArray, ak.to_numpy(ops[\"ids\"])).flatten()\n",
+    "total_corners = cast(NDArray, ak.to_numpy(ops[\"corners\"])).reshape(-1, 4, 3)\n",
+    "ops_map: dict[int, NDArray] = dict(zip(total_ids, total_corners))\n",
+    "# display(\"ops_map\", ops_map)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process(\n",
+    "    frame: MatLike,\n",
+    "    cam_mtx: MatLike,\n",
+    "    dist_coeffs: MatLike,\n",
+    "    target: Optional[MatLike] = None,\n",
+    ") -> tuple[MatLike, Optional[MatLike], Optional[MatLike]]:\n",
+    "    if target is None:\n",
+    "        target = frame.copy()\n",
+    "    grey = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)\n",
+    "    # pylint: disable-next=unpacking-non-sequence\n",
+    "    markers, ids, rejected = detector.detectMarkers(grey)\n",
+    "    # `markers` is [N, 1, 4, 2]\n",
+    "    # `ids` is [N, 1]\n",
+    "    ret_rvec: Optional[MatLike] = None\n",
+    "    ret_tvec: Optional[MatLike] = None\n",
+    "    if ids is not None:\n",
+    "        markers = np.reshape(markers, (-1, 4, 2))\n",
+    "        ids = np.reshape(ids, (-1, 1))\n",
+    "        # logger.info(\"markers={}, ids={}\", np.array(markers).shape, np.array(ids).shape)\n",
+    "        ips_map: dict[int, NDArray] = {}\n",
+    "        for cs, id in zip(markers, ids):\n",
+    "            id = int(id)\n",
+    "            cs = cast(NDArray, cs)\n",
+    "            ips_map[id] = cs\n",
+    "            center = np.mean(cs, axis=0).astype(int)\n",
+    "            GREY = (128, 128, 128)\n",
+    "            # logger.info(\"id={}, center={}\", id, center)\n",
+    "            cv2.circle(target, tuple(center), 5, GREY, -1)\n",
+    "            cv2.putText(\n",
+    "                target,\n",
+    "                str(id),\n",
+    "                tuple(center),\n",
+    "                cv2.FONT_HERSHEY_SIMPLEX,\n",
+    "                1,\n",
+    "                GREY,\n",
+    "                2,\n",
+    "            )\n",
+    "            # BGR\n",
+    "            RED = (0, 0, 255)\n",
+    "            GREEN = (0, 255, 0)\n",
+    "            BLUE = (255, 0, 0)\n",
+    "            YELLOW = (0, 255, 255)\n",
+    "            color_map = [RED, GREEN, BLUE, YELLOW]\n",
+    "            for color, corners in zip(color_map, cs):\n",
+    "                corners = corners.astype(int)\n",
+    "                target = cv2.circle(target, corners, 5, color, -1)\n",
+    "        # https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga50620f0e26e02caa2e9adc07b5fbf24e\n",
+    "        ops: NDArray = np.empty((0, 3), dtype=np.float32)\n",
+    "        ips: NDArray = np.empty((0, 2), dtype=np.float32)\n",
+    "        for id, ip in ips_map.items():\n",
+    "            try:\n",
+    "                op = ops_map[id]\n",
+    "                assert ip.shape == (4, 2), f\"corners.shape={ip.shape}\"\n",
+    "                assert op.shape == (4, 3), f\"op.shape={op.shape}\"\n",
+    "                ops = np.concatenate((ops, op), axis=0)\n",
+    "                ips = np.concatenate((ips, ip), axis=0)\n",
+    "            except KeyError:\n",
+    "                logger.warning(\"No object points for id={}\", id)\n",
+    "                continue\n",
+    "        assert len(ops) == len(ips), f\"len(ops)={len(ops)} != len(ips)={len(ips)}\"\n",
+    "        if len(ops) > 0:\n",
+    "            # https://docs.opencv.org/4.x/d5/d1f/calib3d_solvePnP.html\n",
+    "            # https://docs.opencv.org/4.x/d5/d1f/calib3d_solvePnP.html#calib3d_solvePnP_flags\n",
+    "            ret, rvec, tvec = cv2.solvePnP(\n",
+    "                objectPoints=ops,\n",
+    "                imagePoints=ips,\n",
+    "                cameraMatrix=cam_mtx,\n",
+    "                distCoeffs=dist_coeffs,\n",
+    "                flags=cv2.SOLVEPNP_SQPNP,\n",
+    "            )\n",
+    "            # ret, rvec, tvec, inliners = cv2.solvePnPRansac(\n",
+    "            #     objectPoints=ops,\n",
+    "            #     imagePoints=ips,\n",
+    "            #     cameraMatrix=camera_matrix,\n",
+    "            #     distCoeffs=distortion_coefficients,\n",
+    "            #     flags=cv2.SOLVEPNP_SQPNP,\n",
+    "            # )\n",
+    "            if ret:\n",
+    "                cv2.drawFrameAxes(\n",
+    "                    target,\n",
+    "                    cam_mtx,\n",
+    "                    dist_coeffs,\n",
+    "                    rvec,\n",
+    "                    tvec,\n",
+    "                    MARKER_LENGTH,\n",
+    "                )\n",
+    "                ret_rvec = rvec\n",
+    "                ret_tvec = tvec\n",
+    "    return target, ret_rvec, ret_tvec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A_IMG = Path(\"dumped/batch_three/video-20241224-154256-a.png\")\n",
+    "B_IMG = Path(\"dumped/batch_three/video-20241224-154302-b.png\")\n",
+    "C_IMG = Path(\"dumped/batch_three/video-20241224-154252-c.png\")\n",
+    "C_PRIME_IMG = Path(\"dumped/batch_three/video-20241224-153926-c-prime.png\")\n",
+    "\n",
+    "a_img = cv2.imread(str(A_IMG))\n",
+    "b_img = cv2.imread(str(B_IMG))\n",
+    "c_img = cv2.imread(str(C_IMG))\n",
+    "c_prime_img = cv2.imread(str(C_PRIME_IMG))\n",
+    "\n",
+    "a_mtx, a_dist = read_camera_calibration(A_CALIBRATION_PARQUET)\n",
+    "b_mtx, b_dist = read_camera_calibration(B_CALIBRATION_PARQUET)\n",
+    "c_mtx, c_dist = read_camera_calibration(C_CALIBRATION_PARQUET)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a_result_img, a_rvec, a_tvec = process(a_img, a_mtx, a_dist)\n",
+    "# plt.imshow(cv2.cvtColor(a_result_img, cv2.COLOR_BGR2RGB))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "b_result_img, b_rvec, b_tvec = process(b_img, b_mtx, b_dist)\n",
+    "# plt.imshow(cv2.cvtColor(b_result_img, cv2.COLOR_BGR2RGB))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_result_img, c_rvec, c_tvec = process(c_img, c_mtx, c_dist)\n",
+    "c_prime_result_img, c_prime_rvec, c_prime_tvec = process(c_prime_img, c_mtx, c_dist)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = AwkwardArray(\n",
+    "    [\n",
+    "        {\n",
+    "            \"name\": \"a-ae_08\",\n",
+    "            \"rvec\": a_rvec,\n",
+    "            \"tvec\": a_tvec,\n",
+    "            \"camera_matrix\": a_mtx,\n",
+    "            \"distortion_coefficients\": a_dist,\n",
+    "        },\n",
+    "        {\n",
+    "            \"name\": \"b-ae_09\",\n",
+    "            \"rvec\": b_rvec,\n",
+    "            \"tvec\": b_tvec,\n",
+    "            \"camera_matrix\": b_mtx,\n",
+    "            \"distortion_coefficients\": b_dist,\n",
+    "        },\n",
+    "        {\n",
+    "            \"name\": \"c-af_03\",\n",
+    "            \"rvec\": c_rvec,\n",
+    "            \"tvec\": c_tvec,\n",
+    "            \"camera_matrix\": c_mtx,\n",
+    "            \"distortion_coefficients\": c_dist\n",
+    "        },\n",
+    "        {\n",
+    "            \"name\": \"c-prime-af_03\",\n",
+    "            \"rvec\": c_prime_rvec,\n",
+    "            \"tvec\": c_prime_tvec,\n",
+    "            \"camera_matrix\": c_mtx,\n",
+    "            \"distortion_coefficients\": c_dist\n",
+    "        }\n",
+    "    ]\n",
+    ")\n",
+    "display(\"params\", params)\n",
+    "ak.to_parquet(params, Path(\"output\") / \"params.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cv2.imwrite(\"output/a_result_img.png\", a_result_img)\n",
+    "cv2.imwrite(\"output/b_result_img.png\", b_result_img)\n",
+    "cv2.imwrite(\"output/c_result_img.png\", c_result_img)\n",
+    "cv2.imwrite(\"output/c_prime_result_img.png\", c_prime_result_img)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}