Refactor code structure for improved readability and maintainability

2026-02-06 17:02:42 +08:00
parent aeff8fd5c2
commit 816d11a5f5
8 changed files with 2219 additions and 6 deletions
@@ -0,0 +1,255 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+#   "numpy",
+#   "opencv-python",
+#   "trimesh",
+#   "awkward",
+#   "orjson",
+#   "click",
+# ]
+# ///
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, cast
+
+import awkward as ak
+import click
+import cv2
+import numpy as np
+import orjson
+import trimesh
+from cv2 import aruco
+from numpy.typing import NDArray
+
+
+@dataclass
+class Marker:
+    id: int
+    center: NDArray[np.float64]
+    corners: NDArray[np.float64]
+
+
+def normalize_point(
+    point: NDArray[Any], width: int, height: int
+) -> NDArray[np.float64]:
+    return cast(
+        NDArray[np.float64], point / np.array([width, height], dtype=np.float64)
+    )
+
+
+def flip_y(point: NDArray[Any], y_max: float = 1.0) -> NDArray[np.float64]:
+    return np.array([point[0], y_max - point[1]], dtype=np.float64)
+
+
+def detect_markers_as_uv(
+    input_image: Path,
+    dictionary: int,
+) -> list[Marker]:
+    frame = cv2.imread(str(input_image))
+    if frame is None:
+        raise FileNotFoundError(f"Failed to read image: {input_image}")
+
+    detector = aruco.ArucoDetector(
+        dictionary=aruco.getPredefinedDictionary(dictionary),
+        detectorParams=aruco.DetectorParameters(),
+    )
+    grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    markers, ids, _ = detector.detectMarkers(grey)
+    if ids is None:
+        return []
+
+    markers = np.reshape(markers, (-1, 4, 2))
+    ids = np.reshape(ids, (-1, 1))
+    image_width = frame.shape[1]
+    image_height = frame.shape[0]
+
+    output_markers: list[Marker] = []
+    for m, marker_id in zip(markers, ids):
+        center = np.mean(m, axis=0)
+        output_markers.append(
+            Marker(
+                id=int(marker_id[0]),
+                center=flip_y(normalize_point(center, image_width, image_height)),
+                corners=np.array(
+                    [
+                        flip_y(normalize_point(corner, image_width, image_height))
+                        for corner in m
+                    ],
+                    dtype=np.float64,
+                ),
+            )
+        )
+
+    return output_markers
+
+
+def interpolate_uvs_to_3d(
+    uv_points: NDArray[np.float64],
+    vertices: NDArray[np.float64],
+    uvs: NDArray[np.float64],
+    faces: NDArray[np.int64],
+    epsilon: float = 1e-6,
+) -> NDArray[np.float64]:
+    results = np.full((uv_points.shape[0], 3), np.nan, dtype=np.float64)
+    for point_index, uv_point in enumerate(uv_points):
+        for face in faces:
+            uv_tri = uvs[face]
+            v_tri = vertices[face]
+            matrix = np.array(
+                [
+                    [uv_tri[0, 0] - uv_tri[2, 0], uv_tri[1, 0] - uv_tri[2, 0]],
+                    [uv_tri[0, 1] - uv_tri[2, 1], uv_tri[1, 1] - uv_tri[2, 1]],
+                ],
+                dtype=np.float64,
+            )
+            rhs = uv_point - uv_tri[2]
+            try:
+                w0, w1 = np.linalg.solve(matrix, rhs)
+            except np.linalg.LinAlgError:
+                continue
+            w2 = 1.0 - w0 - w1
+            if min(w0, w1, w2) >= -epsilon:
+                results[point_index] = w0 * v_tri[0] + w1 * v_tri[1] + w2 * v_tri[2]
+                break
+    return results
+
+
+def interpolate_uvs_to_3d_trimesh(
+    uv_points: NDArray[np.float64],
+    mesh: trimesh.Trimesh,
+    epsilon: float = 1e-6,
+) -> NDArray[np.float64]:
+    if mesh.visual is None:
+        raise ValueError("Mesh has no visual")
+    uv_data = cast(Any, mesh.visual).uv
+    if uv_data is None:
+        raise ValueError("Mesh has no UV")
+    return interpolate_uvs_to_3d(
+        uv_points=uv_points,
+        vertices=cast(NDArray[np.float64], mesh.vertices),
+        uvs=cast(NDArray[np.float64], uv_data),
+        faces=cast(NDArray[np.int64], mesh.faces),
+        epsilon=epsilon,
+    )
+
+
+def scale_mesh_for_box_size_mm(
+    mesh: trimesh.Trimesh,
+    box_size_mm: float,
+    unit_box_side: float = 2.0,
+) -> trimesh.Trimesh:
+    if box_size_mm <= 0:
+        raise ValueError("box_size_mm must be positive")
+    if unit_box_side <= 0:
+        raise ValueError("unit_box_side must be positive")
+
+    scale = (box_size_mm / 1000.0) / unit_box_side
+    scaled = mesh.copy()
+    scaled.vertices = cast(NDArray[np.float64], scaled.vertices * scale)
+    return scaled
+
+
+def marker_to_3d_coords(marker: Marker, mesh: trimesh.Trimesh) -> NDArray[np.float64]:
+    return interpolate_uvs_to_3d_trimesh(marker.corners, mesh)
+
+
+def parse_dictionary(value: str) -> int:
+    if not hasattr(aruco, value):
+        raise ValueError(f"Unknown aruco dictionary name: {value}")
+    return int(getattr(aruco, value))
+
+
+@click.command(
+    help="Convert draw_uv marker detections into 3D object points with real-world box sizing"
+)
+@click.option(
+    "--input-image",
+    type=click.Path(path_type=Path),
+    default=Path("merged_uv_layout.png"),
+    show_default=True,
+)
+@click.option(
+    "--mesh",
+    type=click.Path(path_type=Path),
+    default=Path("sample/standard_box.glb"),
+    show_default=True,
+)
+@click.option(
+    "--dictionary", type=str, default="DICT_APRILTAG_36H11", show_default=True
+)
+@click.option("--box-size-mm", type=float, default=600.0, show_default=True)
+@click.option("--unit-box-side", type=float, default=2.0, show_default=True)
+@click.option(
+    "--output-json",
+    type=click.Path(path_type=Path),
+    default=Path("output/aruco_2d_uv_coords_normalized.json"),
+    show_default=True,
+)
+@click.option(
+    "--output-parquet",
+    type=click.Path(path_type=Path),
+    default=Path("output/standard_box_markers.parquet"),
+    show_default=True,
+)
+def main(
+    input_image: Path,
+    mesh: Path,
+    dictionary: str,
+    box_size_mm: float,
+    unit_box_side: float,
+    output_json: Path,
+    output_parquet: Path,
+) -> None:
+    dictionary_value = parse_dictionary(dictionary)
+    output_markers = detect_markers_as_uv(input_image, dictionary_value)
+
+    output_json.parent.mkdir(parents=True, exist_ok=True)
+    output_json.write_bytes(
+        orjson.dumps(output_markers, option=orjson.OPT_SERIALIZE_NUMPY)
+    )
+
+    loaded = trimesh.load_mesh(mesh)
+    if isinstance(loaded, trimesh.Scene):
+        if not loaded.geometry:
+            raise ValueError("Scene has no geometry")
+        mesh = list(loaded.geometry.values())[0]
+    else:
+        mesh = loaded
+    if not isinstance(mesh, trimesh.Trimesh):
+        raise TypeError("Expected Trimesh or Scene with Trimesh geometry")
+
+    mesh = scale_mesh_for_box_size_mm(mesh, box_size_mm, unit_box_side)
+    id_to_3d_coords = {
+        marker.id: marker_to_3d_coords(marker, mesh) for marker in output_markers
+    }
+
+    face_to_ids = {
+        "bottom": [21],
+        "back": [22],
+        "top": [23],
+        "front": [24],
+        "right": [26],
+        "left": [25],
+    }
+    rows: list[dict[str, Any]] = []
+    for name, marker_ids in face_to_ids.items():
+        corners = np.array([id_to_3d_coords[marker_id] for marker_id in marker_ids])
+        rows.append(
+            {
+                "name": name,
+                "ids": np.array(marker_ids),
+                "corners": corners,
+            }
+        )
+
+    output_parquet.parent.mkdir(parents=True, exist_ok=True)
+    ak.to_parquet(rows, str(output_parquet))
+
+
+if __name__ == "__main__":
+    main()