From 3f89431434d34ca77f5e14359e64d5c7e65ea90b Mon Sep 17 00:00:00 2001 From: crosstyan Date: Wed, 24 Dec 2025 15:37:35 +0800 Subject: [PATCH] init --- .gitattributes | 4 + README.md | 229 ++++++++++++++ figures/Fig2_anno.webp | 3 + figures/sample.jpg | 3 + scripts/client_example.py | 122 ++++++++ scripts/client_example.sh | 95 ++++++ scripts/vis_whole_body.py | 624 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 1080 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 figures/Fig2_anno.webp create mode 100644 figures/sample.jpg create mode 100644 scripts/client_example.py create mode 100755 scripts/client_example.sh create mode 100644 scripts/vis_whole_body.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b1b4dc7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..dcd6818 --- /dev/null +++ b/README.md @@ -0,0 +1,229 @@ +# 人体姿态估计接口 + +本文档描述了人体姿态估计(Human Pose Estimation, HPE)服务的 HTTP API 接口规范。 + +## 目录 + +- [人体姿态估计接口](#人体姿态估计接口) + - [目录](#目录) + - [服务概述](#服务概述) + - [接口说明](#接口说明) + - [请求](#请求) + - [请求头](#请求头) + - [响应](#响应) + - [成功响应](#成功响应) + - [响应体示例](#响应体示例) + - [错误码](#错误码) + - [错误响应示例](#错误响应示例) + - [响应数据结构](#响应数据结构) + - [关键点定义](#关键点定义) + - [骨骼连接定义](#骨骼连接定义) + - [可视化示例](#可视化示例) + - [使用示例](#使用示例) + - [Python 示例](#python-示例) + - [Bash/cURL 示例](#bashcurl-示例) + - [cURL 快速示例](#curl-快速示例) + - [注意事项](#注意事项) + +--- + +## 服务概述 + +HPE 服务提供基于深度学习的人体姿态估计能力,可从输入图像中检测人体并识别 133 个关键点(全身姿态,包含身体、手部及面部关键点)。 + +- **服务端点**:`https://api.pose.weihua-iot.cn/hpe` + +--- + +## 接口说明 + +### 请求 + +| 属性 | 说明 | +| ------------------ | --------------------------- | +| **URL** | `/hpe` | +| **方法** | `POST` | +| **Content-Type** | `image/jpeg` 或 `image/png` | +| **请求体** | 原始图像二进制数据 | +| **最大请求体大小** | 10 MB | +| **超时时间** | 30 秒 | + +#### 请求头 + +| 名称 | 必填 | 说明 | +| ---------------- | ---- | ---------------------------------- | +| `Content-Type` | 是 | 必须为 `image/jpeg` 或 `image/png` | +| `Content-Length` | 否 | 请求体字节数(推荐提供) | + +### 响应 + +#### 成功响应 + +| HTTP 状态码 | Content-Type | 说明 | +| ----------- | ------------------ | ------------------------------ | +| `200` | `application/json` | 检测到姿态,返回 JSON 数据 | +| `200` | 无内容 | 未检测到人体姿态,返回空响应体 | + +#### 响应体示例 + +```json +{ + "frame_index": 0, + "reference_size": [1920, 1080], + "bbox": [[100, 150, 400, 800], [500, 200, 750, 850]], + "bbox_confidence": [0.95, 0.87], + "keypoints": [ + [[x1, y1], [x2, y2], ...], + [[x1, y1], [x2, y2], ...] + ], + "keypoints_confidence": [ + [0.9, 0.85, ...], + [0.88, 0.92, ...] + ] +} +``` + +### 错误码 + +| HTTP 状态码 | 错误类型 | 说明 | +| ----------- | ---------------------- | --------------------------------------------- | +| `400` | Bad Request | 请求体为空或图像解码失败 | +| `408` | Request Timeout | 推理超时(超过 30 秒) | +| `413` | Payload Too Large | 请求体超过 10 MB 限制 | +| `415` | Unsupported Media Type | Content-Type 不是 `image/jpeg` 或 `image/png` | +| `503` | Service Unavailable | 服务过载,请求队列已满(最大 16 个请求) | + +#### 错误响应示例 + +```json +{ + "error": "Unsupported Media Type", + "detail": "Expected Content-Type: image/jpeg or image/png, got: text/plain" +} +``` + +--- + +## 响应数据结构 + +| 字段 | 类型 | 说明 | +| ---------------------- | ------------------------------- | -------------------------------------------------------------------------------- | +| `frame_index` | `int` | 帧索引,HTTP 接口始终返回 `0` | +| `reference_size` | `[int, int]` | 输入图像尺寸,格式为 `[宽度, 高度]` | +| `bbox` | `[[x1, y1, x2, y2], ...]` | 检测到的人体边界框列表,每个边界框包含左上角 `(x1, y1)` 和右下角 `(x2, y2)` 坐标 | +| `bbox_confidence` | `[float, ...]` 或 `null` | 每个边界框的置信度分数(0-1),可能为空 | +| `keypoints` | `[[[x, y], ...], ...]` | 每个检测到的人体的 133 个关键点坐标列表 | +| `keypoints_confidence` | `[[float, ...], ...]` 或 `null` | 每个关键点的置信度分数(0-1),可能为空 | + +--- + +## 关键点定义 + +本服务使用 COCO WholeBody 格式,共包含 133 个关键点: + +| 索引范围 | 数量 | 描述 | +| -------- | ---- | -------------------------------------------------------------------- | +| 0-16 | 17 | 身体关键点(鼻子、眼睛、耳朵、肩膀、肘部、手腕、髋部、膝盖、脚踝等) | +| 17-22 | 6 | 脚部关键点 | +| 23-90 | 68 | 面部关键点 | +| 91-111 | 21 | 左手关键点 | +| 112-132 | 21 | 右手关键点 | + +![关键点示意图](figures/Fig2_anno.webp) + +### 骨骼连接定义 + +关键点之间通过骨骼(Bone)连接形成人体骨架结构。主要骨骼连接如下: + +| 部位 | 连接关系(0-based 索引) | +| ---- | ------------------------------------------------ | +| 腿部 | (15, 13), (13, 11), (16, 14), (14, 12), (11, 12) | +| 躯干 | (5, 11), (6, 12), (5, 6) | +| 手臂 | (5, 7), (7, 9), (6, 8), (8, 10) | +| 头部 | (1, 2), (0, 1), (0, 2), (1, 3), (2, 4) | +| 左脚 | (15, 17), (15, 18), (15, 19) | +| 右脚 | (16, 20), (16, 21), (16, 22) | +| 左手 | 手腕(91)连接至各指根,每指4个关节依次连接 | +| 右手 | 手腕(112)连接至各指根,每指4个关节依次连接 | + +### 可视化示例 + +项目提供了完整的可视化脚本 [scripts/vis_whole_body.py](scripts/vis_whole_body.py),包含: + +- **关键点定义**:`body_landmarks`、`foot_landmarks`、`face_landmarks`、`hand_landmarks` 字典,包含每个关键点的索引、名称和颜色 +- **骨骼定义**:`body_bones`、`hand_bones` 列表,定义了关键点之间的连接关系 +- **可视化函数**: + - `visualize_whole_body()` - 可视化单人 133 个关键点 + - `visualize_17_keypoints()` - 仅可视化 17 个身体关键点 + +--- + +## 使用示例 + +完整的客户端示例代码位于 [scripts](scripts/) 目录下。 + +### Python 示例 + +参见 [scripts/client_example.py](scripts/client_example.py)。 + +该脚本使用 [PEP 723](https://peps.python.org/pep-0723/) 内联脚本元数据,可通过 [uv](https://docs.astral.sh/uv/) 直接运行,无需手动安装依赖: + +```bash +# 使用 uv 直接运行(自动安装依赖) +uv run scripts/client_example.py photo.jpg + +# 指定自定义 URL +uv run scripts/client_example.py photo.png --url https://api.pose.weihua-iot.cn/hpe + +# 或使用传统方式(需先安装 httpx) +pip install httpx +python scripts/client_example.py photo.jpg +``` + +### Bash/cURL 示例 + +参见 [scripts/client_example.sh](scripts/client_example.sh)。 + +```bash +# 赋予执行权限 +chmod +x scripts/client_example.sh + +# 发送图像 +./scripts/client_example.sh figures/sample.jpg + +# 指定自定义 URL +./scripts/client_example.sh figures/sample.jpg https://api.pose.weihua-iot.cn/hpe +``` + +### cURL 快速示例 + +```bash +# 发送 JPEG 图像 +curl -X POST \ + -H "Content-Type: image/jpeg" \ + --data-binary @figures/sample.jpg \ + https://api.pose.weihua-iot.cn/hpe + +# 发送 PNG 图像并使用 jq 格式化输出 +curl -s -X POST \ + -H "Content-Type: image/png" \ + --data-binary @figures/sample.png \ + https://api.pose.weihua-iot.cn/hpe | jq . + +# 将结果保存到文件 +curl -X POST \ + -H "Content-Type: image/jpeg" \ + --data-binary @figures/sample.jpg \ + -o result.json \ + https://api.pose.weihua-iot.cn/hpe +``` + +--- + +## 注意事项 + +1. **图像格式**:仅支持 JPEG 和 PNG 格式,其他格式将返回 415 错误。 +2. **图像大小**:请确保图像文件不超过 10 MB,否则将返回 413 错误。 +3. **并发限制**:服务最多同时处理 16 个请求,超出后将返回 503 错误。 +4. **超时处理**:单次推理最长等待 30 秒,超时将返回 408 错误。 +5. **空结果处理**:当未检测到人体时,服务返回 HTTP 200 但响应体为空,请在客户端代码中正确处理此情况。 diff --git a/figures/Fig2_anno.webp b/figures/Fig2_anno.webp new file mode 100644 index 0000000..94e1c98 --- /dev/null +++ b/figures/Fig2_anno.webp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6431cb3080dba39d00b4d0b4141fa1c8b2144d3f71deecd25cb2f15a05c39b28 +size 328622 diff --git a/figures/sample.jpg b/figures/sample.jpg new file mode 100644 index 0000000..acb3954 --- /dev/null +++ b/figures/sample.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17aba77b59d60e2603fdb372d84e705a92c07aefea76b75a7e2783b886acc102 +size 148683 diff --git a/scripts/client_example.py b/scripts/client_example.py new file mode 100644 index 0000000..a99cc43 --- /dev/null +++ b/scripts/client_example.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "httpx", +# ] +# /// +""" +Example client for the Human Pose Estimation server. + +Usage: + python client_example.py [--url URL] + uv run client_example.py [--url URL] + +Examples: + python client_example.py photo.jpg + uv run client_example.py photo.png --url https://api.pose.weihua-iot.cn/hpe +""" + +import argparse +import sys +from pathlib import Path + +import httpx + + +def detect_poses( + image_path: Path, url: str = "https://api.pose.weihua-iot.cn/hpe" +) -> dict | None: + """ + Send an image to the HPE server and return pose detection results. + + Args: + image_path: Path to the image file (JPEG or PNG) + url: HPE server endpoint URL + + Returns: + Dictionary with pose detection info, or None if no poses detected + """ + # Determine content type from file extension + suffix = image_path.suffix.lower() + content_type_map = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + } + + content_type = content_type_map.get(suffix) + if content_type is None: + raise ValueError(f"Unsupported image format: {suffix}. Use JPEG or PNG.") + + # Read image bytes + image_bytes = image_path.read_bytes() + + # Send request + with httpx.Client(timeout=60.0) as client: + response = client.post( + url, + content=image_bytes, + headers={"Content-Type": content_type}, + ) + + # Handle response + response.raise_for_status() + + if response.status_code == 200 and len(response.content) == 0: + # No poses detected + return None + + return response.json() + + +def main(): + parser = argparse.ArgumentParser(description="Human Pose Estimation client example") + parser.add_argument( + "image", + type=Path, + help="Path to the image file (JPEG or PNG)", + ) + parser.add_argument( + "--url", + default="https://api.pose.weihua-iot.cn/hpe", + help="HPE server endpoint URL (default: https://api.pose.weihua-iot.cn/hpe)", + ) + args = parser.parse_args() + + image_path: Path = args.image + + if not image_path.exists(): + print(f"Error: Image file not found: {image_path}", file=sys.stderr) + sys.exit(1) + + try: + result = detect_poses(image_path, args.url) + except httpx.HTTPStatusError as e: + print(f"HTTP Error: {e.response.status_code}", file=sys.stderr) + try: + error_detail = e.response.json() + print( + f" {error_detail.get('error')}: {error_detail.get('detail')}", + file=sys.stderr, + ) + except Exception: + print(f" {e.response.text}", file=sys.stderr) + sys.exit(1) + except httpx.RequestError as e: + print(f"Request Error: {e}", file=sys.stderr) + sys.exit(1) + except ValueError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + if result is None: + print("No poses detected in the image.") + else: + import json + + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/scripts/client_example.sh b/scripts/client_example.sh new file mode 100755 index 0000000..0155272 --- /dev/null +++ b/scripts/client_example.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# +# Example curl client for the Human Pose Estimation server. +# +# Usage: +# ./client_example.sh [url] +# +# Examples: +# ./client_example.sh photo.jpg +# ./client_example.sh photo.png https://api.pose.weihua-iot.cn/hpe +# + +set -euo pipefail + +# Default server URL +DEFAULT_URL="https://api.pose.weihua-iot.cn/hpe" + +# Parse arguments +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [url]" >&2 + echo "" >&2 + echo "Examples:" >&2 + echo " $0 photo.jpg" >&2 + echo " $0 photo.png http://192.168.1.100:8245/hpe" >&2 + exit 1 +fi + +IMAGE_PATH="$1" +URL="${2:-$DEFAULT_URL}" + +# Check if image exists +if [[ ! -f "$IMAGE_PATH" ]]; then + echo "Error: Image file not found: $IMAGE_PATH" >&2 + exit 1 +fi + +# Determine content type from file extension +get_content_type() { + local ext="${1##*.}" + ext="${ext,,}" # lowercase + case "$ext" in + jpg|jpeg) + echo "image/jpeg" + ;; + png) + echo "image/png" + ;; + *) + echo "Error: Unsupported image format: .$ext. Use JPEG or PNG." >&2 + exit 1 + ;; + esac +} + +CONTENT_TYPE=$(get_content_type "$IMAGE_PATH") + +# Send request and capture response +echo "Sending image to $URL ..." >&2 + +HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" \ + -X POST \ + -H "Content-Type: $CONTENT_TYPE" \ + --data-binary "@$IMAGE_PATH" \ + "$URL") + +# Split response body and status code +HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed '$d') +HTTP_CODE=$(echo "$HTTP_RESPONSE" | tail -n1) + +# Handle response +case "$HTTP_CODE" in + 200) + if [[ -z "$HTTP_BODY" ]]; then + echo "No poses detected in the image." + else + # Pretty print JSON if jq is available + if command -v jq &> /dev/null; then + echo "$HTTP_BODY" | jq . + else + echo "$HTTP_BODY" + fi + fi + ;; + *) + echo "HTTP Error: $HTTP_CODE" >&2 + if [[ -n "$HTTP_BODY" ]]; then + if command -v jq &> /dev/null; then + echo "$HTTP_BODY" | jq . >&2 + else + echo "$HTTP_BODY" >&2 + fi + fi + exit 1 + ;; +esac diff --git a/scripts/vis_whole_body.py b/scripts/vis_whole_body.py new file mode 100644 index 0000000..62b3cce --- /dev/null +++ b/scripts/vis_whole_body.py @@ -0,0 +1,624 @@ +from dataclasses import dataclass +from typing import Iterable, Optional, Tuple + +import cv2 +import numpy as np +from numpy.typing import NDArray + +# https://www.researchgate.net/figure/Whole-body-keypoints-as-defined-in-the-COCO-WholeBody-Dataset_fig3_358873962 +# https://github.com/jin-s13/COCO-WholeBody/blob/master/imgs/Fig2_anno.png +# body landmarks 1-17 +# foot landmarks 18-23 (18-20 right, 21-23 left) +# face landmarks 24-91 +# 24 start, counterclockwise to 40 as chin +# 41-45 right eyebrow, 46-50 left eyebrow +# https://www.neiltanna.com/face/rhinoplasty/nasal-analysis/ +# 51-54 nose (vertical), 55-59 nose (horizontal) +# 60-65 right eye, 66-71 left eye +# 72-83 outer lips (contour, counterclockwise) +# ... +# hand landmarks 92-133 (92-112 right, 113-133 left) + + +Color = Tuple[int, int, int] +COLOR_SPINE = (138, 201, 38) # green, spine & head +COLOR_ARMS = (255, 202, 58) # yellow, arms & shoulders +COLOR_LEGS = (25, 130, 196) # blue, legs & hips +COLOR_FINGERS = (255, 0, 0) # red, fingers +COLOR_FACE = (255, 200, 0) # yellow, face +COLOR_FOOT = (255, 128, 0) # orange, foot +COLOR_HEAD = (255, 0, 255) # purple, head + + +@dataclass(frozen=True) +class Landmark: + """ + Note the index is 1-based, corresponding to the COCO WholeBody dataset. + https://github.com/jin-s13/COCO-WholeBody/blob/master/imgs/Fig2_anno.png + """ + + index: int + name: str + color: Color + + @property + def index_base_0(self) -> int: + """Returns the 0-based index of the landmark.""" + return self.index - 1 + + +body_landmarks: dict[int, Landmark] = { + 0: Landmark(index=1, name="nose", color=COLOR_SPINE), + 1: Landmark(index=2, name="left_eye", color=COLOR_SPINE), + 2: Landmark(index=3, name="right_eye", color=COLOR_SPINE), + 3: Landmark(index=4, name="left_ear", color=COLOR_SPINE), + 4: Landmark(index=5, name="right_ear", color=COLOR_SPINE), + 5: Landmark(index=6, name="left_shoulder", color=COLOR_ARMS), + 6: Landmark(index=7, name="right_shoulder", color=COLOR_ARMS), + 7: Landmark(index=8, name="left_elbow", color=COLOR_ARMS), + 8: Landmark(index=9, name="right_elbow", color=COLOR_ARMS), + 9: Landmark(index=10, name="left_wrist", color=COLOR_ARMS), + 10: Landmark(index=11, name="right_wrist", color=COLOR_ARMS), + 11: Landmark(index=12, name="left_hip", color=COLOR_LEGS), + 12: Landmark(index=13, name="right_hip", color=COLOR_LEGS), + 13: Landmark(index=14, name="left_knee", color=COLOR_LEGS), + 14: Landmark(index=15, name="right_knee", color=COLOR_LEGS), + 15: Landmark(index=16, name="left_ankle", color=COLOR_LEGS), + 16: Landmark(index=17, name="right_ankle", color=COLOR_LEGS), +} + +foot_landmarks: dict[int, Landmark] = { + 17: Landmark(index=18, name="left_big_toe", color=COLOR_FOOT), + 18: Landmark(index=19, name="left_small_toe", color=COLOR_FOOT), + 19: Landmark(index=20, name="left_heel", color=COLOR_FOOT), + 20: Landmark(index=21, name="right_big_toe", color=COLOR_FOOT), + 21: Landmark(index=22, name="right_small_toe", color=COLOR_FOOT), + 22: Landmark(index=23, name="right_heel", color=COLOR_FOOT), +} + +face_landmarks: dict[int, Landmark] = { + # Chin contour (24-40) + 23: Landmark(index=24, name="chin_0", color=COLOR_FACE), + 24: Landmark(index=25, name="chin_1", color=COLOR_FACE), + 25: Landmark(index=26, name="chin_2", color=COLOR_FACE), + 26: Landmark(index=27, name="chin_3", color=COLOR_FACE), + 27: Landmark(index=28, name="chin_4", color=COLOR_FACE), + 28: Landmark(index=29, name="chin_5", color=COLOR_FACE), + 29: Landmark(index=30, name="chin_6", color=COLOR_FACE), + 30: Landmark(index=31, name="chin_7", color=COLOR_FACE), + 31: Landmark(index=32, name="chin_8", color=COLOR_FACE), + 32: Landmark(index=33, name="chin_9", color=COLOR_FACE), + 33: Landmark(index=34, name="chin_10", color=COLOR_FACE), + 34: Landmark(index=35, name="chin_11", color=COLOR_FACE), + 35: Landmark(index=36, name="chin_12", color=COLOR_FACE), + 36: Landmark(index=37, name="chin_13", color=COLOR_FACE), + 37: Landmark(index=38, name="chin_14", color=COLOR_FACE), + 38: Landmark(index=39, name="chin_15", color=COLOR_FACE), + 39: Landmark(index=40, name="chin_16", color=COLOR_FACE), + # Right eyebrow (41-45) + 40: Landmark(index=41, name="right_eyebrow_0", color=COLOR_FACE), + 41: Landmark(index=42, name="right_eyebrow_1", color=COLOR_FACE), + 42: Landmark(index=43, name="right_eyebrow_2", color=COLOR_FACE), + 43: Landmark(index=44, name="right_eyebrow_3", color=COLOR_FACE), + 44: Landmark(index=45, name="right_eyebrow_4", color=COLOR_FACE), + # Left eyebrow (46-50) + 45: Landmark(index=46, name="left_eyebrow_0", color=COLOR_FACE), + 46: Landmark(index=47, name="left_eyebrow_1", color=COLOR_FACE), + 47: Landmark(index=48, name="left_eyebrow_2", color=COLOR_FACE), + 48: Landmark(index=49, name="left_eyebrow_3", color=COLOR_FACE), + 49: Landmark(index=50, name="left_eyebrow_4", color=COLOR_FACE), + # Nasal Bridge (51-54) + 50: Landmark(index=51, name="nasal_bridge_0", color=COLOR_FACE), + 51: Landmark(index=52, name="nasal_bridge_1", color=COLOR_FACE), + 52: Landmark(index=53, name="nasal_bridge_2", color=COLOR_FACE), + 53: Landmark(index=54, name="nasal_bridge_3", color=COLOR_FACE), + # Nasal Base (55-59) + 54: Landmark(index=55, name="nasal_base_0", color=COLOR_FACE), + 55: Landmark(index=56, name="nasal_base_1", color=COLOR_FACE), + 56: Landmark(index=57, name="nasal_base_2", color=COLOR_FACE), + 57: Landmark(index=58, name="nasal_base_3", color=COLOR_FACE), + 58: Landmark(index=59, name="nasal_base_4", color=COLOR_FACE), + # Right eye (60-65) + 59: Landmark(index=60, name="right_eye_0", color=COLOR_FACE), + 60: Landmark(index=61, name="right_eye_1", color=COLOR_FACE), + 61: Landmark(index=62, name="right_eye_2", color=COLOR_FACE), + 62: Landmark(index=63, name="right_eye_3", color=COLOR_FACE), + 63: Landmark(index=64, name="right_eye_4", color=COLOR_FACE), + 64: Landmark(index=65, name="right_eye_5", color=COLOR_FACE), + # Left eye (66-71) + 65: Landmark(index=66, name="left_eye_0", color=COLOR_FACE), + 66: Landmark(index=67, name="left_eye_1", color=COLOR_FACE), + 67: Landmark(index=68, name="left_eye_2", color=COLOR_FACE), + 68: Landmark(index=69, name="left_eye_3", color=COLOR_FACE), + 69: Landmark(index=70, name="left_eye_4", color=COLOR_FACE), + 70: Landmark(index=71, name="left_eye_5", color=COLOR_FACE), + # lips (72-91) + 71: Landmark(index=72, name="lip_0", color=COLOR_FACE), + 72: Landmark(index=73, name="lip_1", color=COLOR_FACE), + 73: Landmark(index=74, name="lip_2", color=COLOR_FACE), + 74: Landmark(index=75, name="lip_3", color=COLOR_FACE), + 75: Landmark(index=76, name="lip_4", color=COLOR_FACE), + 76: Landmark(index=77, name="lip_5", color=COLOR_FACE), + 77: Landmark(index=78, name="lip_6", color=COLOR_FACE), + 78: Landmark(index=79, name="lip_7", color=COLOR_FACE), + 79: Landmark(index=80, name="lip_8", color=COLOR_FACE), + 80: Landmark(index=81, name="lip_9", color=COLOR_FACE), + 81: Landmark(index=82, name="lip_0", color=COLOR_FACE), + 82: Landmark(index=83, name="lip_1", color=COLOR_FACE), + 83: Landmark(index=84, name="lip_2", color=COLOR_FACE), + 84: Landmark(index=85, name="lip_3", color=COLOR_FACE), + 85: Landmark(index=86, name="lip_4", color=COLOR_FACE), + 86: Landmark(index=87, name="lip_5", color=COLOR_FACE), + 87: Landmark(index=88, name="lip_6", color=COLOR_FACE), + 88: Landmark(index=89, name="lip_7", color=COLOR_FACE), + 89: Landmark(index=90, name="lip_8", color=COLOR_FACE), + 90: Landmark(index=91, name="lip_9", color=COLOR_FACE), +} + +hand_landmarks: dict[int, Landmark] = { + # Right hand (92-112) + 91: Landmark(index=92, name="right_wrist", color=COLOR_FINGERS), # wrist/carpus + 92: Landmark( + index=93, name="right_thumb_metacarpal", color=COLOR_FINGERS + ), # thumb metacarpal + 93: Landmark( + index=94, name="right_thumb_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 94: Landmark( + index=95, name="right_thumb_ip", color=COLOR_FINGERS + ), # interphalangeal joint + 95: Landmark(index=96, name="right_thumb_tip", color=COLOR_FINGERS), # tip of thumb + 96: Landmark( + index=97, name="right_index_metacarpal", color=COLOR_FINGERS + ), # index metacarpal + 97: Landmark( + index=98, name="right_index_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 98: Landmark( + index=99, name="right_index_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 99: Landmark( + index=100, name="right_index_tip", color=COLOR_FINGERS + ), # tip of index + 100: Landmark( + index=101, name="right_middle_metacarpal", color=COLOR_FINGERS + ), # middle metacarpal + 101: Landmark( + index=102, name="right_middle_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 102: Landmark( + index=103, name="right_middle_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 103: Landmark( + index=104, name="right_middle_tip", color=COLOR_FINGERS + ), # tip of middle + 104: Landmark( + index=105, name="right_ring_metacarpal", color=COLOR_FINGERS + ), # ring metacarpal + 105: Landmark( + index=106, name="right_ring_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 106: Landmark( + index=107, name="right_ring_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 107: Landmark(index=108, name="right_ring_tip", color=COLOR_FINGERS), # tip of ring + 108: Landmark( + index=109, name="right_pinky_metacarpal", color=COLOR_FINGERS + ), # pinky metacarpal + 109: Landmark( + index=110, name="right_pinky_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 110: Landmark( + index=111, name="right_pinky_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 111: Landmark( + index=112, name="right_pinky_tip", color=COLOR_FINGERS + ), # tip of pinky + # Left hand (113-133) + 112: Landmark(index=113, name="left_wrist", color=COLOR_FINGERS), # wrist/carpus + 113: Landmark( + index=114, name="left_thumb_metacarpal", color=COLOR_FINGERS + ), # thumb metacarpal + 114: Landmark( + index=115, name="left_thumb_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 115: Landmark( + index=116, name="left_thumb_ip", color=COLOR_FINGERS + ), # interphalangeal joint + 116: Landmark( + index=117, name="left_thumb_tip", color=COLOR_FINGERS + ), # tip of thumb + 117: Landmark( + index=118, name="left_index_metacarpal", color=COLOR_FINGERS + ), # index metacarpal + 118: Landmark( + index=119, name="left_index_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 119: Landmark( + index=120, name="left_index_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 120: Landmark( + index=121, name="left_index_tip", color=COLOR_FINGERS + ), # tip of index + 121: Landmark( + index=122, name="left_middle_metacarpal", color=COLOR_FINGERS + ), # middle metacarpal + 122: Landmark( + index=123, name="left_middle_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 123: Landmark( + index=124, name="left_middle_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 124: Landmark( + index=125, name="left_middle_tip", color=COLOR_FINGERS + ), # tip of middle + 125: Landmark( + index=126, name="left_ring_metacarpal", color=COLOR_FINGERS + ), # ring metacarpal + 126: Landmark( + index=127, name="left_ring_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 127: Landmark( + index=128, name="left_ring_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 128: Landmark(index=129, name="left_ring_tip", color=COLOR_FINGERS), # tip of ring + 129: Landmark( + index=130, name="left_pinky_metacarpal", color=COLOR_FINGERS + ), # pinky metacarpal + 130: Landmark( + index=131, name="left_pinky_mcp", color=COLOR_FINGERS + ), # metacarpophalangeal joint + 131: Landmark( + index=132, name="left_pinky_pip", color=COLOR_FINGERS + ), # proximal interphalangeal joint + 132: Landmark( + index=133, name="left_pinky_tip", color=COLOR_FINGERS + ), # tip of pinky +} +""" +Key corrections made: + 1. Each finger has a metacarpal bone in the palm + 2. Used standard anatomical abbreviations: + - MCP: MetaCarpoPhalangeal joint + - PIP: Proximal InterPhalangeal joint + - IP: InterPhalangeal joint (for thumb) + 3. The thumb has a different structure: + - Only one interphalangeal joint (IP) + - Different metacarpal orientation + 4. Used "tip" instead of specific phalanx names for endpoints + 5. Removed redundant bone naming since landmarks represent joints/connections +This better reflects the actual skeletal and joint structure of human hands while maintaining compatibility with the COCO-WholeBody dataset's keypoint system. +""" + +skeleton_joints = { + **body_landmarks, + **foot_landmarks, + **face_landmarks, + **hand_landmarks, +} + + +@dataclass(frozen=True) +class Bone: + start: Landmark + end: Landmark + name: str + color: Color + + @staticmethod + def from_landmarks( + landmarks: Iterable[Landmark], + start_idx: int, + end_idx: int, + name: str, + color: Color, + ) -> "Bone": + """Create a Bone from landmark indices (1-based, COCO WholeBody).""" + start = next(lm for lm in landmarks if lm.index == start_idx) + end = next(lm for lm in landmarks if lm.index == end_idx) + return Bone(start=start, end=end, name=name, color=color) + + +# Note it's 0-based +# (15, 13), (13, 11), (16, 14), (14, 12), (11, 12), # 腿部 +# (5, 11), (6, 12), (5, 6), # 臀部和躯干 +# (5, 7), (7, 9), (6, 8), (8, 10), # 手臂 +# (1, 2), (0, 1), (0, 2), (1, 3), (2, 4), # 头部 +# (15, 17), (15, 18), (15, 19), # 左脚 +# (16, 20), (16, 21), (16, 22), # 右脚 +body_bones: list[Bone] = [ + # legs + Bone.from_landmarks( + skeleton_joints.values(), 16, 14, "left_tibia", COLOR_LEGS + ), # tibia & fibula + Bone.from_landmarks(skeleton_joints.values(), 14, 12, "left_femur", COLOR_LEGS), + Bone.from_landmarks(skeleton_joints.values(), 17, 15, "right_tibia", COLOR_LEGS), + Bone.from_landmarks(skeleton_joints.values(), 15, 13, "right_femur", COLOR_LEGS), + Bone.from_landmarks(skeleton_joints.values(), 12, 13, "pelvis", COLOR_LEGS), + # torso + Bone.from_landmarks( + skeleton_joints.values(), 6, 12, "left_contour", COLOR_SPINE + ), # contour of rib cage & pelvis (parallel to spine) + Bone.from_landmarks(skeleton_joints.values(), 7, 13, "right_contour", COLOR_SPINE), + Bone.from_landmarks(skeleton_joints.values(), 6, 7, "clavicle", COLOR_SPINE), + # arms + Bone.from_landmarks( + skeleton_joints.values(), 6, 8, "left_humerus", COLOR_ARMS + ), # humerus + Bone.from_landmarks( + skeleton_joints.values(), 8, 10, "left_radius", COLOR_ARMS + ), # radius & ulna + Bone.from_landmarks(skeleton_joints.values(), 7, 9, "right_humerus", COLOR_ARMS), + Bone.from_landmarks(skeleton_joints.values(), 9, 11, "right_radius", COLOR_ARMS), + # head + Bone.from_landmarks(skeleton_joints.values(), 2, 3, "head", COLOR_HEAD), + Bone.from_landmarks(skeleton_joints.values(), 1, 2, "left_eye", COLOR_HEAD), + Bone.from_landmarks(skeleton_joints.values(), 1, 3, "right_eye", COLOR_HEAD), + Bone.from_landmarks(skeleton_joints.values(), 2, 4, "left_ear", COLOR_HEAD), + Bone.from_landmarks(skeleton_joints.values(), 3, 5, "right_ear", COLOR_HEAD), + # foot + Bone.from_landmarks(skeleton_joints.values(), 16, 18, "left_foot_toe", COLOR_FOOT), + Bone.from_landmarks( + skeleton_joints.values(), 16, 19, "left_foot_small_toe", COLOR_FOOT + ), + Bone.from_landmarks(skeleton_joints.values(), 16, 20, "left_foot_heel", COLOR_FOOT), + Bone.from_landmarks(skeleton_joints.values(), 17, 21, "right_foot_toe", COLOR_FOOT), + Bone.from_landmarks( + skeleton_joints.values(), 17, 22, "right_foot_small_toe", COLOR_FOOT + ), + Bone.from_landmarks( + skeleton_joints.values(), 17, 23, "right_foot_heel", COLOR_FOOT + ), +] + +# note it's 0-based +# (91, 92), (92, 93), (93, 94), (94, 95), # 左拇指 +# (91, 96), (96, 97), (97, 98), (98, 99), # 左食指 +# (91, 100), (100, 101), (101, 102), (102, 103), # 左中指 +# (91, 104), (104, 105), (105, 106), (106, 107), # 左无名指 +# (91, 108), (108, 109), (109, 110), (110, 111), # 左小指 +# (112, 113), (113, 114), (114, 115), (115, 116), # 右拇指 +# (112, 117), (117, 118), (118, 119), (119, 120), # 右食指 +# (112, 121), (121, 122), (122, 123), (123, 124), # 右中指 +# (112, 125), (125, 126), (126, 127), (127, 128), # 右无名指 +# (112, 129), (129, 130), (130, 131), (131, 132) # 右小指 +hand_bones: list[Bone] = [ + # Right Thumb (Pollex) + Bone.from_landmarks( + hand_landmarks.values(), 92, 93, "right_thumb_metacarpal", COLOR_FINGERS + ), # First metacarpal + Bone.from_landmarks( + hand_landmarks.values(), 93, 94, "right_thumb_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 94, 95, "right_thumb_distal_phalanx", COLOR_FINGERS + ), + # Right Index (Digit II) + Bone.from_landmarks( + hand_landmarks.values(), 92, 97, "right_index_metacarpal", COLOR_FINGERS + ), # Second metacarpal + Bone.from_landmarks( + hand_landmarks.values(), 97, 98, "right_index_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 98, 99, "right_index_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 99, 100, "right_index_distal_phalanx", COLOR_FINGERS + ), + # Right Middle (Digit III) + Bone.from_landmarks( + hand_landmarks.values(), 92, 101, "right_middle_metacarpal", COLOR_FINGERS + ), # Third metacarpal + Bone.from_landmarks( + hand_landmarks.values(), + 101, + 102, + "right_middle_proximal_phalanx", + COLOR_FINGERS, + ), + Bone.from_landmarks( + hand_landmarks.values(), 102, 103, "right_middle_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 103, 104, "right_middle_distal_phalanx", COLOR_FINGERS + ), + # Right Ring (Digit IV) + Bone.from_landmarks( + hand_landmarks.values(), 92, 105, "right_ring_metacarpal", COLOR_FINGERS + ), # Fourth metacarpal + Bone.from_landmarks( + hand_landmarks.values(), 105, 106, "right_ring_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 106, 107, "right_ring_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 107, 108, "right_ring_distal_phalanx", COLOR_FINGERS + ), + # Right Pinky (Digit V) + Bone.from_landmarks( + hand_landmarks.values(), 92, 109, "right_pinky_metacarpal", COLOR_FINGERS + ), # Fifth metacarpal + Bone.from_landmarks( + hand_landmarks.values(), 109, 110, "right_pinky_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 110, 111, "right_pinky_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 111, 112, "right_pinky_distal_phalanx", COLOR_FINGERS + ), + # Left Thumb (Pollex) + Bone.from_landmarks( + hand_landmarks.values(), 113, 114, "left_thumb_metacarpal", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 114, 115, "left_thumb_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 115, 116, "left_thumb_distal_phalanx", COLOR_FINGERS + ), + # Left Index (Digit II) + Bone.from_landmarks( + hand_landmarks.values(), 113, 118, "left_index_metacarpal", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 118, 119, "left_index_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 119, 120, "left_index_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 120, 121, "left_index_distal_phalanx", COLOR_FINGERS + ), + # Left Middle (Digit III) + Bone.from_landmarks( + hand_landmarks.values(), 113, 122, "left_middle_metacarpal", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 122, 123, "left_middle_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 123, 124, "left_middle_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 124, 125, "left_middle_distal_phalanx", COLOR_FINGERS + ), + # Left Ring (Digit IV) + Bone.from_landmarks( + hand_landmarks.values(), 113, 126, "left_ring_metacarpal", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 126, 127, "left_ring_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 127, 128, "left_ring_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 128, 129, "left_ring_distal_phalanx", COLOR_FINGERS + ), + # Left Pinky (Digit V) + Bone.from_landmarks( + hand_landmarks.values(), 113, 130, "left_pinky_metacarpal", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 130, 131, "left_pinky_proximal_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 131, 132, "left_pinky_middle_phalanx", COLOR_FINGERS + ), + Bone.from_landmarks( + hand_landmarks.values(), 132, 133, "left_pinky_distal_phalanx", COLOR_FINGERS + ), +] +""" +Key points about the hand bone structure: +1. Each finger (except thumb) has: + - Connection to metacarpal + - Proximal phalanx + - Middle phalanx + - Distal phalanx +2. Thumb is unique with: + - Metacarpal + - Proximal phalanx + - Distal phalanx (no middle phalanx) +3. All fingers connect back to the wrist (index 92 for right hand, 113 for left hand) +4. The anatomical names include the proper terms for each digit (Pollex for thumb, Digits II-V for fingers) +""" + +total_bones = body_bones + hand_bones + + +def visualize_whole_body( + keypoints: NDArray[np.floating], + frame: NDArray[np.uint8], + *, + landmark_size: int = 1, + bone_size: int = 2, + output: Optional[NDArray[np.uint8]] = None, + confidences: Optional[NDArray[np.floating]] = None, + confidence_threshold: float = 0.1, +) -> NDArray[np.uint8]: + """Visualize the whole body keypoints on the given frame. + + Args: + keypoints: Array of shape (133, 2) with x, y coordinates. + frame: Input image. + landmark_size: Radius of landmark circles. + bone_size: Thickness of bone lines. + output: Optional output array (defaults to copy of frame). + confidences: Optional array of shape (133,) with confidence scores. + confidence_threshold: Minimum confidence to draw a landmark/bone. + """ + if output is None: + output = frame.copy() + for bone in total_bones: + start = keypoints[bone.start.index_base_0] + end = keypoints[bone.end.index_base_0] + start = tuple(start.astype(int)) + end = tuple(end.astype(int)) + if ( + confidences is not None + and confidences[bone.start.index_base_0] < confidence_threshold + and confidences[bone.end.index_base_0] < confidence_threshold + ): + continue + cv2.line(output, start, end, bone.color, bone_size) + for landmark in skeleton_joints.values(): + point = keypoints[landmark.index_base_0] + point = tuple(point.astype(int)) + if ( + confidences is not None + and confidences[landmark.index_base_0] < confidence_threshold + ): + continue + cv2.circle(output, point, landmark_size, landmark.color, -1) + return output + + +def visualize_17_keypoints( + keypoints: NDArray[np.floating], + frame: NDArray[np.uint8], + *, + output: Optional[NDArray[np.uint8]] = None, + confidences: Optional[NDArray[np.floating]] = None, + confidence_threshold: float = 0.1, + landmark_size: int = 1, + bone_size: int = 2, +) -> NDArray[np.uint8]: + """Visualize the first 17 body keypoints on the given frame. + + Args: + keypoints: Array of shape (17, 2) with x, y coordinates. + frame: Input image. + output: Optional output array (defaults to copy of frame). + confidences: Optional array of shape (17,) with confidence scores. + confidence_threshold: Minimum confidence to draw a landmark/bone. + landmark_size: Radius of landmark circles. + bone_size: Thickness of bone lines. + """ + if output is None: + output = frame.copy() + for bone in total_bones[:17]: + start = keypoints[bone.start.index_base_0] + end = keypoints[bone.end.index_base_0] + start = tuple(start.astype(int)) + end = tuple(end.astype(int)) + if ( + confidences is not None + and confidences[bone.start.index_base_0] < confidence_threshold + and confidences[bone.end.index_base_0] < confidence_threshold + ): + continue + cv2.line(output, start, end, bone.color, bone_size) + for landmark in list(body_landmarks.values())[:17]: + point = keypoints[landmark.index_base_0] + point = tuple(point.astype(int)) + if ( + confidences is not None + and confidences[landmark.index_base_0] < confidence_threshold + ): + continue + cv2.circle(output, point, landmark_size, landmark.color, -1) + return output