1
0
forked from HQU-gxy/CVTH3PE

feat: Enhance play notebook with new data structures and visualization utilities

- Added new TypedDict classes for camera parameters, including Resolution, Intrinsic, and Extrinsic.
- Updated dataset reading logic to accommodate new camera parameters structure.
- Introduced functions for reading datasets by port and visualizing whole body keypoints.
- Improved the affinity matrix calculation logic in the camera module.
- Updated dependencies in pyproject.toml to include Plotly and SciPy for enhanced functionality.
This commit is contained in:
2025-04-16 18:53:05 +08:00
parent 3f32333de4
commit 3cc93e5eae
8 changed files with 1394 additions and 80 deletions

View File

@ -350,7 +350,7 @@ def calculate_affinity_matrix_by_epipolar_constraint(
else:
camera_wise_split = classify_by_camera(detections)
num_entries = sum(len(entries) for entries in camera_wise_split.values())
affinity_matrix = jnp.zeros((num_entries, num_entries), dtype=jnp.float32)
affinity_matrix = jnp.ones((num_entries, num_entries), dtype=jnp.float32) * -jnp.inf
affinity_matrix_mask = jnp.zeros((num_entries, num_entries), dtype=jnp.bool_)
acc = 0
@ -361,7 +361,7 @@ def calculate_affinity_matrix_by_epipolar_constraint(
sorted_detections: list[Detection] = []
for camera_id, entries in camera_wise_split.items():
for i, _ in enumerate(entries):
camera_id_index_map[camera_id].add(acc + i)
camera_id_index_map[camera_id].add(acc)
sorted_detections.append(entries[i])
acc += 1
camera_id_index_map_inverse[camera_id] = (
@ -374,6 +374,8 @@ def calculate_affinity_matrix_by_epipolar_constraint(
for i, det in enumerate(sorted_detections):
other_indices = camera_id_index_map_inverse[det.camera.id]
for j in other_indices:
if i == j:
continue
if affinity_matrix_mask[i, j] or affinity_matrix_mask[j, i]:
continue
a = compute_affinity_epipolar_constraint_with_pairs(

View File

@ -151,7 +151,7 @@ class _BIPSolver:
clusters[int(label)].append(i)
return list(clusters.values())
def solve(self, affinity_matrix: NDArray, rtn_matrix=False):
def solve(self, affinity_matrix: NDArray):
n_nodes = affinity_matrix.shape[0]
if n_nodes <= 1:
solution_x, sol_matrix = (
@ -197,8 +197,6 @@ class _BIPSolver:
sol_matrix += sol_matrix.T
clusters = self.solution_mat_clusters(sol_matrix)
if not rtn_matrix:
return clusters
return clusters, sol_matrix

221
app/solver/_old.py Normal file
View File

@ -0,0 +1,221 @@
import itertools
from collections import defaultdict
import numpy as np
from cvxopt import glpk, matrix, spmatrix # type:ignore
from jaxtyping import jaxtyped, Num
from app._typing import NDArray
glpk.options = {"msg_lev": "GLP_MSG_ERR"}
FROZEN_POS_EDGE = -1
FROZEN_NEG_EDGE = -2
INVALID_EDGE = -100
class _BIPSolver:
"""
Binary Integer Programming solver
"""
min_affinity: float
max_affinity: float
def __init__(self, min_affinity: float = -np.inf, max_affinity: float = np.inf):
self.min_affinity = min_affinity
self.max_affinity = max_affinity
@staticmethod
def _create_bip(
affinity_matrix: Num[NDArray, "N N"], min_affinity: float, max_affinity: float
):
n_nodes = affinity_matrix.shape[0]
# mask for selecting pairs of nodes
triu_mask = np.triu(np.ones_like(affinity_matrix, dtype=bool), 1)
affinities = affinity_matrix[triu_mask]
frozen_pos_mask = affinities >= max_affinity
frozen_neg_mask = affinities <= min_affinity
unfrozen_mask = np.logical_not(frozen_pos_mask | frozen_neg_mask)
# generate objective coefficients
objective_coefficients = affinities[unfrozen_mask]
if len(objective_coefficients) == 0: # nio unfrozen edges
objective_coefficients = np.asarray([affinity_matrix[0, -1]])
unfrozen_mask = np.zeros_like(unfrozen_mask, dtype=np.bool)
unfrozen_mask[affinity_matrix.shape[1] - 1] = 1
# create matrix whose rows are the indices of the three edges in a
# constraint x_ij + x_ik - x_jk <= 1
constraints_edges_idx = []
if n_nodes >= 3:
edges_idx = np.empty_like(affinities, dtype=int)
edges_idx[frozen_pos_mask] = FROZEN_POS_EDGE
edges_idx[frozen_neg_mask] = FROZEN_NEG_EDGE
edges_idx[unfrozen_mask] = np.arange(len(objective_coefficients))
nodes_to_edge_matrix = np.empty_like(affinity_matrix, dtype=int)
nodes_to_edge_matrix.fill(INVALID_EDGE)
nodes_to_edge_matrix[triu_mask] = edges_idx
triplets = np.asarray(
tuple(itertools.combinations(range(n_nodes), 3)), dtype=int
)
constraints_edges_idx = np.zeros_like(triplets)
constraints_edges_idx[:, 0] = nodes_to_edge_matrix[
(triplets[:, 0], triplets[:, 1])
]
constraints_edges_idx[:, 1] = nodes_to_edge_matrix[
(triplets[:, 0], triplets[:, 2])
]
constraints_edges_idx[:, 2] = nodes_to_edge_matrix[
(triplets[:, 1], triplets[:, 2])
]
constraints_edges_idx = constraints_edges_idx[
np.any(constraints_edges_idx >= 0, axis=1)
]
if len(constraints_edges_idx) == 0: # no constraints
constraints_edges_idx = np.asarray([0, 0, 0], dtype=int).reshape(-1, 3)
# add remaining constraints by permutation
constraints_edges_idx = np.vstack(
(
constraints_edges_idx,
np.roll(constraints_edges_idx, 1, axis=1),
np.roll(constraints_edges_idx, 2, axis=1),
)
)
# clean redundant constraints
# x1 + x2 <= 2
constraints_edges_idx = constraints_edges_idx[
constraints_edges_idx[:, 2] != FROZEN_POS_EDGE
]
# x1 - x2 <= 1
constraints_edges_idx = constraints_edges_idx[
np.all(constraints_edges_idx[:, 0:2] != FROZEN_NEG_EDGE, axis=1)
]
if len(constraints_edges_idx) == 0: # no constraints
constraints_edges_idx = np.asarray([0, 0, 0], dtype=int).reshape(-1, 3)
# generate constraint coefficients
constraints_coefficients = np.ones_like(constraints_edges_idx)
constraints_coefficients[:, 2] = -1
# generate constraint upper bounds
upper_bounds = np.ones(len(constraints_coefficients), dtype=float)
upper_bounds -= np.sum(
constraints_coefficients * (constraints_edges_idx == FROZEN_POS_EDGE),
axis=1,
)
# flatten constraints data into sparse matrix format
constraints_idx = np.repeat(np.arange(len(constraints_edges_idx)), 3)
constraints_edges_idx = constraints_edges_idx.reshape(-1)
constraints_coefficients = constraints_coefficients.reshape(-1)
unfrozen_edges = constraints_edges_idx >= 0
constraints_idx = constraints_idx[unfrozen_edges]
constraints_edges_idx = constraints_edges_idx[unfrozen_edges]
constraints_coefficients = constraints_coefficients[unfrozen_edges]
return (
objective_coefficients,
unfrozen_mask,
frozen_pos_mask,
frozen_neg_mask,
(constraints_coefficients, constraints_idx, constraints_edges_idx),
upper_bounds,
)
@staticmethod
def _solve_bip(objective_coefficients, sparse_constraints, upper_bounds):
raise NotImplementedError
@staticmethod
def solution_mat_clusters(solution_mat: NDArray) -> list[list[int]]:
n = solution_mat.shape[0]
labels = np.arange(1, n + 1)
for i in range(n):
for j in range(i + 1, n):
if solution_mat[i, j] > 0:
labels[j] = labels[i]
clusters = defaultdict(list)
for i, label in enumerate(labels):
clusters[label].append(i)
return list(clusters.values())
def solve(self, affinity_matrix: Num[NDArray, "N N"]):
n_nodes = affinity_matrix.shape[0]
if n_nodes <= 1:
solution_x, sol_matrix = (
np.asarray([], dtype=int),
np.asarray([0] * n_nodes, dtype=int),
)
sol_matrix = sol_matrix[:, None]
elif n_nodes == 2:
solution_matrix = np.zeros_like(affinity_matrix, dtype=int)
solution_matrix[0, 1] = affinity_matrix[0, 1] > 0
solution_matrix += solution_matrix.T
solution_x = (
[solution_matrix[0, 1]]
if self.min_affinity < affinity_matrix[0, 1] < self.max_affinity
else []
)
solution_x, sol_matrix = np.asarray(solution_x), solution_matrix
else:
# create BIP problem
(
objective_coefficients,
unfrozen_mask,
frozen_pos_mask,
frozen_neg_mask,
sparse_constraints,
upper_bounds,
) = self._create_bip(affinity_matrix, self.min_affinity, self.max_affinity)
# solve
solution_x = self._solve_bip(
objective_coefficients, sparse_constraints, upper_bounds
)
# solution to matrix
all_sols = np.zeros_like(unfrozen_mask, dtype=int)
all_sols[unfrozen_mask] = np.array(solution_x, dtype=int).reshape(-1)
all_sols[frozen_neg_mask] = 0
all_sols[frozen_pos_mask] = 1
sol_matrix = np.zeros_like(affinity_matrix, dtype=int)
sol_matrix[np.triu(np.ones([n_nodes, n_nodes], dtype=int), 1) > 0] = (
all_sols
)
sol_matrix += sol_matrix.T
clusters = self.solution_mat_clusters(sol_matrix)
return clusters, sol_matrix
class GLPKSolver(_BIPSolver):
def __init__(self, min_affinity=-np.inf, max_affinity=np.inf):
super().__init__(min_affinity, max_affinity)
@staticmethod
def _solve_bip(objective_coefficients, sparse_constraints, upper_bounds):
c = matrix(-objective_coefficients) # max -> min
G = spmatrix(
*sparse_constraints, size=(len(upper_bounds), len(objective_coefficients))
) # G * x <= h
# G = spmatrix(sparse_constraints[0],sparse_constraints[1],sparse_constraints[2]) # G * x <= h
h = matrix(upper_bounds, tc="d")
status, solution = glpk.ilp(c, G, h, B=set(range(len(c))))
assert solution is not None, "Solver error: {}".format(status)
return np.asarray(solution, int).reshape(-1)

680
app/visualize/whole_body.py Normal file
View File

@ -0,0 +1,680 @@
from dataclasses import dataclass, field
from typing import (
Any,
Dict,
Iterable,
List,
Literal,
Optional,
Sequence,
Tuple,
TypedDict,
cast,
)
import cv2
import matplotlib.pyplot as plt
import numpy as np
from beartype import beartype
from cv2.typing import MatLike
from jaxtyping import Float, Int, Num, jaxtyped
from app._typing import NDArray
# https://www.researchgate.net/figure/Whole-body-keypoints-as-defined-in-the-COCO-WholeBody-Dataset_fig3_358873962
# https://github.com/jin-s13/COCO-WholeBody/blob/master/imgs/Fig2_anno.png
# body landmarks 1-17
# foot landmarks 18-23 (18-20 right, 21-23 left)
# face landmarks 24-91
# 24 start, counterclockwise to 40 as chin
# 41-45 right eyebrow, 46-50 left eyebrow
# https://www.neiltanna.com/face/rhinoplasty/nasal-analysis/
# 51-54 nose (vertical), 55-59 nose (horizontal)
# 60-65 right eye, 66-71 left eye
# 72-83 outer lips (contour, counterclockwise)
# ...
# hand landmarks 92-133 (92-112 right, 113-133 left)
Color = Tuple[int, int, int]
COLOR_SPINE = (138, 201, 38) # green, spine & head
COLOR_ARMS = (255, 202, 58) # yellow, arms & shoulders
COLOR_LEGS = (25, 130, 196) # blue, legs & hips
COLOR_FINGERS = (255, 0, 0) # red, fingers
COLOR_FACE = (255, 200, 0) # yellow, face
COLOR_FOOT = (255, 128, 0) # orange, foot
COLOR_HEAD = (255, 0, 255) # purple, head
@dataclass
class Landmark:
index: int
"""
Note the index is 1-based, corresponding to the COCO WholeBody dataset.
https://github.com/jin-s13/COCO-WholeBody/blob/master/imgs/Fig2_anno.png
"""
name: str
color: Color
def __post_init__(self):
if self.index < 1:
raise ValueError(f"Index must be positive, got {self.index}")
@property
def index_base_0(self) -> int:
"""
Returns the 0-based index of the landmark.
Useful for indexing into lists or arrays.
"""
return self.index - 1
body_landmarks: dict[int, Landmark] = {
0: Landmark(index=1, name="nose", color=COLOR_SPINE),
1: Landmark(index=2, name="left_eye", color=COLOR_SPINE),
2: Landmark(index=3, name="right_eye", color=COLOR_SPINE),
3: Landmark(index=4, name="left_ear", color=COLOR_SPINE),
4: Landmark(index=5, name="right_ear", color=COLOR_SPINE),
5: Landmark(index=6, name="left_shoulder", color=COLOR_ARMS),
6: Landmark(index=7, name="right_shoulder", color=COLOR_ARMS),
7: Landmark(index=8, name="left_elbow", color=COLOR_ARMS),
8: Landmark(index=9, name="right_elbow", color=COLOR_ARMS),
9: Landmark(index=10, name="left_wrist", color=COLOR_ARMS),
10: Landmark(index=11, name="right_wrist", color=COLOR_ARMS),
11: Landmark(index=12, name="left_hip", color=COLOR_LEGS),
12: Landmark(index=13, name="right_hip", color=COLOR_LEGS),
13: Landmark(index=14, name="left_knee", color=COLOR_LEGS),
14: Landmark(index=15, name="right_knee", color=COLOR_LEGS),
15: Landmark(index=16, name="left_ankle", color=COLOR_LEGS),
16: Landmark(index=17, name="right_ankle", color=COLOR_LEGS),
}
foot_landmarks: dict[int, Landmark] = {
17: Landmark(index=18, name="left_big_toe", color=COLOR_FOOT),
18: Landmark(index=19, name="left_small_toe", color=COLOR_FOOT),
19: Landmark(index=20, name="left_heel", color=COLOR_FOOT),
20: Landmark(index=21, name="right_big_toe", color=COLOR_FOOT),
21: Landmark(index=22, name="right_small_toe", color=COLOR_FOOT),
22: Landmark(index=23, name="right_heel", color=COLOR_FOOT),
}
face_landmarks: dict[int, Landmark] = {
# Chin contour (24-40)
23: Landmark(index=24, name="chin_0", color=COLOR_FACE),
24: Landmark(index=25, name="chin_1", color=COLOR_FACE),
25: Landmark(index=26, name="chin_2", color=COLOR_FACE),
26: Landmark(index=27, name="chin_3", color=COLOR_FACE),
27: Landmark(index=28, name="chin_4", color=COLOR_FACE),
28: Landmark(index=29, name="chin_5", color=COLOR_FACE),
29: Landmark(index=30, name="chin_6", color=COLOR_FACE),
30: Landmark(index=31, name="chin_7", color=COLOR_FACE),
31: Landmark(index=32, name="chin_8", color=COLOR_FACE),
32: Landmark(index=33, name="chin_9", color=COLOR_FACE),
33: Landmark(index=34, name="chin_10", color=COLOR_FACE),
34: Landmark(index=35, name="chin_11", color=COLOR_FACE),
35: Landmark(index=36, name="chin_12", color=COLOR_FACE),
36: Landmark(index=37, name="chin_13", color=COLOR_FACE),
37: Landmark(index=38, name="chin_14", color=COLOR_FACE),
38: Landmark(index=39, name="chin_15", color=COLOR_FACE),
39: Landmark(index=40, name="chin_16", color=COLOR_FACE),
# Right eyebrow (41-45)
40: Landmark(index=41, name="right_eyebrow_0", color=COLOR_FACE),
41: Landmark(index=42, name="right_eyebrow_1", color=COLOR_FACE),
42: Landmark(index=43, name="right_eyebrow_2", color=COLOR_FACE),
43: Landmark(index=44, name="right_eyebrow_3", color=COLOR_FACE),
44: Landmark(index=45, name="right_eyebrow_4", color=COLOR_FACE),
# Left eyebrow (46-50)
45: Landmark(index=46, name="left_eyebrow_0", color=COLOR_FACE),
46: Landmark(index=47, name="left_eyebrow_1", color=COLOR_FACE),
47: Landmark(index=48, name="left_eyebrow_2", color=COLOR_FACE),
48: Landmark(index=49, name="left_eyebrow_3", color=COLOR_FACE),
49: Landmark(index=50, name="left_eyebrow_4", color=COLOR_FACE),
# Nasal Bridge (51-54)
50: Landmark(index=51, name="nasal_bridge_0", color=COLOR_FACE),
51: Landmark(index=52, name="nasal_bridge_1", color=COLOR_FACE),
52: Landmark(index=53, name="nasal_bridge_2", color=COLOR_FACE),
53: Landmark(index=54, name="nasal_bridge_3", color=COLOR_FACE),
# Nasal Base (55-59)
54: Landmark(index=55, name="nasal_base_0", color=COLOR_FACE),
55: Landmark(index=56, name="nasal_base_1", color=COLOR_FACE),
56: Landmark(index=57, name="nasal_base_2", color=COLOR_FACE),
57: Landmark(index=58, name="nasal_base_3", color=COLOR_FACE),
58: Landmark(index=59, name="nasal_base_4", color=COLOR_FACE),
# Right eye (60-65)
59: Landmark(index=60, name="right_eye_0", color=COLOR_FACE),
60: Landmark(index=61, name="right_eye_1", color=COLOR_FACE),
61: Landmark(index=62, name="right_eye_2", color=COLOR_FACE),
62: Landmark(index=63, name="right_eye_3", color=COLOR_FACE),
63: Landmark(index=64, name="right_eye_4", color=COLOR_FACE),
64: Landmark(index=65, name="right_eye_5", color=COLOR_FACE),
# Left eye (66-71)
65: Landmark(index=66, name="left_eye_0", color=COLOR_FACE),
66: Landmark(index=67, name="left_eye_1", color=COLOR_FACE),
67: Landmark(index=68, name="left_eye_2", color=COLOR_FACE),
68: Landmark(index=69, name="left_eye_3", color=COLOR_FACE),
69: Landmark(index=70, name="left_eye_4", color=COLOR_FACE),
70: Landmark(index=71, name="left_eye_5", color=COLOR_FACE),
# lips (72-91)
71: Landmark(index=72, name="lip_0", color=COLOR_FACE),
72: Landmark(index=73, name="lip_1", color=COLOR_FACE),
73: Landmark(index=74, name="lip_2", color=COLOR_FACE),
74: Landmark(index=75, name="lip_3", color=COLOR_FACE),
75: Landmark(index=76, name="lip_4", color=COLOR_FACE),
76: Landmark(index=77, name="lip_5", color=COLOR_FACE),
77: Landmark(index=78, name="lip_6", color=COLOR_FACE),
78: Landmark(index=79, name="lip_7", color=COLOR_FACE),
79: Landmark(index=80, name="lip_8", color=COLOR_FACE),
80: Landmark(index=81, name="lip_9", color=COLOR_FACE),
81: Landmark(index=82, name="lip_0", color=COLOR_FACE),
82: Landmark(index=83, name="lip_1", color=COLOR_FACE),
83: Landmark(index=84, name="lip_2", color=COLOR_FACE),
84: Landmark(index=85, name="lip_3", color=COLOR_FACE),
85: Landmark(index=86, name="lip_4", color=COLOR_FACE),
86: Landmark(index=87, name="lip_5", color=COLOR_FACE),
87: Landmark(index=88, name="lip_6", color=COLOR_FACE),
88: Landmark(index=89, name="lip_7", color=COLOR_FACE),
89: Landmark(index=90, name="lip_8", color=COLOR_FACE),
90: Landmark(index=91, name="lip_9", color=COLOR_FACE),
}
hand_landmarks: dict[int, Landmark] = {
# Right hand (92-112)
91: Landmark(index=92, name="right_wrist", color=COLOR_FINGERS), # wrist/carpus
92: Landmark(
index=93, name="right_thumb_metacarpal", color=COLOR_FINGERS
), # thumb metacarpal
93: Landmark(
index=94, name="right_thumb_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
94: Landmark(
index=95, name="right_thumb_ip", color=COLOR_FINGERS
), # interphalangeal joint
95: Landmark(index=96, name="right_thumb_tip", color=COLOR_FINGERS), # tip of thumb
96: Landmark(
index=97, name="right_index_metacarpal", color=COLOR_FINGERS
), # index metacarpal
97: Landmark(
index=98, name="right_index_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
98: Landmark(
index=99, name="right_index_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
99: Landmark(
index=100, name="right_index_tip", color=COLOR_FINGERS
), # tip of index
100: Landmark(
index=101, name="right_middle_metacarpal", color=COLOR_FINGERS
), # middle metacarpal
101: Landmark(
index=102, name="right_middle_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
102: Landmark(
index=103, name="right_middle_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
103: Landmark(
index=104, name="right_middle_tip", color=COLOR_FINGERS
), # tip of middle
104: Landmark(
index=105, name="right_ring_metacarpal", color=COLOR_FINGERS
), # ring metacarpal
105: Landmark(
index=106, name="right_ring_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
106: Landmark(
index=107, name="right_ring_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
107: Landmark(index=108, name="right_ring_tip", color=COLOR_FINGERS), # tip of ring
108: Landmark(
index=109, name="right_pinky_metacarpal", color=COLOR_FINGERS
), # pinky metacarpal
109: Landmark(
index=110, name="right_pinky_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
110: Landmark(
index=111, name="right_pinky_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
111: Landmark(
index=112, name="right_pinky_tip", color=COLOR_FINGERS
), # tip of pinky
# Left hand (113-133)
112: Landmark(index=113, name="left_wrist", color=COLOR_FINGERS), # wrist/carpus
113: Landmark(
index=114, name="left_thumb_metacarpal", color=COLOR_FINGERS
), # thumb metacarpal
114: Landmark(
index=115, name="left_thumb_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
115: Landmark(
index=116, name="left_thumb_ip", color=COLOR_FINGERS
), # interphalangeal joint
116: Landmark(
index=117, name="left_thumb_tip", color=COLOR_FINGERS
), # tip of thumb
117: Landmark(
index=118, name="left_index_metacarpal", color=COLOR_FINGERS
), # index metacarpal
118: Landmark(
index=119, name="left_index_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
119: Landmark(
index=120, name="left_index_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
120: Landmark(
index=121, name="left_index_tip", color=COLOR_FINGERS
), # tip of index
121: Landmark(
index=122, name="left_middle_metacarpal", color=COLOR_FINGERS
), # middle metacarpal
122: Landmark(
index=123, name="left_middle_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
123: Landmark(
index=124, name="left_middle_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
124: Landmark(
index=125, name="left_middle_tip", color=COLOR_FINGERS
), # tip of middle
125: Landmark(
index=126, name="left_ring_metacarpal", color=COLOR_FINGERS
), # ring metacarpal
126: Landmark(
index=127, name="left_ring_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
127: Landmark(
index=128, name="left_ring_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
128: Landmark(index=129, name="left_ring_tip", color=COLOR_FINGERS), # tip of ring
129: Landmark(
index=130, name="left_pinky_metacarpal", color=COLOR_FINGERS
), # pinky metacarpal
130: Landmark(
index=131, name="left_pinky_mcp", color=COLOR_FINGERS
), # metacarpophalangeal joint
131: Landmark(
index=132, name="left_pinky_pip", color=COLOR_FINGERS
), # proximal interphalangeal joint
132: Landmark(
index=133, name="left_pinky_tip", color=COLOR_FINGERS
), # tip of pinky
}
"""
Key corrections made:
1. Each finger has a metacarpal bone in the palm
2. Used standard anatomical abbreviations:
- MCP: MetaCarpoPhalangeal joint
- PIP: Proximal InterPhalangeal joint
- IP: InterPhalangeal joint (for thumb)
3. The thumb has a different structure:
- Only one interphalangeal joint (IP)
- Different metacarpal orientation
4. Used "tip" instead of specific phalanx names for endpoints
5. Removed redundant bone naming since landmarks represent joints/connections
This better reflects the actual skeletal and joint structure of human hands while maintaining compatibility with the COCO-WholeBody dataset's keypoint system.
"""
skeleton_joints = {
**body_landmarks,
**foot_landmarks,
**face_landmarks,
**hand_landmarks,
}
@dataclass
class Bone:
start: Landmark
end: Landmark
name: str
color: Color
@staticmethod
def from_landmarks(
landmarks: Iterable[Landmark],
start_idx: int,
end_idx: int,
name: str,
color: Color,
) -> "Bone":
"""
Note that the start and end indices are 1-based, corresponding to the COCO WholeBody dataset.
"""
start = next(filter(lambda x: x.index == start_idx, landmarks))
end = next(filter(lambda x: x.index == end_idx, landmarks))
return Bone(start=start, end=end, name=name, color=color)
# Note it's 0-based
# (15, 13), (13, 11), (16, 14), (14, 12), (11, 12), # 腿部
# (5, 11), (6, 12), (5, 6), # 臀部和躯干
# (5, 7), (7, 9), (6, 8), (8, 10), # 手臂
# (1, 2), (0, 1), (0, 2), (1, 3), (2, 4), # 头部
# (15, 17), (15, 18), (15, 19), # 左脚
# (16, 20), (16, 21), (16, 22), # 右脚
body_bones: list[Bone] = [
# legs
Bone.from_landmarks(
skeleton_joints.values(), 16, 14, "left_tibia", COLOR_LEGS
), # tibia & fibula
Bone.from_landmarks(skeleton_joints.values(), 14, 12, "left_femur", COLOR_LEGS),
Bone.from_landmarks(skeleton_joints.values(), 17, 15, "right_tibia", COLOR_LEGS),
Bone.from_landmarks(skeleton_joints.values(), 15, 13, "right_femur", COLOR_LEGS),
Bone.from_landmarks(skeleton_joints.values(), 12, 13, "pelvis", COLOR_LEGS),
# torso
Bone.from_landmarks(
skeleton_joints.values(), 6, 12, "left_contour", COLOR_SPINE
), # contour of rib cage & pelvis (parallel to spine)
Bone.from_landmarks(skeleton_joints.values(), 7, 13, "right_contour", COLOR_SPINE),
Bone.from_landmarks(skeleton_joints.values(), 6, 7, "clavicle", COLOR_SPINE),
# arms
Bone.from_landmarks(
skeleton_joints.values(), 6, 8, "left_humerus", COLOR_ARMS
), # humerus
Bone.from_landmarks(
skeleton_joints.values(), 8, 10, "left_radius", COLOR_ARMS
), # radius & ulna
Bone.from_landmarks(skeleton_joints.values(), 7, 9, "right_humerus", COLOR_ARMS),
Bone.from_landmarks(skeleton_joints.values(), 9, 11, "right_radius", COLOR_ARMS),
# head
Bone.from_landmarks(skeleton_joints.values(), 2, 3, "head", COLOR_HEAD),
Bone.from_landmarks(skeleton_joints.values(), 1, 2, "left_eye", COLOR_HEAD),
Bone.from_landmarks(skeleton_joints.values(), 1, 3, "right_eye", COLOR_HEAD),
Bone.from_landmarks(skeleton_joints.values(), 2, 4, "left_ear", COLOR_HEAD),
Bone.from_landmarks(skeleton_joints.values(), 3, 5, "right_ear", COLOR_HEAD),
# foot
Bone.from_landmarks(skeleton_joints.values(), 16, 18, "left_foot_toe", COLOR_FOOT),
Bone.from_landmarks(
skeleton_joints.values(), 16, 19, "left_foot_small_toe", COLOR_FOOT
),
Bone.from_landmarks(skeleton_joints.values(), 16, 20, "left_foot_heel", COLOR_FOOT),
Bone.from_landmarks(skeleton_joints.values(), 17, 21, "right_foot_toe", COLOR_FOOT),
Bone.from_landmarks(
skeleton_joints.values(), 17, 22, "right_foot_small_toe", COLOR_FOOT
),
Bone.from_landmarks(
skeleton_joints.values(), 17, 23, "right_foot_heel", COLOR_FOOT
),
]
# note it's 0-based
# (91, 92), (92, 93), (93, 94), (94, 95), # 左拇指
# (91, 96), (96, 97), (97, 98), (98, 99), # 左食指
# (91, 100), (100, 101), (101, 102), (102, 103), # 左中指
# (91, 104), (104, 105), (105, 106), (106, 107), # 左无名指
# (91, 108), (108, 109), (109, 110), (110, 111), # 左小指
# (112, 113), (113, 114), (114, 115), (115, 116), # 右拇指
# (112, 117), (117, 118), (118, 119), (119, 120), # 右食指
# (112, 121), (121, 122), (122, 123), (123, 124), # 右中指
# (112, 125), (125, 126), (126, 127), (127, 128), # 右无名指
# (112, 129), (129, 130), (130, 131), (131, 132) # 右小指
hand_bones: list[Bone] = [
# Right Thumb (Pollex)
Bone.from_landmarks(
hand_landmarks.values(), 92, 93, "right_thumb_metacarpal", COLOR_FINGERS
), # First metacarpal
Bone.from_landmarks(
hand_landmarks.values(), 93, 94, "right_thumb_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 94, 95, "right_thumb_distal_phalanx", COLOR_FINGERS
),
# Right Index (Digit II)
Bone.from_landmarks(
hand_landmarks.values(), 92, 97, "right_index_metacarpal", COLOR_FINGERS
), # Second metacarpal
Bone.from_landmarks(
hand_landmarks.values(), 97, 98, "right_index_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 98, 99, "right_index_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 99, 100, "right_index_distal_phalanx", COLOR_FINGERS
),
# Right Middle (Digit III)
Bone.from_landmarks(
hand_landmarks.values(), 92, 101, "right_middle_metacarpal", COLOR_FINGERS
), # Third metacarpal
Bone.from_landmarks(
hand_landmarks.values(),
101,
102,
"right_middle_proximal_phalanx",
COLOR_FINGERS,
),
Bone.from_landmarks(
hand_landmarks.values(), 102, 103, "right_middle_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 103, 104, "right_middle_distal_phalanx", COLOR_FINGERS
),
# Right Ring (Digit IV)
Bone.from_landmarks(
hand_landmarks.values(), 92, 105, "right_ring_metacarpal", COLOR_FINGERS
), # Fourth metacarpal
Bone.from_landmarks(
hand_landmarks.values(), 105, 106, "right_ring_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 106, 107, "right_ring_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 107, 108, "right_ring_distal_phalanx", COLOR_FINGERS
),
# Right Pinky (Digit V)
Bone.from_landmarks(
hand_landmarks.values(), 92, 109, "right_pinky_metacarpal", COLOR_FINGERS
), # Fifth metacarpal
Bone.from_landmarks(
hand_landmarks.values(), 109, 110, "right_pinky_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 110, 111, "right_pinky_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 111, 112, "right_pinky_distal_phalanx", COLOR_FINGERS
),
# Left Thumb (Pollex)
Bone.from_landmarks(
hand_landmarks.values(), 113, 114, "left_thumb_metacarpal", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 114, 115, "left_thumb_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 115, 116, "left_thumb_distal_phalanx", COLOR_FINGERS
),
# Left Index (Digit II)
Bone.from_landmarks(
hand_landmarks.values(), 113, 118, "left_index_metacarpal", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 118, 119, "left_index_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 119, 120, "left_index_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 120, 121, "left_index_distal_phalanx", COLOR_FINGERS
),
# Left Middle (Digit III)
Bone.from_landmarks(
hand_landmarks.values(), 113, 122, "left_middle_metacarpal", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 122, 123, "left_middle_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 123, 124, "left_middle_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 124, 125, "left_middle_distal_phalanx", COLOR_FINGERS
),
# Left Ring (Digit IV)
Bone.from_landmarks(
hand_landmarks.values(), 113, 126, "left_ring_metacarpal", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 126, 127, "left_ring_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 127, 128, "left_ring_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 128, 129, "left_ring_distal_phalanx", COLOR_FINGERS
),
# Left Pinky (Digit V)
Bone.from_landmarks(
hand_landmarks.values(), 113, 130, "left_pinky_metacarpal", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 130, 131, "left_pinky_proximal_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 131, 132, "left_pinky_middle_phalanx", COLOR_FINGERS
),
Bone.from_landmarks(
hand_landmarks.values(), 132, 133, "left_pinky_distal_phalanx", COLOR_FINGERS
),
]
"""
Key points about the hand bone structure:
1. Each finger (except thumb) has:
- Connection to metacarpal
- Proximal phalanx
- Middle phalanx
- Distal phalanx
2. Thumb is unique with:
- Metacarpal
- Proximal phalanx
- Distal phalanx (no middle phalanx)
3. All fingers connect back to the wrist (index 92 for right hand, 113 for left hand)
4. The anatomical names include the proper terms for each digit (Pollex for thumb, Digits II-V for fingers)
"""
total_bones = body_bones + hand_bones
@jaxtyped(typechecker=beartype)
def visualize_whole_body(
keypoints: Num[NDArray, "133 2"],
frame: MatLike,
# keyword arguements
# kwargs
landmark_size: int = 1,
bone_size: int = 2,
output: Optional[MatLike] = None,
confidences: Optional[Num[NDArray, "133 1"]] = None,
confidence_threshold: float = 0.1,
):
"""
Visualize the whole body keypoints on the given frame.
"""
if output is None:
output = frame.copy()
for bone in total_bones:
start = keypoints[bone.start.index_base_0]
end = keypoints[bone.end.index_base_0]
start = tuple(start.astype(int))
end = tuple(end.astype(int))
if (
confidences is not None
and confidences[bone.start.index_base_0] < confidence_threshold
and confidences[bone.end.index_base_0] < confidence_threshold
):
continue
cv2.line(output, start, end, bone.color, bone_size)
for landmark in skeleton_joints.values():
point = keypoints[landmark.index_base_0]
point = tuple(point.astype(int))
if (
confidences is not None
and confidences[landmark.index_base_0] < confidence_threshold
):
continue
cv2.circle(output, point, landmark_size, landmark.color, -1)
return output
@jaxtyped(typechecker=beartype)
def visualize_17_keypoints(
keypoints: Num[NDArray, "17 2"],
frame: MatLike,
output: Optional[MatLike] = None,
confidences: Optional[Num[NDArray, "17 1"]] = None,
confidence_threshold: float = 0.1,
landmark_size: int = 1,
bone_size: int = 2,
):
"""
Visualize the first 17 keypoints on the given frame.
"""
if output is None:
output = frame.copy()
for bone in total_bones[:17]:
start = keypoints[bone.start.index_base_0]
end = keypoints[bone.end.index_base_0]
start = tuple(start.astype(int))
end = tuple(end.astype(int))
if (
confidences is not None
and confidences[bone.start.index_base_0] < confidence_threshold
and confidences[bone.end.index_base_0] < confidence_threshold
):
continue
cv2.line(output, start, end, bone.color, bone_size)
for landmark in list(body_landmarks.values())[:17]:
point = keypoints[landmark.index_base_0]
point = tuple(point.astype(int))
if (
confidences is not None
and confidences[landmark.index_base_0] < confidence_threshold
):
continue
cv2.circle(output, point, landmark_size, landmark.color, -1)
return output
@jaxtyped(typechecker=beartype)
def visualize_whole_body_many(
keypoints: Num[NDArray, "N 133 2"],
frame: MatLike,
landmark_size: int = 1,
bone_size: int = 2,
output: Optional[MatLike] = None,
confidences: Optional[Num[NDArray, "N 133 1"]] = None,
confidence_threshold: float = 0.1,
):
"""
Visualize a batch of whole body keypoints on the given frame.
"""
if len(keypoints) == 0:
return frame
if output is None:
output = frame.copy()
if confidences is None:
for keypoint in keypoints:
output = visualize_whole_body(
keypoint,
frame,
landmark_size,
bone_size,
output=output,
confidences=None,
)
return output
if confidences is not None:
assert len(keypoints) == len(
confidences
), f"Expected same length, got {len(keypoints)} and {len(confidences)}"
for keypoint, confidence in zip(keypoints, confidences):
output = visualize_whole_body(
keypoint,
frame,
landmark_size,
bone_size,
output=output,
confidences=confidence,
confidence_threshold=confidence_threshold,
)
return output