9 Commits

21 changed files with 12289 additions and 1727 deletions

2
.gitignore vendored
View File

@ -10,3 +10,5 @@ wheels/
.venv
.hypothesis
samples
*.jpg
*.parquet

View File

@ -2,3 +2,4 @@
```bash
jupytext --to py:percent <script>.ipynb
```
hjkhljkl

View File

@ -1,6 +1,7 @@
from collections import OrderedDict, defaultdict
from dataclasses import dataclass
from datetime import datetime
import string
from typing import Any, TypeAlias, TypedDict, Optional, Sequence
from beartype import beartype
@ -522,10 +523,14 @@ def to_homogeneous(points: Num[Array, "N 2"] | Num[Array, "N 3"]) -> Num[Array,
raise ValueError(f"Invalid shape for points: {points.shape}")
import awkward as ak
@jaxtyped(typechecker=beartype)
def point_line_distance(
points: Num[Array, "N 3"] | Num[Array, "N 2"],
line: Num[Array, "N 3"],
description: str,
eps: float = 1e-9,
):
"""
@ -544,6 +549,12 @@ def point_line_distance(
"""
numerator = abs(line[:, 0] * points[:, 0] + line[:, 1] * points[:, 1] + line[:, 2])
denominator = jnp.sqrt(line[:, 0] * line[:, 0] + line[:, 1] * line[:, 1])
# line_data = {"a": line[:, 0], "b": line[:, 1], "c": line[:, 2]}
# line_x_y = {"x": points[:, 0], "y": points[:, 1]}
# ak.to_parquet(
# line_data, f"/home/admin/Code/CVTH3PE/line_a_b_c_{description}.parquet"
# )
# ak.to_parquet(line_x_y, f"/home/admin/Code/CVTH3PE/line_x_y_{description}.parquet")
return numerator / (denominator + eps)
@ -571,7 +582,7 @@ def left_to_right_epipolar_distance(
"""
F_t = fundamental_matrix.transpose()
line1_in_2 = jnp.matmul(left, F_t)
return point_line_distance(right, line1_in_2)
return point_line_distance(right, line1_in_2, "left_to_right")
@jaxtyped(typechecker=beartype)
@ -597,7 +608,7 @@ def right_to_left_epipolar_distance(
$$x^{\\prime T}Fx = 0$$
"""
line2_in_1 = jnp.matmul(right, fundamental_matrix)
return point_line_distance(left, line2_in_1)
return point_line_distance(left, line2_in_1, "right_to_left")
def distance_between_epipolar_lines(

View File

@ -1,3 +1,4 @@
import warnings
import weakref
from collections import deque
from dataclasses import dataclass
@ -114,13 +115,23 @@ class LastDifferenceVelocityFilter(GenericVelocityFilter):
def predict(self, timestamp: datetime) -> TrackingPrediction:
delta_t_s = (timestamp - self._last_timestamp).total_seconds()
assert delta_t_s >= 0, f"delta_t_s is negative: {delta_t_s}"
if delta_t_s <= 0:
warnings.warn(
"delta_t={}; last={}; current={}".format(
delta_t_s, self._last_timestamp, timestamp
)
)
if self._last_velocity is None:
return TrackingPrediction(
velocity=None,
keypoints=self._last_keypoints,
)
else:
if delta_t_s <= 0:
return TrackingPrediction(
velocity=self._last_velocity,
keypoints=self._last_keypoints,
)
return TrackingPrediction(
velocity=self._last_velocity,
keypoints=self._last_keypoints + self._last_velocity * delta_t_s,
@ -128,10 +139,12 @@ class LastDifferenceVelocityFilter(GenericVelocityFilter):
def update(self, keypoints: Float[Array, "J 3"], timestamp: datetime) -> None:
delta_t_s = (timestamp - self._last_timestamp).total_seconds()
assert delta_t_s >= 0, f"delta_t_s is negative: {delta_t_s}"
self._last_velocity = (keypoints - self._last_keypoints) / delta_t_s
if delta_t_s <= 0:
pass
else:
self._last_timestamp = timestamp
self._last_velocity = (keypoints - self._last_keypoints) / delta_t_s
self._last_keypoints = keypoints
self._last_timestamp = timestamp
def get(self) -> TrackingPrediction:
if self._last_velocity is None:
@ -162,20 +175,8 @@ class LeastMeanSquareVelocityFilter(GenericVelocityFilter):
historical_timestamps: Sequence[datetime],
max_samples: int = 10,
):
"""
Args:
historical_3d_poses: sequence of 3D poses, at least one element is required
historical_timestamps: sequence of timestamps, whose length is the same as `historical_3d_poses`
max_samples: maximum number of samples to keep
"""
assert (N := len(historical_3d_poses)) == len(
historical_timestamps
), f"the length of `historical_3d_poses` and `historical_timestamps` must be the same; got {N} and {len(historical_timestamps)}"
if N < 1:
raise ValueError("at least one historical 3D pose is required")
assert len(historical_3d_poses) == len(historical_timestamps)
temp = zip(historical_3d_poses, historical_timestamps)
# sorted by timestamp
temp_sorted = sorted(temp, key=lambda x: x[1])
self._historical_3d_poses = deque(
map(lambda x: x[0], temp_sorted), maxlen=max_samples
@ -201,7 +202,6 @@ class LeastMeanSquareVelocityFilter(GenericVelocityFilter):
latest_timestamp = self._historical_timestamps[-1]
delta_t_s = (timestamp - latest_timestamp).total_seconds()
assert delta_t_s >= 0, f"delta_t_s is negative: {delta_t_s}"
if self._velocity is None:
return TrackingPrediction(
@ -243,6 +243,7 @@ class LeastMeanSquareVelocityFilter(GenericVelocityFilter):
keypoints_reshaped = keypoints.reshape(n_samples, -1)
# Use JAX's lstsq to solve the least squares problem
# This is more numerically stable than manually computing pseudoinverse
coefficients, _, _, _ = jnp.linalg.lstsq(X, keypoints_reshaped, rcond=None)
# Coefficients shape is [2, J*3]
@ -559,8 +560,8 @@ class AffinityResult:
matrix: Float[Array, "T D"]
trackings: Sequence[Tracking]
detections: Sequence[Detection]
indices_T: Int[Array, "T"] # pylint: disable=invalid-name
indices_D: Int[Array, "D"] # pylint: disable=invalid-name
indices_T: Int[Array, "A"] # pylint: disable=invalid-name
indices_D: Int[Array, "A"] # pylint: disable=invalid-name
def tracking_association(
self,

1452
filter_object_by_box.ipynb Normal file

File diff suppressed because one or more lines are too long

208
filter_object_by_box.py Normal file
View File

@ -0,0 +1,208 @@
from narwhals import Boolean
import numpy as np
import cv2
from typing import (
TypeAlias,
TypedDict,
)
from jaxtyping import Array, Num
from shapely.geometry import Polygon
from sympy import false, true
NDArray: TypeAlias = np.ndarray
# 盒子各个面的三维三角形集合
box_triangles_list = [
["4", "6", "7"],
["4", "5", "6"],
["2", "5", "6"],
["1", "2", "5"],
["1", "2", "3"],
["0", "1", "3"],
["0", "3", "7"],
["0", "4", "7"],
["2", "6", "7"],
["2", "3", "7"],
["0", "4", "5"],
["0", "1", "5"],
]
class Camera_Params(TypedDict):
rvec: Num[NDArray, "3"]
tvec: Num[NDArray, "3"]
camera_matrix: Num[Array, "3 3"]
dist: Num[Array, "N"]
width: int
height: int
class KeypointDataset(TypedDict):
frame_index: int
boxes: Num[NDArray, "N 4"]
kps: Num[NDArray, "N J 2"]
kps_scores: Num[NDArray, "N J"]
# 三维坐标系根据相机内外参计算该镜头下的二维重投影坐标
def reprojet_3d_to_2d(point_3d, camera_param):
point_2d, _ = cv2.projectPoints(
objectPoints=point_3d,
rvec=np.array(camera_param.params.Rt[:3, :3]),
tvec=np.array(camera_param.params.Rt[:3, 3]),
cameraMatrix=np.array(camera_param.params.K),
distCoeffs=np.array(camera_param.params.dist_coeffs),
)
point_2d = point_2d.reshape(-1).astype(int)
return point_2d
# 计算盒子三维坐标系
def calculaterCubeVersices(position, dimensions):
[cx, cy, cz] = position
[width, height, depth] = dimensions
halfWidth = width / 2
halfHeight = height / 2
halfDepth = depth / 2
return [
[cx - halfWidth, cy - halfHeight, cz - halfDepth],
[cx + halfWidth, cy - halfHeight, cz - halfDepth],
[cx + halfWidth, cy + halfHeight, cz - halfDepth],
[cx - halfWidth, cy + halfHeight, cz - halfDepth],
[cx - halfWidth, cy - halfHeight, cz + halfDepth],
[cx + halfWidth, cy - halfHeight, cz + halfDepth],
[cx + halfWidth, cy + halfHeight, cz + halfDepth],
[cx - halfWidth, cy + halfHeight, cz + halfDepth],
]
# 获得盒子三维坐标系
def calculater_box_3d_points():
# 盒子原点位置,相对于六面体中心偏移
box_ori_potision = [0.205 + 0.2, 0.205 + 0.50, -0.205 - 0.45]
# 盒子边长1.5米1.5米深度1.8米
box_geometry = [0.65, 1.8, 1]
filter_box_points_3d = calculaterCubeVersices(box_ori_potision, box_geometry)
filter_box_points_3d = {
str(index): element for index, element in enumerate(filter_box_points_3d)
}
return filter_box_points_3d
# 计算盒子坐标系的二维重投影数据
def calculater_box_2d_points(filter_box_points_3d, camera_param):
box_points_2d = dict()
for element_index, elment_point_3d in enumerate(filter_box_points_3d.values()):
box_points_2d[str(element_index)] = reprojet_3d_to_2d(
np.array(elment_point_3d), camera_param
).tolist()
return box_points_2d
# 盒子总的二维平面各三角形坐标点
def calculater_box_common_scope(box_points_2d):
box_triangles_all_points = []
# 遍历三角形个数
for i in range(len(box_triangles_list)):
# 获取单个三角形二维平面坐标点
single_triangles = []
for element_key in box_triangles_list[i]:
single_triangles.append(box_points_2d[element_key])
box_triangles_all_points.append(single_triangles)
return box_triangles_all_points
def calculate_triangle_union(triangles):
"""
计算多个三角形的并集区域
参数:
triangles: 包含多个三角形的列表,每个三角形由三个点的坐标组成
返回:
union_area: 并集区域的面积
union_polygon: 表示并集区域的多边形对象
"""
# 创建多边形对象列表
polygons = [Polygon(tri) for tri in triangles]
# 计算并集
union_polygon = polygons[0]
for polygon in polygons[1:]:
union_polygon = union_polygon.union(polygon)
# 计算并集面积
union_area = union_polygon.area
return union_area, union_polygon
# 射线法判断坐标点是否在box二维重投影的区域内
def point_in_polygon(p, polygon):
x, y = p
n = len(polygon)
intersections = 0
on_boundary = False
for i in range(n):
xi, yi = polygon[i]
xj, yj = polygon[(i + 1) % n] # 闭合多边形
# 检查点是否在顶点上
if (x == xi and y == yi) or (x == xj and y == yj):
on_boundary = True
break
# 检查点是否在线段上(非顶点情况)
if (min(xi, xj) <= x <= max(xi, xj)) and (min(yi, yj) <= y <= max(yi, yj)):
cross = (x - xi) * (yj - yi) - (y - yi) * (xj - xi)
if cross == 0:
on_boundary = True
break
# 计算射线与边的交点(非水平边)
if (yi > y) != (yj > y):
slope = (xj - xi) / (yj - yi) if (yj - yi) != 0 else float("inf")
x_intersect = xi + (y - yi) * slope
if x <= x_intersect:
intersections += 1
if on_boundary:
return false
return intersections % 2 == 1 # 奇数为内部返回True
# 获取并集区域坐标点
def get_contours(union_polygon):
if union_polygon.geom_type == "Polygon":
# 单一多边形
x, y = union_polygon.exterior.xy
contours = [(list(x)[i], list(y)[i]) for i in range(len(x))]
contours = np.array(contours, np.int32)
return contours
# 筛选落在盒子二维重投影区域内的关键点信息
def filter_kps_in_contours(kps, contours) -> Boolean:
# 4 5 16 17
keypoint_index: list[list[int]] = [[4, 5], [16, 17]]
centers = []
for element_keypoint in keypoint_index:
x1, y1 = kps[element_keypoint[0]]
x2, y2 = kps[element_keypoint[1]]
centers.append([(x1 + x2) / 2, (y1 + y2) / 2])
if point_in_polygon(centers[0], contours) and point_in_polygon(
centers[1], contours
):
return true
else:
return false

View File

@ -0,0 +1,282 @@
[
{
"kps": [
419.0,
154.0
],
"kps_scores": 1.0,
"index": 0
},
{
"kps": [
419.0521240234375,
154.07498168945312
],
"kps_scores": 1.0,
"index": 1
},
{
"kps": [
418.5992736816406,
154.3507080078125
],
"kps_scores": 1.0,
"index": 2
},
{
"kps": [
417.0777893066406,
154.17327880859375
],
"kps_scores": 1.0,
"index": 3
},
{
"kps": [
416.8981628417969,
154.15330505371094
],
"kps_scores": 1.0,
"index": 4
},
{
"kps": [
415.1317443847656,
153.68324279785156
],
"kps_scores": 1.0,
"index": 5
},
{
"kps": [
413.2596130371094,
153.39761352539062
],
"kps_scores": 1.0,
"index": 6
},
{
"kps": [
412.7089538574219,
153.3645782470703
],
"kps_scores": 1.0,
"index": 7
},
{
"kps": [
409.3253173828125,
152.9347686767578
],
"kps_scores": 1.0,
"index": 8
},
{
"kps": [
404.74853515625,
152.21153259277344
],
"kps_scores": 1.0,
"index": 9
},
{
"kps": [
404.3977355957031,
152.19647216796875
],
"kps_scores": 1.0,
"index": 10
},
{
"kps": [
396.53131103515625,
152.09912109375
],
"kps_scores": 1.0,
"index": 11
},
{
"kps": [
393.76605224609375,
151.91282653808594
],
"kps_scores": 1.0,
"index": 12
},
{
"kps": [
393.28106689453125,
151.76124572753906
],
"kps_scores": 1.0,
"index": 13
},
{
"kps": [
383.2342834472656,
152.3790740966797
],
"kps_scores": 1.0,
"index": 14
},
{
"kps": [
379.7545471191406,
152.79055786132812
],
"kps_scores": 1.0,
"index": 15
},
{
"kps": [
379.8231506347656,
152.8155975341797
],
"kps_scores": 1.0,
"index": 16
},
{
"kps": [
370.0028076171875,
155.16213989257812
],
"kps_scores": 1.0,
"index": 17
},
{
"kps": [
366.5267639160156,
155.72059631347656
],
"kps_scores": 1.0,
"index": 18
},
{
"kps": [
366.69610595703125,
156.3056182861328
],
"kps_scores": 1.0,
"index": 19
},
{
"kps": [
359.8770751953125,
158.69798278808594
],
"kps_scores": 1.0,
"index": 20
},
{
"kps": [
356.67681884765625,
160.0414581298828
],
"kps_scores": 1.0,
"index": 21
},
{
"kps": [
348.1063232421875,
163.32858276367188
],
"kps_scores": 1.0,
"index": 22
},
{
"kps": [
343.6862487792969,
165.0043182373047
],
"kps_scores": 1.0,
"index": 23
},
{
"kps": [
339.2411804199219,
167.18580627441406
],
"kps_scores": 1.0,
"index": 24
},
{
"kps": [
330.0,
170.0
],
"kps_scores": 0.0,
"index": 25
},
{
"kps": [
322.0425720214844,
174.9293975830078
],
"kps_scores": 1.0,
"index": 26
},
{
"kps": [
310.0,
176.0
],
"kps_scores": 0.0,
"index": 27
},
{
"kps": [
305.0433349609375,
178.03123474121094
],
"kps_scores": 1.0,
"index": 28
},
{
"kps": [
293.71295166015625,
183.8294219970703
],
"kps_scores": 1.0,
"index": 29
},
{
"kps": [
291.28656005859375,
184.33445739746094
],
"kps_scores": 1.0,
"index": 30
},
{
"kps": [
281.0,
190.0
],
"kps_scores": 0.0,
"index": 31
},
{
"kps": [
272.0,
200.0
],
"kps_scores": 0.0,
"index": 32
},
{
"kps": [
261.0457763671875,
211.67132568359375
],
"kps_scores": 1.0,
"index": 33
},
{
"kps": [
239.03567504882812,
248.68519592285156
],
"kps_scores": 1.0,
"index": 34
}
]

View File

@ -0,0 +1,282 @@
[
{
"kps": [
474.0,
215.00003051757812
],
"kps_scores": 1.0,
"index": 0
},
{
"kps": [
474.0710754394531,
215.04542541503906
],
"kps_scores": 1.0,
"index": 1
},
{
"kps": [
476.81365966796875,
215.0387420654297
],
"kps_scores": 1.0,
"index": 2
},
{
"kps": [
479.3288269042969,
214.4371795654297
],
"kps_scores": 1.0,
"index": 3
},
{
"kps": [
479.3817443847656,
214.49256896972656
],
"kps_scores": 1.0,
"index": 4
},
{
"kps": [
483.0047302246094,
213.85231018066406
],
"kps_scores": 1.0,
"index": 5
},
{
"kps": [
484.1208801269531,
213.64219665527344
],
"kps_scores": 1.0,
"index": 6
},
{
"kps": [
484.140869140625,
213.63470458984375
],
"kps_scores": 1.0,
"index": 7
},
{
"kps": [
487.458251953125,
213.45497131347656
],
"kps_scores": 1.0,
"index": 8
},
{
"kps": [
488.8343505859375,
213.4651336669922
],
"kps_scores": 1.0,
"index": 9
},
{
"kps": [
488.899658203125,
213.48526000976562
],
"kps_scores": 1.0,
"index": 10
},
{
"kps": [
493.831787109375,
214.70533752441406
],
"kps_scores": 1.0,
"index": 11
},
{
"kps": [
495.60980224609375,
215.26271057128906
],
"kps_scores": 1.0,
"index": 12
},
{
"kps": [
495.5881042480469,
215.2436065673828
],
"kps_scores": 1.0,
"index": 13
},
{
"kps": [
502.015380859375,
217.81201171875
],
"kps_scores": 1.0,
"index": 14
},
{
"kps": [
504.2356262207031,
218.78392028808594
],
"kps_scores": 1.0,
"index": 15
},
{
"kps": [
504.2625427246094,
218.81021118164062
],
"kps_scores": 1.0,
"index": 16
},
{
"kps": [
511.97552490234375,
222.26150512695312
],
"kps_scores": 1.0,
"index": 17
},
{
"kps": [
514.9180908203125,
224.3387908935547
],
"kps_scores": 1.0,
"index": 18
},
{
"kps": [
514.7620239257812,
224.2892608642578
],
"kps_scores": 1.0,
"index": 19
},
{
"kps": [
524.9593505859375,
230.30003356933594
],
"kps_scores": 1.0,
"index": 20
},
{
"kps": [
528.3402709960938,
232.76568603515625
],
"kps_scores": 1.0,
"index": 21
},
{
"kps": [
528.371826171875,
232.73399353027344
],
"kps_scores": 1.0,
"index": 22
},
{
"kps": [
538.7906494140625,
240.9889678955078
],
"kps_scores": 1.0,
"index": 23
},
{
"kps": [
538.7630004882812,
241.00299072265625
],
"kps_scores": 1.0,
"index": 24
},
{
"kps": [
550.0248413085938,
248.24708557128906
],
"kps_scores": 1.0,
"index": 25
},
{
"kps": [
554.3512573242188,
250.6501922607422
],
"kps_scores": 1.0,
"index": 26
},
{
"kps": [
554.0921020507812,
250.47769165039062
],
"kps_scores": 1.0,
"index": 27
},
{
"kps": [
567.93212890625,
266.1629943847656
],
"kps_scores": 1.0,
"index": 28
},
{
"kps": [
571.8528442382812,
273.5104675292969
],
"kps_scores": 1.0,
"index": 29
},
{
"kps": [
571.9888305664062,
273.5711669921875
],
"kps_scores": 1.0,
"index": 30
},
{
"kps": [
586.6533203125,
309.09576416015625
],
"kps_scores": 1.0,
"index": 31
},
{
"kps": [
591.8392944335938,
325.38385009765625
],
"kps_scores": 1.0,
"index": 32
},
{
"kps": [
592.3212280273438,
325.2934265136719
],
"kps_scores": 1.0,
"index": 33
},
{
"kps": [
603.3639526367188,
362.4980773925781
],
"kps_scores": 1.0,
"index": 34
}
]

View File

@ -0,0 +1,282 @@
[
{
"kps": [
461.0,
164.0
],
"kps_scores": 1.0,
"index": 0
},
{
"kps": [
460.9234619140625,
164.2275390625
],
"kps_scores": 1.0,
"index": 1
},
{
"kps": [
460.93524169921875,
164.19480895996094
],
"kps_scores": 1.0,
"index": 2
},
{
"kps": [
460.4592590332031,
164.14320373535156
],
"kps_scores": 1.0,
"index": 3
},
{
"kps": [
459.9245910644531,
164.054931640625
],
"kps_scores": 1.0,
"index": 4
},
{
"kps": [
459.8656921386719,
164.08154296875
],
"kps_scores": 1.0,
"index": 5
},
{
"kps": [
456.9087219238281,
163.1707305908203
],
"kps_scores": 1.0,
"index": 6
},
{
"kps": [
455.7566223144531,
162.69784545898438
],
"kps_scores": 1.0,
"index": 7
},
{
"kps": [
455.740478515625,
162.74818420410156
],
"kps_scores": 1.0,
"index": 8
},
{
"kps": [
449.8667907714844,
161.95462036132812
],
"kps_scores": 1.0,
"index": 9
},
{
"kps": [
447.55975341796875,
162.12559509277344
],
"kps_scores": 1.0,
"index": 10
},
{
"kps": [
447.5325012207031,
162.12460327148438
],
"kps_scores": 1.0,
"index": 11
},
{
"kps": [
439.9998474121094,
162.59873962402344
],
"kps_scores": 1.0,
"index": 12
},
{
"kps": [
437.3090515136719,
162.88577270507812
],
"kps_scores": 1.0,
"index": 13
},
{
"kps": [
437.2088623046875,
162.84994506835938
],
"kps_scores": 1.0,
"index": 14
},
{
"kps": [
429.199951171875,
164.5860595703125
],
"kps_scores": 1.0,
"index": 15
},
{
"kps": [
429.32745361328125,
164.66001892089844
],
"kps_scores": 1.0,
"index": 16
},
{
"kps": [
424.8293762207031,
166.40106201171875
],
"kps_scores": 1.0,
"index": 17
},
{
"kps": [
419.6496887207031,
168.80294799804688
],
"kps_scores": 1.0,
"index": 18
},
{
"kps": [
419.6795349121094,
168.93418884277344
],
"kps_scores": 1.0,
"index": 19
},
{
"kps": [
414.8919677734375,
172.65428161621094
],
"kps_scores": 1.0,
"index": 20
},
{
"kps": [
410.0992431640625,
175.77218627929688
],
"kps_scores": 1.0,
"index": 21
},
{
"kps": [
410.0442810058594,
175.911376953125
],
"kps_scores": 1.0,
"index": 22
},
{
"kps": [
400.20159912109375,
184.33380126953125
],
"kps_scores": 1.0,
"index": 23
},
{
"kps": [
396.4606628417969,
186.7172088623047
],
"kps_scores": 1.0,
"index": 24
},
{
"kps": [
396.3185119628906,
186.76808166503906
],
"kps_scores": 1.0,
"index": 25
},
{
"kps": [
382.623291015625,
192.941650390625
],
"kps_scores": 1.0,
"index": 26
},
{
"kps": [
376.8236999511719,
195.2269744873047
],
"kps_scores": 1.0,
"index": 27
},
{
"kps": [
376.66937255859375,
195.1109161376953
],
"kps_scores": 1.0,
"index": 28
},
{
"kps": [
362.7231750488281,
209.30923461914062
],
"kps_scores": 1.0,
"index": 29
},
{
"kps": [
355.9901123046875,
216.26303100585938
],
"kps_scores": 1.0,
"index": 30
},
{
"kps": [
356.3956298828125,
216.3310546875
],
"kps_scores": 1.0,
"index": 31
},
{
"kps": [
343.6780090332031,
235.2663116455078
],
"kps_scores": 1.0,
"index": 32
},
{
"kps": [
332.50238037109375,
261.8990783691406
],
"kps_scores": 1.0,
"index": 33
},
{
"kps": [
332.8721923828125,
261.7060546875
],
"kps_scores": 1.0,
"index": 34
}
]

File diff suppressed because one or more lines are too long

3268
play.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@ -64,7 +64,6 @@ from app.tracking import (
TrackingID,
AffinityResult,
LastDifferenceVelocityFilter,
LeastMeanSquareVelocityFilter,
Tracking,
TrackingState,
)
@ -73,19 +72,19 @@ from app.visualize.whole_body import visualize_whole_body
NDArray: TypeAlias = np.ndarray
# %%
DATASET_PATH = Path("samples") / "04_02"
AK_CAMERA_DATASET: ak.Array = ak.from_parquet(DATASET_PATH / "camera_params.parquet") # type: ignore
DELTA_T_MIN = timedelta(milliseconds=1)
display(AK_CAMERA_DATASET)
DATASET_PATH = Path("samples") / "04_02" #定义数据集路径
AK_CAMERA_DATASET: ak.Array = ak.from_parquet(DATASET_PATH / "camera_params.parquet") # 从parquet文件中读取相机参数数据集
DELTA_T_MIN = timedelta(milliseconds=1) #定义最小时间间隔为1毫秒
display(AK_CAMERA_DATASET) #显示相机参数
# %%
class Resolution(TypedDict):
class Resolution(TypedDict): #定义Resonlution类型用于表述图像分辨率
width: int
height: int
class Intrinsic(TypedDict):
class Intrinsic(TypedDict): #定义Intrinsic类型用于表示相机参数
camera_matrix: Num[Array, "3 3"]
"""
K
@ -96,12 +95,12 @@ class Intrinsic(TypedDict):
"""
class Extrinsic(TypedDict):
class Extrinsic(TypedDict): #相机外参
rvec: Num[NDArray, "3"]
tvec: Num[NDArray, "3"]
class ExternalCameraParams(TypedDict):
class ExternalCameraParams(TypedDict): #外部相机参数
name: str
port: int
intrinsic: Intrinsic
@ -110,93 +109,93 @@ class ExternalCameraParams(TypedDict):
# %%
def read_dataset_by_port(port: int) -> ak.Array:
P = DATASET_PATH / f"{port}.parquet"
return ak.from_parquet(P)
def read_dataset_by_port(port: int) -> ak.Array: #定义函数根据端口号读取数据集
P = DATASET_PATH / f"{port}.parquet" #构建数据集文件路径
return ak.from_parquet(P) #从Parquet文件中读取数据集
KEYPOINT_DATASET = {
KEYPOINT_DATASET = { #构建关键点数据集字典,键为端口号,,值为对应的数据集
int(p): read_dataset_by_port(p) for p in ak.to_numpy(AK_CAMERA_DATASET["port"])
}
# %%
class KeypointDataset(TypedDict):
frame_index: int
boxes: Num[NDArray, "N 4"]
kps: Num[NDArray, "N J 2"]
kps_scores: Num[NDArray, "N J"]
class KeypointDataset(TypedDict): #用于表示关键点数据集
frame_index: int # 帧索引
boxes: Num[NDArray, "N 4"] # 边界框N个框每个框4个坐标
kps: Num[NDArray, "N J 2"] # 关键点N个对象每个对象J个关键点每个关键点2维坐标
kps_scores: Num[NDArray, "N J"] # 关键点分数N个对象每个对象J个分数
@jaxtyped(typechecker=beartype)
def to_transformation_matrix(
rvec: Num[NDArray, "3"], tvec: Num[NDArray, "3"]
@jaxtyped(typechecker=beartype) #运行时检查函数参数和返回值是否符合类型注解中的维度约束
def to_transformation_matrix( #将旋转向量和平移向量转换为4x4的变换矩阵
rvec: Num[NDArray, "3"], tvec: Num[NDArray, "3"] #输入参数
) -> Num[NDArray, "4 4"]:
res = np.eye(4)
res[:3, :3] = R.from_rotvec(rvec).as_matrix()
res[:3, 3] = tvec
res = np.eye(4) #初始化一个4x4的单位矩阵
res[:3, :3] = R.from_rotvec(rvec).as_matrix() #将旋转向量转换为旋转矩阵并赋值给左上角3x3子矩阵
res[:3, 3] = tvec #将平移向量赋值给最后一列的前三个元素
return res
@jaxtyped(typechecker=beartype)
def undistort_points(
points: Num[NDArray, "M 2"],
camera_matrix: Num[NDArray, "3 3"],
dist_coeffs: Num[NDArray, "N"],
) -> Num[NDArray, "M 2"]:
K = camera_matrix
def undistort_points( # 对图像点进行去畸变处理
points: Num[NDArray, "M 2"], #输入参数 # M个点每个点2维坐标 (x, y)
camera_matrix: Num[NDArray, "3 3"], # 3×3相机内参矩阵
dist_coeffs: Num[NDArray, "N"], # N个畸变系数
) -> Num[NDArray, "M 2"]: # 返回M个去畸变后的点坐标
K = camera_matrix # 重新赋值参数
dist = dist_coeffs
res = undistortPoints(points, K, dist, P=K) # type: ignore
return res.reshape(-1, 2)
res = undistortPoints(points, K, dist, P=K) # type: ignore #使用OpenCV 中的函数,用于对图像点进行去畸变处理
return res.reshape(-1, 2) #将输出结果重塑为 M×2 的二维数组,确保返回格式正确
def from_camera_params(camera: ExternalCameraParams) -> Camera:
def from_camera_params(camera: ExternalCameraParams) -> Camera: #将外部相机参数转换为内部 Camera 对象
rt = jnp.array(
to_transformation_matrix(
ak.to_numpy(camera["extrinsic"]["rvec"]),
to_transformation_matrix( #调用函数,将将旋转向量和平移向量组合为齐次变换矩阵
ak.to_numpy(camera["extrinsic"]["rvec"]), #数据转换为 NumPy 数组
ak.to_numpy(camera["extrinsic"]["tvec"]),
)
)
K = jnp.array(camera["intrinsic"]["camera_matrix"]).reshape(3, 3)
dist_coeffs = jnp.array(camera["intrinsic"]["distortion_coefficients"])
image_size = jnp.array(
K = jnp.array(camera["intrinsic"]["camera_matrix"]).reshape(3, 3) #从外部参数中提取相机内参矩阵,重塑为 3×3 矩阵
dist_coeffs = jnp.array(camera["intrinsic"]["distortion_coefficients"]) #提取相机的畸变系数
image_size = jnp.array( #提取图像的宽度和高度,存储为 JAX 数组
(camera["resolution"]["width"], camera["resolution"]["height"])
)
return Camera(
id=camera["name"],
params=CameraParams(
K=K,
Rt=rt,
dist_coeffs=dist_coeffs,
image_size=image_size,
params=CameraParams( #封装所有相机参数
K=K, #相机内参矩阵
Rt=rt, #相机外参矩阵(齐次变换矩阵)
dist_coeffs=dist_coeffs, #畸变系数
image_size=image_size, #图像分辨率
),
)
def preprocess_keypoint_dataset(
dataset: Sequence[KeypointDataset],
camera: Camera,
fps: float,
start_timestamp: datetime,
) -> Generator[Detection, None, None]:
frame_interval_s = 1 / fps
def preprocess_keypoint_dataset( #用于将关键点数据集KeypointDataset 序列)转换为 Detection 对象流
dataset: Sequence[KeypointDataset], # 输入:关键点数据集序列
camera: Camera, # 相机参数
fps: float, # 帧率(帧/秒)
start_timestamp: datetime, # 起始时间戳
) -> Generator[Detection, None, None]: # 输出Detection对象生成器
frame_interval_s = 1 / fps #计算每帧的时间间隔(秒)
for el in dataset:
frame_index = el["frame_index"]
frame_index = el["frame_index"] # 获取当前帧索引
timestamp = start_timestamp + timedelta(seconds=frame_index * frame_interval_s)
for kp, kp_score in zip(el["kps"], el["kps_scores"]):
yield Detection(
keypoints=jnp.array(kp),
confidences=jnp.array(kp_score),
camera=camera,
timestamp=timestamp,
keypoints=jnp.array(kp), # 关键点坐标
confidences=jnp.array(kp_score), # 关键点置信度
camera=camera, # 相机参数
timestamp=timestamp, # 时间戳
)
# %%
DetectionGenerator: TypeAlias = Generator[Detection, None, None]
DetectionGenerator: TypeAlias = Generator[Detection, None, None] #别名定义
def sync_batch_gen(gens: Sequence[DetectionGenerator], diff: timedelta):
#将多个异步的检测流按时间戳同步,生成时间上 “对齐” 的批次
def sync_batch_gen(gens: Sequence[DetectionGenerator], diff: timedelta): #gens: 检测生成器列表diff: 允许的时间戳最大差异,用于判断两个检测是否属于同一批次
"""
given a list of detection generators, return a generator that yields a batch of detections
@ -204,13 +203,13 @@ def sync_batch_gen(gens: Sequence[DetectionGenerator], diff: timedelta):
gens: list of detection generators
diff: maximum timestamp difference between detections to consider them part of the same batch
"""
N = len(gens)
last_batch_timestamp: Optional[datetime] = None
next_batch_timestamp: Optional[datetime] = None
current_batch: list[Detection] = []
next_batch: list[Detection] = []
paused: list[bool] = [False] * N
finished: list[bool] = [False] * N
N = len(gens) # 生成器数量
last_batch_timestamp: Optional[datetime] = None # 当前批次的时间戳
next_batch_timestamp: Optional[datetime] = None # 下一批次的时间戳
current_batch: list[Detection] = [] # 当前批次的检测结果
next_batch: list[Detection] = [] # 下一批次的检测结果
paused: list[bool] = [False] * N # 标记每个生成器是否暂停
finished: list[bool] = [False] * N # 标记每个生成器是否已耗尽
def reset_paused():
"""
@ -224,56 +223,56 @@ def sync_batch_gen(gens: Sequence[DetectionGenerator], diff: timedelta):
EPS = 1e-6
# a small epsilon to avoid floating point precision issues
diff_esp = diff - timedelta(seconds=EPS)
diff_esp = diff - timedelta(seconds=EPS) #用于处理浮点数精度问题,避免因微小时间差导致误判。
while True:
for i, gen in enumerate(gens):
try:
if finished[i] or paused[i]:
continue
val = next(gen)
if last_batch_timestamp is None:
val = next(gen) # 获取下一个检测结果
if last_batch_timestamp is None: # ... 时间戳比较与批次分配 ...
last_batch_timestamp = val.timestamp
current_batch.append(val)
current_batch.append(val) # 初始化第一批
else:
if abs(val.timestamp - last_batch_timestamp) >= diff_esp:
next_batch.append(val)
next_batch.append(val) # 时间差超过阈值,放入下一批
if next_batch_timestamp is None:
next_batch_timestamp = val.timestamp
paused[i] = True
paused[i] = True # 暂停该生成器,等待批次切换
if all(paused):
yield current_batch
yield current_batch # 所有生成器都暂停时,输出当前批次
current_batch = next_batch
next_batch = []
last_batch_timestamp = next_batch_timestamp
next_batch_timestamp = None
reset_paused()
reset_paused() # 重置暂停状态
else:
current_batch.append(val)
current_batch.append(val) # 时间差在阈值内,加入当前批次
except StopIteration:
finished[i] = True
paused[i] = True
if all(finished):
if len(current_batch) > 0:
# All generators exhausted, flush remaining batch and exit
yield current_batch
yield current_batch # 输出最后一批
break
# %%
@overload
def to_projection_matrix(
def to_projection_matrix( #将 变换矩阵4×4 和 相机内参矩阵3×3 组合成一个 投影矩阵3×4
transformation_matrix: Num[NDArray, "4 4"], camera_matrix: Num[NDArray, "3 3"]
) -> Num[NDArray, "3 4"]: ...
@overload
def to_projection_matrix(
def to_projection_matrix( #将 变换矩阵4×4 和 相机内参矩阵3×3 组合成一个 投影矩阵3×4
transformation_matrix: Num[Array, "4 4"], camera_matrix: Num[Array, "3 3"]
) -> Num[Array, "3 4"]: ...
@jaxtyped(typechecker=beartype)
def to_projection_matrix(
def to_projection_matrix( #计算投影矩阵使用jax.jit提高性能
transformation_matrix: Num[Any, "4 4"],
camera_matrix: Num[Any, "3 3"],
) -> Num[Any, "3 4"]:
@ -284,28 +283,29 @@ to_projection_matrix_jit = jax.jit(to_projection_matrix)
@jaxtyped(typechecker=beartype)
def dlt(
H1: Num[NDArray, "3 4"],
H2: Num[NDArray, "3 4"],
p1: Num[NDArray, "2"],
p2: Num[NDArray, "2"],
) -> Num[NDArray, "3"]:
def dlt( # DLT算法
H1: Num[NDArray, "3 4"], # 第一个相机的投影矩阵3×4
H2: Num[NDArray, "3 4"], # 第二个相机的投影矩阵3×4
p1: Num[NDArray, "2"], # 三维点在第一个相机图像上的投影u1, v1
p2: Num[NDArray, "2"], # 三维点在第二个相机图像上的投影u2, v2
) -> Num[NDArray, "3"]: # 输出三维空间点坐标X, Y, Z
"""
Direct Linear Transformation
"""
A = [
p1[1] * H1[2, :] - H1[1, :],
H1[0, :] - p1[0] * H1[2, :],
p2[1] * H2[2, :] - H2[1, :],
H2[0, :] - p2[0] * H2[2, :],
A = [ # 构建矩阵A
p1[1] * H1[2, :] - H1[1, :], # 第一行v₁·H1[2,:] - H1[1,:]
H1[0, :] - p1[0] * H1[2, :], # 第二行H1[0,:] - u₁·H1[2,:]
p2[1] * H2[2, :] - H2[1, :], # 第三行v₂·H2[2,:] - H2[1,:]
H2[0, :] - p2[0] * H2[2, :], # 第四行H2[0,:] - u₂·H2[2,:]
]
A = np.array(A).reshape((4, 4))
A = np.array(A).reshape((4, 4)) # 转换为4×4矩阵
B = A.transpose() @ A
# 求解超定方程组
B = A.transpose() @ A # 计算A^T·A4×4矩阵
from scipy import linalg
U, s, Vh = linalg.svd(B, full_matrices=False)
return Vh[3, 0:3] / Vh[3, 3]
U, s, Vh = linalg.svd(B, full_matrices=False) # SVD分解
return Vh[3, 0:3] / Vh[3, 3] # 提取解并归一化
@overload
@ -317,7 +317,7 @@ def homogeneous_to_euclidean(points: Num[Array, "N 4"]) -> Num[Array, "N 3"]: ..
@jaxtyped(typechecker=beartype)
def homogeneous_to_euclidean(
def homogeneous_to_euclidean( #将 齐次坐标 转换为 欧几里得坐标
points: Num[Any, "N 4"],
) -> Num[Any, "N 3"]:
"""
@ -332,25 +332,31 @@ def homogeneous_to_euclidean(
return points[..., :-1] / points[..., -1:]
# %%
FPS = 24
# %% # 创建三个相机的关键点检测生成器,并使用 sync_batch_gen 函数将它们同步为时间对齐的批次。
FPS = 24 # 帧率24帧/秒
# 创建三个相机的检测生成器假设port=5600,5601,5602对应三个不同相机
image_gen_5600 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5600], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5600][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
image_gen_5601 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5601], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5601][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
image_gen_5602 = preprocess_keypoint_dataset(KEYPOINT_DATASET[5602], from_camera_params(AK_CAMERA_DATASET[AK_CAMERA_DATASET["port"] == 5602][0]), FPS, datetime(2024, 4, 2, 12, 0, 0)) # type: ignore
display(1 / FPS)
display(1 / FPS) # 每帧时间间隔约0.0417秒
# 同步三个生成器时间窗口为1/FPS秒即同一批次内的检测时间差不超过一帧
sync_gen = sync_batch_gen(
[image_gen_5600, image_gen_5601, image_gen_5602], timedelta(seconds=1 / FPS)
)
# %%
# %% 基于 对极约束 计算不同相机检测结果之间的关联度矩阵,并返回排序后的检测结果和关联度矩阵
#输入 # next(sync_gen):从同步生成器获取的一批检测结果(包含多个相机在相近时间点的检测)
# alpha_2d=2000控制 2D 距离权重的参数,用于平衡对极约束和其他特征(如外观、运动)的影响
#输出 #sorted_detections排序后的检测结果列表
#affinity_matrix关联度矩阵matrix[i][j] 表示第 i 个检测与第 j 个检测的关联程度(值越大表示越可能是同一目标)
sorted_detections, affinity_matrix = calculate_affinity_matrix_by_epipolar_constraint(
next(sync_gen), alpha_2d=2000
)
display(sorted_detections)
# %%
display(
# %% # 可视化多相机目标跟踪中的关键数据:检测时间戳和关联度矩阵
display( #将排序后的检测结果转换为包含时间戳和相机 ID 的字典列表,并在 Jupyter 中显示
list(
map(
lambda x: {"timestamp": str(x.timestamp), "camera": x.camera.id},
@ -358,12 +364,12 @@ display(
)
)
)
with jnp.printoptions(precision=3, suppress=True):
with jnp.printoptions(precision=3, suppress=True): #以高精度格式显示关联度矩阵,控制浮点数精度为 3 位,并禁用科学计数法
display(affinity_matrix)
# %%
def clusters_to_detections(
# %% #实现了一个基于关联度矩阵的聚类算法,将可能属于同一目标的检测结果分组
def clusters_to_detections( # 聚类函数
clusters: Sequence[Sequence[int]], sorted_detections: Sequence[Detection]
) -> list[list[Detection]]:
"""
@ -380,17 +386,17 @@ def clusters_to_detections(
return [[sorted_detections[i] for i in cluster] for cluster in clusters]
solver = GLPKSolver()
aff_np = np.asarray(affinity_matrix).astype(np.float64)
clusters, sol_matrix = solver.solve(aff_np)
solver = GLPKSolver() # 初始化GLPK线性规划求解器
aff_np = np.asarray(affinity_matrix).astype(np.float64) # 转换关联度矩阵为NumPy数组
clusters, sol_matrix = solver.solve(aff_np) # 求解聚类问题
display(clusters)
display(sol_matrix)
# %%
# %% #两个函数用于处理嵌套数据结构
T = TypeVar("T")
def flatten_values(
def flatten_values( # 将 字典 中所有序列值展开成一个 一维 列表
d: Mapping[Any, Sequence[T]],
) -> list[T]:
"""
@ -399,7 +405,7 @@ def flatten_values(
return [v for vs in d.values() for v in vs]
def flatten_values_len(
def flatten_values_len( #计算字典中所有序列值的元素总数
d: Mapping[Any, Sequence[T]],
) -> int:
"""
@ -409,19 +415,22 @@ def flatten_values_len(
return val
# %%
# %% #将同一目标在不同相机中的关键点投影到同一图像上,直观验证多相机跟踪的准确性
WIDTH = 2560
HEIGHT = 1440
# 将聚类结果转换为Detection对象列表
clusters_detections = clusters_to_detections(clusters, sorted_detections)
# 创建空白图像(黑色背景)
im = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
# 可视化第一个聚类中的所有检测(同一目标在不同相机中的关键点)
for el in clusters_detections[0]:
im = visualize_whole_body(np.asarray(el.keypoints), im)
# 显示结果图像
p = plt.imshow(im)
display(p)
# %%
# %% #根据上部分顺延,可视化第二个聚类,通常指检测中的第二个个体
im_prime = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
for el in clusters_detections[1]:
im_prime = visualize_whole_body(np.asarray(el.keypoints), im_prime)
@ -430,21 +439,21 @@ p_prime = plt.imshow(im_prime)
display(p_prime)
# %%
# %% #从多视角图像点进行三维点三角测量的算法
@jaxtyped(typechecker=beartype)
def triangulate_one_point_from_multiple_views_linear(
def triangulate_one_point_from_multiple_views_linear( # 单一点的三角测量
proj_matrices: Float[Array, "N 3 4"],
points: Num[Array, "N 2"],
confidences: Optional[Float[Array, "N"]] = None,
) -> Float[Array, "3"]:
"""
Args:
proj_matrices: (N, 3, 4) projection matrices
points: (N, 2) image-coordinates per view
confidences: (N,) optional per-view confidences in [0,1]
proj_matrices: 形状为(N, 3, 4)的投影矩阵序列
points: 形状为(N, 2)的点坐标序列
confidences: 形状为(N,)的置信度序列,范围[0.0, 1.0]
Returns:
(3,) 3D point
point_3d: 形状为(3,)的三角测量得到的3D点
"""
assert len(proj_matrices) == len(points)
@ -470,7 +479,7 @@ def triangulate_one_point_from_multiple_views_linear(
# replace the Python `if` with a jnp.where
point_3d_homo = jnp.where(
point_3d_homo[3] <= 0, # predicate (scalar bool tracer)
point_3d_homo[3] < 0, # predicate (scalar bool tracer)
-point_3d_homo, # if True
point_3d_homo, # if False
)
@ -480,7 +489,7 @@ def triangulate_one_point_from_multiple_views_linear(
@jaxtyped(typechecker=beartype)
def triangulate_points_from_multiple_views_linear(
def triangulate_points_from_multiple_views_linear( # 批量三角测量
proj_matrices: Float[Array, "N 3 4"],
points: Num[Array, "N P 2"],
confidences: Optional[Float[Array, "N P"]] = None,
@ -494,14 +503,14 @@ def triangulate_points_from_multiple_views_linear(
confidences: (N, P, 1) optional per-view confidences in [0,1]
Returns:
(P, 3) 3D point for each of the P
(P, 3) 3D point for each of the P tracks
"""
N, P, _ = points.shape
assert proj_matrices.shape[0] == N
if confidences is None:
conf = jnp.ones((N, P), dtype=jnp.float32)
else:
conf = confidences
conf = jnp.sqrt(jnp.clip(confidences, 0.0, 1.0))
# vectorize your one-point routine over P
vmap_triangulate = jax.vmap(
@ -512,9 +521,9 @@ def triangulate_points_from_multiple_views_linear(
return vmap_triangulate(proj_matrices, points, conf)
# %%
# %% #两个函数实现了带时间权重的多视角三维点三角测量算法
@jaxtyped(typechecker=beartype)
def triangulate_one_point_from_multiple_views_linear_time_weighted(
def triangulate_one_point_from_multiple_views_linear_time_weighted( #单一点三角测量函数
proj_matrices: Float[Array, "N 3 4"],
points: Num[Array, "N 2"],
delta_t: Num[Array, "N"],
@ -579,7 +588,7 @@ def triangulate_one_point_from_multiple_views_linear_time_weighted(
# Ensure homogeneous coordinate is positive
point_3d_homo = jnp.where(
point_3d_homo[3] <= 0,
point_3d_homo[3] < 0,
-point_3d_homo,
point_3d_homo,
)
@ -590,7 +599,7 @@ def triangulate_one_point_from_multiple_views_linear_time_weighted(
@jaxtyped(typechecker=beartype)
def triangulate_points_from_multiple_views_linear_time_weighted(
def triangulate_points_from_multiple_views_linear_time_weighted( #批量三角测量函数
proj_matrices: Float[Array, "N 3 4"],
points: Num[Array, "N P 2"],
delta_t: Num[Array, "N"],
@ -648,9 +657,7 @@ def triangulate_points_from_multiple_views_linear_time_weighted(
)
# %%
# %% #从一个聚类的检测结果中通过三角测量计算三维点坐标,并返回该聚类的最新时间戳
@jaxtyped(typechecker=beartype)
def triangle_from_cluster(
cluster: Sequence[Detection],
@ -667,8 +674,8 @@ def triangle_from_cluster(
)
# %%
def group_by_cluster_by_camera(
# %% #多目标跟踪系统的核心逻辑,用于从聚类的检测结果中创建和管理全局跟踪状态
def group_by_cluster_by_camera( #按相机分组函数
cluster: Sequence[Detection],
) -> PMap[CameraID, Detection]:
"""
@ -680,12 +687,10 @@ def group_by_cluster_by_camera(
eld = r[el.camera.id]
preserved = max([eld, el], key=lambda x: x.timestamp)
r[el.camera.id] = preserved
else:
r[el.camera.id] = el
return pmap(r)
class GlobalTrackingState:
class GlobalTrackingState: #全局跟踪状态类
_last_id: int
_trackings: dict[int, Tracking]
@ -702,7 +707,7 @@ class GlobalTrackingState:
def trackings(self) -> dict[int, Tracking]:
return shallow_copy(self._trackings)
def add_tracking(self, cluster: Sequence[Detection]) -> Tracking:
def add_tracking(self, cluster: Sequence[Detection]) -> Tracking: #为一个聚类创建新的跟踪记录
if len(cluster) < 2:
raise ValueError(
"cluster must contain at least 2 detections to form a tracking"
@ -717,11 +722,7 @@ class GlobalTrackingState:
tracking = Tracking(
id=next_id,
state=tracking_state,
# velocity_filter=LastDifferenceVelocityFilter(kps_3d, latest_timestamp),
velocity_filter=LeastMeanSquareVelocityFilter(
historical_3d_poses=[kps_3d],
historical_timestamps=[latest_timestamp],
),
velocity_filter=LastDifferenceVelocityFilter(kps_3d, latest_timestamp),
)
self._trackings[next_id] = tracking
self._last_id = next_id
@ -733,14 +734,14 @@ for cluster in clusters_detections:
global_tracking_state.add_tracking(cluster)
display(global_tracking_state)
# %%
next_group = next(sync_gen)
display(next_group)
# %% #从同步生成器 sync_gen 中获取下一批时间对齐的检测结果,并通过 display() 函数进行可视化
next_group = next(sync_gen) # 从同步生成器获取下一批检测结果
display(next_group) # 在Jupyter环境中显示该批次数据
# %%
# %% #多相机跟踪系统中 关联亲和度 计算的核心算法
@jaxtyped(typechecker=beartype)
def calculate_distance_2d(
def calculate_distance_2d( #归一化 2D 距离
left: Num[Array, "J 2"],
right: Num[Array, "J 2"],
image_size: tuple[int, int] = (1, 1),
@ -769,7 +770,7 @@ def calculate_distance_2d(
@jaxtyped(typechecker=beartype)
def calculate_affinity_2d(
def calculate_affinity_2d( #2D 亲和度分数
distance_2d: Float[Array, "J"],
delta_t: timedelta,
w_2d: float,
@ -802,7 +803,7 @@ def calculate_affinity_2d(
@jaxtyped(typechecker=beartype)
def perpendicular_distance_point_to_line_two_points(
def perpendicular_distance_point_to_line_two_points( #点到射线的垂直距离
point: Num[Array, "3"], line: tuple[Num[Array, "3"], Num[Array, "3"]]
) -> Float[Array, ""]:
"""
@ -825,6 +826,7 @@ def perpendicular_distance_point_to_line_two_points(
@jaxtyped(typechecker=beartype)
#多相机三维重建中的射线距离计算,是评估 2D 检测点与 3D 跟踪点匹配程度的核心算法
def perpendicular_distance_camera_2d_points_to_tracking_raycasting(
detection: Detection,
tracking: Tracking,
@ -868,7 +870,7 @@ def perpendicular_distance_camera_2d_points_to_tracking_raycasting(
@jaxtyped(typechecker=beartype)
def calculate_affinity_3d(
def calculate_affinity_3d( #3D 亲和度分数
distances: Float[Array, "J"],
delta_t: timedelta,
w_3d: float,
@ -899,7 +901,7 @@ def calculate_affinity_3d(
@beartype
def calculate_tracking_detection_affinity(
def calculate_tracking_detection_affinity( #综合亲和度计算流程
tracking: Tracking,
detection: Detection,
w_2d: float,
@ -961,9 +963,9 @@ def calculate_tracking_detection_affinity(
return jnp.sum(total_affinity).item()
# %%
# %% #实现了多相机跟踪系统中亲和度矩阵的高效计算是连接跟踪轨迹Tracking与新检测结果Detection的核心算法
@beartype
def calculate_camera_affinity_matrix_jax(
def calculate_camera_affinity_matrix_jax( #相机亲和度矩阵计算
trackings: Sequence[Tracking],
camera_detections: Sequence[Detection],
w_2d: float,
@ -1125,7 +1127,7 @@ def calculate_camera_affinity_matrix_jax(
@beartype
def calculate_affinity_matrix(
def calculate_affinity_matrix( #多相机亲和度矩阵计算
trackings: Sequence[Tracking],
detections: Sequence[Detection] | Mapping[CameraID, list[Detection]],
w_2d: float,
@ -1177,7 +1179,7 @@ def calculate_affinity_matrix(
return res
# %%
# %% #实现了跨视角关联cross-view association 流程
# let's do cross-view association
W_2D = 1.0
ALPHA_2D = 1.0
@ -1201,8 +1203,8 @@ affinities = calculate_affinity_matrix(
display(affinities)
# %%
def affinity_result_by_tracking(
# %% #两个函数分别实现了关联结果聚合和轨迹更新的核心逻辑
def affinity_result_by_tracking( #关联结果聚合
results: Iterable[AffinityResult],
min_affinity: float = 0.0,
) -> dict[TrackingID, list[Detection]]:
@ -1224,7 +1226,7 @@ def affinity_result_by_tracking(
return res
def update_tracking(
def update_tracking( #更新流程
tracking: Tracking,
detections: Sequence[Detection],
max_delta_t: timedelta = timedelta(milliseconds=100),
@ -1275,9 +1277,9 @@ def update_tracking(
tracking.state = new_state
# %%
affinity_results_by_tracking = affinity_result_by_tracking(affinities.values())
for tracking_id, detections in affinity_results_by_tracking.items():
# %% #多目标跟踪系统中轨迹更新的核心流程
affinity_results_by_tracking = affinity_result_by_tracking(affinities.values()) # 1. 按轨迹ID聚合所有相机的匹配检测结果
for tracking_id, detections in affinity_results_by_tracking.items(): # 2. 遍历每个轨迹ID用匹配的检测结果更新轨迹
update_tracking(global_tracking_state.trackings[tracking_id], detections)
# %%

406
plot_epipolar_lines.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -14,6 +14,7 @@ dependencies = [
"jaxtyping>=0.2.38",
"jupytext>=1.17.0",
"matplotlib>=3.10.1",
"more-itertools>=10.7.0",
"opencv-python-headless>=4.11.0.86",
"optax>=0.2.4",
"orjson>=3.10.15",
@ -23,6 +24,7 @@ dependencies = [
"pyrsistent>=0.20.0",
"pytest>=8.3.5",
"scipy>=1.15.2",
"shapely>=2.1.1",
"torch>=2.6.0",
"torchvision>=0.21.0",
"typeguard>=4.4.2",

File diff suppressed because one or more lines are too long

1062
rebuild_by_epipolar_line.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

122
smooth_3d_kps.ipynb Normal file
View File

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"id": "0d48b7eb",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from pathlib import Path\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "dfd27584",
"metadata": {},
"outputs": [],
"source": [
"KPS_PATH = Path(\"samples/WeiHua_03.json\")\n",
"with open(KPS_PATH, \"r\") as file:\n",
" data = json.load(file)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "360f9c50",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'index:1, shape: (33, 133, 3)'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'index:2, shape: (662, 133, 3)'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for item_object_index in data.keys():\n",
" item_object = np.array(data[item_object_index])\n",
" display(f'index:{item_object_index}, shape: {item_object.shape}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 对data['2']的662帧3d关键点数据进行滑动窗口平滑处理\n",
"object_points = np.array(data['2']) # shape: (662, 133, 3)\n",
"window_size = 5\n",
"kernel = np.ones(window_size) / window_size\n",
"# 对每个关键点的每个坐标轴分别做滑动平均\n",
"smoothed_points = np.zeros_like(object_points)\n",
"# 遍历133个关节\n",
"for kp_idx in range(object_points.shape[1]):\n",
" # 遍历每个关节的空间三维坐标点\n",
" for axis in range(3):\n",
" # 对第i帧的滑动平滑方式 smoothed[i] = (point[i-2] + point[i-1] + point[i] + point[i+1] + point[i+2]) / 5\n",
" smoothed_points[:, kp_idx, axis] = np.convolve(object_points[:, kp_idx, axis], kernel, mode='same')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "24c6c0c9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'smoothed_points shape: (662, 133, 3)'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(f'smoothed_points shape: {smoothed_points.shape}')\n",
"with open(\"samples/smoothed_3d_kps.json\", \"w\") as file:\n",
" json.dump({'1':smoothed_points.tolist()}, file)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cvth3pe",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"id": "11cc2345",
"metadata": {},
"outputs": [],
"source": [
"import awkward as ak\n",
"import numpy as np\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "84348d97",
"metadata": {},
"outputs": [],
"source": [
"CAMERA_INDEX ={\n",
" 2:\"5602\",\n",
" 4:\"5604\",\n",
"}\n",
"index = 4\n",
"CAMERA_PATH = Path(\"/home/admin/Documents/ActualTest_QuanCheng/camera_ex_params_1_2025_4_20/camera_params\")\n",
"camera_data = ak.from_parquet(CAMERA_PATH / CAMERA_INDEX[index]/ \"extrinsic.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1d771740",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>[{rvec: [[-2.26], [0.0669], [-2.15]], tvec: [[0.166], ...]},\n",
" {rvec: [[2.07], [0.144], [2.21]], tvec: [[0.143], ...]},\n",
" {rvec: [[2.09], [0.0872], [2.25]], tvec: [[0.141], ...]},\n",
" {rvec: [[2.16], [0.172], [2.09]], tvec: [[0.162], ...]},\n",
" {rvec: [[2.15], [0.18], [2.09]], tvec: [[0.162], ...]},\n",
" {rvec: [[-2.22], [0.117], [-2.14]], tvec: [[0.162], ...]},\n",
" {rvec: [[2.18], [0.176], [2.08]], tvec: [[0.166], ...]},\n",
" {rvec: [[2.18], [0.176], [2.08]], tvec: [[0.166], ...]},\n",
" {rvec: [[-2.26], [0.116], [-2.1]], tvec: [[0.17], ...]},\n",
" {rvec: [[-2.26], [0.124], [-2.09]], tvec: [[0.171], ...]},\n",
" ...,\n",
" {rvec: [[-2.2], [0.0998], [-2.17]], tvec: [[0.158], ...]},\n",
" {rvec: [[-2.2], [0.0998], [-2.17]], tvec: [[0.158], ...]},\n",
" {rvec: [[2.12], [0.151], [2.16]], tvec: [[0.152], ...]},\n",
" {rvec: [[-2.3], [0.0733], [-2.1]], tvec: [[0.175], ...]},\n",
" {rvec: [[2.1], [0.16], [2.17]], tvec: [[0.149], ...]},\n",
" {rvec: [[2.1], [0.191], [2.13]], tvec: [[0.153], ...]},\n",
" {rvec: [[2.11], [0.196], [2.12]], tvec: [[0.154], ...]},\n",
" {rvec: [[2.19], [0.171], [2.08]], tvec: [[0.166], ...]},\n",
" {rvec: [[2.24], [0.0604], [2.12]], tvec: [[0.166], ...]}]\n",
"---------------------------------------------------------------------------\n",
"backend: cpu\n",
"nbytes: 10.1 kB\n",
"type: 90 * {\n",
" rvec: var * var * float64,\n",
" tvec: var * var * float64\n",
"}</pre>"
],
"text/plain": [
"<Array [{rvec: [...], tvec: [...]}, ..., {...}] type='90 * {rvec: var * var...'>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(camera_data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "59fde11b",
"metadata": {},
"outputs": [],
"source": [
"data = []\n",
"for element in camera_data:\n",
" rvec = element[\"rvec\"]\n",
" if rvec[0]<0:\n",
" data.append({\"rvec\": rvec, \"tvec\": element[\"tvec\"]})"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4792cbc4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pyarrow._parquet.FileMetaData object at 0x7799cbf62d40>\n",
" created_by: parquet-cpp-arrow version 19.0.1\n",
" num_columns: 2\n",
" num_rows: 30\n",
" num_row_groups: 1\n",
" format_version: 2.6\n",
" serialized_size: 0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ak.to_parquet(ak.from_iter(data),\"/home/admin/Documents/ActualTest_QuanCheng/camera_ex_params_1_2025_4_20/camera_params/5604/re_extrinsic.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8225ee33",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>[{rvec: [[-2.26], [0.0669], [-2.15]], tvec: [[0.166], ...]},\n",
" {rvec: [[-2.22], [0.117], [-2.14]], tvec: [[0.162], ...]},\n",
" {rvec: [[-2.26], [0.116], [-2.1]], tvec: [[0.17], ...]},\n",
" {rvec: [[-2.26], [0.124], [-2.09]], tvec: [[0.171], ...]},\n",
" {rvec: [[-2.24], [0.133], [-2.11]], tvec: [[0.167], ...]},\n",
" {rvec: [[-2.22], [0.0556], [-2.2]], tvec: [[0.158], ...]},\n",
" {rvec: [[-2.27], [0.119], [-2.09]], tvec: [[0.172], ...]},\n",
" {rvec: [[-2.34], [0.0663], [-2.06]], tvec: [[0.181], ...]},\n",
" {rvec: [[-2.21], [0.117], [-2.15]], tvec: [[0.161], ...]},\n",
" {rvec: [[-2.33], [0.0731], [-2.08]], tvec: [[0.179], ...]},\n",
" ...,\n",
" {rvec: [[-2.23], [0.106], [-2.13]], tvec: [[0.166], ...]},\n",
" {rvec: [[-2.21], [0.054], [-2.2]], tvec: [[0.157], ...]},\n",
" {rvec: [[-2.19], [0.0169], [-2.25]], tvec: [[0.151], ...]},\n",
" {rvec: [[-2.2], [0.0719], [-2.19]], tvec: [[0.157], ...]},\n",
" {rvec: [[-2.22], [0.0726], [-2.18]], tvec: [[0.161], ...]},\n",
" {rvec: [[-2.2], [0.0742], [-2.19]], tvec: [[0.158], ...]},\n",
" {rvec: [[-2.2], [0.0998], [-2.17]], tvec: [[0.158], ...]},\n",
" {rvec: [[-2.2], [0.0998], [-2.17]], tvec: [[0.158], ...]},\n",
" {rvec: [[-2.3], [0.0733], [-2.1]], tvec: [[0.175], ...]}]\n",
"---------------------------------------------------------------------------\n",
"backend: cpu\n",
"nbytes: 3.4 kB\n",
"type: 30 * {\n",
" rvec: var * var * float64,\n",
" tvec: var * var * float64\n",
"}</pre>"
],
"text/plain": [
"<Array [{rvec: [...], tvec: [...]}, ..., {...}] type='30 * {rvec: var * var...'>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"temp_data = ak.from_parquet(\"/home/admin/Documents/ActualTest_QuanCheng/camera_ex_params_1_2025_4_20/camera_params/5604/re_extrinsic.parquet\")\n",
"display(temp_data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

3745
uv.lock generated

File diff suppressed because it is too large Load Diff