Merge branch 'cpp' into 'master'

Convert to cpp for faster runtime

See merge request Percipiote/SimplePoseTriangulation!1
This commit is contained in:
DANBER
2024-10-09 08:30:16 +00:00
32 changed files with 10790 additions and 3393 deletions

37
.gitignore vendored
View File

@ -1,3 +1,40 @@
spt_wrap.*
spt.py
*.bin
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

View File

@ -37,6 +37,11 @@ RUN python3 -c "from utils_2d_pose import load_wb_model; load_wb_model();"
# Fix an undefined symbol error with ompi # Fix an undefined symbol error with ompi
RUN echo "ldconfig" >> ~/.bashrc RUN echo "ldconfig" >> ~/.bashrc
# Install swig and later dependencies
RUN apt-get update && apt-get install -y --no-install-recommends build-essential
RUN apt-get update && apt-get install -y --no-install-recommends swig
RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
COPY ./skelda/ /skelda/ COPY ./skelda/ /skelda/
RUN pip3 install --no-cache-dir -e /skelda/ RUN pip3 install --no-cache-dir -e /skelda/

View File

@ -34,3 +34,11 @@ Triangulation of multiple persons from multiple camera views.
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python3 /SimplePoseTriangulation/scripts/test_skelda_dataset.py python3 /SimplePoseTriangulation/scripts/test_skelda_dataset.py
``` ```
<br>
## Debugging
```bash
cd /SimplePoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py
```

12
data/e1/README.md Normal file
View File

@ -0,0 +1,12 @@
Image source:
```bash
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam01/images/00002.jpg' ./c1_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam02/images/00002.jpg' ./c2_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam03/images/00002.jpg' ./c3_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam04/images/00002.jpg' ./c4_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam05/images/00002.jpg' ./c5_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam06/images/00002.jpg' ./c6_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam07/images/00002.jpg' ./c7_00002.jpg
cp '/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam08/images/00002.jpg' ./c8_00002.jpg
```

BIN
data/e1/c1_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 KiB

BIN
data/e1/c2_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 298 KiB

BIN
data/e1/c3_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 KiB

BIN
data/e1/c4_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 KiB

BIN
data/e1/c5_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 316 KiB

BIN
data/e1/c6_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 491 KiB

BIN
data/e1/c7_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

BIN
data/e1/c8_00002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 332 KiB

483
data/e1/sample.json Normal file
View File

@ -0,0 +1,483 @@
{
"id": "006_legoassemble-000002",
"imgpaths": [
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam01/images/c1_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam02/images/c2_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam03/images/c3_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam04/images/c4_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam05/images/c5_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam06/images/c6_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam07/images/c7_00002.jpg",
"/datasets/egohumans/02_legoassemble/006_legoassemble/exo/cam08/images/c8_00002.jpg"
],
"dataset_name": "egohumans",
"room_size": [
6.0,
5.0,
2.0
],
"room_center": [
1.5,
1.0,
-0.5
],
"num_persons": 3,
"cameras": [
{
"type": "fisheye",
"name": "cam01",
"width": 3840,
"height": 2160,
"K": [
[
1905.440429,
0.0,
1920.0
],
[
0.0,
1908.738384,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.022721,
0.096403,
-0.122399,
0.06457
],
"R": [
[
-1.320967,
1.091159,
-0.053999
],
[
0.324639,
0.311149,
-1.654174
],
[
-1.043145,
-1.284934,
-0.446418
]
],
"T": [
[
1.938363
],
[
2.912456
],
[
-0.05983
]
]
},
{
"type": "fisheye",
"name": "cam02",
"width": 3840,
"height": 2160,
"K": [
[
1932.244518,
0.0,
1920.0
],
[
0.0,
1936.559524,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.038159,
0.063352,
-0.103183,
0.057205
],
"R": [
[
-0.559664,
1.606496,
-0.210818
],
[
0.354514,
-0.096222,
-1.674383
],
[
-1.58101,
-0.590262,
-0.300823
]
],
"T": [
[
3.363792
],
[
2.221809
],
[
-0.150734
]
]
},
{
"type": "fisheye",
"name": "cam03",
"width": 3840,
"height": 2160,
"K": [
[
1894.357534,
0.0,
1920.0
],
[
0.0,
1895.186619,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.050168,
0.041318,
-0.141156,
0.109365
],
"R": [
[
1.33355,
1.075326,
-0.06178
],
[
0.254785,
-0.410461,
-1.64472
],
[
-1.046531,
1.270312,
-0.479142
]
],
"T": [
[
3.350025
],
[
-0.796138
],
[
-0.123788
]
]
},
{
"type": "fisheye",
"name": "cam04",
"width": 3840,
"height": 2160,
"K": [
[
1954.001155,
0.0,
1920.0
],
[
0.0,
1961.448327,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.002428,
0.182701,
-0.217826,
0.085325
],
"R": [
[
-1.267021,
-1.154138,
-0.033506
],
[
-0.150745,
0.214669,
-1.694016
],
[
1.144741,
-1.249152,
-0.260162
]
],
"T": [
[
-0.017013
],
[
3.310275
],
[
-0.109274
]
]
},
{
"type": "fisheye",
"name": "cam05",
"width": 3840,
"height": 2160,
"K": [
[
1935.020765,
0.0,
1920.0
],
[
0.0,
1932.219587,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.058948,
-0.009451,
0.031702,
-0.02819
],
"R": [
[
1.702269,
0.12581,
-0.157957
],
[
-0.14946,
-0.116741,
-1.703681
],
[
-0.135795,
1.705591,
-0.104959
]
],
"T": [
[
1.404604
],
[
-1.586799
],
[
-0.106577
]
]
},
{
"type": "fisheye",
"name": "cam06",
"width": 3840,
"height": 2160,
"K": [
[
1922.984553,
0.0,
1920.0
],
[
0.0,
1913.40327,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.051617,
-0.019883,
0.131883,
-0.148917
],
"R": [
[
0.859208,
-1.483123,
-0.024598
],
[
-0.452399,
-0.234941,
-1.636654
],
[
1.412655,
0.826829,
-0.509173
]
],
"T": [
[
-1.069183
],
[
0.162843
],
[
-0.081835
]
]
},
{
"type": "fisheye",
"name": "cam07",
"width": 3840,
"height": 2160,
"K": [
[
1921.007178,
0.0,
1920.0
],
[
0.0,
1922.985943,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
0.012135,
0.192931,
-0.334768,
0.182519
],
"R": [
[
-0.069061,
1.712667,
-0.022347
],
[
0.123643,
-0.017322,
-1.709652
],
[
-1.708344,
-0.07049,
-0.122835
]
],
"T": [
[
4.379272
],
[
1.482245
],
[
-0.111096
]
]
},
{
"type": "fisheye",
"name": "cam08",
"width": 3840,
"height": 2160,
"K": [
[
1944.872901,
0.0,
1920.0
],
[
0.0,
1952.784761,
1080.0
],
[
0.0,
0.0,
1.0
]
],
"DC": [
-0.011357,
0.225262,
-0.308,
0.148322
],
"R": [
[
-0.32595,
-1.682675,
-0.029302
],
[
-0.137145,
0.056305,
-1.707782
],
[
1.677333,
-0.322385,
-0.145329
]
],
"T": [
[
-1.092642
],
[
1.923018
],
[
-0.068911
]
]
}
]
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 MiB

After

Width:  |  Height:  |  Size: 1.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 MiB

After

Width:  |  Height:  |  Size: 1.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 541 KiB

After

Width:  |  Height:  |  Size: 542 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
import json import json
import os import os
import sys
import time import time
import cv2 import cv2
@ -8,10 +9,12 @@ import numpy as np
import tqdm import tqdm
import test_triangulate import test_triangulate
import triangulate_poses
import utils_2d_pose import utils_2d_pose
from skelda import evals, utils_pose from skelda import evals, utils_pose
sys.path.append("/SimplePoseTriangulation/swig/")
import spt
# ================================================================================================== # ==================================================================================================
# dataset_use = "panoptic" # dataset_use = "panoptic"
@ -185,111 +188,6 @@ def load_labels(dataset: dict):
# ================================================================================================== # ==================================================================================================
def add_extra_joints(poses3D, poses2D, joint_names_3d):
# Update "head" joint as average of "ear" joints
idx_h = joint_names_3d.index("head")
idx_el = joint_names_3d.index("ear_left")
idx_er = joint_names_3d.index("ear_right")
for i in range(len(poses3D)):
if poses3D[i, idx_h, 3] == 0:
ear_left = poses3D[i, idx_el]
ear_right = poses3D[i, idx_er]
if ear_left[3] > 0.1 and ear_right[3] > 0.1:
head = (ear_left + ear_right) / 2
head[3] = min(ear_left[3], ear_right[3])
poses3D[i, idx_h] = head
for j in range(len(poses2D)):
ear_left = poses2D[j][i, idx_el]
ear_right = poses2D[j][i, idx_er]
if ear_left[2] > 0.1 and ear_right[2] > 0.1:
head = (ear_left + ear_right) / 2
head[2] = min(ear_left[2], ear_right[2])
poses2D[j][i, idx_h] = head
return poses3D, poses2D
# ==================================================================================================
def add_missing_joints(poses3D, joint_names_3d):
"""Replace missing joints with their nearest adjacent joints"""
adjacents = {
"hip_right": ["hip_middle", "hip_left"],
"hip_left": ["hip_middle", "hip_right"],
"knee_right": ["hip_right", "knee_left"],
"knee_left": ["hip_left", "knee_right"],
"ankle_right": ["knee_right", "ankle_left"],
"ankle_left": ["knee_left", "ankle_right"],
"shoulder_right": ["shoulder_middle", "shoulder_left"],
"shoulder_left": ["shoulder_middle", "shoulder_right"],
"elbow_right": ["shoulder_right", "hip_right"],
"elbow_left": ["shoulder_left", "hip_left"],
"wrist_right": ["elbow_right"],
"wrist_left": ["elbow_left"],
"nose": ["shoulder_middle", "shoulder_right", "shoulder_left"],
"head": ["shoulder_middle", "shoulder_right", "shoulder_left"],
"foot_*_left_*": ["ankle_left"],
"foot_*_right_*": ["ankle_right"],
"face_*": ["nose"],
"hand_*_left_*": ["wrist_left"],
"hand_*_right_*": ["wrist_right"],
}
for i in range(len(poses3D)):
valid_joints = np.where(poses3D[i, :, 3] > 0.1)[0]
if len(valid_joints) == 0:
continue
body_center = np.mean(poses3D[i, valid_joints, :3], axis=0)
for j in range(len(joint_names_3d)):
adname = ""
if joint_names_3d[j][0:5] == "foot_" and "_left" in joint_names_3d[j]:
adname = "foot_*_left_*"
elif joint_names_3d[j][0:5] == "foot_" and "_right" in joint_names_3d[j]:
adname = "foot_*_right_*"
elif joint_names_3d[j][0:5] == "face_":
adname = "face_*"
elif joint_names_3d[j][0:5] == "hand_" and "_left" in joint_names_3d[j]:
adname = "hand_*_left_*"
elif joint_names_3d[j][0:5] == "hand_" and "_right" in joint_names_3d[j]:
adname = "hand_*_right_*"
elif joint_names_3d[j] in adjacents:
adname = joint_names_3d[j]
if adname == "":
continue
if poses3D[i, j, 3] == 0:
if joint_names_3d[j] in adjacents or joint_names_3d[j][0:5] in [
"foot_",
"face_",
"hand_",
]:
adjacent_joints = [
poses3D[i, joint_names_3d.index(a), :]
for a in adjacents[adname]
]
adjacent_joints = [a[0:3] for a in adjacent_joints if a[3] > 0.1]
if len(adjacent_joints) > 0:
poses3D[i, j, :3] = np.mean(adjacent_joints, axis=0)
else:
poses3D[i, j, :3] = body_center
else:
poses3D[i, j, :3] = body_center
poses3D[i, j, 3] = 0.1
return poses3D
# ==================================================================================================
def main(): def main():
global joint_names_3d, eval_joints global joint_names_3d, eval_joints
@ -316,20 +214,44 @@ def main():
# Print a dataset sample for debugging # Print a dataset sample for debugging
print(labels[0]) print(labels[0])
minscores = {
# Choose this depending on the fraction of invalid/missing persons
# A higher value reduces the number of proposals
"panoptic": 0.94,
"human36m": 0.94,
"mvor": 0.86,
"campus": 0.96,
"shelf": 0.96,
"ikeaasm": 0.89,
"tsinghua": 0.96,
"human36m_wb": 0.94,
"koarob": 0.91,
}
minscore = minscores.get(dataset_use, 0.95)
min_group_sizes = {
# If the number of cameras is high, and the views are not occluded, use a higher value
"panoptic": 1,
"shelf": 2,
"tsinghua": 2,
}
min_group_size = min_group_sizes.get(dataset_use, 1)
if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10:
min_group_size = 4
print("\nRunning predictions ...") print("\nRunning predictions ...")
all_poses = [] all_poses = []
all_ids = [] all_ids = []
all_paths = [] all_paths = []
times = [] times = []
last_poses_3d = np.array([]) triangulator = spt.Triangulator(min_score=minscore, min_group_size=min_group_size)
old_scene = "" old_scene = ""
for label in tqdm.tqdm(labels): for label in tqdm.tqdm(labels):
images_2d = [] images_2d = []
if old_scene != label.get("scene", ""): if old_scene != label.get("scene", "") or dataset_use == "human36m_wb":
# Reset last poses if scene changes # Reset last poses if scene changes
old_scene = label.get("scene", "") old_scene = label.get("scene", "")
last_poses_3d = np.array([]) triangulator.reset()
try: try:
start = time.time() start = time.time()
@ -355,62 +277,34 @@ def main():
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1]) cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
images_2d[i] = cv2.resize(img, (1000, 1000)) images_2d[i] = cv2.resize(img, (1000, 1000))
roomparams = {
"room_size": label["room_size"],
"room_center": label["room_center"],
}
start = time.time() start = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d) poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
time_2d = time.time() - start time_2d = time.time() - start
print("2D time:", time_2d) print("2D time:", time_2d)
minscores = {
# Choose this depending on the fraction of invalid/missing persons
# A higher value reduces the number of proposals
"panoptic": 0.94,
"human36m": 0.94,
"mvor": 0.86,
"campus": 0.96,
"shelf": 0.96,
"ikeaasm": 0.89,
"tsinghua": 0.96,
"human36m_wb": 0.94,
}
minscore = minscores.get(dataset_use, 0.95)
start = time.time() start = time.time()
if sum(np.sum(p) for p in poses_2d) == 0: if sum(np.sum(p) for p in poses_2d) == 0:
poses3D = np.zeros([1, len(joint_names_3d), 4]) poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist()
poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
else: else:
poses3D = triangulate_poses.get_3d_pose( spt_cameras = spt.convert_cameras(label["cameras"])
poses_2d, label["cameras"], roomparams, joint_names_2d, last_poses_3d, minscore roomparams = [label["room_size"], label["room_center"]]
poses3D = triangulator.triangulate_poses(
poses_2d, spt_cameras, roomparams, joint_names_2d
) )
poses2D = []
for cam in label["cameras"]:
poses_2d, _ = utils_pose.project_poses(poses3D, cam)
poses2D.append(poses_2d)
poses3D, poses2D = add_extra_joints(poses3D, poses2D, joint_names_3d)
poses3D, poses2D = test_triangulate.filter_poses(
poses3D,
poses2D,
roomparams,
joint_names_3d,
drop_few_limbs=(dataset_use != "mvor"),
)
poses3D = add_missing_joints(poses3D, joint_names_3d)
last_poses_3d = poses3D
time_3d = time.time() - start time_3d = time.time() - start
print("3D time:", time_3d) print("3D time:", time_3d)
all_poses.append(poses3D) all_poses.append(np.array(poses3D))
all_ids.append(label["id"]) all_ids.append(label["id"])
all_paths.append(label["imgpaths"]) all_paths.append(label["imgpaths"])
times.append((time_2d, time_3d)) times.append((time_2d, time_3d))
# Print per-step triangulation timings
print("")
triangulator.print_stats()
warmup_iters = 10 warmup_iters = 10
if len(times) > warmup_iters: if len(times) > warmup_iters:
times = times[warmup_iters:] times = times[warmup_iters:]

View File

@ -1,6 +1,8 @@
import copy import copy
import json import json
import os import os
import sys
import time
from typing import List from typing import List
import cv2 import cv2
@ -8,10 +10,12 @@ import matplotlib
import numpy as np import numpy as np
import draw_utils import draw_utils
import triangulate_poses
import utils_2d_pose import utils_2d_pose
from skelda import utils_pose from skelda import utils_pose
sys.path.append("/SimplePoseTriangulation/swig/")
import spt
# ================================================================================================== # ==================================================================================================
filepath = os.path.dirname(os.path.realpath(__file__)) + "/" filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
@ -224,124 +228,6 @@ def load_image(path: str):
# ================================================================================================== # ==================================================================================================
def filter_poses(poses3D, poses2D, roomparams, joint_names, drop_few_limbs=True):
drop = []
for i, pose in enumerate(poses3D):
pose = np.array(pose)
valid_joints = [j for j in pose if j[-1] > 0.1]
# Drop persons with too few joints
if np.sum(pose[..., -1] > 0.1) < 5:
drop.append(i)
continue
# Drop too large or too small persons
mins = np.min(valid_joints, axis=0)
maxs = np.max(valid_joints, axis=0)
diff = maxs - mins
if any(((d > 2.3) for d in diff)):
drop.append(i)
continue
if all(((d < 0.3) for d in diff)):
drop.append(i)
continue
if (
(diff[0] < 0.2 and diff[1] < 0.2)
or (diff[1] < 0.2 and diff[2] < 0.2)
or (diff[2] < 0.2 and diff[0] < 0.2)
):
drop.append(i)
continue
# Drop persons outside room
mean = np.mean(valid_joints, axis=0)
mins = np.min(valid_joints, axis=0)
maxs = np.max(valid_joints, axis=0)
rsize = [r / 2 for r in roomparams["room_size"]]
rcent = roomparams["room_center"]
if any(
(
# Center of mass outside room
mean[j] > rsize[j] + rcent[j] or mean[j] < -rsize[j] + rcent[j]
for j in range(3)
)
) or any(
(
# One limb more than 10cm outside room
maxs[j] > rsize[j] + rcent[j] + 0.1
or mins[j] < -rsize[j] + rcent[j] - 0.1
for j in range(3)
)
):
drop.append(i)
continue
if drop_few_limbs:
# Drop persons with less than 3 limbs
found_limbs = 0
for limb in main_limbs:
start_idx = joint_names.index(limb[0])
end_idx = joint_names.index(limb[1])
if pose[start_idx, -1] > 0.1 and pose[end_idx, -1] > 0.1:
found_limbs += 1
if found_limbs < 3:
drop.append(i)
continue
# Drop persons with too small or high average limb length
total_length = 0
total_limbs = 0
for limb in main_limbs:
start_idx = joint_names.index(limb[0])
end_idx = joint_names.index(limb[1])
if pose[start_idx, -1] < 0.1 or pose[end_idx, -1] < 0.1:
continue
limb_length = np.linalg.norm(pose[end_idx, :3] - pose[start_idx, :3])
total_length += limb_length
total_limbs += 1
if total_limbs == 0:
drop.append(i)
continue
average_length = total_length / total_limbs
if average_length < 0.1:
drop.append(i)
continue
if total_limbs > 4 and average_length > 0.5:
drop.append(i)
continue
new_poses3D = []
new_poses2D = [[] for _ in range(len(poses2D))]
for i in range(len(poses3D)):
if len(poses3D[i]) != len(joint_names):
# Sometimes some joints of a poor detection are missing
continue
if i not in drop:
new_poses3D.append(poses3D[i])
for j in range(len(poses2D)):
new_poses2D[j].append(poses2D[j][i])
else:
new_pose = np.array(poses3D[i])
new_pose[..., -1] = 0.001
new_poses3D.append(new_pose)
for j in range(len(poses2D)):
new_pose = np.array(poses2D[j][i])
new_pose[..., -1] = 0.001
new_poses2D[j].append(new_pose)
new_poses3D = np.array(new_poses3D)
new_poses2D = np.array(new_poses2D)
if new_poses3D.size == 0:
new_poses3D = np.zeros([1, len(joint_names), 4])
new_poses2D = np.zeros([len(poses2D), 1, len(joint_names), 3])
return new_poses3D, new_poses2D
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str]) -> list: def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
new_views = [] new_views = []
for view in poses_2d: for view in poses_2d:
@ -409,10 +295,11 @@ def main():
if not os.path.isdir(dirpath): if not os.path.isdir(dirpath):
continue continue
if (dirname[0] not in ["p", "h"]) or len(dirname) != 2: if (dirname[0] not in ["p", "h", "e"]) or len(dirname) != 2:
continue continue
# Load sample infos # Load sample infos
print("\n" + dirpath)
with open(os.path.join(dirpath, "sample.json"), "r", encoding="utf-8") as file: with open(os.path.join(dirpath, "sample.json"), "r", encoding="utf-8") as file:
sample = json.load(file) sample = json.load(file)
sample = update_sample(sample, dirpath) sample = update_sample(sample, dirpath)
@ -431,8 +318,11 @@ def main():
images_2d.append(img) images_2d.append(img)
# Get 2D poses # Get 2D poses
stime = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = update_keypoints(poses_2d, joint_names_2d) poses_2d = update_keypoints(poses_2d, joint_names_2d)
print("2D time:", time.time() - stime)
# print([np.array(p).round(6).tolist() for p in poses_2d])
fig1 = draw_utils.show_poses2d( fig1 = draw_utils.show_poses2d(
poses_2d, np.array(images_2d), joint_names_2d, "2D detections" poses_2d, np.array(images_2d), joint_names_2d, "2D detections"
@ -449,23 +339,27 @@ def main():
poses3D = np.zeros([1, len(joint_names_3d), 4]) poses3D = np.zeros([1, len(joint_names_3d), 4])
poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3]) poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
else: else:
poses3D = triangulate_poses.get_3d_pose( cameras = spt.convert_cameras(camparams)
poses_2d, camparams, roomparams, joint_names_2d roomp = [roomparams["room_size"], roomparams["room_center"]]
triangulator = spt.Triangulator(min_score=0.95)
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomp, joint_names_2d
) )
poses3D = np.array(poses_3d)
if len(poses3D) == 0:
poses3D = np.zeros([1, len(joint_names_3d), 4])
print("3D time:", time.time() - stime)
poses2D = [] poses2D = []
for cam in camparams: for cam in camparams:
poses_2d, _ = utils_pose.project_poses(poses3D, cam) poses_2d, _ = utils_pose.project_poses(poses3D, cam)
poses2D.append(poses_2d) poses2D.append(poses_2d)
poses3D, poses2D = filter_poses(
poses3D,
poses2D,
roomparams,
joint_names_3d,
)
print("\n" + dirpath)
print(poses3D) print(poses3D)
# print(poses2D) # print(poses2D)
# print(poses3D.round(3).tolist())
fig2 = draw_utils.utils_view.show_poses3d( fig2 = draw_utils.utils_view.show_poses3d(
poses3D, joint_names_3d, roomparams, camparams poses3D, joint_names_3d, roomparams, camparams

View File

@ -1,456 +0,0 @@
import copy
import math
import cv2
import numpy as np
from skelda import utils_pose
# ==================================================================================================
core_joints = [
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
]
# ==================================================================================================
def undistort_points(points: np.ndarray, caminfo: dict):
"""Undistorts 2D pixel coordinates"""
K = np.asarray(caminfo["K"], dtype=np.float32)
DC = np.asarray(caminfo["DC"][0:5], dtype=np.float32)
# Undistort camera matrix
w = caminfo["width"]
h = caminfo["height"]
newK, _ = cv2.getOptimalNewCameraMatrix(K, DC, (w, h), 1, (w, h))
caminfo["K"] = newK
caminfo["DC"] = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
# Undistort points
pshape = points.shape
points = np.reshape(points, (-1, 1, 2))
points = cv2.undistortPoints(points, K, DC, P=newK)
points = points.reshape(pshape)
return points, caminfo
# ==================================================================================================
def get_camera_P(cam):
"""Calculate opencv-style projection matrix"""
R = np.asarray(cam["R"])
T = np.asarray(cam["T"])
K = np.asarray(cam["K"])
Tr = R @ (T * -1)
P = K @ np.hstack([R, Tr])
return P
# ==================================================================================================
def calc_pose_score(pose1, pose2, dist1, cam1, joint_names, use_joints):
"""Calculates the score between two poses"""
# Select core joints
jids = [joint_names.index(j) for j in use_joints]
pose1 = pose1[jids]
pose2 = pose2[jids]
dist1 = dist1[jids]
mask = (pose1[:, 2] > 0.1) & (pose2[:, 2] > 0.1)
if np.sum(mask) < 3:
return 0.0
iscale = (cam1["width"] + cam1["height"]) / 2
scores = score_projection(pose1, pose2, dist1, mask, iscale)
score = np.mean(scores)
return score
# ==================================================================================================
def calc_pair_score(pair, poses_2d, camparams, roomparams, joint_names_2d, use_joints):
"""Triangulates a pair of persons and scores them based on the reprojection error"""
cam1 = camparams[pair[0][0]]
cam2 = camparams[pair[0][1]]
pose1 = poses_2d[pair[0][0]][pair[0][2]]
pose2 = poses_2d[pair[0][1]][pair[0][3]]
# Select core joints
jids = [joint_names_2d.index(j) for j in use_joints]
pose1 = pose1[jids]
pose2 = pose2[jids]
poses_3d, score = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams)
return poses_3d, score
# ==================================================================================================
def score_projection(pose1, repro1, dists1, mask, iscale):
min_score = 0.1
error1 = np.linalg.norm(pose1[mask, 0:2] - repro1[mask, 0:2], axis=1)
# Set errors of invisible reprojections to a high value
penalty = iscale
mask1b = (repro1[:, 2] < min_score)[mask]
error1[mask1b] = penalty
# Scale error by image size and distance to the camera
error1 = error1.clip(0, iscale / 4) / iscale
dscale1 = np.sqrt(np.mean(dists1[mask]) / 3.5)
error1 = error1 * dscale1
# Convert errors to a score
score1 = 1.0 / (1.0 + error1 * 10)
return score1
# ==================================================================================================
def triangulate_and_score(pose1, pose2, cam1, cam2, roomparams):
"""Triangulates a pair of persons and scores them based on the reprojection error"""
# Mask out invisible joints
min_score = 0.1
mask1a = pose1[:, 2] >= min_score
mask2a = pose2[:, 2] >= min_score
mask = mask1a & mask2a
# If too few joints are visible return a low score
if np.sum(mask) < 3:
pose3d = np.zeros([len(pose1), 4])
score = 0.0
return pose3d, score
# Triangulate points
points1 = pose1[mask, 0:2].T
points2 = pose2[mask, 0:2].T
points3d = cv2.triangulatePoints(cam1["P"], cam2["P"], points1, points2)
points3d = (points3d / points3d[3, :])[0:3, :].T
pose3d = np.zeros([len(pose1), 4])
pose3d[mask] = np.concatenate([points3d, np.ones([points3d.shape[0], 1])], axis=-1)
# If the triangulated points are outside the room drop it
mean = np.mean(pose3d[mask][:, 0:3], axis=0)
mins = np.min(pose3d[mask][:, 0:3], axis=0)
maxs = np.max(pose3d[mask][:, 0:3], axis=0)
rsize = np.array(roomparams["room_size"]) / 2
rcent = np.array(roomparams["room_center"])
wdist = 0.1
center_outside = np.any((mean > rsize + rcent) | (mean < -rsize + rcent))
limb_outside = np.any(
(maxs > rsize + rcent + wdist) | (mins < -rsize + rcent - wdist)
)
if center_outside or limb_outside:
pose3d[:, 3] = 0.001
score = 0.001
return pose3d, score
# Calculate reprojection error
poses_3d = np.expand_dims(pose3d, axis=0)
repro1, dists1 = utils_pose.project_poses(poses_3d, cam1, calc_dists=True)
repro2, dists2 = utils_pose.project_poses(poses_3d, cam2, calc_dists=True)
repro1, dists1 = repro1[0], dists1[0]
repro2, dists2 = repro2[0], dists2[0]
# Calculate scores for each view
iscale = (cam1["width"] + cam1["height"]) / 2
score1 = score_projection(pose1, repro1, dists1, mask, iscale)
score2 = score_projection(pose2, repro2, dists2, mask, iscale)
# Combine scores
scores = (score1 + score2) / 2
# Drop lowest scores
drop_k = math.floor(len(pose1) * 0.2)
score = np.mean(np.sort(scores, axis=-1)[drop_k:])
# Add score to 3D pose
full_scores = np.zeros([poses_3d.shape[1], 1])
full_scores[mask] = np.expand_dims(scores, axis=-1)
pose3d[:, 3] = full_scores[:, 0]
return pose3d, score
# ==================================================================================================
def calc_grouping(all_pairs, min_score: float):
"""Groups pairs that share a person"""
# Calculate the pose center for each pair
for i in range(len(all_pairs)):
pair = all_pairs[i]
pose_3d = pair[2][0]
mask = pose_3d[:, 3] > min_score
center = np.mean(pose_3d[mask, 0:3], axis=0)
all_pairs[i] = all_pairs[i] + [center]
groups = []
for i in range(len(all_pairs)):
pair = all_pairs[i]
# Create new group if non exists
if len(groups) == 0:
groups.append([pair[3], pair[2][0], [pair]])
continue
# Check if the pair matches to an existing group
max_center_dist = 0.6
max_joint_avg_dist = 0.3
best_dist = math.inf
best_group = -1
for j in range(len(groups)):
g0 = groups[j]
center0 = g0[0]
center1 = pair[3]
if np.linalg.norm(center0 - center1) < max_center_dist:
pose0 = g0[1]
pose1 = pair[2][0]
# Calculate the distance between the two poses
mask0 = pose0[:, 3] > min_score
mask1 = pose1[:, 3] > min_score
mask = mask0 & mask1
dists = np.linalg.norm(pose0[mask, 0:3] - pose1[mask, 0:3], axis=1)
dist = np.mean(dists)
if dist < max_joint_avg_dist:
if dist < best_dist:
best_dist = dist
best_group = j
if best_group >= 0:
# Add pair to existing group and update the mean positions
group = groups[best_group]
new_center = (group[0] * len(group[2]) + pair[3]) / (len(group[2]) + 1)
new_pose = (group[1] * len(group[2]) + pair[2][0]) / (len(group[2]) + 1)
group[2].append(pair)
group[0] = new_center
group[1] = new_pose
else:
# Create new group if no match was found
groups.append([pair[3], pair[2][0], [pair]])
return groups
# ==================================================================================================
def merge_group(poses_3d: np.ndarray, min_score: float):
"""Merges a group of poses into a single pose"""
# Merge poses to create initial pose
# Use only those triangulations with a high score
imask = poses_3d[:, :, 3:4] > min_score
sum_poses = np.sum(poses_3d * imask, axis=0)
sum_mask = np.sum(imask, axis=0)
initial_pose_3d = np.divide(
sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
)
# Use center as default if the initial pose is empty
jmask = initial_pose_3d[:, 3] > 0.0
sum_joints = np.sum(initial_pose_3d[jmask, 0:3], axis=0)
sum_mask = np.sum(jmask)
center = np.divide(
sum_joints, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_joints)
)
initial_pose_3d[~jmask, 0:3] = center
# Drop joints with low scores
offset = 0.1
mask = poses_3d[:, :, 3:4] > (min_score - offset)
# Drop outliers that are far away from the other proposals
max_dist = 1.2
distances = np.linalg.norm(
poses_3d[:, :, :3] - initial_pose_3d[np.newaxis, :, :3], axis=2
)
dmask = distances <= max_dist
mask = mask & np.expand_dims(dmask, axis=-1)
# Select the best-k proposals for each joint that are closest to the initial pose
keep_best = 3
sorted_indices = np.argsort(distances, axis=0)
best_k_mask = np.zeros_like(mask, dtype=bool)
num_joints = poses_3d.shape[1]
for i in range(num_joints):
valid_indices = sorted_indices[:, i][mask[sorted_indices[:, i], i, 0]]
best_k_mask[valid_indices[:keep_best], i, 0] = True
mask = mask & best_k_mask
# Final pose computation with combined masks
sum_poses = np.sum(poses_3d * mask, axis=0)
sum_mask = np.sum(mask, axis=0)
final_pose_3d = np.divide(
sum_poses, sum_mask, where=(sum_mask > 0), out=np.zeros_like(sum_poses)
)
return final_pose_3d
# ==================================================================================================
def get_3d_pose(
poses_2d,
camparams,
roomparams,
joint_names_2d,
last_poses_3d=np.array([]),
min_score=0.95,
):
"""Triangulates 3D poses from 2D poses of multiple views"""
# Convert poses and camparams to numpy arrays
camparams = copy.deepcopy(camparams)
for i in range(len(camparams)):
poses_2d[i] = np.asarray(poses_2d[i])
camparams[i]["K"] = np.array(camparams[i]["K"])
camparams[i]["R"] = np.array(camparams[i]["R"])
camparams[i]["T"] = np.array(camparams[i]["T"])
camparams[i]["DC"] = np.array(camparams[i]["DC"])
# Undistort 2D points
for i in range(len(camparams)):
poses = poses_2d[i]
cam = camparams[i]
poses[:, :, 0:2], cam = undistort_points(poses[:, :, 0:2], cam)
# Mask out points that are far outside the image (points slightly outside are still valid)
offset = (cam["width"] + cam["height"]) / 40
mask = (
(poses[:, :, 0] >= 0 - offset)
& (poses[:, :, 0] < cam["width"] + offset)
& (poses[:, :, 1] >= 0 - offset)
& (poses[:, :, 1] < cam["height"] + offset)
)
poses = poses * np.expand_dims(mask, axis=-1)
poses_2d[i] = poses
# Calc projection matrix with updated camera parameters
cam["P"] = get_camera_P(cam)
camparams[i] = cam
# Project last 3D poses to 2D
last_poses_2d = []
last_poses_3d = np.asarray(last_poses_3d)
if last_poses_3d.size > 0:
for i in range(len(camparams)):
poses2d, dists = utils_pose.project_poses(last_poses_3d, camparams[i])
last_poses_2d.append((poses2d, dists))
# Check matches to old poses
threshold = min_score - 0.2
scored_pasts = {}
if last_poses_3d.size > 0:
for i in range(len(camparams)):
scored_pasts[i] = {}
poses = poses_2d[i]
last_poses, dists = last_poses_2d[i]
for j in range(len(last_poses)):
scored_pasts[i][j] = []
for k in range(len(poses)):
score = calc_pose_score(
poses[k],
last_poses[j],
dists[j],
camparams[i],
joint_names_2d,
core_joints,
)
if score > threshold:
scored_pasts[i][j].append(k)
# Create pairs of persons
# Checks if the person was already matched to the last frame and if so only creates pairs with those
# Else it creates all possible pairs
num_persons = [len(p) for p in poses_2d]
all_pairs = []
for i in range(len(camparams)):
poses = poses_2d[i]
for j in range(i + 1, len(poses_2d)):
poses2 = poses_2d[j]
for k in range(len(poses)):
for l in range(len(poses2)):
pid1 = sum(num_persons[:i]) + k
pid2 = sum(num_persons[:j]) + l
match = False
if last_poses_3d.size > 0:
for m in range(len(last_poses_3d)):
if k in scored_pasts[i][m] and l in scored_pasts[j][m]:
match = True
all_pairs.append([(i, j, k, l), (pid1, pid2)])
elif k in scored_pasts[i][m] or l in scored_pasts[j][m]:
match = True
if not match:
all_pairs.append([(i, j, k, l), (pid1, pid2)])
# Calculate pair scores
for i in range(len(all_pairs)):
pair = all_pairs[i]
pose_3d, score = calc_pair_score(
pair, poses_2d, camparams, roomparams, joint_names_2d, core_joints
)
all_pairs[i].append((pose_3d, score))
# import draw_utils
# poses3D = np.array([pose_3d])
# _ = draw_utils.utils_view.show_poses3d(
# poses3D, core_joints, {}, camparams
# )
# draw_utils.utils_view.show_plots()
# Drop pairs with low scores
all_pairs = [p for p in all_pairs if p[2][1] > min_score]
# Group pairs that share a person
groups = calc_grouping(all_pairs, min_score)
# Calculate full 3D poses
poses_3d = []
for pair in all_pairs:
cam1 = camparams[pair[0][0]]
cam2 = camparams[pair[0][1]]
pose1 = poses_2d[pair[0][0]][pair[0][2]]
pose2 = poses_2d[pair[0][1]][pair[0][3]]
pose_3d, _ = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams)
pair.append(pose_3d)
# Merge groups
poses_3d = []
for group in groups:
poses = np.array([p[4] for p in group[2]])
pose_3d = merge_group(poses, min_score)
poses_3d.append(pose_3d)
if len(poses_3d) > 0:
poses3D = np.array(poses_3d)
else:
poses3D = np.zeros([1, len(joint_names_2d), 4])
return poses3D

75
spt/camera.cpp Normal file
View File

@ -0,0 +1,75 @@
#include <iomanip>
#include <sstream>
#include "camera.hpp"
// =================================================================================================
// =================================================================================================
template <size_t M, size_t N>
static const std::string print_matrix(const std::array<std::array<float, N>, M> &matrix)
{
std::ostringstream out;
out << "[";
for (size_t j = 0; j < M; ++j)
{
out << "[";
for (size_t i = 0; i < N; ++i)
{
out << matrix[j][i];
if (i < N - 1)
{
out << ", ";
}
}
out << "]";
if (j < M - 1)
{
out << ", ";
}
}
out << "]";
return out.str();
}
// =================================================================================================
// =================================================================================================
std::string Camera::to_string() const
{
std::ostringstream out;
out << std::fixed << std::setprecision(6);
out << "{";
out << "'name': '" << name << "', ";
out << "'K': " << print_matrix(K) << ", ";
out << "'DC': [";
for (size_t i = 0; i < DC.size(); ++i)
{
out << DC[i];
if (i < DC.size() - 1)
out << ", ";
}
out << "], ";
out << "'R': " << print_matrix(R) << ", ";
out << "'T': " << print_matrix(T) << ", ";
out << "'P': " << print_matrix(P) << ", ";
out << "'width': " << width << ", ";
out << "'height': " << height << ", ";
out << "'type': " << type;
out << "}";
return out.str();
}
// =================================================================================================
std::ostream &operator<<(std::ostream &out, const Camera &cam)
{
out << cam.to_string();
return out;
}

24
spt/camera.hpp Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#include <array>
#include <iostream>
#include <string>
#include <vector>
// =================================================================================================
struct Camera
{
std::string name;
std::array<std::array<float, 3>, 3> K;
std::vector<float> DC;
std::array<std::array<float, 3>, 3> R;
std::array<std::array<float, 1>, 3> T;
std::array<std::array<float, 3>, 4> P;
int width;
int height;
std::string type;
friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
std::string to_string() const;
};

35
spt/interface.cpp Normal file
View File

@ -0,0 +1,35 @@
#include "triangulator.hpp"
#include "interface.hpp"
// =================================================================================================
// =================================================================================================
Triangulator::Triangulator(float min_score, size_t min_group_size)
{
this->triangulator = new TriangulatorInternal(min_score, min_group_size);
}
// =================================================================================================
std::vector<std::vector<std::array<float, 4>>> Triangulator::triangulate_poses(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
return this->triangulator->triangulate_poses(poses_2d, cameras, roomparams, joint_names);
}
// =================================================================================================
void Triangulator::reset()
{
this->triangulator->reset();
}
// =================================================================================================
void Triangulator::print_stats()
{
this->triangulator->print_stats();
}

54
spt/interface.hpp Normal file
View File

@ -0,0 +1,54 @@
#pragma once
#include <string>
#include <vector>
#include "camera.hpp"
// =================================================================================================
// Forward declaration of the class, that swig does not try to parse all its dependencies.
class TriangulatorInternal;
// =================================================================================================
class Triangulator
{
public:
/**
* Triangulator to predict poses from multiple views.
*
*
* @param min_score Minimum score to consider a triangulated joint as valid.
* @param min_group_size Minimum number of camera pairs that need to see a person.
*/
Triangulator(
float min_score = 0.95,
size_t min_group_size = 1);
/**
* Calculate a triangulation.
*
*
* @param poses_2d List of shape [views, persons, joints, 3], containing the 2D poses.
* @param cameras List of cameras.
* @param roomparams Room parameters (room size, room center).
* @param joint_names List of 2D joint names.
*
* @return List of shape [persons, joints, 4], containing the 3D poses.
*/
std::vector<std::vector<std::array<float, 4>>> triangulate_poses(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
/** Reset the triangulator. */
void reset();
/** Print triangulation statistics. */
void print_stats();
private:
TriangulatorInternal *triangulator;
};

1876
spt/triangulator.cpp Normal file

File diff suppressed because it is too large Load Diff

133
spt/triangulator.hpp Normal file
View File

@ -0,0 +1,133 @@
#pragma once
#include <array>
#include <iostream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include "camera.hpp"
// =================================================================================================
class CameraInternal
{
public:
CameraInternal(const Camera &cam);
Camera cam;
cv::Mat K;
cv::Mat DC;
cv::Mat R;
cv::Mat T;
cv::Mat P;
void update_projection_matrix();
};
// =================================================================================================
class TriangulatorInternal
{
public:
TriangulatorInternal(float min_score, size_t min_group_size);
std::vector<std::vector<std::array<float, 4>>> triangulate_poses(
const std::vector<std::vector<std::vector<std::array<float, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
void reset();
void print_stats();
private:
float min_score;
float min_group_size;
const std::vector<std::string> core_joints = {
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
};
const std::vector<std::pair<std::string, std::string>> core_limbs = {
{"knee_left", "ankle_left"},
{"hip_left", "knee_left"},
{"hip_right", "knee_right"},
{"knee_right", "ankle_right"},
{"elbow_left", "wrist_left"},
{"elbow_right", "wrist_right"},
{"shoulder_left", "elbow_left"},
{"shoulder_right", "elbow_right"},
};
std::vector<cv::Mat> last_poses_3d;
void undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam);
std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> project_poses(
const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists);
float calc_pose_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const cv::Mat &dist1,
const CameraInternal &icam);
cv::Mat score_projection(
const cv::Mat &pose1,
const cv::Mat &repro1,
const cv::Mat &dists1,
const cv::Mat &mask,
float iscale);
std::pair<cv::Mat, float> triangulate_and_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const CameraInternal &cam1,
const CameraInternal &cam2,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> calc_grouping(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
const std::vector<std::pair<cv::Mat, float>> &all_scored_poses,
float min_score);
cv::Mat merge_group(const std::vector<cv::Mat> &poses_3d, float min_score);
void add_extra_joints(std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
void filter_poses(
std::vector<cv::Mat> &poses,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<size_t> &core_joint_idx,
const std::vector<std::array<size_t, 2>> &core_limbs_idx);
void add_missing_joints(
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
// Statistics
float num_calls = 0;
float total_time = 0;
float init_time = 0;
float undistort_time = 0;
float project_time = 0;
float match_time = 0;
float pairs_time = 0;
float pair_scoring_time = 0;
float grouping_time = 0;
float full_time = 0;
float merge_time = 0;
float post_time = 0;
float convert_time = 0;
};

13
swig/Makefile Normal file
View File

@ -0,0 +1,13 @@
# Standard compile options for the C++ executable
FLAGS = -fPIC -O3 -march=native -Wall -Werror -flto -fopenmp -fopenmp-simd
# The Python interface through SWIG
PYTHONI = -I/usr/include/python3.8/
PYTHONL = -Xlinker -export-dynamic
# Default super-target
all:
cd ../spt/ && g++ $(FLAGS) -std=c++2a -I/usr/include/opencv4 -c *.cpp ; cd ../swig/
swig -c++ -python -keyword -o spt_wrap.cxx spt.i
g++ $(FLAGS) $(PYTHONI) -c spt_wrap.cxx -o spt_wrap.o
g++ $(FLAGS) $(PYTHONL) -shared ../spt/*.o spt_wrap.o -lopencv_core -lopencv_imgproc -lopencv_calib3d -o _spt.so

70
swig/spt.i Normal file
View File

@ -0,0 +1,70 @@
%module spt
%{
// Includes the header in the wrapper code
#include "../spt/camera.hpp"
#include "../spt/interface.hpp"
%}
// Some modules need extra imports beside the main .hpp file
%include "std_array.i"
%include "std_string.i"
%include "std_vector.i"
// Instantiate templates used by example
// If the template is too nested (>2), parts of it need to be declared as well
namespace std {
%template(FloatMatrix_3x3) array<array<float, 3>, 3>;
%template(VectorFloat) vector<float>;
%template(FloatMatrix_3x1) array<array<float, 1>, 3>;
%template(FloatMatrix_3x4) array<array<float, 3>, 4>;
%template(Matrix_Jx4) vector<array<float, 4>>;
%template(Matrix_NxJx4) vector<vector<array<float, 4>>>;
%template(Matrix_Jx3) vector<array<float, 3>>;
%template(Matrix_VxNxJx3) vector<vector<vector<array<float, 3>>>>;
%template(VectorCamera) vector<Camera>;
%template(FloatMatrix_2x3) array<array<float, 3>, 2>;
%template(VectorString) vector<std::string>;
}
// Convert vector to native (python) list
%naturalvar Camera::K;
%naturalvar Camera::DC;
%naturalvar Camera::R;
%naturalvar Camera::T;
// Improve printing of result objects
%extend Camera {
std::string __str__() const {
return $self->to_string();
}
}
// Ignore: Warning 503: Can't wrap 'operator <<' unless renamed to a valid identifier.
%warnfilter(503) Camera;
// Ignore: Warning 511: Can't use keyword arguments with overloaded functions.
// The warning is cause by enabling keyword arguments, which doesn't work for vectors.
#pragma SWIG nowarn=511
// Parse the header file to generate wrappers
%include "../spt/camera.hpp"
%include "../spt/interface.hpp"
// Add additional Python code to the module
%pythoncode %{
def convert_cameras(cameras):
"""Convert cameras from Python to C++."""
c_cameras = []
for cam in cameras:
camera = Camera()
camera.name = cam["name"]
camera.K = cam["K"]
camera.DC = cam["DC"]
camera.R = cam["R"]
camera.T = cam["T"]
camera.width = cam["width"]
camera.height = cam["height"]
camera.type = cam.get("type", "pinhole")
c_cameras.append(camera)
return c_cameras
%}

2428
tests/poses_e1.json Normal file

File diff suppressed because it is too large Load Diff

544
tests/poses_h1.json Normal file
View File

@ -0,0 +1,544 @@
{
"2D": [
[
[
[
445.214,
373.348,
0.805
],
[
446.448,
367.181,
0.834
],
[
446.859,
370.881,
0.447
],
[
456.726,
365.947,
0.974
],
[
484.272,
374.17,
0.745
],
[
445.625,
385.271,
0.611
],
[
505.24,
409.528,
0.761
],
[
393.411,
385.682,
0.486
],
[
484.683,
447.352,
0.477
],
[
369.976,
385.271,
0.672
],
[
452.203,
456.808,
0.292
],
[
442.336,
503.677,
0.504
],
[
478.927,
502.444,
0.505
],
[
414.79,
508.2,
0.462
],
[
428.769,
476.131,
0.332
],
[
423.835,
576.037,
0.689
],
[
379.433,
497.099,
0.485
],
[
460.632,
503.061,
0.504
],
[
475.433,
397.399,
0.611
],
[
470.499,
370.059,
0.745
]
]
],
[
[
[
502.664,
372.786,
0.81
],
[
503.643,
363.001,
0.894
],
[
492.39,
368.383,
0.862
],
[
508.535,
351.26,
0.85
],
[
479.181,
367.405,
0.837
],
[
536.422,
367.894,
0.721
],
[
476.245,
392.356,
0.785
],
[
584.367,
370.829,
0.867
],
[
464.993,
441.279,
0.717
],
[
560.883,
385.017,
0.72
],
[
470.864,
392.356,
0.687
],
[
542.292,
469.166,
0.674
],
[
503.153,
467.698,
0.639
],
[
502.664,
506.837,
0.765
],
[
458.633,
454.489,
0.798
],
[
471.842,
589.518,
0.813
],
[
530.551,
499.988,
0.71
],
[
522.723,
468.432,
0.639
],
[
506.333,
380.125,
0.721
],
[
493.858,
359.332,
0.837
]
]
],
[
[
[
635.959,
325.896,
0.26
],
[
603.03,
315.205,
0.37
],
[
647.506,
325.896,
0.366
],
[
611.155,
316.488,
0.826
],
[
641.519,
327.607,
0.878
],
[
577.37,
334.877,
0.652
],
[
652.638,
364.385,
0.766
],
[
532.466,
334.877,
0.77
],
[
650.5,
405.44,
0.57
],
[
569.672,
337.015,
0.406
],
[
631.255,
375.504,
0.375
],
[
573.521,
461.463,
0.659
],
[
617.57,
458.897,
0.573
],
[
614.576,
471.727,
0.287
],
[
656.487,
410.145,
0.653
],
[
643.657,
520.908,
0.599
],
[
631.255,
512.782,
0.262
],
[
595.546,
460.18,
0.573
],
[
615.004,
349.631,
0.652
],
[
626.337,
322.047,
0.826
]
]
],
[
[
[
512.076,
343.77,
0.704
],
[
518.561,
335.34,
0.855
],
[
502.349,
336.637,
0.799
],
[
523.1,
323.667,
0.76
],
[
480.3,
324.964,
0.887
],
[
534.773,
328.855,
0.743
],
[
446.579,
361.928,
0.849
],
[
592.488,
330.152,
0.802
],
[
479.652,
423.534,
0.795
],
[
621.022,
343.77,
0.904
],
[
491.324,
362.576,
0.811
],
[
523.1,
450.77,
0.671
],
[
472.518,
461.146,
0.7
],
[
549.688,
510.431,
0.776
],
[
491.324,
448.825,
0.757
],
[
528.937,
624.565,
0.855
],
[
587.949,
496.813,
0.741
],
[
497.809,
455.958,
0.671
],
[
490.676,
345.391,
0.743
],
[
501.7,
324.316,
0.76
]
]
]
],
"3D": [
[
[
-0.04,
-0.124,
1.088,
0.943
],
[
-0.0,
-0.069,
1.127,
0.92
],
[
-0.082,
-0.15,
1.106,
0.964
],
[
-0.005,
-0.051,
1.159,
0.952
],
[
-0.148,
-0.059,
1.111,
0.983
],
[
0.115,
0.064,
1.093,
0.981
],
[
-0.234,
0.076,
0.968,
0.98
],
[
0.354,
0.051,
1.09,
0.988
],
[
-0.189,
-0.105,
0.754,
0.964
],
[
0.315,
-0.244,
1.063,
0.969
],
[
-0.151,
-0.009,
0.955,
0.925
],
[
0.121,
0.2,
0.563,
0.952
],
[
-0.081,
0.16,
0.547,
0.967
],
[
0.071,
-0.242,
0.451,
0.955
],
[
-0.172,
-0.237,
0.703,
0.993
],
[
-0.047,
-0.396,
0.093,
0.985
],
[
0.224,
-0.293,
0.521,
0.972
],
[
0.026,
0.185,
0.55,
0.95
],
[
-0.062,
0.085,
1.026,
0.991
],
[
-0.077,
-0.051,
1.134,
0.971
]
]
]
}

2136
tests/poses_p1.json Normal file

File diff suppressed because it is too large Load Diff

132
tests/test_interface.py Normal file
View File

@ -0,0 +1,132 @@
import json
import sys
import time
import numpy as np
sys.path.append("../swig/")
import spt
# ==================================================================================================
def main():
print("")
# Test camera structure
camera = spt.Camera()
camera.name = "Camera 1"
camera.K = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
camera.DC = [0, 0, 0, 0, 0]
camera.R = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
camera.T = [[1], [2], [3]]
camera.width = 640
camera.height = 480
print(camera)
print("")
# Load input data
roomparams = [[4.8, 6.0, 2.0], [0, 0, 1.0]]
joint_names = [
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
"hip_middle",
"shoulder_middle",
"head",
]
cpath = "/SimplePoseTriangulation/data/h1/sample.json"
ppath = "/SimplePoseTriangulation/tests/poses_h1.json"
with open(cpath, "r") as file:
cdata = json.load(file)
with open(ppath, "r") as file:
pdata = json.load(file)
cams = cdata["cameras"]
poses_2d = pdata["2D"]
cameras = spt.convert_cameras(cams)
# Run triangulation
triangulator = spt.Triangulator(min_score=0.95)
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomparams, joint_names
)
print("3D time:", time.time() - stime)
print(np.array(poses_3d))
print("")
# Load input data
roomparams = [[5.6, 6.4, 2.4], [0, -0.5, 1.2]]
cpath = "/SimplePoseTriangulation/data/p1/sample.json"
ppath = "/SimplePoseTriangulation/tests/poses_p1.json"
with open(cpath, "r") as file:
cdata = json.load(file)
with open(ppath, "r") as file:
pdata = json.load(file)
cams = cdata["cameras"]
poses_2d = pdata["2D"]
cameras = spt.convert_cameras(cams)
# Run triangulation
triangulator.reset()
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomparams, joint_names
)
print("3D time:", time.time() - stime)
print(np.array(poses_3d))
print("")
# Run again to test last pose cache
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomparams, joint_names
)
print("3D time:", time.time() - stime)
print(np.array(poses_3d))
print("")
# Load input data
roomparams = [[6.0, 5.0, 2.0], [1.5, 1.0, -0.5]]
cpath = "/SimplePoseTriangulation/data/e1/sample.json"
ppath = "/SimplePoseTriangulation/tests/poses_e1.json"
with open(cpath, "r") as file:
cdata = json.load(file)
with open(ppath, "r") as file:
pdata = json.load(file)
cams = cdata["cameras"]
poses_2d = pdata["2D"]
cameras = spt.convert_cameras(cams)
# Run triangulation
triangulator.reset()
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomparams, joint_names
)
print("3D time:", time.time() - stime)
print(np.array(poses_3d))
print("")
triangulator.print_stats()
print("")
# ==================================================================================================
if __name__ == "__main__":
main()