Initial cpp reimplementation.

This commit is contained in:
Daniel
2024-09-11 11:40:00 +02:00
parent 7b426d209c
commit 244f46559c
14 changed files with 1904 additions and 1 deletions

37
.gitignore vendored
View File

@ -1,3 +1,40 @@
spt_wrap.*
spt.py
*.bin
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@ -37,6 +37,11 @@ RUN python3 -c "from utils_2d_pose import load_wb_model; load_wb_model();"
# Fix an undefined symbol error with ompi
RUN echo "ldconfig" >> ~/.bashrc
# Install swig and later dependencies
RUN apt-get update && apt-get install -y --no-install-recommends build-essential
RUN apt-get update && apt-get install -y --no-install-recommends swig
RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev
COPY ./skelda/ /skelda/
RUN pip3 install --no-cache-dir -e /skelda/

View File

@ -34,3 +34,11 @@ Triangulation of multiple persons from multiple camera views.
export CUDA_VISIBLE_DEVICES=0
python3 /SimplePoseTriangulation/scripts/test_skelda_dataset.py
```
<br>
## Debugging
```bash
cd /SimplePoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py
```

View File

@ -1,6 +1,7 @@
import copy
import json
import os
import time
from typing import List
import cv2
@ -413,6 +414,7 @@ def main():
continue
# Load sample infos
print("\n" + dirpath)
with open(os.path.join(dirpath, "sample.json"), "r", encoding="utf-8") as file:
sample = json.load(file)
sample = update_sample(sample, dirpath)
@ -431,8 +433,11 @@ def main():
images_2d.append(img)
# Get 2D poses
stime = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = update_keypoints(poses_2d, joint_names_2d)
print("2D time:", time.time() - stime)
# print(np.array(poses_2d).round(3).tolist())
fig1 = draw_utils.show_poses2d(
poses_2d, np.array(images_2d), joint_names_2d, "2D detections"
@ -449,9 +454,11 @@ def main():
poses3D = np.zeros([1, len(joint_names_3d), 4])
poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
else:
stime = time.time()
poses3D = triangulate_poses.get_3d_pose(
poses_2d, camparams, roomparams, joint_names_2d
)
print("3D time:", time.time() - stime)
poses2D = []
for cam in camparams:
poses_2d, _ = utils_pose.project_poses(poses3D, cam)
@ -463,9 +470,9 @@ def main():
joint_names_3d,
)
print("\n" + dirpath)
print(poses3D)
# print(poses2D)
# print(poses3D.round(3).tolist())
fig2 = draw_utils.utils_view.show_poses3d(
poses3D, joint_names_3d, roomparams, camparams

76
spt/camera.cpp Normal file
View File

@ -0,0 +1,76 @@
#include <iomanip>
#include <sstream>
#include "camera.hpp"
// =================================================================================================
// =================================================================================================
template <size_t M, size_t N>
static const std::string print_matrix(const std::array<std::array<double, N>, M> &matrix)
{
std::ostringstream out;
out << "[";
for (size_t j = 0; j < M; ++j)
{
out << "[";
for (size_t i = 0; i < N; ++i)
{
out << matrix[j][i];
if (i < N - 1)
{
out << ", ";
}
}
out << "]";
if (j < M - 1)
{
out << ", ";
}
}
out << "]";
return out.str();
}
// =================================================================================================
// =================================================================================================
std::string Camera::to_string() const
{
std::ostringstream out;
out << std::fixed << std::setprecision(6);
out << "{";
out << "'name': '" << name << "', ";
out << "'K': " << print_matrix(K) << ", ";
out << "'DC': [";
for (size_t i = 0; i < DC.size(); ++i)
{
out << DC[i];
if (i < DC.size() - 1)
out << ", ";
}
out << "], ";
out << "'R': " << print_matrix(R) << ", ";
out << "'T': " << print_matrix(T) << ", ";
out << "'P': " << print_matrix(P) << ", ";
out << "'width': " << width << ", ";
out << "'height': " << height << ", ";
out << "'type': " << type;
out << "}";
return out.str();
}
// =================================================================================================
std::ostream &operator<<(std::ostream &out, const Camera &cam)
{
out << cam.to_string();
return out;
}

24
spt/camera.hpp Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#include <array>
#include <iostream>
#include <string>
#include <vector>
// =================================================================================================
struct Camera
{
std::string name;
std::array<std::array<double, 3>, 3> K;
std::vector<double> DC;
std::array<std::array<double, 3>, 3> R;
std::array<std::array<double, 1>, 3> T;
std::array<std::array<double, 3>, 4> P;
int width;
int height;
std::string type;
friend std::ostream &operator<<(std::ostream &out, Camera const &camera);
std::string to_string() const;
};

28
spt/interface.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "triangulator.hpp"
#include "interface.hpp"
// =================================================================================================
// =================================================================================================
Triangulator::Triangulator(double min_score)
{
this->triangulator = new TriangulatorInternal(min_score);
}
// =================================================================================================
std::vector<std::vector<std::array<double, 4>>> Triangulator::triangulate_poses(
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
return this->triangulator->triangulate_poses(poses_2d, cameras, roomparams, joint_names);
}
// =================================================================================================
void Triangulator::reset()
{
this->triangulator->reset();
}

49
spt/interface.hpp Normal file
View File

@ -0,0 +1,49 @@
#pragma once
#include <string>
#include <vector>
#include "camera.hpp"
// =================================================================================================
// Forward declaration of the class, that swig does try to parse all its dependencies.
class TriangulatorInternal;
// =================================================================================================
class Triangulator
{
public:
/**
* Triangulator to predict poses from multiple views.
*
*
* @param min_score Minimum score to consider a triangulated joint as valid.
*/
Triangulator(
double min_score = 0.95);
/**
* Calculate a triangulation.
*
*
* @param poses_2d List of shape [views, persons, joints, 3], containing the 2D poses.
* @param cameras List of cameras.
* @param roomparams Room parameters (room center, room size).
* @param joint_names List of 2D joint names.
*
* @return List of shape [persons, joints, 4], containing the 3D poses.
*/
std::vector<std::vector<std::array<double, 4>>> triangulate_poses(
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
/** Reset the triangulator. */
void reset();
private:
TriangulatorInternal *triangulator;
};

887
spt/triangulator.cpp Normal file
View File

@ -0,0 +1,887 @@
#include <numeric>
#include <opencv2/opencv.hpp>
#include "camera.hpp"
#include "triangulator.hpp"
// =================================================================================================
// =================================================================================================
[[maybe_unused]] static void print_2d_mat(const cv::Mat &mat)
{
// Ensure the matrix is 2D
if (mat.dims != 2)
{
std::cerr << "Error: The matrix is not 2D." << std::endl;
return;
}
// Retrieve matrix dimensions
int rows = mat.rows;
int cols = mat.cols;
// Print the matrix in a NumPy-like style
std::cout << "cv::Mat('shape': (" << rows << ", " << cols << ")";
std::cout << ", 'data': [" << std::endl;
for (int i = 0; i < rows; ++i)
{
std::cout << " [";
for (int j = 0; j < cols; ++j)
{
std::cout << std::fixed << std::setprecision(3) << mat.at<double>(i, j);
if (j < cols - 1)
{
std::cout << ", ";
}
}
std::cout << "]";
if (i < rows - 1)
{
std::cout << "," << std::endl;
}
}
std::cout << "])" << std::endl;
}
// =================================================================================================
[[maybe_unused]] static void print_allpairs(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs)
{
std::cout << "All pairs: [" << std::endl;
for (const auto &pair : all_pairs)
{
const auto &tuple_part = pair.first;
const auto &pair_part = pair.second;
// Print the tuple part
std::cout << "("
<< std::get<0>(tuple_part) << ", "
<< std::get<1>(tuple_part) << ", "
<< std::get<2>(tuple_part) << ", "
<< std::get<3>(tuple_part) << ")";
// Print the pair part
std::cout << ", ("
<< pair_part.first << ", "
<< pair_part.second << ")"
<< std::endl;
}
std::cout << "]" << std::endl;
}
// =================================================================================================
// =================================================================================================
CameraInternal::CameraInternal(const Camera &cam)
{
this->cam = cam;
// Convert camera matrix to cv::Mat for OpenCV
K = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.K[0][0])).clone();
DC = cv::Mat(cam.DC.size(), 1, CV_64FC1, const_cast<double *>(cam.DC.data())).clone();
R = cv::Mat(3, 3, CV_64FC1, const_cast<double *>(&cam.R[0][0])).clone();
T = cv::Mat(3, 1, CV_64FC1, const_cast<double *>(&cam.T[0][0])).clone();
}
// =================================================================================================
void CameraInternal::update_projection_matrix()
{
// Calculate opencv-style projection matrix
cv::Mat Tr, RT;
Tr = R * (T * -1);
cv::hconcat(R, Tr, RT);
P = K * RT;
}
// =================================================================================================
// =================================================================================================
TriangulatorInternal::TriangulatorInternal(double min_score)
{
this->min_score = min_score;
}
// =================================================================================================
std::vector<std::vector<std::array<double, 4>>> TriangulatorInternal::triangulate_poses(
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names)
{
// Check inputs
if (poses_2d.empty())
{
throw std::invalid_argument("No 2D poses provided.");
}
if (poses_2d.size() != cameras.size())
{
throw std::invalid_argument("Number of cameras and 2D poses must be the same.");
}
if (joint_names.size() != poses_2d[0][0].size())
{
throw std::invalid_argument("Number of joint names and 2D poses must be the same.");
}
for (const auto &joint : core_joints)
{
if (std::find(joint_names.begin(), joint_names.end(), joint) == joint_names.end())
{
throw std::invalid_argument("Core joint '" + joint + "' not found in joint names.");
}
}
// Convert inputs to internal formats
std::vector<std::vector<cv::Mat>> poses_2d_mats;
poses_2d_mats.reserve(cameras.size());
for (size_t i = 0; i < cameras.size(); ++i)
{
size_t num_persons = poses_2d[i].size();
size_t num_joints = poses_2d[i][0].size();
std::vector<cv::Mat> camera_poses;
camera_poses.reserve(num_persons);
for (size_t j = 0; j < num_persons; ++j)
{
std::vector<int> dims = {(int)num_joints, 3};
cv::Mat pose_mat(dims, CV_64F);
for (size_t k = 0; k < num_joints; ++k)
{
for (size_t l = 0; l < 3; ++l)
{
pose_mat.at<double>(k, l) = poses_2d[i][j][k][l];
}
}
camera_poses.push_back(pose_mat);
}
poses_2d_mats.push_back(camera_poses);
}
std::vector<CameraInternal> internal_cameras;
for (size_t i = 0; i < cameras.size(); ++i)
{
internal_cameras.emplace_back(cameras[i]);
}
std::vector<size_t> core_joint_idx;
for (const auto &joint : core_joints)
{
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
}
// Undistort 2D poses
for (size_t i = 0; i < cameras.size(); ++i)
{
undistort_poses(poses_2d_mats[i], internal_cameras[i]);
internal_cameras[i].update_projection_matrix();
}
// Create pairs of persons
// Checks if the person was already matched to the last frame and if so only creates pairs
// with those, else it creates all possible pairs
std::vector<int> num_persons;
for (size_t i = 0; i < cameras.size(); ++i)
{
num_persons.push_back(poses_2d[i].size());
}
std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> all_pairs;
for (size_t i = 0; i < cameras.size(); ++i)
{
for (size_t j = i + 1; j < cameras.size(); ++j)
{
for (size_t k = 0; k < poses_2d[i].size(); ++k)
{
for (size_t l = 0; l < poses_2d[j].size(); ++l)
{
int pid1 = std::accumulate(num_persons.begin(), num_persons.begin() + i, 0) + k;
int pid2 = std::accumulate(num_persons.begin(), num_persons.begin() + j, 0) + l;
bool match = false;
if (!match)
{
all_pairs.emplace_back(
std::make_tuple(i, j, k, l), std::make_pair(pid1, pid2));
}
}
}
}
}
// Calculate pair scores
std::vector<std::pair<cv::Mat, double>> all_scored_poses;
all_scored_poses.reserve(all_pairs.size());
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
// Extract camera parameters
const auto &cam1 = internal_cameras[std::get<0>(pids)];
const auto &cam2 = internal_cameras[std::get<1>(pids)];
// Extract 2D poses
const cv::Mat &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
const cv::Mat &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
// Select core joints
std::vector<int> dims = {(int)core_joint_idx.size(), 3};
cv::Mat pose1_core(dims, pose1.type());
cv::Mat pose2_core(dims, pose2.type());
for (size_t j = 0; j < core_joint_idx.size(); ++j)
{
size_t idx = core_joint_idx[j];
pose1.row(idx).copyTo(pose1_core.row(j));
pose2.row(idx).copyTo(pose2_core.row(j));
}
// Triangulate and score
auto [pose3d, score] = triangulate_and_score(pose1_core, pose2_core, cam1, cam2, roomparams);
all_scored_poses.emplace_back(pose3d, score);
}
// Drop low scoring poses
std::vector<size_t> drop_indices;
for (size_t i = 0; i < all_scored_poses.size(); ++i)
{
if (all_scored_poses[i].second < min_score)
{
drop_indices.push_back(i);
}
}
if (!drop_indices.empty())
{
for (size_t i = drop_indices.size() - 1; i >= 0; --i)
{
all_scored_poses.erase(all_scored_poses.begin() + drop_indices[i]);
all_pairs.erase(all_pairs.begin() + drop_indices[i]);
}
}
// Group pairs that share a person
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
groups = calc_grouping(all_pairs, all_scored_poses, min_score);
// Calculate full 3D poses
std::vector<cv::Mat> all_full_poses;
all_full_poses.reserve(all_pairs.size());
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const auto &pids = all_pairs[i].first;
const auto &cam1 = internal_cameras[std::get<0>(pids)];
const auto &cam2 = internal_cameras[std::get<1>(pids)];
const auto &pose1 = poses_2d_mats[std::get<0>(pids)][std::get<2>(pids)];
const auto &pose2 = poses_2d_mats[std::get<1>(pids)][std::get<3>(pids)];
auto [pose3d, score] = triangulate_and_score(pose1, pose2, cam1, cam2, roomparams);
all_full_poses.push_back(pose3d);
}
// Merge groups
std::vector<cv::Mat> all_merged_poses;
all_merged_poses.reserve(groups.size());
for (size_t i = 0; i < groups.size(); ++i)
{
const auto &group = groups[i];
std::vector<cv::Mat> poses;
for (const auto &idx : std::get<2>(group))
{
poses.push_back(all_full_poses[idx]);
}
auto merged_pose = merge_group(poses, min_score);
all_merged_poses.push_back(merged_pose);
}
// Convert to output format
std::vector<std::vector<std::array<double, 4>>> poses_3d;
poses_3d.reserve(all_merged_poses.size());
for (size_t i = 0; i < all_merged_poses.size(); ++i)
{
const auto &mat = all_merged_poses[i];
std::vector<std::array<double, 4>> pose;
size_t num_joints = mat.rows;
pose.reserve(num_joints);
for (size_t j = 0; j < num_joints; ++j)
{
std::array<double, 4> point;
for (size_t k = 0; k < 4; ++k)
{
point[k] = mat.at<double>(j, k);
}
pose.push_back(point);
}
poses_3d.push_back(std::move(pose));
}
return poses_3d;
}
// =================================================================================================
void TriangulatorInternal::reset()
{
last_poses_3d.clear();
}
// =================================================================================================
void TriangulatorInternal::undistort_points(cv::Mat &points, CameraInternal &icam)
{
int width = icam.cam.width;
int height = icam.cam.height;
// Undistort camera matrix
cv::Mat newK = cv::getOptimalNewCameraMatrix(
icam.K, icam.DC, cv::Size(width, height), 1, cv::Size(width, height));
// Undistort points
cv::undistortPoints(points, points, icam.K, icam.DC, cv::noArray(), newK);
// Update the camera parameters
icam.K = newK;
icam.DC = cv::Mat::zeros(5, 1, CV_64F);
}
// =================================================================================================
void TriangulatorInternal::undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam)
{
for (size_t p = 0; p < poses.size(); ++p)
{
int num_joints = poses[p].rows;
// Extract the (x, y) coordinates
std::vector<int> dims = {num_joints, 2};
cv::Mat points(dims, CV_64F);
for (int j = 0; j < num_joints; ++j)
{
points.at<double>(j, 0) = poses[p].at<double>(j, 0);
points.at<double>(j, 1) = poses[p].at<double>(j, 1);
}
// Undistort the points
undistort_points(points, icam);
// Update the original poses with the undistorted points
for (int j = 0; j < num_joints; ++j)
{
poses[p].at<double>(j, 0) = points.at<double>(j, 0);
poses[p].at<double>(j, 1) = points.at<double>(j, 1);
}
// Mask out points that are far outside the image (points slightly outside are still valid)
double offset = (icam.cam.width + icam.cam.height) / 40.0;
for (int j = 0; j < num_joints; ++j)
{
double x = poses[p].at<double>(j, 0);
double y = poses[p].at<double>(j, 1);
bool in_x = x >= -offset && x < icam.cam.width + offset;
bool in_y = y >= -offset && y < icam.cam.height + offset;
if (!in_x || !in_y)
{
poses[p].at<double>(j, 0) = 0;
poses[p].at<double>(j, 1) = 0;
poses[p].at<double>(j, 2) = 0;
}
}
}
}
// =================================================================================================
std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> TriangulatorInternal::project_poses(
const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists)
{
size_t num_persons = bodies3D.size();
std::vector<cv::Mat> bodies2D_list(num_persons);
std::vector<cv::Mat> dists_list(num_persons);
for (size_t i = 0; i < num_persons; ++i)
{
const cv::Mat &body3D = bodies3D[i];
size_t num_joints = body3D.size[0];
// Split up vector
std::vector<int> dimsA = {(int)num_joints, 3};
cv::Mat points3d(dimsA, CV_64F);
for (size_t i = 0; i < num_joints; ++i)
{
points3d.at<double>(i, 0) = body3D.at<double>(i, 0);
points3d.at<double>(i, 1) = body3D.at<double>(i, 1);
points3d.at<double>(i, 2) = body3D.at<double>(i, 2);
}
// Project from world to camera coordinate system
cv::Mat T_repeated = cv::repeat(icam.T, 1, num_joints);
cv::Mat xyz = (icam.R * (points3d.t() - T_repeated)).t();
// Set points behind the camera to zero
for (size_t i = 0; i < num_joints; ++i)
{
if (xyz.at<double>(i, 2) <= 0)
{
xyz.at<double>(i, 0) = 0;
xyz.at<double>(i, 1) = 0;
xyz.at<double>(i, 2) = 0;
}
}
// Calculate distance from camera center
cv::Mat dists;
if (calc_dists)
{
dists = xyz.mul(xyz);
cv::reduce(dists, dists, 1, cv::REDUCE_SUM, CV_64F);
cv::sqrt(dists, dists);
}
// Project to camera system
cv::Mat uv;
if (icam.cam.type == "fisheye")
{
}
else
{
cv::Mat DCc = icam.DC.rowRange(0, 5).clone();
cv::projectPoints(
xyz, cv::Mat::zeros(3, 1, CV_64F), cv::Mat::zeros(3, 1, CV_64F), icam.K, DCc, uv);
}
uv = uv.reshape(1, {xyz.rows, 2});
// Add scores again
std::vector<int> dimsB = {(int)num_joints, 3};
cv::Mat bodies2D(dimsB, CV_64F);
for (size_t i = 0; i < num_joints; ++i)
{
bodies2D.at<double>(i, 0) = uv.at<double>(i, 0);
bodies2D.at<double>(i, 1) = uv.at<double>(i, 1);
bodies2D.at<double>(i, 2) = body3D.at<double>(i, 3);
}
// Filter invalid projections
cv::Mat valid_x = (bodies2D.col(0) >= 0) & (bodies2D.col(0) < icam.cam.width);
cv::Mat valid_y = (bodies2D.col(1) >= 0) & (bodies2D.col(1) < icam.cam.height);
cv::Mat vis = valid_x & valid_y;
for (size_t i = 0; i < num_joints; ++i)
{
if (!vis.at<uchar>(i))
{
bodies2D.at<double>(i, 0) = 0;
bodies2D.at<double>(i, 1) = 0;
bodies2D.at<double>(i, 2) = 0;
}
}
// Store results
bodies2D_list[i] = bodies2D;
dists_list[i] = dists;
}
return std::make_tuple(bodies2D_list, dists_list);
}
// =================================================================================================
cv::Mat TriangulatorInternal::score_projection(
const cv::Mat &pose1,
const cv::Mat &repro1,
const cv::Mat &dists1,
const cv::Mat &mask,
double iscale)
{
double min_score = 0.1;
double penalty = iscale;
// Calculate error
cv::Mat diff = pose1.colRange(0, 2) - repro1.colRange(0, 2);
cv::Mat error1;
error1 = diff.mul(diff);
cv::reduce(error1, error1, 1, cv::REDUCE_SUM, CV_64F);
cv::sqrt(error1, error1);
error1.setTo(0.0, ~mask);
// Set errors of invisible reprojections to a high value
cv::Mat mask_invisible = (repro1.col(2) < min_score);
error1.setTo(penalty, mask_invisible);
// Scale error by image size and distance to the camera
error1 = cv::min(error1, (iscale / 4.0)) / iscale;
// Compute scaling factor
double dscale1 = std::sqrt(cv::mean(dists1).val[0] / 3.5);
// Scale errors
error1 *= dscale1;
// Convert errors to a score
cv::Mat score1 = 1.0 / (1.0 + error1 * 10);
return score1;
}
// =================================================================================================
std::pair<cv::Mat, double> TriangulatorInternal::triangulate_and_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const CameraInternal &cam1,
const CameraInternal &cam2,
const std::array<std::array<double, 3>, 2> &roomparams)
{
// Mask out invisible joints
double min_score = 0.1;
cv::Mat mask1a = (pose1.col(2) >= min_score);
cv::Mat mask2a = (pose2.col(2) >= min_score);
cv::Mat mask = mask1a & mask2a;
// If too few joints are visible, return a low score
int num_visible = cv::countNonZero(mask);
if (num_visible < 3)
{
std::vector<int> dims = {(int)pose1.rows, 4};
cv::Mat pose3d(dims, CV_64F);
double score = 0.0;
return std::make_pair(pose3d, score);
}
// Triangulate points
std::vector<int> dimsA = {2, num_visible};
cv::Mat points1(dimsA, CV_64F);
cv::Mat points2(dimsA, CV_64F);
int idx = 0;
for (int i = 0; i < pose1.rows; ++i)
{
if (mask.at<uchar>(i) > 0)
{
points1.at<double>(0, idx) = pose1.at<double>(i, 0);
points1.at<double>(1, idx) = pose1.at<double>(i, 1);
points2.at<double>(0, idx) = pose2.at<double>(i, 0);
points2.at<double>(1, idx) = pose2.at<double>(i, 1);
idx++;
}
}
cv::Mat points3d_h, points3d;
cv::triangulatePoints(cam1.P, cam2.P, points1, points2, points3d_h);
cv::convertPointsFromHomogeneous(points3d_h.t(), points3d);
// Create the 3D pose matrix
std::vector<int> dimsB = {(int)pose1.rows, 4};
cv::Mat pose3d(dimsB, CV_64F);
for (int i = 0; i < pose1.rows; ++i)
{
if (mask.at<uchar>(i) > 0)
{
pose3d.at<double>(i, 0) = points3d.at<double>(i, 0);
pose3d.at<double>(i, 1) = points3d.at<double>(i, 1);
pose3d.at<double>(i, 2) = points3d.at<double>(i, 2);
pose3d.at<double>(i, 3) = 1.0;
}
}
// Check if points are inside the room bounds
cv::Mat mean, mins, maxs;
cv::reduce(pose3d.colRange(0, 3), mean, 0, cv::REDUCE_AVG);
cv::reduce(pose3d.colRange(0, 3), mins, 0, cv::REDUCE_MIN);
cv::reduce(pose3d.colRange(0, 3), maxs, 0, cv::REDUCE_MAX);
double wdist = 0.1;
std::array<double, 3> center = roomparams[0];
std::array<double, 3> size = roomparams[1];
for (int i = 0; i < 3; ++i)
{
if (mean.at<double>(i) > center[i] + size[i] / 2 ||
mean.at<double>(i) < center[i] - size[i] / 2 ||
maxs.at<double>(i) > center[i] + size[i] / 2 + wdist ||
mins.at<double>(i) < center[i] - size[i] / 2 - wdist)
{
// Very low score if outside room
return {pose3d, 0.001};
}
}
// Calculate reprojections
std::vector<cv::Mat> poses_3d = {pose3d};
cv::Mat repro1, dists1, repro2, dists2;
auto [repro1s, dists1s] = project_poses(poses_3d, cam1, true);
auto [repro2s, dists2s] = project_poses(poses_3d, cam2, true);
repro1 = repro1s[0];
dists1 = dists1s[0];
repro2 = repro2s[0];
dists2 = dists2s[0];
// Calculate scores for each view
double iscale = (cam1.cam.width + cam1.cam.height) / 2.0;
cv::Mat score1 = score_projection(pose1, repro1, dists1, mask, iscale);
cv::Mat score2 = score_projection(pose2, repro2, dists2, mask, iscale);
// Combine scores
cv::Mat scores = (score1 + score2) / 2.0;
// Add scores to 3D pose
for (int i = 0; i < pose1.rows; ++i)
{
if (mask.at<uchar>(i) > 0)
{
pose3d.at<double>(i, 3) = scores.at<double>(i);
}
}
// Drop lowest scores
int drop_k = static_cast<int>(pose1.rows * 0.2);
std::sort(scores.begin<double>(), scores.end<double>());
// Calculate final score
double score = 0.0;
for (int i = drop_k; i < scores.rows; i++)
{
score += scores.at<double>(i);
}
score /= (pose1.rows - drop_k);
return std::make_pair(pose3d, score);
}
// =================================================================================================
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> TriangulatorInternal::calc_grouping(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
const std::vector<std::pair<cv::Mat, double>> &all_scored_poses,
double min_score)
{
double max_center_dist = 0.6;
double max_joint_avg_dist = 0.3;
size_t num_joints = all_scored_poses[0].first.rows;
// Calculate pose centers
std::vector<cv::Point3d> centers;
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const cv::Mat &pose_3d = all_scored_poses[i].first;
cv::Point3d center(0, 0, 0);
size_t num_valid = 0;
for (size_t j = 0; j < num_joints; ++j)
{
if (pose_3d.at<double>(j, 3) > min_score)
{
center.x += pose_3d.at<double>(j, 0);
center.y += pose_3d.at<double>(j, 1);
center.z += pose_3d.at<double>(j, 2);
num_valid++;
}
}
if (num_valid > 0)
{
center.x /= num_valid;
center.y /= num_valid;
center.z /= num_valid;
}
centers.push_back(center);
}
// Calculate Groups
// defined as a tuple of center, pose, and all-pairs-indices of members
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> groups;
for (size_t i = 0; i < all_pairs.size(); ++i)
{
const cv::Mat &pose_3d = all_scored_poses[i].first;
const cv::Point3d &center = centers[i];
double best_dist = std::numeric_limits<double>::infinity();
int best_group = -1;
for (size_t j = 0; j < groups.size(); ++j)
{
auto &group = groups[j];
cv::Point3d &group_center = std::get<0>(group);
cv::Mat &group_pose = std::get<1>(group);
// Check if the center is close enough
if (cv::norm(group_center - center) < max_center_dist)
{
// Calculate average joint distance
std::vector<double> dists;
for (size_t row = 0; row < num_joints; row++)
{
if (pose_3d.at<double>(row, 3) > min_score && group_pose.at<double>(row, 3) > min_score)
{
cv::Point3d p1(pose_3d.at<double>(row, 0), pose_3d.at<double>(row, 1), pose_3d.at<double>(row, 2));
cv::Point3d p2(group_pose.at<double>(row, 0), group_pose.at<double>(row, 1), group_pose.at<double>(row, 2));
dists.push_back(cv::norm(p1 - p2));
}
}
double dist = std::numeric_limits<double>::infinity();
if (!dists.empty())
{
dist = std::accumulate(dists.begin(), dists.end(), 0.0) / dists.size();
}
// Check if the average joint distance is close enough
if (dist < max_joint_avg_dist && dist < best_dist)
{
best_dist = dist;
best_group = j;
}
}
}
if (best_group == -1)
{
// Create a new group
std::vector<int> new_indices{static_cast<int>(i)};
groups.emplace_back(center, pose_3d.clone(), new_indices);
}
else
{
// Update existing group
auto &group = groups[best_group];
cv::Point3d &group_center = std::get<0>(group);
cv::Mat &group_pose = std::get<1>(group);
std::vector<int> &group_indices = std::get<2>(group);
double n_elems = group_indices.size();
group_center = (group_center * n_elems + center) / (n_elems + 1);
cv::Mat new_pose = group_pose.clone();
for (size_t row = 0; row < num_joints; row++)
{
new_pose.at<double>(row, 0) = (group_pose.at<double>(row, 0) * n_elems + pose_3d.at<double>(row, 0)) / (n_elems + 1);
new_pose.at<double>(row, 1) = (group_pose.at<double>(row, 1) * n_elems + pose_3d.at<double>(row, 1)) / (n_elems + 1);
new_pose.at<double>(row, 2) = (group_pose.at<double>(row, 2) * n_elems + pose_3d.at<double>(row, 2)) / (n_elems + 1);
new_pose.at<double>(row, 3) = (group_pose.at<double>(row, 3) * n_elems + pose_3d.at<double>(row, 3)) / (n_elems + 1);
}
group_pose = new_pose;
group_indices.push_back(i);
}
}
return groups;
}
// =================================================================================================
cv::Mat TriangulatorInternal::merge_group(const std::vector<cv::Mat> &poses_3d, double min_score)
{
int num_poses = poses_3d.size();
int num_joints = poses_3d[0].rows;
// Merge poses to create initial pose
// Use only those triangulations with a high score
cv::Mat sum_poses = cv::Mat::zeros(poses_3d[0].size(), poses_3d[0].type());
cv::Mat sum_mask = cv::Mat::zeros(poses_3d[0].size(), CV_32S);
for (const auto &pose : poses_3d)
{
for (int j = 0; j < num_joints; ++j)
{
if (pose.at<double>(j, 3) > min_score)
{
sum_poses.row(j) += pose.row(j);
sum_mask.at<int>(j, 3) += 1;
}
}
}
cv::Mat initial_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
for (int j = 0; j < num_joints; ++j)
{
if (sum_mask.at<int>(j, 3) > 0)
{
initial_pose_3d.row(j) = sum_poses.row(j) / sum_mask.at<int>(j, 3);
}
}
// Use center as default if the initial pose is empty
cv::Mat jmask = cv::Mat::zeros(num_joints, 1, CV_8U);
cv::Point3d center(0, 0, 0);
int valid_joints = 0;
for (int j = 0; j < num_joints; ++j)
{
if (initial_pose_3d.at<double>(j, 3) > 0.0)
{
jmask.at<uchar>(j) = 1;
center += cv::Point3d(initial_pose_3d.at<double>(j, 0), initial_pose_3d.at<double>(j, 1), initial_pose_3d.at<double>(j, 2));
valid_joints++;
}
}
if (valid_joints > 0)
{
center *= 1.0 / valid_joints;
}
for (int j = 0; j < num_joints; ++j)
{
if (jmask.at<uchar>(j) == 0)
{
initial_pose_3d.at<double>(j, 0) = center.x;
initial_pose_3d.at<double>(j, 1) = center.y;
initial_pose_3d.at<double>(j, 2) = center.z;
}
}
// Drop joints with low scores and filter outlying joints using distance threshold
double offset = 0.1;
double max_dist = 1.2;
cv::Mat mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
cv::Mat distances = cv::Mat::zeros(num_poses, num_joints, CV_64F);
for (int i = 0; i < num_poses; ++i)
{
for (int j = 0; j < num_joints; ++j)
{
cv::Point3d joint_i(poses_3d[i].at<double>(j, 0), poses_3d[i].at<double>(j, 1), poses_3d[i].at<double>(j, 2));
cv::Point3d joint_initial(initial_pose_3d.at<double>(j, 0), initial_pose_3d.at<double>(j, 1), initial_pose_3d.at<double>(j, 2));
double distance = cv::norm(joint_i - joint_initial);
distances.at<double>(i, j) = distance;
if (distance <= max_dist && poses_3d[i].at<double>(j, 3) > (min_score - offset))
{
mask.at<uchar>(i, j) = 1;
}
}
}
// Select the best-k proposals for each joint that are closest to the initial pose
int keep_best = 3;
cv::Mat best_k_mask = cv::Mat::zeros(num_poses, num_joints, CV_8U);
for (int j = 0; j < num_joints; ++j)
{
std::vector<std::pair<double, int>> valid_indices;
for (int i = 0; i < num_poses; ++i)
{
if (mask.at<uchar>(i, j))
{
valid_indices.push_back({distances.at<double>(i, j), i});
}
}
std::sort(valid_indices.begin(), valid_indices.end());
for (int k = 0; k < std::min(keep_best, static_cast<int>(valid_indices.size())); ++k)
{
best_k_mask.at<uchar>(valid_indices[k].second, j) = 1;
}
}
// Combine masks
mask = mask & best_k_mask;
// Compute the final pose
sum_poses = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
sum_mask = cv::Mat::zeros(sum_mask.size(), CV_32S);
for (int i = 0; i < num_poses; ++i)
{
for (int j = 0; j < num_joints; ++j)
{
if (mask.at<uchar>(i, j))
{
sum_poses.row(j) += poses_3d[i].row(j);
sum_mask.at<int>(j, 3) += 1;
}
}
}
cv::Mat final_pose_3d = cv::Mat::zeros(sum_poses.size(), sum_poses.type());
for (int j = 0; j < num_joints; ++j)
{
if (sum_mask.at<int>(j, 3) > 0)
{
final_pose_3d.row(j) = sum_poses.row(j) / sum_mask.at<int>(j, 3);
}
}
return final_pose_3d;
}

89
spt/triangulator.hpp Normal file
View File

@ -0,0 +1,89 @@
#pragma once
#include <array>
#include <iostream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include "camera.hpp"
// =================================================================================================
class CameraInternal
{
public:
CameraInternal(const Camera &cam);
Camera cam;
cv::Mat K;
cv::Mat DC;
cv::Mat R;
cv::Mat T;
cv::Mat P;
void update_projection_matrix();
};
// =================================================================================================
class TriangulatorInternal
{
public:
TriangulatorInternal(double min_score);
std::vector<std::vector<std::array<double, 4>>> triangulate_poses(
const std::vector<std::vector<std::vector<std::array<double, 3>>>> &poses_2d,
const std::vector<Camera> &cameras,
const std::array<std::array<double, 3>, 2> &roomparams,
const std::vector<std::string> &joint_names);
void reset();
private:
double min_score;
const std::vector<std::string> core_joints = {
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
};
std::vector<cv::Mat> last_poses_3d;
void undistort_points(cv::Mat &points, CameraInternal &icam);
void undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam);
std::tuple<std::vector<cv::Mat>, std::vector<cv::Mat>> project_poses(
const std::vector<cv::Mat> &bodies3D, const CameraInternal &icam, bool calc_dists);
cv::Mat score_projection(
const cv::Mat &pose1,
const cv::Mat &repro1,
const cv::Mat &dists1,
const cv::Mat &mask,
double iscale);
std::pair<cv::Mat, double> triangulate_and_score(
const cv::Mat &pose1,
const cv::Mat &pose2,
const CameraInternal &cam1,
const CameraInternal &cam2,
const std::array<std::array<double, 3>, 2> &roomparams);
std::vector<std::tuple<cv::Point3d, cv::Mat, std::vector<int>>> calc_grouping(
const std::vector<std::pair<std::tuple<int, int, int, int>, std::pair<int, int>>> &all_pairs,
const std::vector<std::pair<cv::Mat, double>> &all_scored_poses,
double min_score);
cv::Mat merge_group(const std::vector<cv::Mat> &poses_3d, double min_score);
};

13
swig/Makefile Normal file
View File

@ -0,0 +1,13 @@
# Standard compile options for the c++ executable
FLAGS = -fPIC
# The python interface through swig
PYTHONI = -I/usr/include/python3.8/
PYTHONL = -Xlinker -export-dynamic
# Default super-target
all:
cd ../spt/ && g++ -fPIC -std=c++2a -Wall -I/usr/include/opencv4 -c *.cpp ; cd ../swig/
swig -c++ -python -keyword -o spt_wrap.cxx spt.i
g++ $(FLAGS) $(PYTHONI) -I/usr/include/opencv4 -c spt_wrap.cxx -o spt_wrap.o
g++ $(PYTHONL) $(LIBFLAGS) -shared ../spt/*.o spt_wrap.o -lopencv_core -lopencv_imgproc -lopencv_calib3d -o _spt.so

51
swig/spt.i Normal file
View File

@ -0,0 +1,51 @@
%module spt
%{
// Includes the header in the wrapper code
#include "../spt/camera.hpp"
#include "../spt/interface.hpp"
%}
// Some modules need extra imports beside the main .hpp file
%include "std_array.i"
%include "std_string.i"
%include "std_vector.i"
// Instantiate templates used by example
// If the template is too nested (>2), parts of it need to be declared as well
namespace std {
%template(DoubleMatrix_3x3) array<array<double, 3>, 3>;
%template(VectorDouble) vector<double>;
%template(DoubleMatrix_3x1) array<array<double, 1>, 3>;
%template(DoubleMatrix_3x4) array<array<double, 3>, 4>;
%template(Matrix_Jx4) vector<array<double, 4>>;
%template(Matrix_NxJx4) vector<vector<array<double, 4>>>;
%template(Matrix_Jx3) vector<array<double, 3>>;
%template(Matrix_VxNxJx3) vector<vector<vector<array<double, 3>>>>;
%template(VectorCamera) vector<Camera>;
%template(DoubleMatrix_2x3) array<array<double, 3>, 2>;
%template(VectorString) vector<std::string>;
}
// Convert vector to native (python) list
%naturalvar Camera::K;
%naturalvar Camera::DC;
%naturalvar Camera::R;
%naturalvar Camera::T;
// Improve printing of result objects
%extend Camera {
std::string __str__() const {
return $self->to_string();
}
}
// Ignore: Warning 503: Can't wrap 'operator <<' unless renamed to a valid identifier.
%warnfilter(503) Camera;
// Ignore: Warning 511: Can't use keyword arguments with overloaded functions.
// The warning is cause by enabling keyword arguments, which doesn't work for vectors.
#pragma SWIG nowarn=511
// Parse the header file to generate wrappers
%include "../spt/camera.hpp"
%include "../spt/interface.hpp"

544
tests/poses_h1.json Normal file
View File

@ -0,0 +1,544 @@
{
"2D": [
[
[
[
445.214,
373.348,
0.805
],
[
446.448,
367.181,
0.834
],
[
446.859,
370.881,
0.447
],
[
456.726,
365.947,
0.974
],
[
484.272,
374.17,
0.745
],
[
445.625,
385.271,
0.611
],
[
505.24,
409.528,
0.761
],
[
393.411,
385.682,
0.486
],
[
484.683,
447.352,
0.477
],
[
369.976,
385.271,
0.672
],
[
452.203,
456.808,
0.292
],
[
442.336,
503.677,
0.504
],
[
478.927,
502.444,
0.505
],
[
414.79,
508.2,
0.462
],
[
428.769,
476.131,
0.332
],
[
423.835,
576.037,
0.689
],
[
379.433,
497.099,
0.485
],
[
460.632,
503.061,
0.504
],
[
475.433,
397.399,
0.611
],
[
470.499,
370.059,
0.745
]
]
],
[
[
[
502.664,
372.786,
0.81
],
[
503.643,
363.001,
0.894
],
[
492.39,
368.383,
0.862
],
[
508.535,
351.26,
0.85
],
[
479.181,
367.405,
0.837
],
[
536.422,
367.894,
0.721
],
[
476.245,
392.356,
0.785
],
[
584.367,
370.829,
0.867
],
[
464.993,
441.279,
0.717
],
[
560.883,
385.017,
0.72
],
[
470.864,
392.356,
0.687
],
[
542.292,
469.166,
0.674
],
[
503.153,
467.698,
0.639
],
[
502.664,
506.837,
0.765
],
[
458.633,
454.489,
0.798
],
[
471.842,
589.518,
0.813
],
[
530.551,
499.988,
0.71
],
[
522.723,
468.432,
0.639
],
[
506.333,
380.125,
0.721
],
[
493.858,
359.332,
0.837
]
]
],
[
[
[
635.959,
325.896,
0.26
],
[
603.03,
315.205,
0.37
],
[
647.506,
325.896,
0.366
],
[
611.155,
316.488,
0.826
],
[
641.519,
327.607,
0.878
],
[
577.37,
334.877,
0.652
],
[
652.638,
364.385,
0.766
],
[
532.466,
334.877,
0.77
],
[
650.5,
405.44,
0.57
],
[
569.672,
337.015,
0.406
],
[
631.255,
375.504,
0.375
],
[
573.521,
461.463,
0.659
],
[
617.57,
458.897,
0.573
],
[
614.576,
471.727,
0.287
],
[
656.487,
410.145,
0.653
],
[
643.657,
520.908,
0.599
],
[
631.255,
512.782,
0.262
],
[
595.546,
460.18,
0.573
],
[
615.004,
349.631,
0.652
],
[
626.337,
322.047,
0.826
]
]
],
[
[
[
512.076,
343.77,
0.704
],
[
518.561,
335.34,
0.855
],
[
502.349,
336.637,
0.799
],
[
523.1,
323.667,
0.76
],
[
480.3,
324.964,
0.887
],
[
534.773,
328.855,
0.743
],
[
446.579,
361.928,
0.849
],
[
592.488,
330.152,
0.802
],
[
479.652,
423.534,
0.795
],
[
621.022,
343.77,
0.904
],
[
491.324,
362.576,
0.811
],
[
523.1,
450.77,
0.671
],
[
472.518,
461.146,
0.7
],
[
549.688,
510.431,
0.776
],
[
491.324,
448.825,
0.757
],
[
528.937,
624.565,
0.855
],
[
587.949,
496.813,
0.741
],
[
497.809,
455.958,
0.671
],
[
490.676,
345.391,
0.743
],
[
501.7,
324.316,
0.76
]
]
]
],
"3D": [
[
[
-0.04,
-0.124,
1.088,
0.943
],
[
-0.0,
-0.069,
1.127,
0.92
],
[
-0.082,
-0.15,
1.106,
0.964
],
[
-0.005,
-0.051,
1.159,
0.952
],
[
-0.148,
-0.059,
1.111,
0.983
],
[
0.115,
0.064,
1.093,
0.981
],
[
-0.234,
0.076,
0.968,
0.98
],
[
0.354,
0.051,
1.09,
0.988
],
[
-0.189,
-0.105,
0.754,
0.964
],
[
0.315,
-0.244,
1.063,
0.969
],
[
-0.151,
-0.009,
0.955,
0.925
],
[
0.121,
0.2,
0.563,
0.952
],
[
-0.081,
0.16,
0.547,
0.967
],
[
0.071,
-0.242,
0.451,
0.955
],
[
-0.172,
-0.237,
0.703,
0.993
],
[
-0.047,
-0.396,
0.093,
0.985
],
[
0.224,
-0.293,
0.521,
0.972
],
[
0.026,
0.185,
0.55,
0.95
],
[
-0.062,
0.085,
1.026,
0.991
],
[
-0.077,
-0.051,
1.134,
0.971
]
]
]
}

85
tests/test_interface.py Normal file
View File

@ -0,0 +1,85 @@
import json
import sys
import time
import numpy as np
sys.path.append("../swig/")
import spt
# ==================================================================================================
def main():
print("")
# Test camera structure
camera = spt.Camera()
camera.name = "Camera 1"
camera.K = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
camera.DC = [0, 0, 0, 0, 0]
camera.R = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
camera.T = [[1], [2], [3]]
camera.width = 640
camera.height = 480
print(camera)
print("")
# Load input data
roomparams = [[0, 0, 1], [4, 4, 2]]
joint_names = [
"nose",
"eye_left",
"eye_right",
"ear_left",
"ear_right",
"shoulder_left",
"shoulder_right",
"elbow_left",
"elbow_right",
"wrist_left",
"wrist_right",
"hip_left",
"hip_right",
"knee_left",
"knee_right",
"ankle_left",
"ankle_right",
"hip_middle",
"shoulder_middle",
"head",
]
cpath = "/SimplePoseTriangulation/data/h1/sample.json"
ppath = "/SimplePoseTriangulation/tests/poses_h1.json"
with open(cpath, "r") as file:
cdata = json.load(file)
with open(ppath, "r") as file:
pdata = json.load(file)
cams = cdata["cameras"]
poses_2d = pdata["2D"]
cameras = []
for cam in cams:
camera = spt.Camera()
camera.name = cam["name"]
camera.K = cam["K"]
camera.DC = cam["DC"]
camera.R = cam["R"]
camera.T = cam["T"]
camera.width = cam["width"]
camera.height = cam["height"]
cameras.append(camera)
# Run triangulation
triangulator = spt.Triangulator(min_score=0.95)
stime = time.time()
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomparams, joint_names
)
print("3D time:", time.time() - stime)
print(np.array(poses_3d))
# ==================================================================================================
if __name__ == "__main__":
main()