291 lines
9.5 KiB
C++
291 lines
9.5 KiB
C++
#pragma once
|
|
|
|
#include <array>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <cmath>
|
|
#include <iostream>
|
|
|
|
// =================================================================================================
|
|
|
|
struct Track
|
|
{
|
|
std::vector<std::vector<std::array<float, 4>>> core_poses;
|
|
std::vector<std::vector<std::array<float, 4>>> full_poses;
|
|
|
|
std::vector<double> timestamps;
|
|
size_t id;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
class PoseTracker
|
|
{
|
|
public:
|
|
PoseTracker(float max_movement_speed, float max_distance);
|
|
|
|
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> track_poses(
|
|
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
|
|
const std::vector<std::string> &joint_names,
|
|
const double timestamp);
|
|
|
|
void reset();
|
|
|
|
private:
|
|
float max_distance;
|
|
float max_movement_speed;
|
|
size_t history_size = 3;
|
|
|
|
std::vector<double> timestamps;
|
|
std::vector<Track> pose_tracks;
|
|
|
|
const std::vector<std::string> core_joints = {
|
|
"shoulder_left",
|
|
"shoulder_right",
|
|
"hip_left",
|
|
"hip_right",
|
|
"elbow_left",
|
|
"elbow_right",
|
|
"knee_left",
|
|
"knee_right",
|
|
"wrist_left",
|
|
"wrist_right",
|
|
"ankle_left",
|
|
"ankle_right",
|
|
};
|
|
|
|
int match_to_track(const std::vector<std::array<float, 4>> &core_pose_3d);
|
|
|
|
std::vector<std::array<float, 4>> refine_pose(const Track &track);
|
|
};
|
|
|
|
// =================================================================================================
|
|
// =================================================================================================
|
|
|
|
PoseTracker::PoseTracker(float max_movement_speed, float max_distance)
|
|
{
|
|
this->max_movement_speed = max_movement_speed;
|
|
this->max_distance = max_distance;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
void PoseTracker::reset()
|
|
{
|
|
pose_tracks.clear();
|
|
timestamps.clear();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> PoseTracker::track_poses(
|
|
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
|
|
const std::vector<std::string> &joint_names,
|
|
const double timestamp)
|
|
{
|
|
// Extract core joints
|
|
std::vector<size_t> core_joint_idx;
|
|
for (const auto &joint : core_joints)
|
|
{
|
|
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
|
|
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
|
|
}
|
|
std::vector<std::vector<std::array<float, 4>>> core_poses;
|
|
core_poses.resize(poses_3d.size());
|
|
for (size_t i = 0; i < poses_3d.size(); ++i)
|
|
{
|
|
core_poses[i].resize(core_joint_idx.size());
|
|
for (size_t j = 0; j < core_joint_idx.size(); ++j)
|
|
{
|
|
for (size_t k = 0; k < 4; ++k)
|
|
{
|
|
core_poses[i][j][k] = poses_3d[i][core_joint_idx[j]][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Match core poses to tracks
|
|
for (size_t i = 0; i < core_poses.size(); ++i)
|
|
{
|
|
int track_idx = match_to_track(core_poses[i]);
|
|
if (track_idx == -1)
|
|
{
|
|
// Create a new track
|
|
Track new_track;
|
|
new_track.core_poses.push_back(core_poses[i]);
|
|
new_track.full_poses.push_back(poses_3d[i]);
|
|
new_track.timestamps.push_back(timestamp);
|
|
new_track.id = pose_tracks.size();
|
|
pose_tracks.push_back(new_track);
|
|
}
|
|
else
|
|
{
|
|
// Update existing track
|
|
auto &track = pose_tracks[track_idx];
|
|
track.core_poses.push_back(core_poses[i]);
|
|
track.full_poses.push_back(poses_3d[i]);
|
|
track.timestamps.push_back(timestamp);
|
|
}
|
|
}
|
|
|
|
// Remove old tracks
|
|
timestamps.push_back(timestamp);
|
|
if (timestamps.size() > history_size)
|
|
{
|
|
timestamps.erase(timestamps.begin());
|
|
}
|
|
double max_age = timestamps.front();
|
|
for (size_t i = 0; i < pose_tracks.size();)
|
|
{
|
|
auto &track = pose_tracks[i];
|
|
double last_timestamp = track.timestamps.back();
|
|
if (last_timestamp < max_age)
|
|
{
|
|
pose_tracks.erase(pose_tracks.begin() + i);
|
|
}
|
|
else
|
|
{
|
|
++i;
|
|
}
|
|
}
|
|
|
|
// Remove old poses from tracks
|
|
for (auto &track : pose_tracks)
|
|
{
|
|
while (track.core_poses.size() > history_size)
|
|
{
|
|
track.core_poses.erase(track.core_poses.begin());
|
|
track.full_poses.erase(track.full_poses.begin());
|
|
track.timestamps.erase(track.timestamps.begin());
|
|
}
|
|
}
|
|
|
|
// Refine poses
|
|
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> tracked_poses;
|
|
for (size_t i = 0; i < pose_tracks.size(); ++i)
|
|
{
|
|
auto &track = pose_tracks[i];
|
|
if (track.core_poses.size() > 0)
|
|
{
|
|
std::vector<std::array<float, 4>> refined_pose = refine_pose(track);
|
|
tracked_poses.emplace_back(track.id, refined_pose);
|
|
}
|
|
}
|
|
|
|
return tracked_poses;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
int PoseTracker::match_to_track(const std::vector<std::array<float, 4>> &core_pose_3d)
|
|
{
|
|
int best_track = -1;
|
|
float best_distance_sq = max_distance * max_distance;
|
|
|
|
for (size_t i = 0; i < pose_tracks.size(); ++i)
|
|
{
|
|
const auto &track = pose_tracks[i];
|
|
if (track.core_poses.size() == 0)
|
|
continue;
|
|
|
|
// Calculate distance to the last pose in the track
|
|
const auto &last_pose = track.core_poses.back();
|
|
float distance_sq = 0.0;
|
|
for (size_t j = 0; j < core_pose_3d.size(); ++j)
|
|
{
|
|
float dx = core_pose_3d[j][0] - last_pose[j][0];
|
|
float dy = core_pose_3d[j][1] - last_pose[j][1];
|
|
float dz = core_pose_3d[j][2] - last_pose[j][2];
|
|
distance_sq += dx * dx + dy * dy + dz * dz;
|
|
}
|
|
distance_sq /= core_pose_3d.size();
|
|
|
|
if (distance_sq < best_distance_sq)
|
|
{
|
|
best_distance_sq = distance_sq;
|
|
best_track = static_cast<int>(i);
|
|
}
|
|
}
|
|
return best_track;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<std::array<float, 4>> PoseTracker::refine_pose(const Track &track)
|
|
{
|
|
// Restrict maximum movement by physical constraints, by clipping the pose to the maximum
|
|
// movement distance from one of the track's history poses
|
|
//
|
|
// While advanced sensor filtering techniques, like using a Kalman-Filter, might improve the
|
|
// average accuracy of the pose, they introduce update delays on fast movement changes. For
|
|
// example, if a person stands still for a while and then suddenly moves, the first few frames
|
|
// will likely be treated as outliers, since the filter will not be able to adapt fast enough.
|
|
// This behaviour is not desired in a real-time critical applications, where the pose needs to
|
|
// be updated to the real physical position of the person as fast as possible. Therefore, only
|
|
// the movement speed is limited here.
|
|
|
|
if (track.core_poses.size() < 2)
|
|
{
|
|
return track.full_poses.back();
|
|
}
|
|
|
|
// Calculate maximum possible movement distance from each history pose
|
|
std::vector<float> max_movement_dists_sq;
|
|
max_movement_dists_sq.resize(track.core_poses.size() - 1);
|
|
double last_timestamp = track.timestamps.back();
|
|
for (size_t i = 0; i < track.core_poses.size() - 1; ++i)
|
|
{
|
|
double ts = track.timestamps[i];
|
|
float max_movement = max_movement_speed * (last_timestamp - ts);
|
|
max_movement_dists_sq[i] = max_movement * max_movement;
|
|
}
|
|
|
|
// Clip joint if required
|
|
std::vector<std::array<float, 4>> refined_pose = track.full_poses.back();
|
|
for (size_t i = 0; i < refined_pose.size(); ++i)
|
|
{
|
|
float min_dist_sq = std::numeric_limits<float>::infinity();
|
|
size_t closest_idx = 0;
|
|
bool clip = true;
|
|
|
|
for (size_t j = 0; j < max_movement_dists_sq.size(); ++j)
|
|
{
|
|
|
|
float dx = refined_pose[i][0] - track.full_poses[j][i][0];
|
|
float dy = refined_pose[i][1] - track.full_poses[j][i][1];
|
|
float dz = refined_pose[i][2] - track.full_poses[j][i][2];
|
|
float dist_sq = dx * dx + dy * dy + dz * dz;
|
|
if (dist_sq < min_dist_sq)
|
|
{
|
|
min_dist_sq = dist_sq;
|
|
closest_idx = j;
|
|
}
|
|
if (dist_sq <= max_movement_dists_sq[j])
|
|
{
|
|
clip = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (clip)
|
|
{
|
|
float dist_sq = min_dist_sq;
|
|
float scale = max_movement_dists_sq[closest_idx] / dist_sq;
|
|
|
|
float dx = refined_pose[i][0] - track.full_poses[closest_idx][i][0];
|
|
float dy = refined_pose[i][1] - track.full_poses[closest_idx][i][1];
|
|
float dz = refined_pose[i][2] - track.full_poses[closest_idx][i][2];
|
|
refined_pose[i][0] = track.full_poses[closest_idx][i][0] + dx * scale;
|
|
refined_pose[i][1] = track.full_poses[closest_idx][i][1] + dy * scale;
|
|
refined_pose[i][2] = track.full_poses[closest_idx][i][2] + dz * scale;
|
|
|
|
// Set confidence to a low value if the joint is clipped
|
|
refined_pose[i][3] = 0.1;
|
|
}
|
|
}
|
|
|
|
return refined_pose;
|
|
}
|