#pragma once #include #include #include #include #include #include #include // ================================================================================================= struct Track { std::vector>> core_poses; std::vector>> full_poses; std::vector timestamps; size_t id; }; // ================================================================================================= class PoseTracker { public: PoseTracker(float max_movement_speed, float max_distance); std::vector>>> track_poses( const std::vector>> &poses_3d, const std::vector &joint_names, const double timestamp); void reset(); private: float max_distance; float max_movement_speed; size_t history_size = 3; std::vector timestamps; std::vector pose_tracks; const std::vector core_joints = { "shoulder_left", "shoulder_right", "hip_left", "hip_right", "elbow_left", "elbow_right", "knee_left", "knee_right", "wrist_left", "wrist_right", "ankle_left", "ankle_right", }; std::tuple match_to_track(const std::vector> &core_pose_3d); std::vector> refine_pose(const Track &track); }; // ================================================================================================= // ================================================================================================= PoseTracker::PoseTracker(float max_movement_speed, float max_distance) { this->max_movement_speed = max_movement_speed; this->max_distance = max_distance; } // ================================================================================================= void PoseTracker::reset() { pose_tracks.clear(); timestamps.clear(); } // ================================================================================================= std::vector>>> PoseTracker::track_poses( const std::vector>> &poses_3d, const std::vector &joint_names, const double timestamp) { // Extract core joints std::vector core_joint_idx; for (const auto &joint : core_joints) { auto it = std::find(joint_names.begin(), joint_names.end(), joint); core_joint_idx.push_back(std::distance(joint_names.begin(), it)); } std::vector>> core_poses; core_poses.resize(poses_3d.size()); for (size_t i = 0; i < poses_3d.size(); ++i) { core_poses[i].resize(core_joint_idx.size()); for (size_t j = 0; j < core_joint_idx.size(); ++j) { for (size_t k = 0; k < 4; ++k) { core_poses[i][j][k] = poses_3d[i][core_joint_idx[j]][k]; } } } // Match core poses to tracks std::vector> matches; for (size_t i = 0; i < core_poses.size(); ++i) { auto [track_idx, distance_sq] = match_to_track(core_poses[i]); matches.emplace_back(i, track_idx, distance_sq); } std::sort(matches.begin(), matches.end(), [](const auto &a, const auto &b) { return std::get<2>(a) < std::get<2>(b); }); // If track is matched multiple times, only add the best and create new tracks for the rest std::vector used(pose_tracks.size(), false); for (size_t i = 0; i < matches.size(); ++i) { auto [pose_idx, track_idx, distance_sq] = matches[i]; if (track_idx != -1 && !used[track_idx]) { used[track_idx] = true; } else { std::get<1>(matches[i]) = -1; } } // Update tracks for (size_t i = 0; i < matches.size(); ++i) { auto [pose_idx, track_idx, distance_sq] = matches[i]; if (track_idx == -1) { // Create a new track Track new_track; new_track.core_poses.push_back(core_poses[pose_idx]); new_track.full_poses.push_back(poses_3d[pose_idx]); new_track.timestamps.push_back(timestamp); new_track.id = pose_tracks.size(); pose_tracks.push_back(new_track); } else { // Update existing track auto &track = pose_tracks[track_idx]; track.core_poses.push_back(core_poses[pose_idx]); track.full_poses.push_back(poses_3d[pose_idx]); track.timestamps.push_back(timestamp); } } // Remove old tracks timestamps.push_back(timestamp); if (timestamps.size() > history_size) { timestamps.erase(timestamps.begin()); } double max_age = timestamps.front(); for (size_t i = 0; i < pose_tracks.size();) { auto &track = pose_tracks[i]; double last_timestamp = track.timestamps.back(); if (last_timestamp < max_age) { pose_tracks.erase(pose_tracks.begin() + i); } else { ++i; } } // Remove old poses from tracks for (auto &track : pose_tracks) { while (track.core_poses.size() > history_size) { track.core_poses.erase(track.core_poses.begin()); track.full_poses.erase(track.full_poses.begin()); track.timestamps.erase(track.timestamps.begin()); } } // Refine poses std::vector>>> tracked_poses; for (size_t i = 0; i < pose_tracks.size(); ++i) { auto &track = pose_tracks[i]; if (track.core_poses.size() > 0) { std::vector> refined_pose = refine_pose(track); tracked_poses.emplace_back(track.id, refined_pose); } } return tracked_poses; } // ================================================================================================= std::tuple PoseTracker::match_to_track(const std::vector> &core_pose_3d) { int best_track = -1; float best_distance_sq = max_distance * max_distance; for (size_t i = 0; i < pose_tracks.size(); ++i) { const auto &track = pose_tracks[i]; if (track.core_poses.size() == 0) { continue; } // Calculate distance to the last pose in the track const auto &last_pose = track.core_poses.back(); float distance_sq = 0.0; for (size_t j = 0; j < core_pose_3d.size(); ++j) { float dx = core_pose_3d[j][0] - last_pose[j][0]; float dy = core_pose_3d[j][1] - last_pose[j][1]; float dz = core_pose_3d[j][2] - last_pose[j][2]; distance_sq += dx * dx + dy * dy + dz * dz; } distance_sq /= core_pose_3d.size(); if (distance_sq < best_distance_sq) { best_distance_sq = distance_sq; best_track = static_cast(i); } } return {best_track, best_distance_sq}; } // ================================================================================================= std::vector> PoseTracker::refine_pose(const Track &track) { // Restrict maximum movement by physical constraints, by clipping the pose to the maximum // movement distance from one of the track's history poses // // While advanced sensor filtering techniques, like using a Kalman-Filter, might improve the // average accuracy of the pose, they introduce update delays on fast movement changes. For // example, if a person stands still for a while and then suddenly moves, the first few frames // will likely be treated as outliers, since the filter will not be able to adapt fast enough. // This behaviour is not desired in a real-time critical applications, where the pose needs to // be updated to the real physical position of the person as fast as possible. Therefore, only // the movement speed is limited here. if (track.core_poses.size() < 2) { return track.full_poses.back(); } // Calculate maximum possible movement distance from each history pose std::vector max_movement_dists_sq; max_movement_dists_sq.resize(track.core_poses.size() - 1); double last_timestamp = track.timestamps.back(); for (size_t i = 0; i < track.core_poses.size() - 1; ++i) { double ts = track.timestamps[i]; float max_movement = max_movement_speed * (last_timestamp - ts); max_movement_dists_sq[i] = max_movement * max_movement; } // Clip joint if required std::vector> refined_pose = track.full_poses.back(); for (size_t i = 0; i < refined_pose.size(); ++i) { float min_dist_sq = std::numeric_limits::infinity(); size_t closest_idx = 0; bool clip = true; for (size_t j = 0; j < max_movement_dists_sq.size(); ++j) { float dx = refined_pose[i][0] - track.full_poses[j][i][0]; float dy = refined_pose[i][1] - track.full_poses[j][i][1]; float dz = refined_pose[i][2] - track.full_poses[j][i][2]; float dist_sq = dx * dx + dy * dy + dz * dz; if (dist_sq < min_dist_sq) { min_dist_sq = dist_sq; closest_idx = j; } if (dist_sq <= max_movement_dists_sq[j]) { clip = false; break; } } if (clip) { float dist_sq = min_dist_sq; float scale = max_movement_dists_sq[closest_idx] / dist_sq; float dx = refined_pose[i][0] - track.full_poses[closest_idx][i][0]; float dy = refined_pose[i][1] - track.full_poses[closest_idx][i][1]; float dz = refined_pose[i][2] - track.full_poses[closest_idx][i][2]; refined_pose[i][0] = track.full_poses[closest_idx][i][0] + dx * scale; refined_pose[i][1] = track.full_poses[closest_idx][i][1] + dy * scale; refined_pose[i][2] = track.full_poses[closest_idx][i][2] + dz * scale; // Set confidence to a low value if the joint is clipped refined_pose[i][3] = 0.1; } } return refined_pose; }