Merge branch 'tracking' into 'master'

Tracking

See merge request Percipiote/RapidPoseTriangulation!10
This commit is contained in:
DANBER
2025-05-28 08:25:19 +00:00
6 changed files with 2200 additions and 1739 deletions

View File

@ -16,6 +16,7 @@ using json = nlohmann::json;
#include "rpt_msgs/msg/poses.hpp" #include "rpt_msgs/msg/poses.hpp"
#include "/RapidPoseTriangulation/rpt/camera.hpp" #include "/RapidPoseTriangulation/rpt/camera.hpp"
#include "/RapidPoseTriangulation/rpt/interface.hpp" #include "/RapidPoseTriangulation/rpt/interface.hpp"
#include "/RapidPoseTriangulation/rpt/tracker.hpp"
// ================================================================================================= // =================================================================================================
@ -36,9 +37,14 @@ static const std::string pose_in_topic = "/poses/{}";
static const std::string cam_info_topic = "/{}/calibration"; static const std::string cam_info_topic = "/{}/calibration";
static const std::string pose_out_topic = "/poses/humans3d"; static const std::string pose_out_topic = "/poses/humans3d";
static const float min_match_score = 0.92; static const float min_match_score = 0.94;
static const size_t min_group_size = 1; static const size_t min_group_size = 1;
static const bool use_tracking = true;
static const float max_movement_speed = 2.0 * 1.5;
static const float cam_fps = 50;
static const float max_track_distance = 0.3 + max_movement_speed / cam_fps;
static const std::array<std::array<float, 3>, 2> roomparams = {{ static const std::array<std::array<float, 3>, 2> roomparams = {{
{4.0, 5.0, 2.2}, {4.0, 5.0, 2.2},
{1.0, 0.0, 1.1}, {1.0, 0.0, 1.1},
@ -59,9 +65,11 @@ public:
this->joint_names = {}; this->joint_names = {};
this->all_poses_set.resize(cam_ids.size(), false); this->all_poses_set.resize(cam_ids.size(), false);
// Load 3D model // Load 3D models
tri_model = std::make_unique<Triangulator>( tri_model = std::make_unique<Triangulator>(
min_match_score, min_group_size); min_match_score, min_group_size);
pose_tracker = std::make_unique<PoseTracker>(
max_movement_speed, max_track_distance);
// QoS // QoS
rclcpp::QoS qos_profile(1); rclcpp::QoS qos_profile(1);
@ -113,6 +121,7 @@ private:
rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_; rclcpp::Publisher<rpt_msgs::msg::Poses>::SharedPtr pose_pub_;
std::unique_ptr<Triangulator> tri_model; std::unique_ptr<Triangulator> tri_model;
std::unique_ptr<PoseTracker> pose_tracker;
std::vector<Camera> all_cameras; std::vector<Camera> all_cameras;
std::mutex cams_mutex, pose_mutex, model_mutex; std::mutex cams_mutex, pose_mutex, model_mutex;
@ -230,11 +239,32 @@ void Rpt3DWrapperNode::call_model()
// Since the prediction is very fast, parallel callback threads only need to wait a short time // Since the prediction is very fast, parallel callback threads only need to wait a short time
cams_mutex.lock(); cams_mutex.lock();
pose_mutex.lock(); pose_mutex.lock();
const auto valid_poses = tri_model->triangulate_poses( const auto poses_3d = tri_model->triangulate_poses(
all_poses, all_cameras, roomparams, joint_names); all_poses, all_cameras, roomparams, joint_names);
double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end()); double min_ts = *std::min_element(all_timestamps.begin(), all_timestamps.end());
this->all_poses_set = std::vector<bool>(cam_ids.size(), false); this->all_poses_set = std::vector<bool>(cam_ids.size(), false);
std::vector<std::vector<std::array<float, 4>>> valid_poses;
std::vector<size_t> track_ids;
if (use_tracking)
{
auto pose_tracks = pose_tracker->track_poses(poses_3d, joint_names, min_ts);
std::vector<std::vector<std::array<float, 4>>> poses_3d_refined;
for (size_t j = 0; j < pose_tracks.size(); j++)
{
auto &pose = std::get<1>(pose_tracks[j]);
poses_3d_refined.push_back(pose);
auto &track_id = std::get<0>(pose_tracks[j]);
track_ids.push_back(track_id);
}
valid_poses = std::move(poses_3d_refined);
}
else
{
valid_poses = std::move(poses_3d);
track_ids = {};
}
pose_mutex.unlock(); pose_mutex.unlock();
cams_mutex.unlock(); cams_mutex.unlock();
@ -267,6 +297,7 @@ void Rpt3DWrapperNode::call_model()
} }
} }
pose_msg.joint_names = joint_names; pose_msg.joint_names = joint_names;
jdata["track_ids"] = track_ids;
pose_msg.extra_data = jdata.dump(); pose_msg.extra_data = jdata.dump();
pose_pub_->publish(pose_msg); pose_pub_->publish(pose_msg);

File diff suppressed because it is too large Load Diff

319
rpt/tracker.hpp Normal file
View File

@ -0,0 +1,319 @@
#pragma once
#include <array>
#include <string>
#include <vector>
#include <algorithm>
#include <limits>
#include <cmath>
#include <iostream>
// =================================================================================================
struct Track
{
std::vector<std::vector<std::array<float, 4>>> core_poses;
std::vector<std::vector<std::array<float, 4>>> full_poses;
std::vector<double> timestamps;
size_t id;
};
// =================================================================================================
class PoseTracker
{
public:
PoseTracker(float max_movement_speed, float max_distance);
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> track_poses(
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
const std::vector<std::string> &joint_names,
const double timestamp);
void reset();
private:
float max_distance;
float max_movement_speed;
size_t history_size = 3;
std::vector<double> timestamps;
std::vector<Track> pose_tracks;
const std::vector<std::string> core_joints = {
"shoulder_left",
"shoulder_right",
"hip_left",
"hip_right",
"elbow_left",
"elbow_right",
"knee_left",
"knee_right",
"wrist_left",
"wrist_right",
"ankle_left",
"ankle_right",
};
std::tuple<int, float> match_to_track(const std::vector<std::array<float, 4>> &core_pose_3d);
std::vector<std::array<float, 4>> refine_pose(const Track &track);
};
// =================================================================================================
// =================================================================================================
PoseTracker::PoseTracker(float max_movement_speed, float max_distance)
{
this->max_movement_speed = max_movement_speed;
this->max_distance = max_distance;
}
// =================================================================================================
void PoseTracker::reset()
{
pose_tracks.clear();
timestamps.clear();
}
// =================================================================================================
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> PoseTracker::track_poses(
const std::vector<std::vector<std::array<float, 4>>> &poses_3d,
const std::vector<std::string> &joint_names,
const double timestamp)
{
// Extract core joints
std::vector<size_t> core_joint_idx;
for (const auto &joint : core_joints)
{
auto it = std::find(joint_names.begin(), joint_names.end(), joint);
core_joint_idx.push_back(std::distance(joint_names.begin(), it));
}
std::vector<std::vector<std::array<float, 4>>> core_poses;
core_poses.resize(poses_3d.size());
for (size_t i = 0; i < poses_3d.size(); ++i)
{
core_poses[i].resize(core_joint_idx.size());
for (size_t j = 0; j < core_joint_idx.size(); ++j)
{
for (size_t k = 0; k < 4; ++k)
{
core_poses[i][j][k] = poses_3d[i][core_joint_idx[j]][k];
}
}
}
// Match core poses to tracks
std::vector<std::tuple<size_t, int, float>> matches;
for (size_t i = 0; i < core_poses.size(); ++i)
{
auto [track_idx, distance_sq] = match_to_track(core_poses[i]);
matches.emplace_back(i, track_idx, distance_sq);
}
std::sort(matches.begin(), matches.end(),
[](const auto &a, const auto &b)
{ return std::get<2>(a) < std::get<2>(b); });
// If track is matched multiple times, only add the best and create new tracks for the rest
std::vector<bool> used(pose_tracks.size(), false);
for (size_t i = 0; i < matches.size(); ++i)
{
auto [pose_idx, track_idx, distance_sq] = matches[i];
if (track_idx != -1 && !used[track_idx])
{
used[track_idx] = true;
}
else
{
std::get<1>(matches[i]) = -1;
}
}
// Update tracks
for (size_t i = 0; i < matches.size(); ++i)
{
auto [pose_idx, track_idx, distance_sq] = matches[i];
if (track_idx == -1)
{
// Create a new track
Track new_track;
new_track.core_poses.push_back(core_poses[pose_idx]);
new_track.full_poses.push_back(poses_3d[pose_idx]);
new_track.timestamps.push_back(timestamp);
new_track.id = pose_tracks.size();
pose_tracks.push_back(new_track);
}
else
{
// Update existing track
auto &track = pose_tracks[track_idx];
track.core_poses.push_back(core_poses[pose_idx]);
track.full_poses.push_back(poses_3d[pose_idx]);
track.timestamps.push_back(timestamp);
}
}
// Remove old tracks
timestamps.push_back(timestamp);
if (timestamps.size() > history_size)
{
timestamps.erase(timestamps.begin());
}
double max_age = timestamps.front();
for (size_t i = 0; i < pose_tracks.size();)
{
auto &track = pose_tracks[i];
double last_timestamp = track.timestamps.back();
if (last_timestamp < max_age)
{
pose_tracks.erase(pose_tracks.begin() + i);
}
else
{
++i;
}
}
// Remove old poses from tracks
for (auto &track : pose_tracks)
{
while (track.core_poses.size() > history_size)
{
track.core_poses.erase(track.core_poses.begin());
track.full_poses.erase(track.full_poses.begin());
track.timestamps.erase(track.timestamps.begin());
}
}
// Refine poses
std::vector<std::tuple<size_t, std::vector<std::array<float, 4>>>> tracked_poses;
for (size_t i = 0; i < pose_tracks.size(); ++i)
{
auto &track = pose_tracks[i];
if (track.core_poses.size() > 0)
{
std::vector<std::array<float, 4>> refined_pose = refine_pose(track);
tracked_poses.emplace_back(track.id, refined_pose);
}
}
return tracked_poses;
}
// =================================================================================================
std::tuple<int, float> PoseTracker::match_to_track(const std::vector<std::array<float, 4>> &core_pose_3d)
{
int best_track = -1;
float best_distance_sq = max_distance * max_distance;
for (size_t i = 0; i < pose_tracks.size(); ++i)
{
const auto &track = pose_tracks[i];
if (track.core_poses.size() == 0)
{
continue;
}
// Calculate distance to the last pose in the track
const auto &last_pose = track.core_poses.back();
float distance_sq = 0.0;
for (size_t j = 0; j < core_pose_3d.size(); ++j)
{
float dx = core_pose_3d[j][0] - last_pose[j][0];
float dy = core_pose_3d[j][1] - last_pose[j][1];
float dz = core_pose_3d[j][2] - last_pose[j][2];
distance_sq += dx * dx + dy * dy + dz * dz;
}
distance_sq /= core_pose_3d.size();
if (distance_sq < best_distance_sq)
{
best_distance_sq = distance_sq;
best_track = static_cast<int>(i);
}
}
return {best_track, best_distance_sq};
}
// =================================================================================================
std::vector<std::array<float, 4>> PoseTracker::refine_pose(const Track &track)
{
// Restrict maximum movement by physical constraints, by clipping the pose to the maximum
// movement distance from one of the track's history poses
//
// While advanced sensor filtering techniques, like using a Kalman-Filter, might improve the
// average accuracy of the pose, they introduce update delays on fast movement changes. For
// example, if a person stands still for a while and then suddenly moves, the first few frames
// will likely be treated as outliers, since the filter will not be able to adapt fast enough.
// This behaviour is not desired in a real-time critical applications, where the pose needs to
// be updated to the real physical position of the person as fast as possible. Therefore, only
// the movement speed is limited here.
if (track.core_poses.size() < 2)
{
return track.full_poses.back();
}
// Calculate maximum possible movement distance from each history pose
std::vector<float> max_movement_dists_sq;
max_movement_dists_sq.resize(track.core_poses.size() - 1);
double last_timestamp = track.timestamps.back();
for (size_t i = 0; i < track.core_poses.size() - 1; ++i)
{
double ts = track.timestamps[i];
float max_movement = max_movement_speed * (last_timestamp - ts);
max_movement_dists_sq[i] = max_movement * max_movement;
}
// Clip joint if required
std::vector<std::array<float, 4>> refined_pose = track.full_poses.back();
for (size_t i = 0; i < refined_pose.size(); ++i)
{
float min_dist_sq = std::numeric_limits<float>::infinity();
size_t closest_idx = 0;
bool clip = true;
for (size_t j = 0; j < max_movement_dists_sq.size(); ++j)
{
float dx = refined_pose[i][0] - track.full_poses[j][i][0];
float dy = refined_pose[i][1] - track.full_poses[j][i][1];
float dz = refined_pose[i][2] - track.full_poses[j][i][2];
float dist_sq = dx * dx + dy * dy + dz * dz;
if (dist_sq < min_dist_sq)
{
min_dist_sq = dist_sq;
closest_idx = j;
}
if (dist_sq <= max_movement_dists_sq[j])
{
clip = false;
break;
}
}
if (clip)
{
float dist_sq = min_dist_sq;
float scale = max_movement_dists_sq[closest_idx] / dist_sq;
float dx = refined_pose[i][0] - track.full_poses[closest_idx][i][0];
float dy = refined_pose[i][1] - track.full_poses[closest_idx][i][1];
float dz = refined_pose[i][2] - track.full_poses[closest_idx][i][2];
refined_pose[i][0] = track.full_poses[closest_idx][i][0] + dx * scale;
refined_pose[i][1] = track.full_poses[closest_idx][i][1] + dy * scale;
refined_pose[i][2] = track.full_poses[closest_idx][i][2] + dz * scale;
// Set confidence to a low value if the joint is clipped
refined_pose[i][3] = 0.1;
}
}
return refined_pose;
}

View File

@ -17,6 +17,7 @@ using json = nlohmann::json;
#include "/RapidPoseTriangulation/rpt/camera.hpp" #include "/RapidPoseTriangulation/rpt/camera.hpp"
#include "/RapidPoseTriangulation/rpt/interface.hpp" #include "/RapidPoseTriangulation/rpt/interface.hpp"
#include "/RapidPoseTriangulation/rpt/tracker.hpp"
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp" #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp" #include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
@ -121,6 +122,9 @@ int main(int argc, char **argv)
const float min_match_score = config["min_match_score"]; const float min_match_score = config["min_match_score"];
const size_t min_group_size = config["min_group_size"]; const size_t min_group_size = config["min_group_size"];
const int take_interval = config["take_interval"]; const int take_interval = config["take_interval"];
const float ifps = config["fps"];
const float max_movement_speed = config["max_movement_speed"];
const float max_track_distance = config["max_track_distance"];
// Load 2D model // Load 2D model
bool use_wb = utils_pipeline::use_whole_body(whole_body); bool use_wb = utils_pipeline::use_whole_body(whole_body);
@ -128,9 +132,11 @@ int main(int argc, char **argv)
std::make_unique<utils_2d_pose::PosePredictor>( std::make_unique<utils_2d_pose::PosePredictor>(
use_wb, min_bbox_score, min_bbox_area, batch_poses); use_wb, min_bbox_score, min_bbox_area, batch_poses);
// Load 3D model // Load 3D models
std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>( std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
min_match_score, min_group_size); min_match_score, min_group_size);
std::unique_ptr<PoseTracker> pose_tracker = std::make_unique<PoseTracker>(
max_movement_speed, max_track_distance);
// Timers // Timers
size_t time_count = dataset.size(); size_t time_count = dataset.size();
@ -138,10 +144,12 @@ int main(int argc, char **argv)
std::vector<double> times_debayer; std::vector<double> times_debayer;
std::vector<double> times_pose2d; std::vector<double> times_pose2d;
std::vector<double> times_pose3d; std::vector<double> times_pose3d;
std::vector<double> times_tracks;
times_image.reserve(time_count); times_image.reserve(time_count);
times_debayer.reserve(time_count); times_debayer.reserve(time_count);
times_pose2d.reserve(time_count); times_pose2d.reserve(time_count);
times_pose3d.reserve(time_count); times_pose3d.reserve(time_count);
times_tracks.reserve(time_count);
size_t print_steps = (size_t)std::floor((float)time_count / 100.0f); size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
print_steps = std::max((size_t)1, print_steps); print_steps = std::max((size_t)1, print_steps);
@ -214,6 +222,7 @@ int main(int argc, char **argv)
{ {
// Reset last poses if scene changes // Reset last poses if scene changes
tri_model->reset(); tri_model->reset();
pose_tracker->reset();
old_scene = item["scene"]; old_scene = item["scene"];
} }
@ -241,6 +250,27 @@ int main(int argc, char **argv)
elapsed = std::chrono::high_resolution_clock::now() - stime; elapsed = std::chrono::high_resolution_clock::now() - stime;
times_pose3d.push_back(elapsed.count()); times_pose3d.push_back(elapsed.count());
if (ifps <= 0)
{
// Disable pose tracking if frame rate is too low
times_tracks.push_back(0.0);
}
else
{
stime = std::chrono::high_resolution_clock::now();
double ts = ((int)item["index"]) / ifps;
auto pose_tracks = pose_tracker->track_poses(poses_3d, joint_names_2d, ts);
std::vector<std::vector<std::array<float, 4>>> poses_3d_refined;
for (size_t j = 0; j < pose_tracks.size(); j++)
{
auto &pose = std::get<1>(pose_tracks[j]);
poses_3d_refined.push_back(pose);
}
poses_3d = poses_3d_refined;
elapsed = std::chrono::high_resolution_clock::now() - stime;
times_tracks.push_back(elapsed.count());
}
all_poses_3d.push_back(std::move(poses_3d)); all_poses_3d.push_back(std::move(poses_3d));
all_ids.push_back(item["id"].get<std::string>()); all_ids.push_back(item["id"].get<std::string>());
old_id = item["index"]; old_id = item["index"];
@ -254,23 +284,27 @@ int main(int argc, char **argv)
double time_debayer = 0.0; double time_debayer = 0.0;
double time_pose2d = 0.0; double time_pose2d = 0.0;
double time_pose3d = 0.0; double time_pose3d = 0.0;
double time_tracks = 0.0;
for (size_t i = warmup; i < time_count; i++) for (size_t i = warmup; i < time_count; i++)
{ {
time_image += times_image[i]; time_image += times_image[i];
time_debayer += times_debayer[i]; time_debayer += times_debayer[i];
time_pose2d += times_pose2d[i]; time_pose2d += times_pose2d[i];
time_pose3d += times_pose3d[i]; time_pose3d += times_pose3d[i];
time_tracks += times_tracks[i];
} }
double avg_time_image = time_image / (time_count - warmup); double avg_time_image = time_image / (time_count - warmup);
double avg_time_debayer = time_debayer / (time_count - warmup); double avg_time_debayer = time_debayer / (time_count - warmup);
double avg_time_pose2d = time_pose2d / (time_count - warmup); double avg_time_pose2d = time_pose2d / (time_count - warmup);
double avg_time_pose3d = time_pose3d / (time_count - warmup); double avg_time_pose3d = time_pose3d / (time_count - warmup);
double fps = 1.0 / (avg_time_debayer + avg_time_pose2d + avg_time_pose3d); double avg_time_tracks = time_tracks / (time_count - warmup);
double fps = 1.0 / (avg_time_debayer + avg_time_pose2d + avg_time_pose3d + avg_time_tracks);
std::cout << "{\n" std::cout << "{\n"
<< " \"img_loading\": " << avg_time_image << ",\n" << " \"img_loading\": " << avg_time_image << ",\n"
<< " \"demosaicing\": " << avg_time_debayer << ",\n" << " \"demosaicing\": " << avg_time_debayer << ",\n"
<< " \"avg_time_2d\": " << avg_time_pose2d << ",\n" << " \"avg_time_2d\": " << avg_time_pose2d << ",\n"
<< " \"avg_time_3d\": " << avg_time_pose3d << ",\n" << " \"avg_time_3d\": " << avg_time_pose3d << ",\n"
<< " \"time_tracks\": " << avg_time_tracks << ",\n"
<< " \"fps\": " << fps << "\n" << " \"fps\": " << fps << "\n"
<< "}" << std::endl; << "}" << std::endl;
tri_model->print_stats(); tri_model->print_stats();

View File

@ -51,15 +51,27 @@ default_min_group_size = 1
# If most of the time only one person is in a image, disable it, because it is slightly slower then # If most of the time only one person is in a image, disable it, because it is slightly slower then
default_batch_poses = True default_batch_poses = True
# Approach speed of EN ISO 13855 with 2000 mm/sec for hand speed
# and an additional factor to compensate for noise-based jumps
default_max_movement_speed = 2.0 * 1.5
# The size of an A4 sheet of paper which is assumed to fit between two different persons
# and additionally the distance a person can move between two frames (here at 10 fps)
default_max_track_distance = 0.3 + default_max_movement_speed / 10
datasets = { datasets = {
"human36m": { "human36m": {
"path": "/datasets/human36m/skelda/pose_test.json", "path": "/datasets/human36m/skelda/pose_test.json",
"take_interval": 5, "take_interval": 5,
"fps": 50,
"min_match_score": 0.95, "min_match_score": 0.95,
"min_group_size": 1, "min_group_size": 1,
"min_bbox_score": 0.4, "min_bbox_score": 0.4,
"min_bbox_area": 0.1 * 0.1, "min_bbox_area": 0.1 * 0.1,
"batch_poses": False, "batch_poses": False,
"max_movement_speed": 2.0 * 1.5,
"max_track_distance": 0.3 + default_max_movement_speed / (50 / 5),
}, },
"panoptic": { "panoptic": {
"path": "/datasets/panoptic/skelda/test.json", "path": "/datasets/panoptic/skelda/test.json",
@ -69,6 +81,7 @@ datasets = {
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"], # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
# "cams": [], # "cams": [],
"take_interval": 3, "take_interval": 3,
"fps": 30,
"min_match_score": 0.95, "min_match_score": 0.95,
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"], "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
"min_group_size": 1, "min_group_size": 1,
@ -77,101 +90,132 @@ datasets = {
# "min_group_size": 2, # "min_group_size": 2,
# "min_group_size": 11, # "min_group_size": 11,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
"max_track_distance": 0.3 + default_max_movement_speed / (30 / 3),
}, },
"mvor": { "mvor": {
"path": "/datasets/mvor/skelda/all.json", "path": "/datasets/mvor/skelda/all.json",
"take_interval": 1, "take_interval": 1,
"fps": -1,
"min_match_score": 0.81, "min_match_score": 0.81,
"min_bbox_score": 0.25, "min_bbox_score": 0.25,
}, },
"campus": { "campus": {
"path": "/datasets/campus/skelda/test.json", "path": "/datasets/campus/skelda/test.json",
"fps": 25,
"take_interval": 1, "take_interval": 1,
"min_match_score": 0.91, "min_match_score": 0.91,
"min_bbox_score": 0.5, "min_bbox_score": 0.5,
"max_track_distance": 0.3 + default_max_movement_speed / 25,
}, },
"shelf": { "shelf": {
"path": "/datasets/shelf/skelda/test.json", "path": "/datasets/shelf/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"fps": 25,
"min_match_score": 0.95, "min_match_score": 0.95,
"min_group_size": 2, "min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / 25,
}, },
"ikeaasm": { "ikeaasm": {
"path": "/datasets/ikeaasm/skelda/test.json", "path": "/datasets/ikeaasm/skelda/test.json",
"take_interval": 2, "take_interval": 2,
"fps": -1,
"min_match_score": 0.81, "min_match_score": 0.81,
"min_bbox_score": 0.20, "min_bbox_score": 0.20,
}, },
"chi3d": { "chi3d": {
"path": "/datasets/chi3d/skelda/all.json", "path": "/datasets/chi3d/skelda/all.json",
"take_interval": 5, "take_interval": 5,
"fps": 50,
"max_track_distance": 0.3 + default_max_movement_speed / (50 / 5),
}, },
"tsinghua": { "tsinghua": {
"path": "/datasets/tsinghua/skelda/test.json", "path": "/datasets/tsinghua/skelda/test.json",
"take_interval": 3, "take_interval": 3,
"fps": 30,
"min_match_score": 0.95, "min_match_score": 0.95,
"min_group_size": 2, "min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / (30 / 3),
}, },
"human36m_wb": { "human36m_wb": {
"path": "/datasets/human36m/skelda/wb/test.json", "path": "/datasets/human36m/skelda/wb/test.json",
"take_interval": 100, "take_interval": 100,
"fps": -1,
"min_bbox_score": 0.4, "min_bbox_score": 0.4,
"batch_poses": False, "batch_poses": False,
}, },
"egohumans_tagging": { "egohumans_tagging": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "tagging", "subset": "tagging",
"min_match_score": 0.89, "min_match_score": 0.89,
"min_group_size": 1, "min_group_size": 1,
"min_bbox_score": 0.2, "min_bbox_score": 0.2,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
}, },
"egohumans_legoassemble": { "egohumans_legoassemble": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "legoassemble", "subset": "legoassemble",
"min_group_size": 2, "min_group_size": 2,
"max_track_distance": 0.3 + default_max_movement_speed / (20 / 2),
}, },
"egohumans_fencing": { "egohumans_fencing": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "fencing", "subset": "fencing",
"min_group_size": 7, "min_group_size": 7,
"min_bbox_score": 0.5, "min_bbox_score": 0.5,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
"max_track_distance": 0.3 + default_max_movement_speed / (20 / 2),
}, },
"egohumans_basketball": { "egohumans_basketball": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "basketball", "subset": "basketball",
"min_group_size": 4, "min_group_size": 4,
"min_bbox_score": 0.25, "min_bbox_score": 0.25,
"min_bbox_area": 0.025 * 0.025, "min_bbox_area": 0.025 * 0.025,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
}, },
"egohumans_volleyball": { "egohumans_volleyball": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "volleyball", "subset": "volleyball",
"min_match_score": 0.95, "min_match_score": 0.95,
"min_group_size": 7, "min_group_size": 7,
"min_bbox_score": 0.25, "min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
}, },
"egohumans_badminton": { "egohumans_badminton": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "badminton", "subset": "badminton",
"min_group_size": 7, "min_group_size": 7,
"min_bbox_score": 0.25, "min_bbox_score": 0.25,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
}, },
"egohumans_tennis": { "egohumans_tennis": {
"path": "/datasets/egohumans/skelda/all.json", "path": "/datasets/egohumans/skelda/all.json",
"take_interval": 2, "take_interval": 2,
"fps": 20,
"subset": "tennis", "subset": "tennis",
"min_group_size": 11, "min_group_size": 11,
"min_bbox_area": 0.025 * 0.025, "min_bbox_area": 0.025 * 0.025,
"max_movement_speed": 4.0 * 1.5,
"max_track_distance": 0.3 + (4.0 * 1.5) / (20 / 2),
}, },
} }
@ -338,6 +382,12 @@ def main():
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score) min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area) min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses) batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
max_movement_speed = datasets[dataset_use].get(
"max_movement_speed", default_max_movement_speed
)
max_track_distance = datasets[dataset_use].get(
"max_track_distance", default_max_track_distance
)
# Save config # Save config
config_path = tmp_export_dir + "config.json" config_path = tmp_export_dir + "config.json"
@ -347,8 +397,11 @@ def main():
"min_bbox_score": min_bbox_score, "min_bbox_score": min_bbox_score,
"min_bbox_area": min_bbox_area, "min_bbox_area": min_bbox_area,
"batch_poses": batch_poses, "batch_poses": batch_poses,
"max_movement_speed": max_movement_speed,
"max_track_distance": max_track_distance,
"whole_body": whole_body, "whole_body": whole_body,
"take_interval": datasets[dataset_use]["take_interval"], "take_interval": datasets[dataset_use]["take_interval"],
"fps": datasets[dataset_use]["fps"],
} }
utils_pipeline.save_json(config, config_path) utils_pipeline.save_json(config, config_path)

View File

@ -26,6 +26,9 @@ config = {
"batch_poses": True, "batch_poses": True,
"whole_body": whole_body, "whole_body": whole_body,
"take_interval": 1, "take_interval": 1,
"fps": -1,
"max_movement_speed": 0,
"max_track_distance": 0,
} }
joint_names_2d = utils_pipeline.get_joint_names(whole_body) joint_names_2d = utils_pipeline.get_joint_names(whole_body)