Improved group merging.

This commit is contained in:
Daniel
2024-10-10 12:46:11 +02:00
parent 2f093e9f82
commit b80ad139e5
2 changed files with 257 additions and 147 deletions

View File

@ -5998,58 +5998,58 @@ Results of the model in various experiments on different datasets.
(volleyball) (volleyball)
```json ```json
{ {
"avg_time_2d": 0.3441050417788394, "avg_time_2d": 0.3424515831577885,
"avg_time_3d": 0.0033576402578267966, "avg_time_3d": 0.003363656568097639,
"avg_fps": 2.878006910378003 "avg_fps": 2.8917175564404274
} }
{ {
"person_nums": { "person_nums": {
"total_frames": 121, "total_frames": 121,
"total_labels": 484, "total_labels": 484,
"total_preds": 633, "total_preds": 634,
"considered_empty": 0, "considered_empty": 0,
"valid_preds": 483, "valid_preds": 483,
"invalid_preds": 150, "invalid_preds": 151,
"missing": 1, "missing": 1,
"invalid_fraction": 0.23697, "invalid_fraction": 0.23817,
"precision": 0.76303, "precision": 0.76183,
"recall": 0.99793, "recall": 0.99793,
"f1": 0.86482, "f1": 0.86404,
"non_empty": 633 "non_empty": 634
}, },
"mpjpe": { "mpjpe": {
"count": 483, "count": 483,
"mean": 0.035428, "mean": 0.03745,
"median": 0.032584, "median": 0.035063,
"std": 0.013121, "std": 0.014194,
"sem": 0.000598, "sem": 0.000647,
"min": 0.017908, "min": 0.01794,
"max": 0.12169, "max": 0.136715,
"recall-0.025": 0.216942, "recall-0.025": 0.163223,
"recall-0.05": 0.876033, "recall-0.05": 0.842975,
"recall-0.1": 0.993802, "recall-0.1": 0.991736,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484, "num_labels": 484,
"ap-0.025": 0.041041, "ap-0.025": 0.023957,
"ap-0.05": 0.626763, "ap-0.05": 0.580664,
"ap-0.1": 0.797009, "ap-0.1": 0.79534,
"ap-0.15": 0.803699, "ap-0.15": 0.804355,
"ap-0.25": 0.803699, "ap-0.25": 0.804355,
"ap-0.5": 0.803699 "ap-0.5": 0.804355
}, },
"head": { "head": {
"count": 483, "count": 483,
"mean": 0.036562, "mean": 0.037576,
"median": 0.030386, "median": 0.030619,
"std": 0.022881, "std": 0.023474,
"sem": 0.001042, "sem": 0.001069,
"min": 0.004382, "min": 0.004382,
"max": 0.136584, "max": 0.136584,
"recall-0.025": 0.392562, "recall-0.025": 0.378099,
"recall-0.05": 0.739669, "recall-0.05": 0.721074,
"recall-0.1": 0.979339, "recall-0.1": 0.977273,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
@ -6057,15 +6057,15 @@ Results of the model in various experiments on different datasets.
}, },
"shoulder_left": { "shoulder_left": {
"count": 483, "count": 483,
"mean": 0.034722, "mean": 0.039129,
"median": 0.029123, "median": 0.032213,
"std": 0.020485, "std": 0.022975,
"sem": 0.000933, "sem": 0.001047,
"min": 0.004214, "min": 0.004214,
"max": 0.142662, "max": 0.142662,
"recall-0.025": 0.369835, "recall-0.025": 0.31405,
"recall-0.05": 0.81405, "recall-0.05": 0.745868,
"recall-0.1": 0.987603, "recall-0.1": 0.979339,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
@ -6073,111 +6073,111 @@ Results of the model in various experiments on different datasets.
}, },
"shoulder_right": { "shoulder_right": {
"count": 483, "count": 483,
"mean": 0.031759, "mean": 0.035508,
"median": 0.026109, "median": 0.030929,
"std": 0.018407, "std": 0.021175,
"sem": 0.000838, "sem": 0.000965,
"min": 0.00347, "min": 0.003681,
"max": 0.110679, "max": 0.180338,
"recall-0.025": 0.466942, "recall-0.025": 0.384298,
"recall-0.05": 0.826446, "recall-0.05": 0.778926,
"recall-0.1": 0.993802, "recall-0.1": 0.987603,
"recall-0.15": 0.997934, "recall-0.15": 0.995868,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484 "num_labels": 484
}, },
"elbow_left": { "elbow_left": {
"count": 483, "count": 483,
"mean": 0.035706, "mean": 0.039093,
"median": 0.029341, "median": 0.031479,
"std": 0.024522, "std": 0.027871,
"sem": 0.001117, "sem": 0.001269,
"min": 0.004211, "min": 0.005996,
"max": 0.237523, "max": 0.237523,
"recall-0.025": 0.386364, "recall-0.025": 0.347107,
"recall-0.05": 0.77686, "recall-0.05": 0.737603,
"recall-0.1": 0.981405, "recall-0.1": 0.966942,
"recall-0.15": 0.993802, "recall-0.15": 0.987603,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484 "num_labels": 484
}, },
"elbow_right": { "elbow_right": {
"count": 483, "count": 483,
"mean": 0.03838, "mean": 0.041455,
"median": 0.031125, "median": 0.033784,
"std": 0.034151, "std": 0.038189,
"sem": 0.001556, "sem": 0.001739,
"min": 0.003944, "min": 0.003944,
"max": 0.362552, "max": 0.443462,
"recall-0.025": 0.407025, "recall-0.025": 0.367769,
"recall-0.05": 0.774793, "recall-0.05": 0.741736,
"recall-0.1": 0.964876, "recall-0.1": 0.954545,
"recall-0.15": 0.981405, "recall-0.15": 0.979339,
"recall-0.25": 0.993802, "recall-0.25": 0.993802,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484 "num_labels": 484
}, },
"wrist_left": { "wrist_left": {
"count": 483, "count": 483,
"mean": 0.047139, "mean": 0.04895,
"median": 0.03772, "median": 0.039104,
"std": 0.035565, "std": 0.036788,
"sem": 0.00162, "sem": 0.001676,
"min": 0.003421, "min": 0.002618,
"max": 0.314461, "max": 0.309556,
"recall-0.025": 0.274793, "recall-0.025": 0.258264,
"recall-0.05": 0.669421, "recall-0.05": 0.650826,
"recall-0.1": 0.923554, "recall-0.1": 0.913223,
"recall-0.15": 0.979339, "recall-0.15": 0.977273,
"recall-0.25": 0.995868, "recall-0.25": 0.995868,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484 "num_labels": 484
}, },
"wrist_right": { "wrist_right": {
"count": 482, "count": 481,
"mean": 0.047946, "mean": 0.050009,
"median": 0.036566, "median": 0.040081,
"std": 0.041584, "std": 0.040668,
"sem": 0.001896, "sem": 0.001856,
"min": 0.002446, "min": 0.003069,
"max": 0.39908, "max": 0.35488,
"recall-0.025": 0.289256, "recall-0.025": 0.262397,
"recall-0.05": 0.652893, "recall-0.05": 0.63843,
"recall-0.1": 0.919421, "recall-0.1": 0.900826,
"recall-0.15": 0.969008, "recall-0.15": 0.964876,
"recall-0.25": 0.987603, "recall-0.25": 0.987603,
"recall-0.5": 0.995868, "recall-0.5": 0.993802,
"num_labels": 484 "num_labels": 484
}, },
"hip_left": { "hip_left": {
"count": 483, "count": 483,
"mean": 0.038758, "mean": 0.040026,
"median": 0.036117, "median": 0.036626,
"std": 0.019053, "std": 0.021724,
"sem": 0.000868, "sem": 0.00099,
"min": 0.005977, "min": 0.004385,
"max": 0.133493, "max": 0.191835,
"recall-0.025": 0.239669, "recall-0.025": 0.231405,
"recall-0.05": 0.774793, "recall-0.05": 0.756198,
"recall-0.1": 0.985537, "recall-0.1": 0.979339,
"recall-0.15": 0.997934, "recall-0.15": 0.995868,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
"num_labels": 484 "num_labels": 484
}, },
"hip_right": { "hip_right": {
"count": 483, "count": 483,
"mean": 0.040711, "mean": 0.042645,
"median": 0.033421, "median": 0.034828,
"std": 0.026405, "std": 0.027596,
"sem": 0.001203, "sem": 0.001257,
"min": 0.005419, "min": 0.004215,
"max": 0.149838, "max": 0.149838,
"recall-0.025": 0.291322, "recall-0.025": 0.27686,
"recall-0.05": 0.75, "recall-0.05": 0.71281,
"recall-0.1": 0.958678, "recall-0.1": 0.948347,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
@ -6185,14 +6185,14 @@ Results of the model in various experiments on different datasets.
}, },
"knee_left": { "knee_left": {
"count": 483, "count": 483,
"mean": 0.025004, "mean": 0.025053,
"median": 0.0211, "median": 0.021076,
"std": 0.018866, "std": 0.018987,
"sem": 0.000859, "sem": 0.000865,
"min": 0.001613, "min": 0.001403,
"max": 0.27912, "max": 0.27912,
"recall-0.025": 0.595041, "recall-0.025": 0.603306,
"recall-0.05": 0.93595, "recall-0.05": 0.923554,
"recall-0.1": 0.993802, "recall-0.1": 0.993802,
"recall-0.15": 0.995868, "recall-0.15": 0.995868,
"recall-0.25": 0.995868, "recall-0.25": 0.995868,
@ -6201,14 +6201,14 @@ Results of the model in various experiments on different datasets.
}, },
"knee_right": { "knee_right": {
"count": 483, "count": 483,
"mean": 0.025695, "mean": 0.026099,
"median": 0.022859, "median": 0.022868,
"std": 0.014864, "std": 0.015211,
"sem": 0.000677, "sem": 0.000693,
"min": 0.001415, "min": 0.001415,
"max": 0.094234, "max": 0.094234,
"recall-0.025": 0.576446, "recall-0.025": 0.557851,
"recall-0.05": 0.92562, "recall-0.05": 0.919421,
"recall-0.1": 0.997934, "recall-0.1": 0.997934,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
@ -6217,15 +6217,15 @@ Results of the model in various experiments on different datasets.
}, },
"ankle_left": { "ankle_left": {
"count": 483, "count": 483,
"mean": 0.028829, "mean": 0.029784,
"median": 0.023676, "median": 0.023999,
"std": 0.033788, "std": 0.034378,
"sem": 0.001539, "sem": 0.001566,
"min": 0.001752, "min": 0.002215,
"max": 0.497796, "max": 0.497796,
"recall-0.025": 0.530992, "recall-0.025": 0.530992,
"recall-0.05": 0.92562, "recall-0.05": 0.909091,
"recall-0.1": 0.981405, "recall-0.1": 0.979339,
"recall-0.15": 0.991736, "recall-0.15": 0.991736,
"recall-0.25": 0.993802, "recall-0.25": 0.993802,
"recall-0.5": 0.997934, "recall-0.5": 0.997934,
@ -6233,14 +6233,14 @@ Results of the model in various experiments on different datasets.
}, },
"ankle_right": { "ankle_right": {
"count": 483, "count": 483,
"mean": 0.028136, "mean": 0.029202,
"median": 0.02508, "median": 0.026139,
"std": 0.016445, "std": 0.017443,
"sem": 0.000749, "sem": 0.000795,
"min": 0.001964, "min": 0.001964,
"max": 0.103825, "max": 0.103825,
"recall-0.025": 0.495868, "recall-0.025": 0.464876,
"recall-0.05": 0.890496, "recall-0.05": 0.880165,
"recall-0.1": 0.993802, "recall-0.1": 0.993802,
"recall-0.15": 0.997934, "recall-0.15": 0.997934,
"recall-0.25": 0.997934, "recall-0.25": 0.997934,
@ -6249,18 +6249,18 @@ Results of the model in various experiments on different datasets.
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 6292, "num_labels": 6292,
"recall-0.025": 0.40782, "recall-0.025": 0.38207,
"recall-0.05": 0.8034, "recall-0.05": 0.77718,
"recall-0.1": 0.9733, "recall-0.1": 0.96615,
"recall-0.15": 0.99189, "recall-0.15": 0.99031,
"recall-0.25": 0.99603, "recall-0.25": 0.99603,
"recall-0.5": 0.99777 "recall-0.5": 0.99762
} }
} }
{ {
"total_parts": 6776, "total_parts": 6776,
"correct_parts": 6740, "correct_parts": 6729,
"pcp": 0.994687 "pcp": 0.993064
} }
``` ```

View File

@ -1220,8 +1220,7 @@ std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> TriangulatorInte
cv::Mat &group_pose = std::get<1>(group); cv::Mat &group_pose = std::get<1>(group);
// Calculate average joint distance // Calculate average joint distance
float dist_sum = 0.0; std::vector<float> dists;
size_t count = 0;
for (size_t row = 0; row < num_joints; ++row) for (size_t row = 0; row < num_joints; ++row)
{ {
const float *pose_3d_ptr = pose_3d.ptr<float>(row); const float *pose_3d_ptr = pose_3d.ptr<float>(row);
@ -1236,15 +1235,20 @@ std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> TriangulatorInte
float dy = pose_3d_ptr[1] - group_pose_ptr[1]; float dy = pose_3d_ptr[1] - group_pose_ptr[1];
float dz = pose_3d_ptr[2] - group_pose_ptr[2]; float dz = pose_3d_ptr[2] - group_pose_ptr[2];
float dist_sq = dx * dx + dy * dy + dz * dz; float dist_sq = dx * dx + dy * dy + dz * dz;
dist_sum += std::sqrt(dist_sq); dists.push_back(std::sqrt(dist_sq));
count++;
} }
} }
if (dists.size() >= 5)
if (count > 0) {
// Drop highest value to reduce influence of outliers
auto max_it = std::max_element(dists.begin(), dists.end());
dists.erase(max_it);
}
if (dists.size() > 0)
{ {
// Check if the average joint distance is close enough // Check if the average joint distance is close enough
float avg_dist = dist_sum / count; float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
avg_dist /= static_cast<float>(dists.size());
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist) if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
{ {
best_dist = avg_dist; best_dist = avg_dist;
@ -1310,7 +1314,113 @@ std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> TriangulatorInte
} }
} }
return groups; // Merge close groups
// Depending on the inital group creation, one or more groups can be created that in the end
// share the same persons, even if they had a larger distance at the beginning
// So merge them similar to the group assignment before
std::vector<std::tuple<cv::Point3f, cv::Mat, std::vector<int>>> merged_groups;
for (size_t i = 0; i < groups.size(); ++i)
{
size_t num_joints = std::get<1>(groups[i]).rows;
auto &group = groups[i];
auto &group_visible_counts = per_group_visible_counts[i];
float best_dist = std::numeric_limits<float>::infinity();
int best_group = -1;
for (size_t j = 0; j < merged_groups.size(); ++j)
{
auto &merged_group = merged_groups[j];
// Calculate average joint distance
std::vector<float> dists;
for (size_t row = 0; row < num_joints; ++row)
{
const float *group_pose_ptr = std::get<1>(group).ptr<float>(row);
const float *merged_pose_ptr = std::get<1>(merged_group).ptr<float>(row);
float score1 = group_pose_ptr[3];
float score2 = merged_pose_ptr[3];
if (score1 > min_score && score2 > min_score)
{
float dx = group_pose_ptr[0] - merged_pose_ptr[0];
float dy = group_pose_ptr[1] - merged_pose_ptr[1];
float dz = group_pose_ptr[2] - merged_pose_ptr[2];
float dist_sq = dx * dx + dy * dy + dz * dz;
dists.push_back(std::sqrt(dist_sq));
}
}
if (dists.size() >= 5)
{
// Drop highest value to reduce influence of outliers
auto max_it = std::max_element(dists.begin(), dists.end());
dists.erase(max_it);
}
if (dists.size() > 0)
{
// Check if the average joint distance is close enough
float avg_dist = std::accumulate(dists.begin(), dists.end(), 0.0);
avg_dist /= static_cast<float>(dists.size());
if (avg_dist < max_joint_avg_dist && avg_dist < best_dist)
{
best_dist = avg_dist;
best_group = static_cast<int>(j);
}
}
}
if (best_group == -1)
{
// Create a new group
merged_groups.push_back(group);
}
else
{
// Update existing group
auto &merged_group = merged_groups[best_group];
cv::Point3f &merged_center = std::get<0>(merged_group);
cv::Mat &merged_group_pose = std::get<1>(merged_group);
std::vector<int> &merged_group_indices = std::get<2>(merged_group);
float n_elems1 = static_cast<float>(merged_group_indices.size());
float n_elems2 = static_cast<float>(std::get<2>(group).size());
float inv1 = n_elems1 / (n_elems1 + n_elems2);
float inv2 = n_elems2 / (n_elems1 + n_elems2);
// Update group center
merged_center.x = (merged_center.x * inv1 + std::get<0>(group).x * inv2);
merged_center.y = (merged_center.y * inv1 + std::get<0>(group).y * inv2);
merged_center.z = (merged_center.z * inv1 + std::get<0>(group).z * inv2);
// Update group pose
for (size_t row = 0; row < num_joints; ++row)
{
const float *group_pose_ptr = std::get<1>(group).ptr<float>(row);
float *merged_pose_ptr = merged_group_pose.ptr<float>(row);
if (group_pose_ptr[3] > min_score)
{
float j_elems1 = static_cast<float>(group_visible_counts[row]);
float j_elems2 = static_cast<float>(per_group_visible_counts[best_group][row]);
float inv1 = j_elems1 / (j_elems1 + j_elems2);
float inv2 = j_elems2 / (j_elems1 + j_elems2);
merged_pose_ptr[0] = (merged_pose_ptr[0] * inv1 + group_pose_ptr[0] * inv2);
merged_pose_ptr[1] = (merged_pose_ptr[1] * inv1 + group_pose_ptr[1] * inv2);
merged_pose_ptr[2] = (merged_pose_ptr[2] * inv1 + group_pose_ptr[2] * inv2);
merged_pose_ptr[3] = (merged_pose_ptr[3] * inv1 + group_pose_ptr[3] * inv2);
group_visible_counts[row]++;
}
}
// Merge indices
merged_group_indices.insert(
merged_group_indices.end(), std::get<2>(group).begin(), std::get<2>(group).end());
}
}
return merged_groups;
} }
// ================================================================================================= // =================================================================================================