diff --git a/media/RESULTS.md b/media/RESULTS.md index 823c8c3..632ed24 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -286,16 +286,16 @@ Results of the model in various experiments on different datasets. "person_nums": { "total_frames": 301, "total_labels": 477, - "total_preds": 888, + "total_preds": 814, "considered_empty": 0, "valid_preds": 477, - "invalid_preds": 411, + "invalid_preds": 337, "missing": 0, - "invalid_fraction": 0.46284, - "precision": 0.53716, + "invalid_fraction": 0.414, + "precision": 0.586, "recall": 1.0, - "f1": 0.6989, - "non_empty": 888 + "f1": 0.73896, + "non_empty": 814 }, "mpjpe": { "count": 477, @@ -313,11 +313,11 @@ Results of the model in various experiments on different datasets. "recall-0.5": 1.0, "num_labels": 477, "ap-0.025": 0.0, - "ap-0.05": 0.340331, - "ap-0.1": 0.693196, - "ap-0.15": 0.704408, - "ap-0.25": 0.704408, - "ap-0.5": 0.704408 + "ap-0.05": 0.345013, + "ap-0.1": 0.702867, + "ap-0.15": 0.714372, + "ap-0.25": 0.714372, + "ap-0.5": 0.714372 }, "head": { "count": 477, @@ -1569,56 +1569,56 @@ Results of the model in various experiments on different datasets. (10 cameras) ```json { - "avg_time_2d": 0.20488240776992425, - "avg_time_3d": 0.0016675780459148128, - "avg_fps": 4.841443082410108 + "avg_time_2d": 0.20779247690991656, + "avg_time_3d": 0.0016487220438515268, + "avg_fps": 4.774609794994247 } { "person_nums": { "total_frames": 420, "total_labels": 1466, - "total_preds": 1735, + "total_preds": 1527, "considered_empty": 0, "valid_preds": 1465, - "invalid_preds": 270, + "invalid_preds": 62, "missing": 1, - "invalid_fraction": 0.15562, - "precision": 0.84438, + "invalid_fraction": 0.0406, + "precision": 0.9594, "recall": 0.99932, - "f1": 0.91534, - "non_empty": 1735 + "f1": 0.97895, + "non_empty": 1527 }, "mpjpe": { "count": 1465, - "mean": 0.037085, - "median": 0.032344, - "std": 0.017223, - "sem": 0.00045, + "mean": 0.037082, + "median": 0.032321, + "std": 0.017242, + "sem": 0.000451, "min": 0.013848, "max": 0.136363, - "recall-0.025": 0.186903, - "recall-0.05": 0.85266, + "recall-0.025": 0.188267, + "recall-0.05": 0.851978, "recall-0.1": 0.989086, "recall-0.15": 0.999318, "recall-0.25": 0.999318, "recall-0.5": 0.999318, "num_labels": 1466, - "ap-0.025": 0.085997, - "ap-0.05": 0.802543, - "ap-0.1": 0.967268, - "ap-0.15": 0.978621, - "ap-0.25": 0.978621, - "ap-0.5": 0.978621 + "ap-0.025": 0.087846, + "ap-0.05": 0.807698, + "ap-0.1": 0.975275, + "ap-0.15": 0.986743, + "ap-0.25": 0.986743, + "ap-0.5": 0.986743 }, "nose": { "count": 1462, - "mean": 0.012376, - "median": 0.010938, - "std": 0.007779, + "mean": 0.012384, + "median": 0.010912, + "std": 0.007797, "sem": 0.000204, "min": 0.001275, "max": 0.124831, - "recall-0.025": 0.957621, + "recall-0.025": 0.956254, "recall-0.05": 0.993165, "recall-0.1": 0.998633, "recall-0.15": 0.999316, @@ -1628,13 +1628,13 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 1465, - "mean": 0.018742, - "median": 0.016661, - "std": 0.00976, - "sem": 0.000255, + "mean": 0.018746, + "median": 0.016641, + "std": 0.009717, + "sem": 0.000254, "min": 0.001103, "max": 0.11138, - "recall-0.025": 0.800136, + "recall-0.025": 0.798772, "recall-0.05": 0.991132, "recall-0.1": 0.998636, "recall-0.15": 0.999318, @@ -1644,13 +1644,13 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 1464, - "mean": 0.02051, + "mean": 0.020464, "median": 0.019186, - "std": 0.008943, - "sem": 0.000234, + "std": 0.008812, + "sem": 0.00023, "min": 0.002013, - "max": 0.093441, - "recall-0.025": 0.768601, + "max": 0.087963, + "recall-0.025": 0.769966, "recall-0.05": 0.990444, "recall-0.1": 0.999317, "recall-0.15": 0.999317, @@ -1660,15 +1660,15 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 1464, - "mean": 0.020864, - "median": 0.018441, - "std": 0.011938, - "sem": 0.000312, + "mean": 0.021041, + "median": 0.018392, + "std": 0.012619, + "sem": 0.00033, "min": 0.002104, - "max": 0.095363, - "recall-0.025": 0.711945, - "recall-0.05": 0.974744, - "recall-0.1": 0.999317, + "max": 0.147682, + "recall-0.025": 0.709215, + "recall-0.05": 0.972696, + "recall-0.1": 0.998635, "recall-0.15": 0.999317, "recall-0.25": 0.999317, "recall-0.5": 0.999317, @@ -1676,14 +1676,14 @@ Results of the model in various experiments on different datasets. }, "elbow_right": { "count": 1462, - "mean": 0.019564, - "median": 0.017384, - "std": 0.011529, - "sem": 0.000302, + "mean": 0.019468, + "median": 0.017347, + "std": 0.011368, + "sem": 0.000297, "min": 0.001927, "max": 0.132247, - "recall-0.025": 0.802461, - "recall-0.05": 0.970608, + "recall-0.025": 0.805878, + "recall-0.05": 0.971975, "recall-0.1": 0.997949, "recall-0.15": 0.999316, "recall-0.25": 0.999316, @@ -1692,13 +1692,13 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 1433, - "mean": 0.020859, + "mean": 0.020901, "median": 0.015088, - "std": 0.021069, - "sem": 0.000557, + "std": 0.021121, + "sem": 0.000558, "min": 0.001506, "max": 0.194344, - "recall-0.025": 0.767085, + "recall-0.025": 0.763598, "recall-0.05": 0.937936, "recall-0.1": 0.982566, "recall-0.15": 0.994421, @@ -1708,14 +1708,14 @@ Results of the model in various experiments on different datasets. }, "wrist_right": { "count": 1456, - "mean": 0.020392, - "median": 0.0137, - "std": 0.020859, - "sem": 0.000547, + "mean": 0.020303, + "median": 0.013717, + "std": 0.020686, + "sem": 0.000542, "min": 0.000284, "max": 0.212342, - "recall-0.025": 0.770604, - "recall-0.05": 0.933379, + "recall-0.025": 0.773352, + "recall-0.05": 0.934753, "recall-0.1": 0.984203, "recall-0.15": 0.997253, "recall-0.25": 1.0, @@ -1724,15 +1724,15 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 1464, - "mean": 0.050256, - "median": 0.048397, - "std": 0.01907, - "sem": 0.000499, + "mean": 0.050265, + "median": 0.048338, + "std": 0.019008, + "sem": 0.000497, "min": 0.008094, "max": 0.148516, - "recall-0.025": 0.049829, - "recall-0.05": 0.546075, - "recall-0.1": 0.975427, + "recall-0.025": 0.049147, + "recall-0.05": 0.54744, + "recall-0.1": 0.976109, "recall-0.15": 0.999317, "recall-0.25": 0.999317, "recall-0.5": 0.999317, @@ -1740,14 +1740,14 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 1465, - "mean": 0.050045, - "median": 0.048856, - "std": 0.016859, - "sem": 0.000441, + "mean": 0.050041, + "median": 0.048863, + "std": 0.016842, + "sem": 0.00044, "min": 0.007258, "max": 0.138747, - "recall-0.025": 0.051842, - "recall-0.05": 0.521146, + "recall-0.025": 0.05116, + "recall-0.05": 0.520464, "recall-0.1": 0.988404, "recall-0.15": 0.999318, "recall-0.25": 0.999318, @@ -1756,13 +1756,13 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 1464, - "mean": 0.038364, - "median": 0.032286, - "std": 0.027217, - "sem": 0.000712, + "mean": 0.038353, + "median": 0.032352, + "std": 0.027187, + "sem": 0.000711, "min": 0.002051, "max": 0.275419, - "recall-0.025": 0.333106, + "recall-0.025": 0.331058, "recall-0.05": 0.759727, "recall-0.1": 0.970648, "recall-0.15": 0.990444, @@ -1772,59 +1772,59 @@ Results of the model in various experiments on different datasets. }, "knee_right": { "count": 1458, - "mean": 0.041819, - "median": 0.035897, - "std": 0.026627, - "sem": 0.000698, + "mean": 0.041659, + "median": 0.035863, + "std": 0.026298, + "sem": 0.000689, "min": 0.004598, "max": 0.242773, - "recall-0.025": 0.242632, - "recall-0.05": 0.731323, - "recall-0.1": 0.963674, - "recall-0.15": 0.989034, + "recall-0.025": 0.243317, + "recall-0.05": 0.732008, + "recall-0.1": 0.964359, + "recall-0.15": 0.990404, "recall-0.25": 0.999315, "recall-0.5": 0.999315, "num_labels": 1459 }, "ankle_left": { "count": 1457, - "mean": 0.085272, - "median": 0.043075, - "std": 0.101395, + "mean": 0.085318, + "median": 0.04279, + "std": 0.101384, "sem": 0.002657, "min": 0.000814, "max": 0.494931, - "recall-0.025": 0.346548, - "recall-0.05": 0.545455, - "recall-0.1": 0.712919, - "recall-0.15": 0.828435, + "recall-0.025": 0.347915, + "recall-0.05": 0.546138, + "recall-0.1": 0.712235, + "recall-0.15": 0.827751, "recall-0.25": 0.917293, "recall-0.5": 0.995899, "num_labels": 1463 }, "ankle_right": { "count": 1447, - "mean": 0.077062, - "median": 0.042773, - "std": 0.096671, - "sem": 0.002542, + "mean": 0.077143, + "median": 0.042829, + "std": 0.096759, + "sem": 0.002545, "min": 0.001957, "max": 0.49866, - "recall-0.025": 0.315753, + "recall-0.025": 0.315068, "recall-0.05": 0.560274, - "recall-0.1": 0.793151, - "recall-0.15": 0.850685, - "recall-0.25": 0.908219, + "recall-0.1": 0.791781, + "recall-0.15": 0.85137, + "recall-0.25": 0.908904, "recall-0.5": 0.991096, "num_labels": 1460 }, "joint_recalls": { "num_labels": 18990, - "recall-0.025": 0.53133, - "recall-0.05": 0.80379, - "recall-0.1": 0.95082, + "recall-0.025": 0.53102, + "recall-0.05": 0.80395, + "recall-0.1": 0.95061, "recall-0.15": 0.97241, - "recall-0.25": 0.98562, + "recall-0.25": 0.98568, "recall-0.5": 0.99816 } } @@ -2119,23 +2119,23 @@ Results of the model in various experiments on different datasets. "person_nums": { "total_frames": 210, "total_labels": 630, - "total_preds": 632, + "total_preds": 631, "considered_empty": 0, "valid_preds": 630, - "invalid_preds": 2, + "invalid_preds": 1, "missing": 0, - "invalid_fraction": 0.00316, - "precision": 0.99684, + "invalid_fraction": 0.00158, + "precision": 0.99842, "recall": 1.0, - "f1": 0.99842, - "non_empty": 632 + "f1": 0.99921, + "non_empty": 631 }, "mpjpe": { "count": 630, - "mean": 0.056116, + "mean": 0.056111, "median": 0.051456, - "std": 0.018382, - "sem": 0.000733, + "std": 0.018368, + "sem": 0.000732, "min": 0.028965, "max": 0.14306, "recall-0.025": 0.0, @@ -2146,18 +2146,18 @@ Results of the model in various experiments on different datasets. "recall-0.5": 1.0, "num_labels": 630, "ap-0.025": 0.0, - "ap-0.05": 0.223916, - "ap-0.1": 0.928562, + "ap-0.05": 0.223683, + "ap-0.1": 0.928267, "ap-0.15": 0.999816, "ap-0.25": 0.999816, "ap-0.5": 0.999816 }, "head": { "count": 598, - "mean": 0.040794, + "mean": 0.040764, "median": 0.039496, - "std": 0.01381, - "sem": 0.000565, + "std": 0.01374, + "sem": 0.000562, "min": 0.011364, "max": 0.102955, "recall-0.025": 0.125418, @@ -2170,15 +2170,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 630, - "mean": 0.062854, + "mean": 0.062839, "median": 0.060457, - "std": 0.01943, - "sem": 0.000775, + "std": 0.019404, + "sem": 0.000774, "min": 0.018922, "max": 0.132634, "recall-0.025": 0.003175, "recall-0.05": 0.273016, - "recall-0.1": 0.974603, + "recall-0.1": 0.97619, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -2202,10 +2202,10 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 630, - "mean": 0.052412, + "mean": 0.05237, "median": 0.049508, - "std": 0.020244, - "sem": 0.000807, + "std": 0.020206, + "sem": 0.000806, "min": 0.010131, "max": 0.140634, "recall-0.025": 0.05873, @@ -2234,10 +2234,10 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 630, - "mean": 0.048082, + "mean": 0.048071, "median": 0.041989, - "std": 0.026744, - "sem": 0.001066, + "std": 0.026751, + "sem": 0.001067, "min": 0.007895, "max": 0.191578, "recall-0.025": 0.134921, @@ -2250,7 +2250,7 @@ Results of the model in various experiments on different datasets. }, "wrist_right": { "count": 625, - "mean": 0.05271, + "mean": 0.052705, "median": 0.047416, "std": 0.025887, "sem": 0.001036, @@ -2266,10 +2266,10 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 630, - "mean": 0.057316, + "mean": 0.057311, "median": 0.054171, - "std": 0.020591, - "sem": 0.000821, + "std": 0.020577, + "sem": 0.00082, "min": 0.014001, "max": 0.17071, "recall-0.025": 0.025397, @@ -2282,9 +2282,9 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 629, - "mean": 0.055242, + "mean": 0.055245, "median": 0.050996, - "std": 0.02309, + "std": 0.023089, "sem": 0.000921, "min": 0.004999, "max": 0.145424, @@ -2298,9 +2298,9 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 628, - "mean": 0.045694, + "mean": 0.045693, "median": 0.034743, - "std": 0.04608, + "std": 0.046075, "sem": 0.00184, "min": 0.003593, "max": 0.364064, @@ -2314,14 +2314,14 @@ Results of the model in various experiments on different datasets. }, "knee_right": { "count": 629, - "mean": 0.053707, + "mean": 0.053711, "median": 0.036065, - "std": 0.072302, + "std": 0.072301, "sem": 0.002885, "min": 0.002669, "max": 0.496679, "recall-0.025": 0.252782, - "recall-0.05": 0.73132, + "recall-0.05": 0.72973, "recall-0.1": 0.934817, "recall-0.15": 0.952305, "recall-0.25": 0.958665, @@ -2330,10 +2330,10 @@ Results of the model in various experiments on different datasets. }, "ankle_left": { "count": 619, - "mean": 0.065828, + "mean": 0.065864, "median": 0.050491, - "std": 0.072587, - "sem": 0.00292, + "std": 0.072779, + "sem": 0.002928, "min": 0.012793, "max": 0.493666, "recall-0.025": 0.035541, @@ -2346,7 +2346,7 @@ Results of the model in various experiments on different datasets. }, "ankle_right": { "count": 601, - "mean": 0.054042, + "mean": 0.054043, "median": 0.047411, "std": 0.042559, "sem": 0.001737, @@ -2363,8 +2363,8 @@ Results of the model in various experiments on different datasets. "joint_recalls": { "num_labels": 8129, "recall-0.025": 0.09583, - "recall-0.05": 0.52749, - "recall-0.1": 0.95079, + "recall-0.05": 0.52737, + "recall-0.1": 0.95092, "recall-0.15": 0.98155, "recall-0.25": 0.98905, "recall-0.5": 0.99742 diff --git a/scripts/test_skelda_dataset.py b/scripts/test_skelda_dataset.py index 0c743ba..55d8053 100644 --- a/scripts/test_skelda_dataset.py +++ b/scripts/test_skelda_dataset.py @@ -333,13 +333,22 @@ def main(): "koarob": 0.91, } minscore = minscores.get(dataset_use, 0.95) + min_group_sizes = { + # If the number of cameras is high, and the views are not occluded, use a higher value + "panoptic": 1, + "shelf": 2, + "tsinghua": 2, + } + min_group_size = min_group_sizes.get(dataset_use, 1) + if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10: + min_group_size = 5 print("\nRunning predictions ...") all_poses = [] all_ids = [] all_paths = [] times = [] - triangulator = spt.Triangulator(min_score=minscore) + triangulator = spt.Triangulator(min_score=minscore, min_group_size=min_group_size) old_scene = "" for label in tqdm.tqdm(labels): images_2d = [] diff --git a/spt/interface.cpp b/spt/interface.cpp index 0ac6fe6..83036d4 100644 --- a/spt/interface.cpp +++ b/spt/interface.cpp @@ -4,9 +4,9 @@ // ================================================================================================= // ================================================================================================= -Triangulator::Triangulator(float min_score) +Triangulator::Triangulator(float min_score, size_t min_group_size) { - this->triangulator = new TriangulatorInternal(min_score); + this->triangulator = new TriangulatorInternal(min_score, min_group_size); } // ================================================================================================= diff --git a/spt/interface.hpp b/spt/interface.hpp index 447d3a8..1020e0c 100644 --- a/spt/interface.hpp +++ b/spt/interface.hpp @@ -20,9 +20,11 @@ public: * * * @param min_score Minimum score to consider a triangulated joint as valid. + * @param min_group_size Minimum number of camera pairs that need to see a person. */ Triangulator( - float min_score = 0.95); + float min_score = 0.95, + size_t min_group_size = 1); /** * Calculate a triangulation. diff --git a/spt/triangulator.cpp b/spt/triangulator.cpp index e510a30..d6aa6cd 100644 --- a/spt/triangulator.cpp +++ b/spt/triangulator.cpp @@ -102,9 +102,10 @@ void CameraInternal::update_projection_matrix() // ================================================================================================= // ================================================================================================= -TriangulatorInternal::TriangulatorInternal(float min_score) +TriangulatorInternal::TriangulatorInternal(float min_score, size_t min_group_size) { this->min_score = min_score; + this->min_group_size = min_group_size; } // ================================================================================================= @@ -443,20 +444,13 @@ std::vector>> TriangulatorInternal::triangulate stime = std::chrono::high_resolution_clock::now(); // Drop low scoring poses - std::vector drop_indices; - for (size_t i = 0; i < all_scored_poses.size(); ++i) + size_t num_poses = all_scored_poses.size(); + for (size_t i = num_poses; i > 0; --i) { - if (all_scored_poses[i].second < min_score) + if (all_scored_poses[i - 1].second < min_score) { - drop_indices.push_back(i); - } - } - if (!drop_indices.empty()) - { - for (size_t i = drop_indices.size(); i > 0; --i) - { - all_scored_poses.erase(all_scored_poses.begin() + drop_indices[i - 1]); - all_pairs.erase(all_pairs.begin() + drop_indices[i - 1]); + all_scored_poses.erase(all_scored_poses.begin() + i - 1); + all_pairs.erase(all_pairs.begin() + i - 1); } } @@ -464,6 +458,16 @@ std::vector>> TriangulatorInternal::triangulate std::vector>> groups; groups = calc_grouping(all_pairs, all_scored_poses, min_score); + // Drop groups with too few matches + size_t num_groups = groups.size(); + for (size_t i = num_groups; i > 0; --i) + { + if (std::get<2>(groups[i - 1]).size() < this->min_group_size) + { + groups.erase(groups.begin() + i - 1); + } + } + elapsed = std::chrono::high_resolution_clock::now() - stime; grouping_time += elapsed.count(); stime = std::chrono::high_resolution_clock::now(); diff --git a/spt/triangulator.hpp b/spt/triangulator.hpp index 4818d31..f569da8 100644 --- a/spt/triangulator.hpp +++ b/spt/triangulator.hpp @@ -31,7 +31,7 @@ public: class TriangulatorInternal { public: - TriangulatorInternal(float min_score); + TriangulatorInternal(float min_score, size_t min_group_size); std::vector>> triangulate_poses( const std::vector>>> &poses_2d, @@ -44,6 +44,8 @@ public: private: float min_score; + float min_group_size; + const std::vector core_joints = { "shoulder_left", "shoulder_right",