diff --git a/media/RESULTS.md b/media/RESULTS.md index 8280cc0..0e78bac 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -278,24 +278,24 @@ Results of the model in various experiments on different datasets. (duration 00:00:56) ```json { - "avg_time_2d": 0.1016948051059369, - "avg_time_3d": 0.0012720597978310077, - "avg_fps": 9.711862169782414 + "avg_time_2d": 0.09936108867737026, + "avg_time_3d": 0.0006794069231170969, + "avg_fps": 9.995952079181109 } { "person_nums": { "total_frames": 301, "total_labels": 477, - "total_preds": 889, - "considered_empty": 2, + "total_preds": 886, + "considered_empty": 0, "valid_preds": 477, - "invalid_preds": 410, + "invalid_preds": 409, "missing": 0, - "invalid_fraction": 0.46223, - "precision": 0.53777, + "invalid_fraction": 0.46163, + "precision": 0.53837, "recall": 1.0, - "f1": 0.69941, - "non_empty": 887 + "f1": 0.69993, + "non_empty": 886 }, "mpjpe": { "count": 477, diff --git a/scripts/test_skelda_dataset.py b/scripts/test_skelda_dataset.py index 3f37856..35cd3d4 100644 --- a/scripts/test_skelda_dataset.py +++ b/scripts/test_skelda_dataset.py @@ -373,11 +373,6 @@ def main(): cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1]) images_2d[i] = cv2.resize(img, (1000, 1000)) - roomparams = { - "room_size": label["room_size"], - "room_center": label["room_center"], - } - start = time.time() poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d) @@ -386,36 +381,18 @@ def main(): start = time.time() if sum(np.sum(p) for p in poses_2d) == 0: - poses3D = np.zeros([1, len(joint_names_3d), 4]) - poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3]) + poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist() else: - cameras = spt.convert_cameras(label["cameras"]) - roomp = [roomparams["room_center"], roomparams["room_size"]] - poses_3d = triangulator.triangulate_poses( - poses_2d, cameras, roomp, joint_names_2d + spt_cameras = spt.convert_cameras(label["cameras"]) + roomparams = [label["room_center"], label["room_size"]] + poses3D = triangulator.triangulate_poses( + poses_2d, spt_cameras, roomparams, joint_names_2d ) - poses3D = np.array(poses_3d) - if len(poses3D) == 0: - poses3D = np.zeros([1, len(joint_names_3d), 4]) - - poses2D = [] - for cam in label["cameras"]: - poses_2d, _ = utils_pose.project_poses(poses3D, cam) - poses2D.append(poses_2d) - poses3D, poses2D = add_extra_joints(poses3D, poses2D, joint_names_3d) - poses3D, poses2D = test_triangulate.filter_poses( - poses3D, - poses2D, - roomparams, - joint_names_3d, - drop_few_limbs=(dataset_use != "mvor"), - ) - poses3D = add_missing_joints(poses3D, joint_names_3d) time_3d = time.time() - start print("3D time:", time_3d) - all_poses.append(poses3D) + all_poses.append(np.array(poses3D)) all_ids.append(label["id"]) all_paths.append(label["imgpaths"]) times.append((time_2d, time_3d)) diff --git a/spt/triangulator.cpp b/spt/triangulator.cpp index 3f8ad18..c506ba2 100644 --- a/spt/triangulator.cpp +++ b/spt/triangulator.cpp @@ -470,14 +470,20 @@ std::vector>> TriangulatorInternal::triangulate auto merged_pose = merge_group(poses, min_score); all_merged_poses[i] = (merged_pose); } - last_poses_3d = all_merged_poses; + + // Run post-processing steps + std::vector final_poses_3d = all_merged_poses; + add_extra_joints(final_poses_3d, joint_names); + filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx, true); + add_missing_joints(final_poses_3d, joint_names); + last_poses_3d = final_poses_3d; // Convert to output format std::vector>> poses_3d; - poses_3d.reserve(all_merged_poses.size()); - for (size_t i = 0; i < all_merged_poses.size(); ++i) + poses_3d.reserve(final_poses_3d.size()); + for (size_t i = 0; i < final_poses_3d.size(); ++i) { - const auto &mat = all_merged_poses[i]; + const auto &mat = final_poses_3d[i]; std::vector> pose; size_t num_joints = mat.rows; pose.reserve(num_joints); @@ -1370,3 +1376,409 @@ cv::Mat TriangulatorInternal::merge_group(const std::vector &poses_3d, return final_pose_3d; } + +// ================================================================================================= + +void TriangulatorInternal::add_extra_joints( + std::vector &poses, const std::vector &joint_names) +{ + // Find indices of "head", "ear_left", and "ear_right" + auto it_head = std::find(joint_names.begin(), joint_names.end(), "head"); + auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left"); + auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right"); + int idx_h = std::distance(joint_names.begin(), it_head); + int idx_el = std::distance(joint_names.begin(), it_ear_left); + int idx_er = std::distance(joint_names.begin(), it_ear_right); + + // If the confidence score of "head" is zero compute it as the average of the ears + for (size_t i = 0; i < poses.size(); ++i) + { + cv::Mat &pose = poses[i]; + float *pose_ptr_h = pose.ptr(idx_h); + + if (pose_ptr_h[3] == 0.0f) + { + float *pose_ptr_el = pose.ptr(idx_el); + float *pose_ptr_er = pose.ptr(idx_er); + + if (pose_ptr_el[3] > 0.0f && pose_ptr_er[3] > 0.0f) + { + pose_ptr_h[0] = (pose_ptr_el[0] + pose_ptr_er[0]) * 0.5f; + pose_ptr_h[1] = (pose_ptr_el[1] + pose_ptr_er[1]) * 0.5f; + pose_ptr_h[2] = (pose_ptr_el[2] + pose_ptr_er[2]) * 0.5f; + pose_ptr_h[3] = std::min(pose_ptr_el[3], pose_ptr_er[3]); + } + } + } +} + +// ================================================================================================= + +void TriangulatorInternal::filter_poses( + std::vector &poses, + const std::array, 2> &roomparams, + const std::vector &core_joint_idx, + const std::vector> &core_limbs_idx, + bool drop_few_limbs = true) +{ + const float min_score = 0.1; + std::vector drop_indices; + + for (size_t i = 0; i < poses.size(); ++i) + { + cv::Mat &pose = poses[i]; + const size_t num_joints = pose.rows; + + // Collect valid joint indices + std::vector valid_joint_idx; + for (size_t j = 0; j < num_joints; ++j) + { + float *pose_ptr = pose.ptr(j); + if (pose_ptr[3] > min_score) + { + valid_joint_idx.push_back(j); + } + } + + // Drop poses with too few joints + if (valid_joint_idx.size() < 5) + { + drop_indices.push_back(i); + continue; + } + + // Compute min, max and mean coordinates + std::array mean = {0.0, 0.0, 0.0}; + std::array mins = { + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max()}; + std::array maxs = { + std::numeric_limits::lowest(), + std::numeric_limits::lowest(), + std::numeric_limits::lowest()}; + for (size_t j = 0; j < valid_joint_idx.size(); ++j) + { + float *joint_ptr = pose.ptr(valid_joint_idx[j]); + for (size_t k = 0; k < 3; ++k) + { + mins[k] = std::min(mins[k], joint_ptr[k]); + maxs[k] = std::max(maxs[k], joint_ptr[k]); + mean[k] += joint_ptr[k]; + } + } + for (size_t k = 0; k < 3; ++k) + { + mean[k] /= static_cast(valid_joint_idx.size()); + } + + // Drop poses that are too large or too small or too flat + float min_flatness = 0.2f; + float max_size = 2.3f; + float min_size = 0.3f; + std::array diff; + for (int j = 0; j < 3; ++j) + { + diff[j] = maxs[j] - mins[j]; + } + if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size) + { + drop_indices.push_back(i); + continue; + } + if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size) + { + drop_indices.push_back(i); + continue; + } + if ((diff[0] < min_flatness && diff[1] < min_flatness) || + (diff[1] < min_flatness && diff[2] < min_flatness) || + (diff[2] < min_flatness && diff[0] < min_flatness)) + { + drop_indices.push_back(i); + continue; + } + + // Drop persons outside room + const float wdist = 0.1f; + const auto &room_size = roomparams[0]; + const auto &room_center = roomparams[1]; + const std::array room_half_size = { + room_size[0] / 2.0f, + room_size[1] / 2.0f, + room_size[2] / 2.0f}; + bool outside = false; + for (int j = 0; j < 3; ++j) + { + // Check if the mean position is outside the room boundaries + if (mean[j] > room_half_size[j] + room_center[j] || + mean[j] < -room_half_size[j] - room_center[j]) + { + outside = true; + break; + } + } + for (int j = 0; j < 3; ++j) + { + // Check if any limb is too far outside the room + if (maxs[j] > room_half_size[j] + room_center[j] + wdist || + mins[j] < -room_half_size[j] - room_center[j] - wdist) + { + outside = true; + break; + } + } + if (outside) + { + drop_indices.push_back(i); + continue; + } + + if (drop_few_limbs) + { + // Drop poses with less than 3 valid limbs + size_t found_limbs = 0; + for (size_t j = 0; j < core_limbs_idx.size(); ++j) + { + size_t start_idx = core_limbs_idx[j][0]; + size_t end_idx = core_limbs_idx[j][1]; + + float *joint_start_ptr = pose.ptr(start_idx); + float *joint_end_ptr = pose.ptr(end_idx); + + if (joint_start_ptr[3] > min_score && joint_end_ptr[3] > min_score) + { + found_limbs++; + } + } + if (found_limbs < 3) + { + drop_indices.push_back(i); + continue; + } + } + + // Calculate total limb length and average limb length + const float max_avg_length = 0.5f; + const float min_avg_length = 0.1f; + float total_length = 0.0f; + int total_limbs = 0; + for (size_t j = 0; j < core_limbs_idx.size(); ++j) + { + size_t start_idx = core_limbs_idx[j][0]; + size_t end_idx = core_limbs_idx[j][1]; + + float *joint_start_ptr = pose.ptr(start_idx); + float *joint_end_ptr = pose.ptr(end_idx); + + if (joint_start_ptr[3] < min_score || joint_end_ptr[3] < min_score) + { + continue; + } + + float dx = joint_end_ptr[0] - joint_start_ptr[0]; + float dy = joint_end_ptr[1] - joint_start_ptr[1]; + float dz = joint_end_ptr[2] - joint_start_ptr[2]; + float length = std::sqrt(dx * dx + dy * dy + dz * dz); + + total_length += length; + total_limbs++; + } + if (total_limbs == 0) + { + drop_indices.push_back(i); + continue; + } + float average_length = total_length / static_cast(total_limbs); + if (average_length < min_avg_length) + { + drop_indices.push_back(i); + continue; + } + if (total_limbs >= 5 && average_length > max_avg_length) + { + drop_indices.push_back(i); + continue; + } + } + + // Set confidences of invalid poses to a low value + for (size_t i = 0; i < drop_indices.size(); ++i) + { + cv::Mat &pose = poses[drop_indices[i]]; + for (int j = 0; j < pose.rows; ++j) + { + float *joint_ptr = pose.ptr(j); + joint_ptr[3] = 0.001f; + } + } +} + +// ================================================================================================= + +void TriangulatorInternal::add_missing_joints( + std::vector &poses, const std::vector &joint_names) +{ + // Map joint names to their indices for quick lookup + std::unordered_map joint_name_to_idx; + for (size_t idx = 0; idx < joint_names.size(); ++idx) + { + joint_name_to_idx[joint_names[idx]] = idx; + } + + // Adjacent mapping + std::unordered_map> adjacents = { + {"hip_right", {"hip_middle", "hip_left"}}, + {"hip_left", {"hip_middle", "hip_right"}}, + {"knee_right", {"hip_right", "knee_left"}}, + {"knee_left", {"hip_left", "knee_right"}}, + {"ankle_right", {"knee_right", "ankle_left"}}, + {"ankle_left", {"knee_left", "ankle_right"}}, + {"shoulder_right", {"shoulder_middle", "shoulder_left"}}, + {"shoulder_left", {"shoulder_middle", "shoulder_right"}}, + {"elbow_right", {"shoulder_right", "hip_right"}}, + {"elbow_left", {"shoulder_left", "hip_left"}}, + {"wrist_right", {"elbow_right"}}, + {"wrist_left", {"elbow_left"}}, + {"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}}, + {"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}}, + {"foot_*_left_*", {"ankle_left"}}, + {"foot_*_right_*", {"ankle_right"}}, + {"face_*", {"nose"}}, + {"hand_*_left_*", {"wrist_left"}}, + {"hand_*_right_*", {"wrist_right"}}}; + + for (size_t i = 0; i < poses.size(); ++i) + { + cv::Mat &pose = poses[i]; + const size_t num_joints = pose.rows; + + // Collect valid joint indices + std::vector valid_joint_idx; + for (size_t j = 0; j < num_joints; ++j) + { + float *pose_ptr = pose.ptr(j); + if (pose_ptr[3] > min_score) + { + valid_joint_idx.push_back(j); + } + } + + if (valid_joint_idx.empty()) + { + continue; + } + + // Compute body center as the mean of valid joints + std::array body_center = {0.0f, 0.0f, 0.0f}; + for (size_t idx : valid_joint_idx) + { + float *joint_ptr = pose.ptr(idx); + body_center[0] += joint_ptr[0]; + body_center[1] += joint_ptr[1]; + body_center[2] += joint_ptr[2]; + } + for (int j = 0; j < 3; ++j) + { + body_center[j] /= static_cast(valid_joint_idx.size()); + } + + // Iterate over each joint + for (size_t j = 0; j < joint_names.size(); ++j) + { + std::string adname = ""; + const std::string &jname = joint_names[j]; + + // Determine adjacent joint based on name patterns + if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos) + { + adname = "foot_*_left_*"; + } + else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos) + { + adname = "foot_*_right_*"; + } + else if (jname.substr(0, 5) == "face_") + { + adname = "face_*"; + } + else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos) + { + adname = "hand_*_left_*"; + } + else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos) + { + adname = "hand_*_right_*"; + } + else if (adjacents.find(jname) != adjacents.end()) + { + adname = jname; + } + + if (adname == "") + { + // No adjacent joints defined for this joint + continue; + } + + float *joint_ptr = pose.ptr(j); + if (joint_ptr[3] == 0.0f) + { + if (adjacents.find(adname) != adjacents.end()) + { + // Joint is missing; attempt to estimate its position based on adjacent joints + std::array adjacent_position; + size_t adjacent_count = 0; + + const std::vector &adjacent_joint_names = adjacents[adname]; + for (const std::string &adj_name : adjacent_joint_names) + { + auto it = joint_name_to_idx.find(adj_name); + if (it != joint_name_to_idx.end()) + { + size_t adj_idx = it->second; + float *adj_joint_ptr = pose.ptr(adj_idx); + if (adj_joint_ptr[3] > 0.1f) + { + adjacent_position[0] += adj_joint_ptr[0]; + adjacent_position[1] += adj_joint_ptr[1]; + adjacent_position[2] += adj_joint_ptr[2]; + adjacent_count++; + } + } + } + + if (adjacent_count > 0) + { + for (size_t k = 0; k < 3; ++k) + { + adjacent_position[k] /= static_cast(adjacent_count); + } + + // Update the missing joint's position + joint_ptr[0] = adjacent_position[0]; + joint_ptr[1] = adjacent_position[1]; + joint_ptr[2] = adjacent_position[2]; + } + else + { + // No valid adjacent joints, use body center + joint_ptr[0] = body_center[0]; + joint_ptr[1] = body_center[1]; + joint_ptr[2] = body_center[2]; + } + } + else + { + // Adjacent joints not defined, use body center + joint_ptr[0] = body_center[0]; + joint_ptr[1] = body_center[1]; + joint_ptr[2] = body_center[2]; + } + + // Set a low confidence score + joint_ptr[3] = 0.1f; + } + } + } +} diff --git a/spt/triangulator.hpp b/spt/triangulator.hpp index 31dc48b..c0db3f4 100644 --- a/spt/triangulator.hpp +++ b/spt/triangulator.hpp @@ -67,7 +67,6 @@ private: {"shoulder_left", "elbow_left"}, {"shoulder_right", "elbow_right"}, }; - std::vector last_poses_3d; void undistort_poses(std::vector &poses, CameraInternal &icam); @@ -102,4 +101,16 @@ private: float min_score); cv::Mat merge_group(const std::vector &poses_3d, float min_score); + + void add_extra_joints(std::vector &poses, const std::vector &joint_names); + + void filter_poses( + std::vector &poses, + const std::array, 2> &roomparams, + const std::vector &core_joint_idx, + const std::vector> &core_limbs_idx, + bool drop_few_limbs); + + void add_missing_joints( + std::vector &poses, const std::vector &joint_names); };