Converted postprocessing steps to cpp as well.

This commit is contained in:
Daniel
2024-09-17 16:10:25 +02:00
parent 75840cf045
commit 8eba0ffa65
4 changed files with 444 additions and 44 deletions

View File

@ -278,24 +278,24 @@ Results of the model in various experiments on different datasets.
(duration 00:00:56)
```json
{
"avg_time_2d": 0.1016948051059369,
"avg_time_3d": 0.0012720597978310077,
"avg_fps": 9.711862169782414
"avg_time_2d": 0.09936108867737026,
"avg_time_3d": 0.0006794069231170969,
"avg_fps": 9.995952079181109
}
{
"person_nums": {
"total_frames": 301,
"total_labels": 477,
"total_preds": 889,
"considered_empty": 2,
"total_preds": 886,
"considered_empty": 0,
"valid_preds": 477,
"invalid_preds": 410,
"invalid_preds": 409,
"missing": 0,
"invalid_fraction": 0.46223,
"precision": 0.53777,
"invalid_fraction": 0.46163,
"precision": 0.53837,
"recall": 1.0,
"f1": 0.69941,
"non_empty": 887
"f1": 0.69993,
"non_empty": 886
},
"mpjpe": {
"count": 477,

View File

@ -373,11 +373,6 @@ def main():
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
images_2d[i] = cv2.resize(img, (1000, 1000))
roomparams = {
"room_size": label["room_size"],
"room_center": label["room_center"],
}
start = time.time()
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
@ -386,36 +381,18 @@ def main():
start = time.time()
if sum(np.sum(p) for p in poses_2d) == 0:
poses3D = np.zeros([1, len(joint_names_3d), 4])
poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist()
else:
cameras = spt.convert_cameras(label["cameras"])
roomp = [roomparams["room_center"], roomparams["room_size"]]
poses_3d = triangulator.triangulate_poses(
poses_2d, cameras, roomp, joint_names_2d
spt_cameras = spt.convert_cameras(label["cameras"])
roomparams = [label["room_center"], label["room_size"]]
poses3D = triangulator.triangulate_poses(
poses_2d, spt_cameras, roomparams, joint_names_2d
)
poses3D = np.array(poses_3d)
if len(poses3D) == 0:
poses3D = np.zeros([1, len(joint_names_3d), 4])
poses2D = []
for cam in label["cameras"]:
poses_2d, _ = utils_pose.project_poses(poses3D, cam)
poses2D.append(poses_2d)
poses3D, poses2D = add_extra_joints(poses3D, poses2D, joint_names_3d)
poses3D, poses2D = test_triangulate.filter_poses(
poses3D,
poses2D,
roomparams,
joint_names_3d,
drop_few_limbs=(dataset_use != "mvor"),
)
poses3D = add_missing_joints(poses3D, joint_names_3d)
time_3d = time.time() - start
print("3D time:", time_3d)
all_poses.append(poses3D)
all_poses.append(np.array(poses3D))
all_ids.append(label["id"])
all_paths.append(label["imgpaths"])
times.append((time_2d, time_3d))

View File

@ -470,14 +470,20 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
auto merged_pose = merge_group(poses, min_score);
all_merged_poses[i] = (merged_pose);
}
last_poses_3d = all_merged_poses;
// Run post-processing steps
std::vector<cv::Mat> final_poses_3d = all_merged_poses;
add_extra_joints(final_poses_3d, joint_names);
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx, true);
add_missing_joints(final_poses_3d, joint_names);
last_poses_3d = final_poses_3d;
// Convert to output format
std::vector<std::vector<std::array<float, 4>>> poses_3d;
poses_3d.reserve(all_merged_poses.size());
for (size_t i = 0; i < all_merged_poses.size(); ++i)
poses_3d.reserve(final_poses_3d.size());
for (size_t i = 0; i < final_poses_3d.size(); ++i)
{
const auto &mat = all_merged_poses[i];
const auto &mat = final_poses_3d[i];
std::vector<std::array<float, 4>> pose;
size_t num_joints = mat.rows;
pose.reserve(num_joints);
@ -1370,3 +1376,409 @@ cv::Mat TriangulatorInternal::merge_group(const std::vector<cv::Mat> &poses_3d,
return final_pose_3d;
}
// =================================================================================================
void TriangulatorInternal::add_extra_joints(
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names)
{
// Find indices of "head", "ear_left", and "ear_right"
auto it_head = std::find(joint_names.begin(), joint_names.end(), "head");
auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left");
auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right");
int idx_h = std::distance(joint_names.begin(), it_head);
int idx_el = std::distance(joint_names.begin(), it_ear_left);
int idx_er = std::distance(joint_names.begin(), it_ear_right);
// If the confidence score of "head" is zero compute it as the average of the ears
for (size_t i = 0; i < poses.size(); ++i)
{
cv::Mat &pose = poses[i];
float *pose_ptr_h = pose.ptr<float>(idx_h);
if (pose_ptr_h[3] == 0.0f)
{
float *pose_ptr_el = pose.ptr<float>(idx_el);
float *pose_ptr_er = pose.ptr<float>(idx_er);
if (pose_ptr_el[3] > 0.0f && pose_ptr_er[3] > 0.0f)
{
pose_ptr_h[0] = (pose_ptr_el[0] + pose_ptr_er[0]) * 0.5f;
pose_ptr_h[1] = (pose_ptr_el[1] + pose_ptr_er[1]) * 0.5f;
pose_ptr_h[2] = (pose_ptr_el[2] + pose_ptr_er[2]) * 0.5f;
pose_ptr_h[3] = std::min(pose_ptr_el[3], pose_ptr_er[3]);
}
}
}
}
// =================================================================================================
void TriangulatorInternal::filter_poses(
std::vector<cv::Mat> &poses,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<size_t> &core_joint_idx,
const std::vector<std::array<size_t, 2>> &core_limbs_idx,
bool drop_few_limbs = true)
{
const float min_score = 0.1;
std::vector<int> drop_indices;
for (size_t i = 0; i < poses.size(); ++i)
{
cv::Mat &pose = poses[i];
const size_t num_joints = pose.rows;
// Collect valid joint indices
std::vector<size_t> valid_joint_idx;
for (size_t j = 0; j < num_joints; ++j)
{
float *pose_ptr = pose.ptr<float>(j);
if (pose_ptr[3] > min_score)
{
valid_joint_idx.push_back(j);
}
}
// Drop poses with too few joints
if (valid_joint_idx.size() < 5)
{
drop_indices.push_back(i);
continue;
}
// Compute min, max and mean coordinates
std::array<float, 3> mean = {0.0, 0.0, 0.0};
std::array<float, 3> mins = {
std::numeric_limits<float>::max(),
std::numeric_limits<float>::max(),
std::numeric_limits<float>::max()};
std::array<float, 3> maxs = {
std::numeric_limits<float>::lowest(),
std::numeric_limits<float>::lowest(),
std::numeric_limits<float>::lowest()};
for (size_t j = 0; j < valid_joint_idx.size(); ++j)
{
float *joint_ptr = pose.ptr<float>(valid_joint_idx[j]);
for (size_t k = 0; k < 3; ++k)
{
mins[k] = std::min(mins[k], joint_ptr[k]);
maxs[k] = std::max(maxs[k], joint_ptr[k]);
mean[k] += joint_ptr[k];
}
}
for (size_t k = 0; k < 3; ++k)
{
mean[k] /= static_cast<float>(valid_joint_idx.size());
}
// Drop poses that are too large or too small or too flat
float min_flatness = 0.2f;
float max_size = 2.3f;
float min_size = 0.3f;
std::array<float, 3> diff;
for (int j = 0; j < 3; ++j)
{
diff[j] = maxs[j] - mins[j];
}
if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size)
{
drop_indices.push_back(i);
continue;
}
if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size)
{
drop_indices.push_back(i);
continue;
}
if ((diff[0] < min_flatness && diff[1] < min_flatness) ||
(diff[1] < min_flatness && diff[2] < min_flatness) ||
(diff[2] < min_flatness && diff[0] < min_flatness))
{
drop_indices.push_back(i);
continue;
}
// Drop persons outside room
const float wdist = 0.1f;
const auto &room_size = roomparams[0];
const auto &room_center = roomparams[1];
const std::array<float, 3> room_half_size = {
room_size[0] / 2.0f,
room_size[1] / 2.0f,
room_size[2] / 2.0f};
bool outside = false;
for (int j = 0; j < 3; ++j)
{
// Check if the mean position is outside the room boundaries
if (mean[j] > room_half_size[j] + room_center[j] ||
mean[j] < -room_half_size[j] - room_center[j])
{
outside = true;
break;
}
}
for (int j = 0; j < 3; ++j)
{
// Check if any limb is too far outside the room
if (maxs[j] > room_half_size[j] + room_center[j] + wdist ||
mins[j] < -room_half_size[j] - room_center[j] - wdist)
{
outside = true;
break;
}
}
if (outside)
{
drop_indices.push_back(i);
continue;
}
if (drop_few_limbs)
{
// Drop poses with less than 3 valid limbs
size_t found_limbs = 0;
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
{
size_t start_idx = core_limbs_idx[j][0];
size_t end_idx = core_limbs_idx[j][1];
float *joint_start_ptr = pose.ptr<float>(start_idx);
float *joint_end_ptr = pose.ptr<float>(end_idx);
if (joint_start_ptr[3] > min_score && joint_end_ptr[3] > min_score)
{
found_limbs++;
}
}
if (found_limbs < 3)
{
drop_indices.push_back(i);
continue;
}
}
// Calculate total limb length and average limb length
const float max_avg_length = 0.5f;
const float min_avg_length = 0.1f;
float total_length = 0.0f;
int total_limbs = 0;
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
{
size_t start_idx = core_limbs_idx[j][0];
size_t end_idx = core_limbs_idx[j][1];
float *joint_start_ptr = pose.ptr<float>(start_idx);
float *joint_end_ptr = pose.ptr<float>(end_idx);
if (joint_start_ptr[3] < min_score || joint_end_ptr[3] < min_score)
{
continue;
}
float dx = joint_end_ptr[0] - joint_start_ptr[0];
float dy = joint_end_ptr[1] - joint_start_ptr[1];
float dz = joint_end_ptr[2] - joint_start_ptr[2];
float length = std::sqrt(dx * dx + dy * dy + dz * dz);
total_length += length;
total_limbs++;
}
if (total_limbs == 0)
{
drop_indices.push_back(i);
continue;
}
float average_length = total_length / static_cast<float>(total_limbs);
if (average_length < min_avg_length)
{
drop_indices.push_back(i);
continue;
}
if (total_limbs >= 5 && average_length > max_avg_length)
{
drop_indices.push_back(i);
continue;
}
}
// Set confidences of invalid poses to a low value
for (size_t i = 0; i < drop_indices.size(); ++i)
{
cv::Mat &pose = poses[drop_indices[i]];
for (int j = 0; j < pose.rows; ++j)
{
float *joint_ptr = pose.ptr<float>(j);
joint_ptr[3] = 0.001f;
}
}
}
// =================================================================================================
void TriangulatorInternal::add_missing_joints(
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names)
{
// Map joint names to their indices for quick lookup
std::unordered_map<std::string, size_t> joint_name_to_idx;
for (size_t idx = 0; idx < joint_names.size(); ++idx)
{
joint_name_to_idx[joint_names[idx]] = idx;
}
// Adjacent mapping
std::unordered_map<std::string, std::vector<std::string>> adjacents = {
{"hip_right", {"hip_middle", "hip_left"}},
{"hip_left", {"hip_middle", "hip_right"}},
{"knee_right", {"hip_right", "knee_left"}},
{"knee_left", {"hip_left", "knee_right"}},
{"ankle_right", {"knee_right", "ankle_left"}},
{"ankle_left", {"knee_left", "ankle_right"}},
{"shoulder_right", {"shoulder_middle", "shoulder_left"}},
{"shoulder_left", {"shoulder_middle", "shoulder_right"}},
{"elbow_right", {"shoulder_right", "hip_right"}},
{"elbow_left", {"shoulder_left", "hip_left"}},
{"wrist_right", {"elbow_right"}},
{"wrist_left", {"elbow_left"}},
{"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
{"foot_*_left_*", {"ankle_left"}},
{"foot_*_right_*", {"ankle_right"}},
{"face_*", {"nose"}},
{"hand_*_left_*", {"wrist_left"}},
{"hand_*_right_*", {"wrist_right"}}};
for (size_t i = 0; i < poses.size(); ++i)
{
cv::Mat &pose = poses[i];
const size_t num_joints = pose.rows;
// Collect valid joint indices
std::vector<size_t> valid_joint_idx;
for (size_t j = 0; j < num_joints; ++j)
{
float *pose_ptr = pose.ptr<float>(j);
if (pose_ptr[3] > min_score)
{
valid_joint_idx.push_back(j);
}
}
if (valid_joint_idx.empty())
{
continue;
}
// Compute body center as the mean of valid joints
std::array<float, 3> body_center = {0.0f, 0.0f, 0.0f};
for (size_t idx : valid_joint_idx)
{
float *joint_ptr = pose.ptr<float>(idx);
body_center[0] += joint_ptr[0];
body_center[1] += joint_ptr[1];
body_center[2] += joint_ptr[2];
}
for (int j = 0; j < 3; ++j)
{
body_center[j] /= static_cast<float>(valid_joint_idx.size());
}
// Iterate over each joint
for (size_t j = 0; j < joint_names.size(); ++j)
{
std::string adname = "";
const std::string &jname = joint_names[j];
// Determine adjacent joint based on name patterns
if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos)
{
adname = "foot_*_left_*";
}
else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos)
{
adname = "foot_*_right_*";
}
else if (jname.substr(0, 5) == "face_")
{
adname = "face_*";
}
else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos)
{
adname = "hand_*_left_*";
}
else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos)
{
adname = "hand_*_right_*";
}
else if (adjacents.find(jname) != adjacents.end())
{
adname = jname;
}
if (adname == "")
{
// No adjacent joints defined for this joint
continue;
}
float *joint_ptr = pose.ptr<float>(j);
if (joint_ptr[3] == 0.0f)
{
if (adjacents.find(adname) != adjacents.end())
{
// Joint is missing; attempt to estimate its position based on adjacent joints
std::array<float, 3> adjacent_position;
size_t adjacent_count = 0;
const std::vector<std::string> &adjacent_joint_names = adjacents[adname];
for (const std::string &adj_name : adjacent_joint_names)
{
auto it = joint_name_to_idx.find(adj_name);
if (it != joint_name_to_idx.end())
{
size_t adj_idx = it->second;
float *adj_joint_ptr = pose.ptr<float>(adj_idx);
if (adj_joint_ptr[3] > 0.1f)
{
adjacent_position[0] += adj_joint_ptr[0];
adjacent_position[1] += adj_joint_ptr[1];
adjacent_position[2] += adj_joint_ptr[2];
adjacent_count++;
}
}
}
if (adjacent_count > 0)
{
for (size_t k = 0; k < 3; ++k)
{
adjacent_position[k] /= static_cast<float>(adjacent_count);
}
// Update the missing joint's position
joint_ptr[0] = adjacent_position[0];
joint_ptr[1] = adjacent_position[1];
joint_ptr[2] = adjacent_position[2];
}
else
{
// No valid adjacent joints, use body center
joint_ptr[0] = body_center[0];
joint_ptr[1] = body_center[1];
joint_ptr[2] = body_center[2];
}
}
else
{
// Adjacent joints not defined, use body center
joint_ptr[0] = body_center[0];
joint_ptr[1] = body_center[1];
joint_ptr[2] = body_center[2];
}
// Set a low confidence score
joint_ptr[3] = 0.1f;
}
}
}
}

View File

@ -67,7 +67,6 @@ private:
{"shoulder_left", "elbow_left"},
{"shoulder_right", "elbow_right"},
};
std::vector<cv::Mat> last_poses_3d;
void undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam);
@ -102,4 +101,16 @@ private:
float min_score);
cv::Mat merge_group(const std::vector<cv::Mat> &poses_3d, float min_score);
void add_extra_joints(std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
void filter_poses(
std::vector<cv::Mat> &poses,
const std::array<std::array<float, 3>, 2> &roomparams,
const std::vector<size_t> &core_joint_idx,
const std::vector<std::array<size_t, 2>> &core_limbs_idx,
bool drop_few_limbs);
void add_missing_joints(
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
};