Converted postprocessing steps to cpp as well.
This commit is contained in:
@ -278,24 +278,24 @@ Results of the model in various experiments on different datasets.
|
||||
(duration 00:00:56)
|
||||
```json
|
||||
{
|
||||
"avg_time_2d": 0.1016948051059369,
|
||||
"avg_time_3d": 0.0012720597978310077,
|
||||
"avg_fps": 9.711862169782414
|
||||
"avg_time_2d": 0.09936108867737026,
|
||||
"avg_time_3d": 0.0006794069231170969,
|
||||
"avg_fps": 9.995952079181109
|
||||
}
|
||||
{
|
||||
"person_nums": {
|
||||
"total_frames": 301,
|
||||
"total_labels": 477,
|
||||
"total_preds": 889,
|
||||
"considered_empty": 2,
|
||||
"total_preds": 886,
|
||||
"considered_empty": 0,
|
||||
"valid_preds": 477,
|
||||
"invalid_preds": 410,
|
||||
"invalid_preds": 409,
|
||||
"missing": 0,
|
||||
"invalid_fraction": 0.46223,
|
||||
"precision": 0.53777,
|
||||
"invalid_fraction": 0.46163,
|
||||
"precision": 0.53837,
|
||||
"recall": 1.0,
|
||||
"f1": 0.69941,
|
||||
"non_empty": 887
|
||||
"f1": 0.69993,
|
||||
"non_empty": 886
|
||||
},
|
||||
"mpjpe": {
|
||||
"count": 477,
|
||||
|
||||
@ -373,11 +373,6 @@ def main():
|
||||
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
|
||||
images_2d[i] = cv2.resize(img, (1000, 1000))
|
||||
|
||||
roomparams = {
|
||||
"room_size": label["room_size"],
|
||||
"room_center": label["room_center"],
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
|
||||
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
|
||||
@ -386,36 +381,18 @@ def main():
|
||||
|
||||
start = time.time()
|
||||
if sum(np.sum(p) for p in poses_2d) == 0:
|
||||
poses3D = np.zeros([1, len(joint_names_3d), 4])
|
||||
poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
|
||||
poses3D = np.zeros([1, len(joint_names_3d), 4]).tolist()
|
||||
else:
|
||||
cameras = spt.convert_cameras(label["cameras"])
|
||||
roomp = [roomparams["room_center"], roomparams["room_size"]]
|
||||
poses_3d = triangulator.triangulate_poses(
|
||||
poses_2d, cameras, roomp, joint_names_2d
|
||||
spt_cameras = spt.convert_cameras(label["cameras"])
|
||||
roomparams = [label["room_center"], label["room_size"]]
|
||||
poses3D = triangulator.triangulate_poses(
|
||||
poses_2d, spt_cameras, roomparams, joint_names_2d
|
||||
)
|
||||
poses3D = np.array(poses_3d)
|
||||
if len(poses3D) == 0:
|
||||
poses3D = np.zeros([1, len(joint_names_3d), 4])
|
||||
|
||||
poses2D = []
|
||||
for cam in label["cameras"]:
|
||||
poses_2d, _ = utils_pose.project_poses(poses3D, cam)
|
||||
poses2D.append(poses_2d)
|
||||
poses3D, poses2D = add_extra_joints(poses3D, poses2D, joint_names_3d)
|
||||
poses3D, poses2D = test_triangulate.filter_poses(
|
||||
poses3D,
|
||||
poses2D,
|
||||
roomparams,
|
||||
joint_names_3d,
|
||||
drop_few_limbs=(dataset_use != "mvor"),
|
||||
)
|
||||
poses3D = add_missing_joints(poses3D, joint_names_3d)
|
||||
|
||||
time_3d = time.time() - start
|
||||
print("3D time:", time_3d)
|
||||
|
||||
all_poses.append(poses3D)
|
||||
all_poses.append(np.array(poses3D))
|
||||
all_ids.append(label["id"])
|
||||
all_paths.append(label["imgpaths"])
|
||||
times.append((time_2d, time_3d))
|
||||
|
||||
@ -470,14 +470,20 @@ std::vector<std::vector<std::array<float, 4>>> TriangulatorInternal::triangulate
|
||||
auto merged_pose = merge_group(poses, min_score);
|
||||
all_merged_poses[i] = (merged_pose);
|
||||
}
|
||||
last_poses_3d = all_merged_poses;
|
||||
|
||||
// Run post-processing steps
|
||||
std::vector<cv::Mat> final_poses_3d = all_merged_poses;
|
||||
add_extra_joints(final_poses_3d, joint_names);
|
||||
filter_poses(final_poses_3d, roomparams, core_joint_idx, core_limbs_idx, true);
|
||||
add_missing_joints(final_poses_3d, joint_names);
|
||||
last_poses_3d = final_poses_3d;
|
||||
|
||||
// Convert to output format
|
||||
std::vector<std::vector<std::array<float, 4>>> poses_3d;
|
||||
poses_3d.reserve(all_merged_poses.size());
|
||||
for (size_t i = 0; i < all_merged_poses.size(); ++i)
|
||||
poses_3d.reserve(final_poses_3d.size());
|
||||
for (size_t i = 0; i < final_poses_3d.size(); ++i)
|
||||
{
|
||||
const auto &mat = all_merged_poses[i];
|
||||
const auto &mat = final_poses_3d[i];
|
||||
std::vector<std::array<float, 4>> pose;
|
||||
size_t num_joints = mat.rows;
|
||||
pose.reserve(num_joints);
|
||||
@ -1370,3 +1376,409 @@ cv::Mat TriangulatorInternal::merge_group(const std::vector<cv::Mat> &poses_3d,
|
||||
|
||||
return final_pose_3d;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
void TriangulatorInternal::add_extra_joints(
|
||||
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names)
|
||||
{
|
||||
// Find indices of "head", "ear_left", and "ear_right"
|
||||
auto it_head = std::find(joint_names.begin(), joint_names.end(), "head");
|
||||
auto it_ear_left = std::find(joint_names.begin(), joint_names.end(), "ear_left");
|
||||
auto it_ear_right = std::find(joint_names.begin(), joint_names.end(), "ear_right");
|
||||
int idx_h = std::distance(joint_names.begin(), it_head);
|
||||
int idx_el = std::distance(joint_names.begin(), it_ear_left);
|
||||
int idx_er = std::distance(joint_names.begin(), it_ear_right);
|
||||
|
||||
// If the confidence score of "head" is zero compute it as the average of the ears
|
||||
for (size_t i = 0; i < poses.size(); ++i)
|
||||
{
|
||||
cv::Mat &pose = poses[i];
|
||||
float *pose_ptr_h = pose.ptr<float>(idx_h);
|
||||
|
||||
if (pose_ptr_h[3] == 0.0f)
|
||||
{
|
||||
float *pose_ptr_el = pose.ptr<float>(idx_el);
|
||||
float *pose_ptr_er = pose.ptr<float>(idx_er);
|
||||
|
||||
if (pose_ptr_el[3] > 0.0f && pose_ptr_er[3] > 0.0f)
|
||||
{
|
||||
pose_ptr_h[0] = (pose_ptr_el[0] + pose_ptr_er[0]) * 0.5f;
|
||||
pose_ptr_h[1] = (pose_ptr_el[1] + pose_ptr_er[1]) * 0.5f;
|
||||
pose_ptr_h[2] = (pose_ptr_el[2] + pose_ptr_er[2]) * 0.5f;
|
||||
pose_ptr_h[3] = std::min(pose_ptr_el[3], pose_ptr_er[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
void TriangulatorInternal::filter_poses(
|
||||
std::vector<cv::Mat> &poses,
|
||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||
const std::vector<size_t> &core_joint_idx,
|
||||
const std::vector<std::array<size_t, 2>> &core_limbs_idx,
|
||||
bool drop_few_limbs = true)
|
||||
{
|
||||
const float min_score = 0.1;
|
||||
std::vector<int> drop_indices;
|
||||
|
||||
for (size_t i = 0; i < poses.size(); ++i)
|
||||
{
|
||||
cv::Mat &pose = poses[i];
|
||||
const size_t num_joints = pose.rows;
|
||||
|
||||
// Collect valid joint indices
|
||||
std::vector<size_t> valid_joint_idx;
|
||||
for (size_t j = 0; j < num_joints; ++j)
|
||||
{
|
||||
float *pose_ptr = pose.ptr<float>(j);
|
||||
if (pose_ptr[3] > min_score)
|
||||
{
|
||||
valid_joint_idx.push_back(j);
|
||||
}
|
||||
}
|
||||
|
||||
// Drop poses with too few joints
|
||||
if (valid_joint_idx.size() < 5)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute min, max and mean coordinates
|
||||
std::array<float, 3> mean = {0.0, 0.0, 0.0};
|
||||
std::array<float, 3> mins = {
|
||||
std::numeric_limits<float>::max(),
|
||||
std::numeric_limits<float>::max(),
|
||||
std::numeric_limits<float>::max()};
|
||||
std::array<float, 3> maxs = {
|
||||
std::numeric_limits<float>::lowest(),
|
||||
std::numeric_limits<float>::lowest(),
|
||||
std::numeric_limits<float>::lowest()};
|
||||
for (size_t j = 0; j < valid_joint_idx.size(); ++j)
|
||||
{
|
||||
float *joint_ptr = pose.ptr<float>(valid_joint_idx[j]);
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
mins[k] = std::min(mins[k], joint_ptr[k]);
|
||||
maxs[k] = std::max(maxs[k], joint_ptr[k]);
|
||||
mean[k] += joint_ptr[k];
|
||||
}
|
||||
}
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
mean[k] /= static_cast<float>(valid_joint_idx.size());
|
||||
}
|
||||
|
||||
// Drop poses that are too large or too small or too flat
|
||||
float min_flatness = 0.2f;
|
||||
float max_size = 2.3f;
|
||||
float min_size = 0.3f;
|
||||
std::array<float, 3> diff;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
diff[j] = maxs[j] - mins[j];
|
||||
}
|
||||
if (diff[0] > max_size || diff[1] > max_size || diff[2] > max_size)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (diff[0] < min_size && diff[1] < min_size && diff[2] < min_size)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if ((diff[0] < min_flatness && diff[1] < min_flatness) ||
|
||||
(diff[1] < min_flatness && diff[2] < min_flatness) ||
|
||||
(diff[2] < min_flatness && diff[0] < min_flatness))
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Drop persons outside room
|
||||
const float wdist = 0.1f;
|
||||
const auto &room_size = roomparams[0];
|
||||
const auto &room_center = roomparams[1];
|
||||
const std::array<float, 3> room_half_size = {
|
||||
room_size[0] / 2.0f,
|
||||
room_size[1] / 2.0f,
|
||||
room_size[2] / 2.0f};
|
||||
bool outside = false;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
// Check if the mean position is outside the room boundaries
|
||||
if (mean[j] > room_half_size[j] + room_center[j] ||
|
||||
mean[j] < -room_half_size[j] - room_center[j])
|
||||
{
|
||||
outside = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
// Check if any limb is too far outside the room
|
||||
if (maxs[j] > room_half_size[j] + room_center[j] + wdist ||
|
||||
mins[j] < -room_half_size[j] - room_center[j] - wdist)
|
||||
{
|
||||
outside = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (outside)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (drop_few_limbs)
|
||||
{
|
||||
// Drop poses with less than 3 valid limbs
|
||||
size_t found_limbs = 0;
|
||||
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
|
||||
{
|
||||
size_t start_idx = core_limbs_idx[j][0];
|
||||
size_t end_idx = core_limbs_idx[j][1];
|
||||
|
||||
float *joint_start_ptr = pose.ptr<float>(start_idx);
|
||||
float *joint_end_ptr = pose.ptr<float>(end_idx);
|
||||
|
||||
if (joint_start_ptr[3] > min_score && joint_end_ptr[3] > min_score)
|
||||
{
|
||||
found_limbs++;
|
||||
}
|
||||
}
|
||||
if (found_limbs < 3)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate total limb length and average limb length
|
||||
const float max_avg_length = 0.5f;
|
||||
const float min_avg_length = 0.1f;
|
||||
float total_length = 0.0f;
|
||||
int total_limbs = 0;
|
||||
for (size_t j = 0; j < core_limbs_idx.size(); ++j)
|
||||
{
|
||||
size_t start_idx = core_limbs_idx[j][0];
|
||||
size_t end_idx = core_limbs_idx[j][1];
|
||||
|
||||
float *joint_start_ptr = pose.ptr<float>(start_idx);
|
||||
float *joint_end_ptr = pose.ptr<float>(end_idx);
|
||||
|
||||
if (joint_start_ptr[3] < min_score || joint_end_ptr[3] < min_score)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
float dx = joint_end_ptr[0] - joint_start_ptr[0];
|
||||
float dy = joint_end_ptr[1] - joint_start_ptr[1];
|
||||
float dz = joint_end_ptr[2] - joint_start_ptr[2];
|
||||
float length = std::sqrt(dx * dx + dy * dy + dz * dz);
|
||||
|
||||
total_length += length;
|
||||
total_limbs++;
|
||||
}
|
||||
if (total_limbs == 0)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
float average_length = total_length / static_cast<float>(total_limbs);
|
||||
if (average_length < min_avg_length)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (total_limbs >= 5 && average_length > max_avg_length)
|
||||
{
|
||||
drop_indices.push_back(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Set confidences of invalid poses to a low value
|
||||
for (size_t i = 0; i < drop_indices.size(); ++i)
|
||||
{
|
||||
cv::Mat &pose = poses[drop_indices[i]];
|
||||
for (int j = 0; j < pose.rows; ++j)
|
||||
{
|
||||
float *joint_ptr = pose.ptr<float>(j);
|
||||
joint_ptr[3] = 0.001f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
void TriangulatorInternal::add_missing_joints(
|
||||
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names)
|
||||
{
|
||||
// Map joint names to their indices for quick lookup
|
||||
std::unordered_map<std::string, size_t> joint_name_to_idx;
|
||||
for (size_t idx = 0; idx < joint_names.size(); ++idx)
|
||||
{
|
||||
joint_name_to_idx[joint_names[idx]] = idx;
|
||||
}
|
||||
|
||||
// Adjacent mapping
|
||||
std::unordered_map<std::string, std::vector<std::string>> adjacents = {
|
||||
{"hip_right", {"hip_middle", "hip_left"}},
|
||||
{"hip_left", {"hip_middle", "hip_right"}},
|
||||
{"knee_right", {"hip_right", "knee_left"}},
|
||||
{"knee_left", {"hip_left", "knee_right"}},
|
||||
{"ankle_right", {"knee_right", "ankle_left"}},
|
||||
{"ankle_left", {"knee_left", "ankle_right"}},
|
||||
{"shoulder_right", {"shoulder_middle", "shoulder_left"}},
|
||||
{"shoulder_left", {"shoulder_middle", "shoulder_right"}},
|
||||
{"elbow_right", {"shoulder_right", "hip_right"}},
|
||||
{"elbow_left", {"shoulder_left", "hip_left"}},
|
||||
{"wrist_right", {"elbow_right"}},
|
||||
{"wrist_left", {"elbow_left"}},
|
||||
{"nose", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
|
||||
{"head", {"shoulder_middle", "shoulder_right", "shoulder_left"}},
|
||||
{"foot_*_left_*", {"ankle_left"}},
|
||||
{"foot_*_right_*", {"ankle_right"}},
|
||||
{"face_*", {"nose"}},
|
||||
{"hand_*_left_*", {"wrist_left"}},
|
||||
{"hand_*_right_*", {"wrist_right"}}};
|
||||
|
||||
for (size_t i = 0; i < poses.size(); ++i)
|
||||
{
|
||||
cv::Mat &pose = poses[i];
|
||||
const size_t num_joints = pose.rows;
|
||||
|
||||
// Collect valid joint indices
|
||||
std::vector<size_t> valid_joint_idx;
|
||||
for (size_t j = 0; j < num_joints; ++j)
|
||||
{
|
||||
float *pose_ptr = pose.ptr<float>(j);
|
||||
if (pose_ptr[3] > min_score)
|
||||
{
|
||||
valid_joint_idx.push_back(j);
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_joint_idx.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute body center as the mean of valid joints
|
||||
std::array<float, 3> body_center = {0.0f, 0.0f, 0.0f};
|
||||
for (size_t idx : valid_joint_idx)
|
||||
{
|
||||
float *joint_ptr = pose.ptr<float>(idx);
|
||||
body_center[0] += joint_ptr[0];
|
||||
body_center[1] += joint_ptr[1];
|
||||
body_center[2] += joint_ptr[2];
|
||||
}
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
body_center[j] /= static_cast<float>(valid_joint_idx.size());
|
||||
}
|
||||
|
||||
// Iterate over each joint
|
||||
for (size_t j = 0; j < joint_names.size(); ++j)
|
||||
{
|
||||
std::string adname = "";
|
||||
const std::string &jname = joint_names[j];
|
||||
|
||||
// Determine adjacent joint based on name patterns
|
||||
if (jname.substr(0, 5) == "foot_" && jname.find("_left") != std::string::npos)
|
||||
{
|
||||
adname = "foot_*_left_*";
|
||||
}
|
||||
else if (jname.substr(0, 5) == "foot_" && jname.find("_right") != std::string::npos)
|
||||
{
|
||||
adname = "foot_*_right_*";
|
||||
}
|
||||
else if (jname.substr(0, 5) == "face_")
|
||||
{
|
||||
adname = "face_*";
|
||||
}
|
||||
else if (jname.substr(0, 5) == "hand_" && jname.find("_left") != std::string::npos)
|
||||
{
|
||||
adname = "hand_*_left_*";
|
||||
}
|
||||
else if (jname.substr(0, 5) == "hand_" && jname.find("_right") != std::string::npos)
|
||||
{
|
||||
adname = "hand_*_right_*";
|
||||
}
|
||||
else if (adjacents.find(jname) != adjacents.end())
|
||||
{
|
||||
adname = jname;
|
||||
}
|
||||
|
||||
if (adname == "")
|
||||
{
|
||||
// No adjacent joints defined for this joint
|
||||
continue;
|
||||
}
|
||||
|
||||
float *joint_ptr = pose.ptr<float>(j);
|
||||
if (joint_ptr[3] == 0.0f)
|
||||
{
|
||||
if (adjacents.find(adname) != adjacents.end())
|
||||
{
|
||||
// Joint is missing; attempt to estimate its position based on adjacent joints
|
||||
std::array<float, 3> adjacent_position;
|
||||
size_t adjacent_count = 0;
|
||||
|
||||
const std::vector<std::string> &adjacent_joint_names = adjacents[adname];
|
||||
for (const std::string &adj_name : adjacent_joint_names)
|
||||
{
|
||||
auto it = joint_name_to_idx.find(adj_name);
|
||||
if (it != joint_name_to_idx.end())
|
||||
{
|
||||
size_t adj_idx = it->second;
|
||||
float *adj_joint_ptr = pose.ptr<float>(adj_idx);
|
||||
if (adj_joint_ptr[3] > 0.1f)
|
||||
{
|
||||
adjacent_position[0] += adj_joint_ptr[0];
|
||||
adjacent_position[1] += adj_joint_ptr[1];
|
||||
adjacent_position[2] += adj_joint_ptr[2];
|
||||
adjacent_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (adjacent_count > 0)
|
||||
{
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
adjacent_position[k] /= static_cast<float>(adjacent_count);
|
||||
}
|
||||
|
||||
// Update the missing joint's position
|
||||
joint_ptr[0] = adjacent_position[0];
|
||||
joint_ptr[1] = adjacent_position[1];
|
||||
joint_ptr[2] = adjacent_position[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
// No valid adjacent joints, use body center
|
||||
joint_ptr[0] = body_center[0];
|
||||
joint_ptr[1] = body_center[1];
|
||||
joint_ptr[2] = body_center[2];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Adjacent joints not defined, use body center
|
||||
joint_ptr[0] = body_center[0];
|
||||
joint_ptr[1] = body_center[1];
|
||||
joint_ptr[2] = body_center[2];
|
||||
}
|
||||
|
||||
// Set a low confidence score
|
||||
joint_ptr[3] = 0.1f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -67,7 +67,6 @@ private:
|
||||
{"shoulder_left", "elbow_left"},
|
||||
{"shoulder_right", "elbow_right"},
|
||||
};
|
||||
|
||||
std::vector<cv::Mat> last_poses_3d;
|
||||
|
||||
void undistort_poses(std::vector<cv::Mat> &poses, CameraInternal &icam);
|
||||
@ -102,4 +101,16 @@ private:
|
||||
float min_score);
|
||||
|
||||
cv::Mat merge_group(const std::vector<cv::Mat> &poses_3d, float min_score);
|
||||
|
||||
void add_extra_joints(std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
|
||||
|
||||
void filter_poses(
|
||||
std::vector<cv::Mat> &poses,
|
||||
const std::array<std::array<float, 3>, 2> &roomparams,
|
||||
const std::vector<size_t> &core_joint_idx,
|
||||
const std::vector<std::array<size_t, 2>> &core_limbs_idx,
|
||||
bool drop_few_limbs);
|
||||
|
||||
void add_missing_joints(
|
||||
std::vector<cv::Mat> &poses, const std::vector<std::string> &joint_names);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user