RapidPoseTriangulation/scripts/test_triangulate.py

import copy
import json
import os
import sys
import time
from typing import List

import cv2
import matplotlib
import numpy as np

import draw_utils
import utils_2d_pose
from skelda import utils_pose

sys.path.append("/SimplePoseTriangulation/swig/")
import spt

# ==================================================================================================

filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
test_img_dir = filepath + "../data/"
whole_body = {
    "foots": False,
    "face": False,
    "hands": False,
}

joint_names_2d = [
    "nose",
    "eye_left",
    "eye_right",
    "ear_left",
    "ear_right",
    "shoulder_left",
    "shoulder_right",
    "elbow_left",
    "elbow_right",
    "wrist_left",
    "wrist_right",
    "hip_left",
    "hip_right",
    "knee_left",
    "knee_right",
    "ankle_left",
    "ankle_right",
]
if whole_body["foots"]:
    joint_names_2d.extend(
        [
            "foot_toe_big_left",
            "foot_toe_small_left",
            "foot_heel_left",
            "foot_toe_big_right",
            "foot_toe_small_right",
            "foot_heel_right",
        ]
    )
if whole_body["face"]:
    joint_names_2d.extend(
        [
            "face_jaw_right_1",
            "face_jaw_right_2",
            "face_jaw_right_3",
            "face_jaw_right_4",
            "face_jaw_right_5",
            "face_jaw_right_6",
            "face_jaw_right_7",
            "face_jaw_right_8",
            "face_jaw_middle",
            "face_jaw_left_1",
            "face_jaw_left_2",
            "face_jaw_left_3",
            "face_jaw_left_4",
            "face_jaw_left_5",
            "face_jaw_left_6",
            "face_jaw_left_7",
            "face_jaw_left_8",
            "face_eyebrow_right_1",
            "face_eyebrow_right_2",
            "face_eyebrow_right_3",
            "face_eyebrow_right_4",
            "face_eyebrow_right_5",
            "face_eyebrow_left_1",
            "face_eyebrow_left_2",
            "face_eyebrow_left_3",
            "face_eyebrow_left_4",
            "face_eyebrow_left_5",
            "face_nose_1",
            "face_nose_2",
            "face_nose_3",
            "face_nose_4",
            "face_nose_5",
            "face_nose_6",
            "face_nose_7",
            "face_nose_8",
            "face_nose_9",
            "face_eye_right_1",
            "face_eye_right_2",
            "face_eye_right_3",
            "face_eye_right_4",
            "face_eye_right_5",
            "face_eye_right_6",
            "face_eye_left_1",
            "face_eye_left_2",
            "face_eye_left_3",
            "face_eye_left_4",
            "face_eye_left_5",
            "face_eye_left_6",
            "face_mouth_1",
            "face_mouth_2",
            "face_mouth_3",
            "face_mouth_4",
            "face_mouth_5",
            "face_mouth_6",
            "face_mouth_7",
            "face_mouth_8",
            "face_mouth_9",
            "face_mouth_10",
            "face_mouth_11",
            "face_mouth_12",
            "face_mouth_13",
            "face_mouth_14",
            "face_mouth_15",
            "face_mouth_16",
            "face_mouth_17",
            "face_mouth_18",
            "face_mouth_19",
            "face_mouth_20",
        ]
    )
if whole_body["hands"]:
    joint_names_2d.extend(
        [
            "hand_wrist_left",
            "hand_finger_thumb_left_1",
            "hand_finger_thumb_left_2",
            "hand_finger_thumb_left_3",
            "hand_finger_thumb_left_4",
            "hand_finger_index_left_1",
            "hand_finger_index_left_2",
            "hand_finger_index_left_3",
            "hand_finger_index_left_4",
            "hand_finger_middle_left_1",
            "hand_finger_middle_left_2",
            "hand_finger_middle_left_3",
            "hand_finger_middle_left_4",
            "hand_finger_ring_left_1",
            "hand_finger_ring_left_2",
            "hand_finger_ring_left_3",
            "hand_finger_ring_left_4",
            "hand_finger_pinky_left_1",
            "hand_finger_pinky_left_2",
            "hand_finger_pinky_left_3",
            "hand_finger_pinky_left_4",
            "hand_wrist_right",
            "hand_finger_thumb_right_1",
            "hand_finger_thumb_right_2",
            "hand_finger_thumb_right_3",
            "hand_finger_thumb_right_4",
            "hand_finger_index_right_1",
            "hand_finger_index_right_2",
            "hand_finger_index_right_3",
            "hand_finger_index_right_4",
            "hand_finger_middle_right_1",
            "hand_finger_middle_right_2",
            "hand_finger_middle_right_3",
            "hand_finger_middle_right_4",
            "hand_finger_ring_right_1",
            "hand_finger_ring_right_2",
            "hand_finger_ring_right_3",
            "hand_finger_ring_right_4",
            "hand_finger_pinky_right_1",
            "hand_finger_pinky_right_2",
            "hand_finger_pinky_right_3",
            "hand_finger_pinky_right_4",
        ]
    )
joint_names_2d.extend(
    [
        "hip_middle",
        "shoulder_middle",
        "head",
    ]
)
joint_names_3d = list(joint_names_2d)

main_limbs = [
    ("shoulder_left", "elbow_left"),
    ("elbow_left", "wrist_left"),
    ("shoulder_right", "elbow_right"),
    ("elbow_right", "wrist_right"),
    ("hip_left", "knee_left"),
    ("knee_left", "ankle_left"),
    ("hip_right", "knee_right"),
    ("knee_right", "ankle_right"),
]

# ==================================================================================================


def update_sample(sample, new_dir=""):
    sample = copy.deepcopy(sample)

    # Rename image paths
    sample["imgpaths"] = [
        os.path.join(new_dir, os.path.basename(v)) for v in sample["imgpaths"]
    ]

    # Add placeholders for missing keys
    sample["cameras_color"] = sample["cameras"]
    sample["imgpaths_color"] = sample["imgpaths"]
    sample["cameras_depth"] = []

    return sample


# ==================================================================================================


def load_image(path: str):
    image = cv2.imread(path, 3)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.array(image, dtype=np.float32)
    return image


# ==================================================================================================


def filter_poses(poses3D, poses2D, roomparams, joint_names, drop_few_limbs=True):
    drop = []
    for i, pose in enumerate(poses3D):
        pose = np.array(pose)
        valid_joints = [j for j in pose if j[-1] > 0.1]

        # Drop persons with too few joints
        if np.sum(pose[..., -1] > 0.1) < 5:
            drop.append(i)
            continue

        # Drop too large or too small persons
        mins = np.min(valid_joints, axis=0)
        maxs = np.max(valid_joints, axis=0)
        diff = maxs - mins
        if any(((d > 2.3) for d in diff)):
            drop.append(i)
            continue
        if all(((d < 0.3) for d in diff)):
            drop.append(i)
            continue
        if (
            (diff[0] < 0.2 and diff[1] < 0.2)
            or (diff[1] < 0.2 and diff[2] < 0.2)
            or (diff[2] < 0.2 and diff[0] < 0.2)
        ):
            drop.append(i)
            continue

        # Drop persons outside room
        mean = np.mean(valid_joints, axis=0)
        mins = np.min(valid_joints, axis=0)
        maxs = np.max(valid_joints, axis=0)
        rsize = [r / 2 for r in roomparams["room_size"]]
        rcent = roomparams["room_center"]
        if any(
            (
                # Center of mass outside room
                mean[j] > rsize[j] + rcent[j] or mean[j] < -rsize[j] + rcent[j]
                for j in range(3)
            )
        ) or any(
            (
                # One limb more than 10cm outside room
                maxs[j] > rsize[j] + rcent[j] + 0.1
                or mins[j] < -rsize[j] + rcent[j] - 0.1
                for j in range(3)
            )
        ):
            drop.append(i)
            continue

        if drop_few_limbs:
            # Drop persons with less than 3 limbs
            found_limbs = 0
            for limb in main_limbs:
                start_idx = joint_names.index(limb[0])
                end_idx = joint_names.index(limb[1])
                if pose[start_idx, -1] > 0.1 and pose[end_idx, -1] > 0.1:
                    found_limbs += 1
            if found_limbs < 3:
                drop.append(i)
                continue

        # Drop persons with too small or high average limb length
        total_length = 0
        total_limbs = 0
        for limb in main_limbs:
            start_idx = joint_names.index(limb[0])
            end_idx = joint_names.index(limb[1])
            if pose[start_idx, -1] < 0.1 or pose[end_idx, -1] < 0.1:
                continue
            limb_length = np.linalg.norm(pose[end_idx, :3] - pose[start_idx, :3])
            total_length += limb_length
            total_limbs += 1
        if total_limbs == 0:
            drop.append(i)
            continue
        average_length = total_length / total_limbs
        if average_length < 0.1:
            drop.append(i)
            continue
        if total_limbs > 4 and average_length > 0.5:
            drop.append(i)
            continue

    new_poses3D = []
    new_poses2D = [[] for _ in range(len(poses2D))]
    for i in range(len(poses3D)):
        if len(poses3D[i]) != len(joint_names):
            # Sometimes some joints of a poor detection are missing
            continue

        if i not in drop:
            new_poses3D.append(poses3D[i])
            for j in range(len(poses2D)):
                new_poses2D[j].append(poses2D[j][i])
        else:
            new_pose = np.array(poses3D[i])
            new_pose[..., -1] = 0.001
            new_poses3D.append(new_pose)
            for j in range(len(poses2D)):
                new_pose = np.array(poses2D[j][i])
                new_pose[..., -1] = 0.001
                new_poses2D[j].append(new_pose)

    new_poses3D = np.array(new_poses3D)
    new_poses2D = np.array(new_poses2D)
    if new_poses3D.size == 0:
        new_poses3D = np.zeros([1, len(joint_names), 4])
        new_poses2D = np.zeros([len(poses2D), 1, len(joint_names), 3])

    return new_poses3D, new_poses2D


# ==================================================================================================


def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
    new_views = []
    for view in poses_2d:
        new_bodies = []
        for body in view:
            body = body.tolist()

            new_body = body[:17]
            if whole_body["foots"]:
                new_body.extend(body[17:22])
            if whole_body["face"]:
                new_body.extend(body[22:90])
            if whole_body["hands"]:
                new_body.extend(body[90:])
            body = new_body

            hlid = joint_names.index("hip_left")
            hrid = joint_names.index("hip_right")
            mid_hip = [
                float(((body[hlid][0] + body[hrid][0]) / 2.0)),
                float(((body[hlid][1] + body[hrid][1]) / 2.0)),
                min(body[hlid][2], body[hrid][2]),
            ]
            body.append(mid_hip)

            slid = joint_names.index("shoulder_left")
            srid = joint_names.index("shoulder_right")
            mid_shoulder = [
                float(((body[slid][0] + body[srid][0]) / 2.0)),
                float(((body[slid][1] + body[srid][1]) / 2.0)),
                min(body[slid][2], body[srid][2]),
            ]
            body.append(mid_shoulder)

            elid = joint_names.index("ear_left")
            erid = joint_names.index("ear_right")
            head = [
                float(((body[elid][0] + body[erid][0]) / 2.0)),
                float(((body[elid][1] + body[erid][1]) / 2.0)),
                min(body[elid][2], body[erid][2]),
            ]
            body.append(head)

            new_bodies.append(body)
        new_views.append(new_bodies)

    return new_views


# ==================================================================================================


def main():
    if any((whole_body[k] for k in whole_body)):
        kpt_model = utils_2d_pose.load_wb_model()
    else:
        kpt_model = utils_2d_pose.load_model()

    # Manually set matplotlib backend
    matplotlib.use("TkAgg")

    for dirname in sorted(os.listdir(test_img_dir)):
        dirpath = os.path.join(test_img_dir, dirname)

        if not os.path.isdir(dirpath):
            continue

        if (dirname[0] not in ["p", "h"]) or len(dirname) != 2:
            continue

        # Load sample infos
        print("\n" + dirpath)
        with open(os.path.join(dirpath, "sample.json"), "r", encoding="utf-8") as file:
            sample = json.load(file)
        sample = update_sample(sample, dirpath)

        camparams = sample["cameras_color"]
        roomparams = {
            "room_size": sample["room_size"],
            "room_center": sample["room_center"],
        }

        # Load color images
        images_2d = []
        for i in range(len(sample["cameras_color"])):
            imgpath = sample["imgpaths_color"][i]
            img = load_image(imgpath)
            images_2d.append(img)

        # Get 2D poses
        stime = time.time()
        poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
        poses_2d = update_keypoints(poses_2d, joint_names_2d)
        print("2D time:", time.time() - stime)
        # print([np.array(p).round(6).tolist() for p in poses_2d])

        fig1 = draw_utils.show_poses2d(
            poses_2d, np.array(images_2d), joint_names_2d, "2D detections"
        )
        fig1.savefig(os.path.join(dirpath, "2d-k.png"), dpi=fig1.dpi)
        # draw_utils.utils_view.show_plots()

        if len(images_2d) == 1:
            draw_utils.utils_view.show_plots()
            continue

        # Get 3D poses
        if sum(np.sum(p) for p in poses_2d) == 0:
            poses3D = np.zeros([1, len(joint_names_3d), 4])
            poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
        else:
            cameras = spt.convert_cameras(camparams)
            roomp = [roomparams["room_center"], roomparams["room_size"]]
            triangulator = spt.Triangulator(min_score=0.95)

            stime = time.time()
            poses_3d = triangulator.triangulate_poses(
                poses_2d, cameras, roomp, joint_names_2d
            )
            poses3D = np.array(poses_3d)
            if len(poses3D) == 0:
                poses3D = np.zeros([1, len(joint_names_3d), 4])
            print("3D time:", time.time() - stime)

            poses2D = []
            for cam in camparams:
                poses_2d, _ = utils_pose.project_poses(poses3D, cam)
                poses2D.append(poses_2d)
            poses3D, poses2D = filter_poses(
                poses3D,
                poses2D,
                roomparams,
                joint_names_3d,
            )

        print(poses3D)
        # print(poses2D)
        # print(poses3D.round(3).tolist())

        fig2 = draw_utils.utils_view.show_poses3d(
            poses3D, joint_names_3d, roomparams, camparams
        )
        fig3 = draw_utils.show_poses2d(
            poses2D, np.array(images_2d), joint_names_3d, "2D reprojections"
        )
        fig2.savefig(os.path.join(dirpath, "3d-p.png"), dpi=fig2.dpi)
        fig3.savefig(os.path.join(dirpath, "2d-p.png"), dpi=fig3.dpi)
        draw_utils.utils_view.show_plots()


# ==================================================================================================

if __name__ == "__main__":
    main()